【数据分析与可视化】Pandas Dataframe
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# 引入网页
import webbrowser
link = 'https://tiobe.com/tiobe-index/'
webbrowser.open(link)
True
# 获取剪贴板数据
df = pd.read_clipboard()
df
|
Year |
Winner |
---|
2019 |
medal |
C |
2018 |
medal |
Python |
2017 |
medal |
C |
2016 |
medal |
Go |
2015 |
medal |
Java |
2014 |
medal |
JavaScript |
2013 |
medal |
Transact-SQL |
2012 |
medal |
Objective-C |
2011 |
medal |
Objective-C |
2010 |
medal |
Python |
2009 |
medal |
Go |
2008 |
medal |
C |
2007 |
medal |
Python |
2006 |
medal |
Ruby |
2005 |
medal |
Java |
2004 |
medal |
PHP |
2003 |
medal |
C++ |
# 查看类型-DataFrame
type(df)
pandas.core.frame.DataFrame
# 返回列
df.columns
Index(['Year', 'Winner'], dtype='object')
# 获取某列的value
df.Winner
2019 C
2018 Python
2017 C
2016 Go
2015 Java
2014 JavaScript
2013 Transact-SQL
2012 Objective-C
2011 Objective-C
2010 Python
2009 Go
2008 C
2007 Python
2006 Ruby
2005 Java
2004 PHP
2003 C++
Name: Winner, dtype: object
# 提取数据生成新 过滤DataFrame
df_new = DataFrame(df, columns=['Year'])
df_new
|
Year |
---|
2019 |
medal |
2018 |
medal |
2017 |
medal |
2016 |
medal |
2015 |
medal |
2014 |
medal |
2013 |
medal |
2012 |
medal |
2011 |
medal |
2010 |
medal |
2009 |
medal |
2008 |
medal |
2007 |
medal |
2006 |
medal |
2005 |
medal |
2004 |
medal |
2003 |
medal |
# 假如列名有空格,用此方法读取
df_new['Year']
2019 medal
2018 medal
2017 medal
2016 medal
2015 medal
2014 medal
2013 medal
2012 medal
2011 medal
2010 medal
2009 medal
2008 medal
2007 medal
2006 medal
2005 medal
2004 medal
2003 medal
Name: Year, dtype: object
# 其中某列的类型 Series
type(df_new['Year'])
pandas.core.series.Series
# 提取数据生成新 过滤DataFrame
# 如果列名根本不存在 值默认为NaN
df_new = DataFrame(df, columns=['Year','Age'])
df_new
|
Year |
Age |
---|
2019 |
medal |
NaN |
2018 |
medal |
NaN |
2017 |
medal |
NaN |
2016 |
medal |
NaN |
2015 |
medal |
NaN |
2014 |
medal |
NaN |
2013 |
medal |
NaN |
2012 |
medal |
NaN |
2011 |
medal |
NaN |
2010 |
medal |
NaN |
2009 |
medal |
NaN |
2008 |
medal |
NaN |
2007 |
medal |
NaN |
2006 |
medal |
NaN |
2005 |
medal |
NaN |
2004 |
medal |
NaN |
2003 |
medal |
NaN |
# 通过字典方式给空值赋值(必须全部写满列)
df_new['Age'] = range(0,17)
df_new
|
Year |
Age |
---|
2019 |
medal |
0 |
2018 |
medal |
1 |
2017 |
medal |
2 |
2016 |
medal |
3 |
2015 |
medal |
4 |
2014 |
medal |
5 |
2013 |
medal |
6 |
2012 |
medal |
7 |
2011 |
medal |
8 |
2010 |
medal |
9 |
2009 |
medal |
10 |
2008 |
medal |
11 |
2007 |
medal |
12 |
2006 |
medal |
13 |
2005 |
medal |
14 |
2004 |
medal |
15 |
2003 |
medal |
16 |
df_new['Age'] = np.arange(0,17)
df_new
|
Year |
Age |
---|
2019 |
medal |
0 |
2018 |
medal |
1 |
2017 |
medal |
2 |
2016 |
medal |
3 |
2015 |
medal |
4 |
2014 |
medal |
5 |
2013 |
medal |
6 |
2012 |
medal |
7 |
2011 |
medal |
8 |
2010 |
medal |
9 |
2009 |
medal |
10 |
2008 |
medal |
11 |
2007 |
medal |
12 |
2006 |
medal |
13 |
2005 |
medal |
14 |
2004 |
medal |
15 |
2003 |
medal |
16 |
# 利用DataFrame某列返回值是Series类型的特点
df_new['Age'] = pd.Series(np.arange(0,17))
df_new
|
Year |
Age |
---|
2019 |
NaN |
NaN |
2018 |
NaN |
NaN |
2017 |
NaN |
NaN |
2016 |
NaN |
NaN |
2015 |
NaN |
NaN |
2014 |
NaN |
NaN |
2013 |
NaN |
NaN |
2012 |
NaN |
NaN |
2011 |
NaN |
NaN |
2010 |
NaN |
NaN |
2009 |
NaN |
NaN |
2008 |
NaN |
NaN |
2007 |
NaN |
NaN |
2006 |
NaN |
NaN |
2005 |
NaN |
NaN |
2004 |
NaN |
NaN |
2003 |
NaN |
NaN |
# 修改部分值
df_new['Age'] = pd.Series([18, 17], index=[2019, 2018])
df_new
|
Year |
Age |
---|
2019 |
NaN |
18.0 |
2018 |
NaN |
17.0 |
2017 |
NaN |
NaN |
2016 |
NaN |
NaN |
2015 |
NaN |
NaN |
2014 |
NaN |
NaN |
2013 |
NaN |
NaN |
2012 |
NaN |
NaN |
2011 |
NaN |
NaN |
2010 |
NaN |
NaN |
2009 |
NaN |
NaN |
2008 |
NaN |
NaN |
2007 |
NaN |
NaN |
2006 |
NaN |
NaN |
2005 |
NaN |
NaN |
2004 |
NaN |
NaN |
2003 |
NaN |
NaN |
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
暂时没有评论,来抢沙发吧~