import numpy as np
import pandas as pd
from pandas import Series, DataFrame
s1 = Series([1,2,3],index=['A','B','C'])
s1
A 1
B 2
C 3
dtype: int64
s2 = Series([4,5,6,7],index=['B','C','D','E'])
s2
B 4
C 5
D 6
E 7
dtype: int64
# Series相加(对应index的value相加)
# nan和任何数相加都为nan
s1 + s2
A NaN
B 6.0
C 8.0
D NaN
E NaN
dtype: float64
DataFrame运算
df1 = DataFrame(np.arange(4).reshape(2,2), index=['A','B'], columns=['BJ','SH'])
df1
df2 = DataFrame(np.arange(9).reshape(3,3), index=['A','B','C'], columns=['BJ','GZ','SH'])
df2
|
BJ |
GZ |
SH |
---|
A |
0 |
1 |
2 |
B |
3 |
4 |
5 |
C |
6 |
7 |
8 |
# 加法,对应索引的值相加(nan加什么都是nan)
df1 + df2
|
BJ |
GZ |
SH |
---|
A |
0.0 |
NaN |
3.0 |
B |
5.0 |
NaN |
8.0 |
C |
NaN |
NaN |
NaN |
df3 = DataFrame([[1,2,3],[4,5,np.nan],[7,8,9]], index=['A','B','C'], columns=['c1','c2','c3'])
df3
|
c1 |
c2 |
c3 |
---|
A |
1 |
2 |
3.0 |
B |
4 |
5 |
NaN |
C |
7 |
8 |
9.0 |
# 求和 默认列(此时求和会忽略nan)
df3.sum()
c1 12.0
c2 15.0
c3 12.0
dtype: float64
type(df3.sum())
pandas.core.series.Series
# 求和 行
df3.sum(axis=1)
A 6.0
B 9.0
C 24.0
dtype: float64
# 最小值 默认列
df3.min()
c1 1.0
c2 2.0
c3 3.0
dtype: float64
# 最小值 行
df3.min(axis=1)
A 1.0
B 4.0
C 7.0
dtype: float64
# 返回统计数据 平均值mean
df3.describe()
|
c1 |
c2 |
c3 |
---|
count |
3.0 |
3.0 |
2.000000 |
mean |
4.0 |
5.0 |
6.000000 |
std |
3.0 |
3.0 |
4.242641 |
min |
1.0 |
2.0 |
3.000000 |
25% |
2.5 |
3.5 |
4.500000 |
50% |
4.0 |
5.0 |
6.000000 |
75% |
5.5 |
6.5 |
7.500000 |
max |
7.0 |
8.0 |
9.000000 |
df3
|
c1 |
c2 |
c3 |
---|
A |
1 |
2 |
3.0 |
B |
4 |
5 |
NaN |
C |
7 |
8 |
9.0 |
暂时没有评论,来抢沙发吧~