import numpy as np
import pandas as pd
from pandas import Series, DataFrame
Series reindex
s1 = Series([1,2,3,4], index=['A','B','C','D'])
s1
A 1
B 2
C 3
D 4
dtype: int64
# reindex新的Series索引 (查看参数快捷键shift+tab)
s1.reindex(index=['A','B','C','D','E'])
A 1.0
B 2.0
C 3.0
D 4.0
E NaN
dtype: float64
# reindex并给空值索引赋值
s1.reindex(index=['A','B','C','D','E'],fill_value=10)
A 1
B 2
C 3
D 4
E 10
dtype: int64
# 测试reindex的赋值方式
s2 = Series(['A','B','C'],index=[1,5,10])
s2
1 A
5 B
10 C
dtype: object
# 重新索引
s2.reindex(index=range(15))
0 NaN
1 A
2 NaN
3 NaN
4 NaN
5 B
6 NaN
7 NaN
8 NaN
9 NaN
10 C
11 NaN
12 NaN
13 NaN
14 NaN
dtype: object
# 函数填充值(ffill函数将重新索引前安装区间范围依次赋值)
s2.reindex(index=range(15),method='ffill')
0 NaN
1 A
2 A
3 A
4 A
5 B
6 B
7 B
8 B
9 B
10 C
11 C
12 C
13 C
14 C
dtype: object
DataFrame reindex
# 故意漏掉C
df1 = DataFrame(np.random.rand(25).reshape([5,5]), index=['A','B','D','E','F'], columns=['c1','c2','c3','c4','c5'])
df1
|
c1 |
c2 |
c3 |
c4 |
c5 |
---|
A |
0.916976 |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
D |
0.715841 |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
E |
0.375207 |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
F |
0.627784 |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
# reindex恢复C-重新索引行
df1.reindex(['A','B','C','D','E','F'])
|
c1 |
c2 |
c3 |
c4 |
c5 |
---|
A |
0.916976 |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
C |
NaN |
NaN |
NaN |
NaN |
NaN |
D |
0.715841 |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
E |
0.375207 |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
F |
0.627784 |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
# reindex-重新索引列
df1.reindex(columns=['c1','c2','c3','c4','c5','c6'])
|
c1 |
c2 |
c3 |
c4 |
c5 |
c6 |
---|
A |
0.916976 |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
NaN |
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
NaN |
D |
0.715841 |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
NaN |
E |
0.375207 |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
NaN |
F |
0.627784 |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
NaN |
# reindex-行列同时重排(新增)
df1.reindex(index=['A','B','C','D','E','F'] ,columns=['c1','c2','c3','c4','c5','c6'])
|
c1 |
c2 |
c3 |
c4 |
c5 |
c6 |
---|
A |
0.916976 |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
NaN |
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
NaN |
C |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
D |
0.715841 |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
NaN |
E |
0.375207 |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
NaN |
F |
0.627784 |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
NaN |
# Series(减少)
s1
A 1
B 2
C 3
D 4
dtype: int64
s1.reindex(index=['A','B'])
A 1
B 2
dtype: int64
# reindex-行列同时重排(减少)
df1
|
c1 |
c2 |
c3 |
c4 |
c5 |
---|
A |
0.916976 |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
D |
0.715841 |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
E |
0.375207 |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
F |
0.627784 |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
df1.reindex(['A','B'])
|
c1 |
c2 |
c3 |
c4 |
c5 |
---|
A |
0.916976 |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
# 删掉的方法
s1.drop('A')
B 2
C 3
D 4
dtype: int64
# 0行1列
df1.drop('A',axis=0)
|
c1 |
c2 |
c3 |
c4 |
c5 |
---|
B |
0.112718 |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
D |
0.715841 |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
E |
0.375207 |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
F |
0.627784 |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
df1.drop('c1',axis=1)
|
c2 |
c3 |
c4 |
c5 |
---|
A |
0.277428 |
0.487468 |
0.392332 |
0.906246 |
B |
0.000009 |
0.958650 |
0.890877 |
0.640683 |
D |
0.857899 |
0.834954 |
0.134856 |
0.982175 |
E |
0.925308 |
0.734072 |
0.583107 |
0.677676 |
F |
0.818094 |
0.636362 |
0.417960 |
0.063043 |
暂时没有评论,来抢沙发吧~