【数据分析可视化】透视表
前提要对数据信息之间的关系要了解
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# 读入excel文件
df = pd.read_excel('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/sales-funnel.xlsx')
df
|
Account |
Name |
Rep |
Manager |
Product |
Quantity |
Price |
Status |
---|
0 |
714466 |
Trantow-Barrows |
Craig Booker |
Debra Henley |
CPU |
1 |
30000 |
presented |
1 |
714466 |
Trantow-Barrows |
Craig Booker |
Debra Henley |
Software |
1 |
10000 |
presented |
2 |
714466 |
Trantow-Barrows |
Craig Booker |
Debra Henley |
Maintenance |
2 |
5000 |
pending |
3 |
737550 |
Fritsch, Russel and Anderson |
Craig Booker |
Debra Henley |
CPU |
1 |
35000 |
declined |
4 |
146832 |
Kiehn-Spinka |
Daniel Hilton |
Debra Henley |
CPU |
2 |
65000 |
won |
5 |
218895 |
Kulas Inc |
Daniel Hilton |
Debra Henley |
CPU |
2 |
40000 |
pending |
6 |
218895 |
Kulas Inc |
Daniel Hilton |
Debra Henley |
Software |
1 |
10000 |
presented |
7 |
412290 |
Jerde-Hilpert |
John Smith |
Debra Henley |
Maintenance |
2 |
5000 |
pending |
8 |
740150 |
Barton LLC |
John Smith |
Debra Henley |
CPU |
1 |
35000 |
declined |
9 |
141962 |
Herman LLC |
Cedric Moss |
Fred Anderson |
CPU |
2 |
65000 |
won |
10 |
163416 |
Purdy-Kunde |
Cedric Moss |
Fred Anderson |
CPU |
1 |
30000 |
presented |
11 |
239344 |
Stokes LLC |
Cedric Moss |
Fred Anderson |
Maintenance |
1 |
5000 |
pending |
12 |
239344 |
Stokes LLC |
Cedric Moss |
Fred Anderson |
Software |
1 |
10000 |
presented |
13 |
307599 |
Kassulke, Ondricka and Metz |
Wendy Yule |
Fred Anderson |
Maintenance |
3 |
7000 |
won |
14 |
688981 |
Keeling LLC |
Wendy Yule |
Fred Anderson |
CPU |
5 |
100000 |
won |
15 |
729833 |
Koepp Ltd |
Wendy Yule |
Fred Anderson |
CPU |
2 |
65000 |
declined |
16 |
729833 |
Koepp Ltd |
Wendy Yule |
Fred Anderson |
Monitor |
2 |
5000 |
presented |
# 透视表-去重复(聚合默认 agg平均值)
pd.pivot_table(df, index=['Name'])
|
Account |
Price |
Quantity |
---|
Name |
|
|
|
---|
Barton LLC |
740150 |
35000 |
1.000000 |
Fritsch, Russel and Anderson |
737550 |
35000 |
1.000000 |
Herman LLC |
141962 |
65000 |
2.000000 |
Jerde-Hilpert |
412290 |
5000 |
2.000000 |
Kassulke, Ondricka and Metz |
307599 |
7000 |
3.000000 |
Keeling LLC |
688981 |
100000 |
5.000000 |
Kiehn-Spinka |
146832 |
65000 |
2.000000 |
Koepp Ltd |
729833 |
35000 |
2.000000 |
Kulas Inc |
218895 |
25000 |
1.500000 |
Purdy-Kunde |
163416 |
30000 |
1.000000 |
Stokes LLC |
239344 |
7500 |
1.000000 |
Trantow-Barrows |
714466 |
15000 |
1.333333 |
# 透视表-去重复(聚合默认 agg-sum)
pd.pivot_table(df, index=['Name'], aggfunc='sum')
|
Account |
Price |
Quantity |
---|
Name |
|
|
|
---|
Barton LLC |
740150 |
35000 |
1 |
Fritsch, Russel and Anderson |
737550 |
35000 |
1 |
Herman LLC |
141962 |
65000 |
2 |
Jerde-Hilpert |
412290 |
5000 |
2 |
Kassulke, Ondricka and Metz |
307599 |
7000 |
3 |
Keeling LLC |
688981 |
100000 |
5 |
Kiehn-Spinka |
146832 |
65000 |
2 |
Koepp Ltd |
1459666 |
70000 |
4 |
Kulas Inc |
437790 |
50000 |
3 |
Purdy-Kunde |
163416 |
30000 |
1 |
Stokes LLC |
478688 |
15000 |
2 |
Trantow-Barrows |
2143398 |
45000 |
4 |
# 透视表-多列索引(基本不变)
pd.pivot_table(df, index=['Name','Rep','Manager'])
|
|
|
Account |
Price |
Quantity |
---|
Name |
Rep |
Manager |
|
|
|
---|
Barton LLC |
John Smith |
Debra Henley |
740150 |
35000 |
1.000000 |
Fritsch, Russel and Anderson |
Craig Booker |
Debra Henley |
737550 |
35000 |
1.000000 |
Herman LLC |
Cedric Moss |
Fred Anderson |
141962 |
65000 |
2.000000 |
Jerde-Hilpert |
John Smith |
Debra Henley |
412290 |
5000 |
2.000000 |
Kassulke, Ondricka and Metz |
Wendy Yule |
Fred Anderson |
307599 |
7000 |
3.000000 |
Keeling LLC |
Wendy Yule |
Fred Anderson |
688981 |
100000 |
5.000000 |
Kiehn-Spinka |
Daniel Hilton |
Debra Henley |
146832 |
65000 |
2.000000 |
Koepp Ltd |
Wendy Yule |
Fred Anderson |
729833 |
35000 |
2.000000 |
Kulas Inc |
Daniel Hilton |
Debra Henley |
218895 |
25000 |
1.500000 |
Purdy-Kunde |
Cedric Moss |
Fred Anderson |
163416 |
30000 |
1.000000 |
Stokes LLC |
Cedric Moss |
Fred Anderson |
239344 |
7500 |
1.000000 |
Trantow-Barrows |
Craig Booker |
Debra Henley |
714466 |
15000 |
1.333333 |
# 透视表-包含关系
pd.pivot_table(df, index=['Manager','Rep'])
|
|
Account |
Price |
Quantity |
---|
Manager |
Rep |
|
|
|
---|
Debra Henley |
Craig Booker |
720237.0 |
20000.000000 |
1.250000 |
Daniel Hilton |
194874.0 |
38333.333333 |
1.666667 |
John Smith |
576220.0 |
20000.000000 |
1.500000 |
Fred Anderson |
Cedric Moss |
196016.5 |
27500.000000 |
1.250000 |
Wendy Yule |
614061.5 |
44250.000000 |
3.000000 |
# 透视表指定 values默认是None
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'])
|
|
Price |
Quantity |
---|
Manager |
Rep |
|
|
---|
Debra Henley |
Craig Booker |
20000.000000 |
1.250000 |
Daniel Hilton |
38333.333333 |
1.666667 |
John Smith |
20000.000000 |
1.500000 |
Fred Anderson |
Cedric Moss |
27500.000000 |
1.250000 |
Wendy Yule |
44250.000000 |
3.000000 |
# 透视表-col
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'])
|
|
Price |
Quantity |
---|
|
Product |
CPU |
Maintenance |
Monitor |
Software |
CPU |
Maintenance |
Monitor |
Software |
---|
Manager |
Rep |
|
|
|
|
|
|
|
|
---|
Debra Henley |
Craig Booker |
32500.0 |
5000.0 |
NaN |
10000.0 |
1.0 |
2.0 |
NaN |
1.0 |
Daniel Hilton |
52500.0 |
NaN |
NaN |
10000.0 |
2.0 |
NaN |
NaN |
1.0 |
John Smith |
35000.0 |
5000.0 |
NaN |
NaN |
1.0 |
2.0 |
NaN |
NaN |
Fred Anderson |
Cedric Moss |
47500.0 |
5000.0 |
NaN |
10000.0 |
1.5 |
1.0 |
NaN |
1.0 |
Wendy Yule |
82500.0 |
7000.0 |
5000.0 |
NaN |
3.5 |
3.0 |
2.0 |
NaN |
# 透视表-填充nan
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'],fill_value=0)
|
|
Price |
Quantity |
---|
|
Product |
CPU |
Maintenance |
Monitor |
Software |
CPU |
Maintenance |
Monitor |
Software |
---|
Manager |
Rep |
|
|
|
|
|
|
|
|
---|
Debra Henley |
Craig Booker |
32500 |
5000 |
0 |
10000 |
1.0 |
2 |
0 |
1 |
Daniel Hilton |
52500 |
0 |
0 |
10000 |
2.0 |
0 |
0 |
1 |
John Smith |
35000 |
5000 |
0 |
0 |
1.0 |
2 |
0 |
0 |
Fred Anderson |
Cedric Moss |
47500 |
5000 |
0 |
10000 |
1.5 |
1 |
0 |
1 |
Wendy Yule |
82500 |
7000 |
5000 |
0 |
3.5 |
3 |
2 |
0 |
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
暂时没有评论,来抢沙发吧~