import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# 读取apply_demo.csv数据
link_csv = '/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/apply_demo.csv'
df = pd.read_csv(link_csv).head()
df
|
time |
data |
---|
0 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
1 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
2 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
3 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
4 |
1473411963 |
Symbol: APPL Seqno: 1 Price: 1649 |
df.size
10
# 新加一列Series
s1 = Series(['a']*10)
s1
0 a
1 a
2 a
3 a
4 a
5 a
6 a
7 a
8 a
9 a
dtype: object
df['A'] = s1
df.head()
|
time |
data |
A |
---|
0 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
a |
1 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
a |
2 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
a |
3 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
a |
4 |
1473411963 |
Symbol: APPL Seqno: 1 Price: 1649 |
a |
# 将A列小写全变为大写(函数.apply(str.upper))
df['A'] = df['A'].apply(str.upper)
df
|
time |
data |
A |
---|
0 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
1 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
2 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
3 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
4 |
1473411963 |
Symbol: APPL Seqno: 1 Price: 1649 |
A |
# 切分去除data数据
df['data'][0]
' Symbol: APPL Seqno: 0 Price: 1623'
# 去除头尾strip,且空格分割split
l1 = df['data'][0].strip().split(' ')
l1
['Symbol:', 'APPL', 'Seqno:', '0', 'Price:', '1623']
# 想要的是字典值
l1[1],l1[3],l1[5]
('APPL', '0', '1623')
# 写分割返回函数
def foo(line):
items = line.strip().split(' ')
return Series([items[1],items[3],items[5]])
# 分割完生成新的数框
df_tmp = df['data'].apply(foo)
df_tmp
|
0 |
1 |
2 |
---|
0 |
APPL |
0 |
1623 |
1 |
APPL |
0 |
1623 |
2 |
APPL |
0 |
1623 |
3 |
APPL |
0 |
1623 |
4 |
APPL |
1 |
1649 |
# 新的数框 重命名
df_tmp = df_tmp.rename(columns={0:'Symbol',1:'Seqno',2:'Price'})
df_tmp
|
Symbol |
Seqno |
Price |
---|
0 |
APPL |
0 |
1623 |
1 |
APPL |
0 |
1623 |
2 |
APPL |
0 |
1623 |
3 |
APPL |
0 |
1623 |
4 |
APPL |
1 |
1649 |
df
|
time |
data |
A |
---|
0 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
1 |
1473411962 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
2 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
3 |
1473411963 |
Symbol: APPL Seqno: 0 Price: 1623 |
A |
4 |
1473411963 |
Symbol: APPL Seqno: 1 Price: 1649 |
A |
# 新旧两个数框 结合
df_new = df.combine_first(df_tmp)
df_new
|
A |
Price |
Seqno |
Symbol |
data |
time |
---|
0 |
A |
1623.0 |
0.0 |
APPL |
Symbol: APPL Seqno: 0 Price: 1623 |
1473411962 |
1 |
A |
1623.0 |
0.0 |
APPL |
Symbol: APPL Seqno: 0 Price: 1623 |
1473411962 |
2 |
A |
1623.0 |
0.0 |
APPL |
Symbol: APPL Seqno: 0 Price: 1623 |
1473411963 |
3 |
A |
1623.0 |
0.0 |
APPL |
Symbol: APPL Seqno: 0 Price: 1623 |
1473411963 |
4 |
A |
1649.0 |
1.0 |
APPL |
Symbol: APPL Seqno: 1 Price: 1649 |
1473411963 |
# 去掉多余已经处理的data
del df_new['data']
del df_new['A']
df_new
|
Price |
Seqno |
Symbol |
time |
---|
0 |
1623.0 |
0.0 |
APPL |
1473411962 |
1 |
1623.0 |
0.0 |
APPL |
1473411962 |
2 |
1623.0 |
0.0 |
APPL |
1473411963 |
3 |
1623.0 |
0.0 |
APPL |
1473411963 |
4 |
1649.0 |
1.0 |
APPL |
1473411963 |
# 转存到外部继续用
df_new.to_csv('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/demo_duplicate.csv')
!ls /Users/bennyrhys/Desktop/数据分析可视化-数据集/homework
AMZN.csv apply_demo.csv iris.csv top5.csv
BABA.csv city_weather.csv movie_metadata.csv train.csv
Pokemon.csv demo_duplicate.csv sales-funnel.xlsx usa_flights.csv
暂时没有评论,来抢沙发吧~