【数据分析与可视化】DataFrame的Selecting和indexing
import numpy as np
import pandas as pd
!pwd
/Users/bennyrhys/opt/anaconda3/bin
!ls /Users/bennyrhys/Desktop/数据分析可视化-数据集/homework
AMZN.csv apply_demo.csv iris.csv top5.csv
BABA.csv city_weather.csv movie_metadata.csv train.csv
Pokemon.csv demo_duplicate.csv sales-funnel.xlsx usa_flights.csv
# 读取电影csv文件
imdb = pd.read_csv('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/movie_metadata.csv')
imdb
|
color |
director_name |
num_critic_for_reviews |
duration |
director_facebook_likes |
actor_3_facebook_likes |
actor_2_name |
actor_1_facebook_likes |
gross |
genres |
... |
num_user_for_reviews |
language |
country |
content_rating |
budget |
title_year |
actor_2_facebook_likes |
imdb_score |
aspect_ratio |
movie_facebook_likes |
---|
0 |
Color |
James Cameron |
723.0 |
178.0 |
0.0 |
855.0 |
Joel David Moore |
1000.0 |
760505847.0 |
Action|Adventure|Fantasy|Sci-Fi |
... |
3054.0 |
English |
USA |
PG-13 |
237000000.0 |
2009.0 |
936.0 |
7.9 |
1.78 |
33000 |
1 |
Color |
Gore Verbinski |
302.0 |
169.0 |
563.0 |
1000.0 |
Orlando Bloom |
40000.0 |
309404152.0 |
Action|Adventure|Fantasy |
... |
1238.0 |
English |
USA |
PG-13 |
300000000.0 |
2007.0 |
5000.0 |
7.1 |
2.35 |
0 |
2 |
Color |
Sam Mendes |
602.0 |
148.0 |
0.0 |
161.0 |
Rory Kinnear |
11000.0 |
200074175.0 |
Action|Adventure|Thriller |
... |
994.0 |
English |
UK |
PG-13 |
245000000.0 |
2015.0 |
393.0 |
6.8 |
2.35 |
85000 |
3 |
Color |
Christopher Nolan |
813.0 |
164.0 |
22000.0 |
23000.0 |
Christian Bale |
27000.0 |
448130642.0 |
Action|Thriller |
... |
2701.0 |
English |
USA |
PG-13 |
250000000.0 |
2012.0 |
23000.0 |
8.5 |
2.35 |
164000 |
4 |
NaN |
Doug Walker |
NaN |
NaN |
131.0 |
NaN |
Rob Walker |
131.0 |
NaN |
Documentary |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
12.0 |
7.1 |
NaN |
0 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
5038 |
Color |
Scott Smith |
1.0 |
87.0 |
2.0 |
318.0 |
Daphne Zuniga |
637.0 |
NaN |
Comedy|Drama |
... |
6.0 |
English |
Canada |
NaN |
NaN |
2013.0 |
470.0 |
7.7 |
NaN |
84 |
5039 |
Color |
NaN |
43.0 |
43.0 |
NaN |
319.0 |
Valorie Curry |
841.0 |
NaN |
Crime|Drama|Mystery|Thriller |
... |
359.0 |
English |
USA |
TV-14 |
NaN |
NaN |
593.0 |
7.5 |
16.00 |
32000 |
5040 |
Color |
Benjamin Roberds |
13.0 |
76.0 |
0.0 |
0.0 |
Maxwell Moody |
0.0 |
NaN |
Drama|Horror|Thriller |
... |
3.0 |
English |
USA |
NaN |
1400.0 |
2013.0 |
0.0 |
6.3 |
NaN |
16 |
5041 |
Color |
Daniel Hsia |
14.0 |
100.0 |
0.0 |
489.0 |
Daniel Henney |
946.0 |
10443.0 |
Comedy|Drama|Romance |
... |
9.0 |
English |
USA |
PG-13 |
NaN |
2012.0 |
719.0 |
6.3 |
2.35 |
660 |
5042 |
Color |
Jon Gunn |
43.0 |
90.0 |
16.0 |
16.0 |
Brian Herzlinger |
86.0 |
85222.0 |
Documentary |
... |
84.0 |
English |
USA |
PG |
1100.0 |
2004.0 |
23.0 |
6.6 |
1.85 |
456 |
5043 rows × 28 columns
# 数据多,查看多少 行列
imdb.shape
(5043, 28)
# 显示部分(默认返回前五行)
imdb.head()
|
color |
director_name |
num_critic_for_reviews |
duration |
director_facebook_likes |
actor_3_facebook_likes |
actor_2_name |
actor_1_facebook_likes |
gross |
genres |
... |
num_user_for_reviews |
language |
country |
content_rating |
budget |
title_year |
actor_2_facebook_likes |
imdb_score |
aspect_ratio |
movie_facebook_likes |
---|
0 |
Color |
James Cameron |
723.0 |
178.0 |
0.0 |
855.0 |
Joel David Moore |
1000.0 |
760505847.0 |
Action|Adventure|Fantasy|Sci-Fi |
... |
3054.0 |
English |
USA |
PG-13 |
237000000.0 |
2009.0 |
936.0 |
7.9 |
1.78 |
33000 |
1 |
Color |
Gore Verbinski |
302.0 |
169.0 |
563.0 |
1000.0 |
Orlando Bloom |
40000.0 |
309404152.0 |
Action|Adventure|Fantasy |
... |
1238.0 |
English |
USA |
PG-13 |
300000000.0 |
2007.0 |
5000.0 |
7.1 |
2.35 |
0 |
2 |
Color |
Sam Mendes |
602.0 |
148.0 |
0.0 |
161.0 |
Rory Kinnear |
11000.0 |
200074175.0 |
Action|Adventure|Thriller |
... |
994.0 |
English |
UK |
PG-13 |
245000000.0 |
2015.0 |
393.0 |
6.8 |
2.35 |
85000 |
3 |
Color |
Christopher Nolan |
813.0 |
164.0 |
22000.0 |
23000.0 |
Christian Bale |
27000.0 |
448130642.0 |
Action|Thriller |
... |
2701.0 |
English |
USA |
PG-13 |
250000000.0 |
2012.0 |
23000.0 |
8.5 |
2.35 |
164000 |
4 |
NaN |
Doug Walker |
NaN |
NaN |
131.0 |
NaN |
Rob Walker |
131.0 |
NaN |
Documentary |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
12.0 |
7.1 |
NaN |
0 |
5 rows × 28 columns
# 显示部分(默认返回后五行)
imdb.tail()
|
color |
director_name |
num_critic_for_reviews |
duration |
director_facebook_likes |
actor_3_facebook_likes |
actor_2_name |
actor_1_facebook_likes |
gross |
genres |
... |
num_user_for_reviews |
language |
country |
content_rating |
budget |
title_year |
actor_2_facebook_likes |
imdb_score |
aspect_ratio |
movie_facebook_likes |
---|
5038 |
Color |
Scott Smith |
1.0 |
87.0 |
2.0 |
318.0 |
Daphne Zuniga |
637.0 |
NaN |
Comedy|Drama |
... |
6.0 |
English |
Canada |
NaN |
NaN |
2013.0 |
470.0 |
7.7 |
NaN |
84 |
5039 |
Color |
NaN |
43.0 |
43.0 |
NaN |
319.0 |
Valorie Curry |
841.0 |
NaN |
Crime|Drama|Mystery|Thriller |
... |
359.0 |
English |
USA |
TV-14 |
NaN |
NaN |
593.0 |
7.5 |
16.00 |
32000 |
5040 |
Color |
Benjamin Roberds |
13.0 |
76.0 |
0.0 |
0.0 |
Maxwell Moody |
0.0 |
NaN |
Drama|Horror|Thriller |
... |
3.0 |
English |
USA |
NaN |
1400.0 |
2013.0 |
0.0 |
6.3 |
NaN |
16 |
5041 |
Color |
Daniel Hsia |
14.0 |
100.0 |
0.0 |
489.0 |
Daniel Henney |
946.0 |
10443.0 |
Comedy|Drama|Romance |
... |
9.0 |
English |
USA |
PG-13 |
NaN |
2012.0 |
719.0 |
6.3 |
2.35 |
660 |
5042 |
Color |
Jon Gunn |
43.0 |
90.0 |
16.0 |
16.0 |
Brian Herzlinger |
86.0 |
85222.0 |
Documentary |
... |
84.0 |
English |
USA |
PG |
1100.0 |
2004.0 |
23.0 |
6.6 |
1.85 |
456 |
5 rows × 28 columns
# 显示部分(指定返回行数)
imdb.head(10)
|
color |
director_name |
num_critic_for_reviews |
duration |
director_facebook_likes |
actor_3_facebook_likes |
actor_2_name |
actor_1_facebook_likes |
gross |
genres |
... |
num_user_for_reviews |
language |
country |
content_rating |
budget |
title_year |
actor_2_facebook_likes |
imdb_score |
aspect_ratio |
movie_facebook_likes |
---|
0 |
Color |
James Cameron |
723.0 |
178.0 |
0.0 |
855.0 |
Joel David Moore |
1000.0 |
760505847.0 |
Action|Adventure|Fantasy|Sci-Fi |
... |
3054.0 |
English |
USA |
PG-13 |
237000000.0 |
2009.0 |
936.0 |
7.9 |
1.78 |
33000 |
1 |
Color |
Gore Verbinski |
302.0 |
169.0 |
563.0 |
1000.0 |
Orlando Bloom |
40000.0 |
309404152.0 |
Action|Adventure|Fantasy |
... |
1238.0 |
English |
USA |
PG-13 |
300000000.0 |
2007.0 |
5000.0 |
7.1 |
2.35 |
0 |
2 |
Color |
Sam Mendes |
602.0 |
148.0 |
0.0 |
161.0 |
Rory Kinnear |
11000.0 |
200074175.0 |
Action|Adventure|Thriller |
... |
994.0 |
English |
UK |
PG-13 |
245000000.0 |
2015.0 |
393.0 |
6.8 |
2.35 |
85000 |
3 |
Color |
Christopher Nolan |
813.0 |
164.0 |
22000.0 |
23000.0 |
Christian Bale |
27000.0 |
448130642.0 |
Action|Thriller |
... |
2701.0 |
English |
USA |
PG-13 |
250000000.0 |
2012.0 |
23000.0 |
8.5 |
2.35 |
164000 |
4 |
NaN |
Doug Walker |
NaN |
NaN |
131.0 |
NaN |
Rob Walker |
131.0 |
NaN |
Documentary |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
12.0 |
7.1 |
NaN |
0 |
5 |
Color |
Andrew Stanton |
462.0 |
132.0 |
475.0 |
530.0 |
Samantha Morton |
640.0 |
73058679.0 |
Action|Adventure|Sci-Fi |
... |
738.0 |
English |
USA |
PG-13 |
263700000.0 |
2012.0 |
632.0 |
6.6 |
2.35 |
24000 |
6 |
Color |
Sam Raimi |
392.0 |
156.0 |
0.0 |
4000.0 |
James Franco |
24000.0 |
336530303.0 |
Action|Adventure|Romance |
... |
1902.0 |
English |
USA |
PG-13 |
258000000.0 |
2007.0 |
11000.0 |
6.2 |
2.35 |
0 |
7 |
Color |
Nathan Greno |
324.0 |
100.0 |
15.0 |
284.0 |
Donna Murphy |
799.0 |
200807262.0 |
Adventure|Animation|Comedy|Family|Fantasy|Musi... |
... |
387.0 |
English |
USA |
PG |
260000000.0 |
2010.0 |
553.0 |
7.8 |
1.85 |
29000 |
8 |
Color |
Joss Whedon |
635.0 |
141.0 |
0.0 |
19000.0 |
Robert Downey Jr. |
26000.0 |
458991599.0 |
Action|Adventure|Sci-Fi |
... |
1117.0 |
English |
USA |
PG-13 |
250000000.0 |
2015.0 |
21000.0 |
7.5 |
2.35 |
118000 |
9 |
Color |
David Yates |
375.0 |
153.0 |
282.0 |
10000.0 |
Daniel Radcliffe |
25000.0 |
301956980.0 |
Adventure|Family|Fantasy|Mystery |
... |
973.0 |
English |
UK |
PG |
250000000.0 |
2009.0 |
11000.0 |
7.5 |
2.35 |
10000 |
10 rows × 28 columns
# 返回列 Series类型,生成新的DataFrame
imdb[['color','director_name']]
|
color |
director_name |
---|
0 |
Color |
James Cameron |
1 |
Color |
Gore Verbinski |
2 |
Color |
Sam Mendes |
3 |
Color |
Christopher Nolan |
4 |
NaN |
Doug Walker |
... |
... |
... |
5038 |
Color |
Scott Smith |
5039 |
Color |
NaN |
5040 |
Color |
Benjamin Roberds |
5041 |
Color |
Daniel Hsia |
5042 |
Color |
Jon Gunn |
5043 rows × 2 columns
# 无法通过head取指定的行范围
row_bf = imdb[['director_name','movie_title','imdb_score']]
row_bf.head()
|
director_name |
movie_title |
imdb_score |
---|
0 |
James Cameron |
Avatar |
7.9 |
1 |
Gore Verbinski |
Pirates of the Caribbean: At World's End |
7.1 |
2 |
Sam Mendes |
Spectre |
6.8 |
3 |
Christopher Nolan |
The Dark Knight Rises |
8.5 |
4 |
Doug Walker |
Star Wars: Episode VII - The Force Awakens ... |
7.1 |
# 行列 的范围实现切片
row_bf.iloc[10:20,:]
|
director_name |
movie_title |
imdb_score |
---|
10 |
Zack Snyder |
Batman v Superman: Dawn of Justice |
6.9 |
11 |
Bryan Singer |
Superman Returns |
6.1 |
12 |
Marc Forster |
Quantum of Solace |
6.7 |
13 |
Gore Verbinski |
Pirates of the Caribbean: Dead Man's Chest |
7.3 |
14 |
Gore Verbinski |
The Lone Ranger |
6.5 |
15 |
Zack Snyder |
Man of Steel |
7.2 |
16 |
Andrew Adamson |
The Chronicles of Narnia: Prince Caspian |
6.6 |
17 |
Joss Whedon |
The Avengers |
8.1 |
18 |
Rob Marshall |
Pirates of the Caribbean: On Stranger Tides |
6.7 |
19 |
Barry Sonnenfeld |
Men in Black 3 |
6.8 |
# 限制列切片
tmp_df = row_bf.iloc[10:20,0:2]
tmp_df
|
director_name |
movie_title |
---|
10 |
Zack Snyder |
Batman v Superman: Dawn of Justice |
11 |
Bryan Singer |
Superman Returns |
12 |
Marc Forster |
Quantum of Solace |
13 |
Gore Verbinski |
Pirates of the Caribbean: Dead Man's Chest |
14 |
Gore Verbinski |
The Lone Ranger |
15 |
Zack Snyder |
Man of Steel |
16 |
Andrew Adamson |
The Chronicles of Narnia: Prince Caspian |
17 |
Joss Whedon |
The Avengers |
18 |
Rob Marshall |
Pirates of the Caribbean: On Stranger Tides |
19 |
Barry Sonnenfeld |
Men in Black 3 |
# iloc对当前数据框的选择范围(index 和label没关系)
tmp_df.iloc[2:4,:]
|
director_name |
movie_title |
---|
12 |
Marc Forster |
Quantum of Solace |
13 |
Gore Verbinski |
Pirates of the Caribbean: Dead Man's Chest |
# 想通过label名字去定位(loc没有前面的i,通过名字查不受index限制)
tmp_df.loc[15:17,:]
|
director_name |
movie_title |
---|
15 |
Zack Snyder |
Man of Steel |
16 |
Andrew Adamson |
The Chronicles of Narnia: Prince Caspian |
17 |
Joss Whedon |
The Avengers |
# 列也可以
tmp_df.loc[15:17,:'director_name']
|
director_name |
---|
15 |
Zack Snyder |
16 |
Andrew Adamson |
17 |
Joss Whedon |
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
暂时没有评论,来抢沙发吧~