# import numpy as np
import pandas as pd
print "Using {} , version {}".format(pd.__name__,pd.__version__)
Using pandas , version 0.23.4
Dataframe¶
df = pd.DataFrame()
print(df)
Empty DataFrame
Columns: []
Index: []
dict = {'name':["Tom", "Bob", "Mary", "James"],
'age': [18, 30, 25, 40],
'city':["Beijing", "ShangHai","GuangZhou", "ShenZhen"]}
df = pd.DataFrame(dict)
df
age | city | name | |
---|---|---|---|
0 | 18 | Beijing | Tom |
1 | 30 | ShangHai | Bob |
2 | 25 | GuangZhou | Mary |
3 | 40 | ShenZhen | James |
index = pd.Index(["Tom", "Bob", "Mary", "James"],name = 'person')
cols = ['age','city']
data = [[18,'Beijing'],
[30,'ShangHai'],
[25,'GuangZhou'],
[40,'ShenZhen']]
df =pd.DataFrame(index = index,data =data,columns = cols)
df
age | city | |
---|---|---|
person | ||
Tom | 18 | Beijing |
Bob | 30 | ShangHai |
Mary | 25 | GuangZhou |
James | 40 | ShenZhen |
2.Dataframe
2.1 columns¶
add column¶
dict = {'name':["Tom", "Bob", "Mary", "James"],
'age': [18, 30, 25, 40],
'city':["Beijing", "ShangHai","GuangZhou", "ShenZhen"]}
df = pd.DataFrame(dict)
df
age | city | name | |
---|---|---|---|
0 | 18 | Beijing | Tom |
1 | 30 | ShangHai | Bob |
2 | 25 | GuangZhou | Mary |
3 | 40 | ShenZhen | James |
df['country'] = 'USA'
df
age | city | name | country | |
---|---|---|---|---|
0 | 18 | Beijing | Tom | USA |
1 | 30 | ShangHai | Bob | USA |
2 | 25 | GuangZhou | Mary | USA |
3 | 40 | ShenZhen | James | USA |
df['adress'] = df['country']
df
age | city | name | country | adress | |
---|---|---|---|---|---|
0 | 18 | Beijing | Tom | USA | USA |
1 | 30 | ShangHai | Bob | USA | USA |
2 | 25 | GuangZhou | Mary | USA | USA |
3 | 40 | ShenZhen | James | USA | USA |
Change column values¶
df['country'] = 'China'
df
age | city | name | country | adress | |
---|---|---|---|---|---|
0 | 18 | Beijing | Tom | China | USA |
1 | 30 | ShangHai | Bob | China | USA |
2 | 25 | GuangZhou | Mary | China | USA |
3 | 40 | ShenZhen | James | China | USA |
df['adress'] = df['city']+','+ df['country']
df
age | city | name | country | adress | |
---|---|---|---|---|---|
0 | 18 | Beijing | Tom | China | Beijing,China |
1 | 30 | ShangHai | Bob | China | ShangHai,China |
2 | 25 | GuangZhou | Mary | China | GuangZhou,China |
3 | 40 | ShenZhen | James | China | ShenZhen,China |
Delete columns¶
df.drop('country',axis=1, inplace=True)
del df['city']
df
age | name | adress | |
---|---|---|---|
0 | 18 | Tom | Beijing,China |
1 | 30 | Bob | ShangHai,China |
2 | 25 | Mary | GuangZhou,China |
3 | 40 | James | ShenZhen,China |
Select columns¶
df['age']
0 18
1 30
2 25
3 40
Name: age, dtype: int64
df.name
0 Tom
1 Bob
2 Mary
3 James
Name: name, dtype: object
df[['age','name']]
age | name | |
---|---|---|
0 | 18 | Tom |
1 | 30 | Bob |
2 | 25 | Mary |
3 | 40 | James |
df.columns
Index([u'age', u'name', u'adress'], dtype='object')
# df.columns = ['Age','Name','Adress']
# df
# df.rename(index = str, columns = {'age':'Age','name':'Name','adress':'Adress'})
df.rename(str.capitalize, axis='columns',inplace =True)
df
Age | Name | Adress | |
---|---|---|---|
0 | 18 | Tom | Beijing,China |
1 | 30 | Bob | ShangHai,China |
2 | 25 | Mary | GuangZhou,China |
3 | 40 | James | ShenZhen,China |
Set column value with conditions¶
df['Group'] = 'elderly'
df.loc[df['Age']<=18,'Group']='young'
df.loc[(df['Age'] >18) & (df['Age'] <= 30),'Group']='middle_aged'
df
Age | Name | Adress | Group | |
---|---|---|---|---|
0 | 18 | Tom | Beijing,China | young |
1 | 30 | Bob | ShangHai,China | middle_aged |
2 | 25 | Mary | GuangZhou,China | middle_aged |
3 | 40 | James | ShenZhen,China | elderly |