import pandas as pd
from pandas import Series, DataFrame
obj = pd.Series([4,7,-5,3])
obj
obj.values
obj.index
obj2 = pd.Series([4,5,-5,4], index=['a', 'b','c','ss'])
obj2
obj2['a']
obj2[obj2>0]
obj2*2
import numpy as np
np.exp(obj2)
dict = {'a':3, 'b':4, '()':4}
obj3 = pd.Series(dict)
obj3
states=['a','b',')','()']
obj4 = pd.Series(dict, index=states)
obj4
pd.isnull(obj4)
pd.notnull(obj4)
obj3
obj4
obj3+obj4
data = {'state':['a','v','r','b'],
'year':[1222,1666,1777,1000],
'pop':[1,2.5,13,5]}
frame = pd.DataFrame(data)
frame
pop
state
year
0
1.0
a
1222
1
2.5
v
1666
2
13.0
r
1777
3
5.0
b
1000
frame.head()
pop
state
year
0
1.0
a
1222
1
2.5
v
1666
2
13.0
r
1777
3
5.0
b
1000
pd.DataFrame(data,columns=['year','state','pop','nothing'])
year
state
pop
nothing
0
1222
a
1.0
NaN
1
1666
v
2.5
NaN
2
1777
r
13.0
NaN
3
1000
b
5.0
NaN
frame.year
frame.pop
frame.values # 返回所有数组
obj = pd.Series(range(3), index=['1','2','a'])
index = obj.index
index
index[2:]
index object 是不可更改的
index[1]='g'
frame
pop
state
year
0
1.0
a
1222
1
2.5
v
1666
2
13.0
r
1777
3
5.0
b
1000
frame.columns
Index(['pop', 'state', 'year'], dtype='object')
和python不同,pandas允许index有重复labels:
dup = pd.Index(['a','a','c','abr'])
dup
Index(['a', 'a', 'c', 'abr'], dtype='object')