[关闭]
@Channelchan 2018-03-22T14:50:34.000000Z 字数 5715 阅读 54892

Pandas计算

pandas


pct_change()

计算百分比变化Percentage Change

  1. import pandas as pd
  2. stock1 = pd.read_excel('sz50.xlsx',sheetname='600036.XSHG', index_col='datetime')
  1. one_day_change = stock1.close.pct_change()
  2. five_day_change = stock1.close.pct_change(5)
  3. ten_day_change = stock1.close.pct_change(10)
  1. import matplotlib.pyplot as plt
  2. plt.figure(figsize=(15,7))
  3. plt.plot(one_day_change.iloc[-50:], label='one')
  4. plt.plot(five_day_change.iloc[-50:], label='five')
  5. plt.plot(ten_day_change.iloc[-50:], label='ten')
  6. plt.legend()
  7. plt.show()

output_4_0.png-60.7kB

cov()

计算协方差Covariance

  1. stock2 = pd.read_excel('sz50.xlsx',sheetname='600050.XSHG', index_col='datetime')
  2. stock3 = pd.read_excel('sz50.xlsx',sheetname='601318.XSHG', index_col='datetime')
  1. five_day_df = pd.concat([stock1.close.pct_change(5),stock2.close.pct_change(5),stock3.close.pct_change(5)],keys=['stock1','stock2','stock3'],axis=1)
  1. print(five_day_df.tail())
                       stock1    stock2    stock3
datetime                                         
2017-11-14 15:00:00  0.036622 -0.014538  0.077249
2017-11-15 15:00:00  0.029140  0.018947  0.071039
2017-11-16 15:00:00  0.038049 -0.029293  0.100700
2017-11-17 15:00:00  0.085355 -0.034102  0.076982
2017-11-20 15:00:00  0.103342 -0.016064  0.079239
  1. print(five_day_df.cov())
          stock1    stock2    stock3
stock1  0.001271  0.000139  0.000806
stock2  0.000139  0.003739  0.000608
stock3  0.000806  0.000608  0.001203

corr()

计算相关系数Correlation

  1. # 相关系数
  2. correlation = five_day_df.corr()
  3. print(correlation)
          stock1    stock2    stock3
stock1  1.000000  0.085415  0.652129
stock2  0.085415  1.000000  0.315773
stock3  0.652129  0.315773  1.000000
  1. # spearman排序相关性
  2. spearman = five_day_df.corr(method='spearman')
  3. print(spearman)
          stock1    stock2    stock3
stock1  1.000000 -0.003432  0.659493
stock2 -0.003432  1.000000  0.165067
stock3  0.659493  0.165067  1.000000

rank()

数据排序

  1. print(five_day_df.tail())
                       stock1    stock2    stock3
datetime                                         
2017-11-14 15:00:00  0.036622 -0.014538  0.077249
2017-11-15 15:00:00  0.029140  0.018947  0.071039
2017-11-16 15:00:00  0.038049 -0.029293  0.100700
2017-11-17 15:00:00  0.085355 -0.034102  0.076982
2017-11-20 15:00:00  0.103342 -0.016064  0.079239
  1. # 正向纵序
  2. print(five_day_df.tail().rank())
                     stock1  stock2  stock3
datetime                                   
2017-11-14 15:00:00     2.0     4.0     3.0
2017-11-15 15:00:00     1.0     5.0     1.0
2017-11-16 15:00:00     3.0     2.0     5.0
2017-11-17 15:00:00     4.0     1.0     2.0
2017-11-20 15:00:00     5.0     3.0     4.0
  1. #横向倒序
  2. print(five_day_df.rank(axis=1, ascending=False).tail())
                     stock1  stock2  stock3
datetime                                   
2017-11-14 15:00:00     2.0     3.0     1.0
2017-11-15 15:00:00     2.0     3.0     1.0
2017-11-16 15:00:00     2.0     3.0     1.0
2017-11-17 15:00:00     1.0     3.0     2.0
2017-11-20 15:00:00     1.0     3.0     2.0

rolling()

滚动计算

  1. rolling = stock1.close.rolling(window=20)
  1. print(rolling.mean().tail())
datetime
2017-11-14 15:00:00    108.1320
2017-11-15 15:00:00    108.2855
2017-11-16 15:00:00    108.4850
2017-11-17 15:00:00    109.0060
2017-11-20 15:00:00    109.8100
Name: close, dtype: float64
  1. plt.figure(figsize=(15,7))
  2. plt.plot(rolling.mean())
  3. plt.plot(stock1.close)
  4. plt.show()

output_20_0.png-30.3kB

apply()

调用自定义函数

  1. import numpy as np
  2. def cal_range(array):
  3. return array.max() - array.min()
  4. print(stock1.rolling(10).apply(cal_range).tail())
                     close   high    low   open      volume
datetime                                                   
2017-11-14 15:00:00   5.63   5.63   5.70   4.83  45715400.0
2017-11-15 15:00:00   5.63   5.63   5.70   5.55  45715400.0
2017-11-16 15:00:00   5.95   5.63   5.70   5.55  45599200.0
2017-11-17 15:00:00  11.06  10.17   8.06   6.83  55576000.0
2017-11-20 15:00:00  13.96  13.92  10.34  10.82  55576000.0

Computing rolling pairwise covariances and correlations

  1. ## 50天滚动的五日收益协方差
  2. cov_50 = five_day_df.rolling(50).cov()
  3. print(cov_50.tail(6))
                              stock1    stock2    stock3
datetime                                                
2017-11-17 15:00:00 stock1  0.000743  0.000248  0.000773
                    stock2  0.000248  0.001945  0.000923
                    stock3  0.000773  0.000923  0.001420
2017-11-20 15:00:00 stock1  0.000931  0.000207  0.000865
                    stock2  0.000207  0.001925  0.000875
                    stock3  0.000865  0.000875  0.001438
  1. ## 50天滚动的五日收益相关性
  2. corr_50 = five_day_df.rolling(50,).corr()
  3. print(corr_50.tail(6))
                              stock1    stock2    stock3
datetime                                                
2017-11-17 15:00:00 stock1  1.000000  0.206307  0.752711
                    stock2  0.206307  1.000000  0.555120
                    stock3  0.752711  0.555120  1.000000
2017-11-20 15:00:00 stock1  1.000000  0.154276  0.747151
                    stock2  0.154276  1.000000  0.525728
                    stock3  0.747151  0.525728  1.000000
  1. plt.figure(figsize=(15,7))
  2. plt.plot(corr_50.unstack()['stock1','stock3'])
  3. plt.show()

output_26_0.png-29.8kB

aggregate

聚合,主要用于处理数据运算(诸如统计平均值,求和等),并返回计算后的运算结果。

  1. # 对多只股票进行求和
  2. sum_agg = five_day_df.rolling(10).aggregate(np.sum)
  3. print(sum_agg.tail())
                       stock1    stock2    stock3
datetime                                         
2017-11-14 15:00:00  0.025171  0.478886  0.434446
2017-11-15 15:00:00  0.050571  0.396198  0.438228
2017-11-16 15:00:00  0.071251  0.293819  0.494201
2017-11-17 15:00:00  0.193616  0.243951  0.563012
2017-11-20 15:00:00  0.339433  0.238697  0.643087
  1. # 对stock1进行多个不同的计算
  2. multi_agg = five_day_df.stock1.rolling(10).agg([np.sum, np.mean, np.std])
  3. print(multi_agg.tail())
                          sum      mean       std
datetime                                         
2017-11-14 15:00:00  0.025171  0.002517  0.026701
2017-11-15 15:00:00  0.050571  0.005057  0.028006
2017-11-16 15:00:00  0.071251  0.007125  0.029727
2017-11-17 15:00:00  0.193616  0.019362  0.034364
2017-11-20 15:00:00  0.339433  0.033943  0.036102
  1. # 针对不要数据用不同算法
  2. multi_algo = five_day_df.rolling(20).agg({'stock1' : np.sum,
  3. 'stock2' : np.std,
  4. 'stock3' : np.mean})
  5. print(multi_algo.tail())
                       stock1    stock2    stock3
datetime                                         
2017-11-14 15:00:00  0.171306  0.049099  0.046782
2017-11-15 15:00:00  0.159352  0.048760  0.047016
2017-11-16 15:00:00  0.169747  0.046769  0.049882
2017-11-17 15:00:00  0.233389  0.046673  0.052029
2017-11-20 15:00:00  0.350535  0.046850  0.053621
  1. # resample成周线后用agg获取所需要的数据
  2. print(stock1.resample('W').agg({'high':'max','low':'min', 'close':'last', 'open':'first'}).tail())
              high     low   close    open
datetime                                  
2017-10-29  112.29  105.38  112.13  106.66
2017-11-05  111.93  106.22  107.98  111.05
2017-11-12  109.49  104.87  108.02  107.34
2017-11-19  117.67  108.46  117.24  108.46
2017-11-26  122.50  116.20  121.82  116.92

expanding

expanding不会返回NaN,功能与rolling相似。

  1. sn = pd.Series([1, 2, np.nan, 3, np.nan, 4])
  1. print(sn.expanding().sum())
0     1.0
1     3.0
2     3.0
3     6.0
4     6.0
5    10.0
dtype: float64
  1. print(sn.cumsum())
0     1.0
1     3.0
2     NaN
3     6.0
4     NaN
5    10.0
dtype: float64
  1. print(sn.cumsum().fillna(method='ffill'))
0     1.0
1     3.0
2     3.0
3     6.0
4     6.0
5    10.0
dtype: float64

Exponentially Weighted Windows

image.png-4kB

  1. def SMA(A,n,m):
  2. # 设置alpha的比例
  3. alpha = m/n
  4. #通过ewm计算递归函数
  5. return A.ewm(alpha=alpha, adjust=False).mean()
  6. print(SMA(stock1.close,7,2).tail())
datetime
2017-11-14 15:00:00    109.525065
2017-11-15 15:00:00    110.017903
2017-11-16 15:00:00    110.621360
2017-11-17 15:00:00    112.512400
2017-11-20 15:00:00    115.171714
Name: close, dtype: float64
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注