[关闭]
@Channelchan 2017-05-06T09:56:30.000000Z 字数 2501 阅读 17854

Python 金融数据处理

Panel


三维数据处理

Panel

299 (items) x 488 (major_axis) x 5 (minor_axis)

数据格式转换

Dict/Series/DataFrame

  1. #因为TA_Lib只能算np.array的格式,因此我们需要做遍历
  2. mom20 = pd.DataFrame({name: ta.ROC(item.values, 20) for name, item in prices.iteritems()}, index=prices.index)

多索引

stack()

  1. mom20 = mom20.stack()

迭代器

iterrows/iteritems

降维计算ATR

def ATR()
def ADX()

  1. def panel_2_frame(panel, function, *args, **kwargs):
  2. if isinstance(panel, pd.Panel):
  3. return pd.DataFrame(
  4. {name: function(frame, *args, **kwargs) for name, frame in panel.iteritems()}
  5. )
  6. else:
  7. raise(TypeError("type of panel should be pandas.Panel"))
  8. def atr(pn, period=10):
  9. if pn.isnull().values.any():
  10. pn.fillna(method='ffill',inplace=True)
  11. return panel_2_frame(pn, ta.abstract.ATR, period)
  12. def adx(pn, period=10):
  13. if pn.isnull().values.any():
  14. pn.fillna(method='ffill',inplace=True)
  15. return panel_2_frame(pn, ta.abstract.ADX, period)

升维计算MACD

zip

pd.Panel({item: DataFrame})

  1. columns = ['macd', 'macdsignal', 'macdhist']
  2. print zip(columns, ta.MACD(prices.iloc[:, 0].values))
  1. def MACD(series):
  2. return pd.DataFrame(dict(zip(columns, ta.MACD(series.values))), index=series.index)
  3. panel = pd.Panel.from_dict({name: MACD(item) for name, item in prices.iteritems()})
  4. print panel

计算理论最大收益

map(function, sequence, *sequence_1)

  1. df_returns = pn.minor_xs('close').pct_change()[1:]
  2. df_returns.fillna(value=0, inplace=True)
  3. df_returns['q_20'] = df_returns.quantile(0.2, axis=1)
  4. df_returns['q_80'] = df_returns.quantile(0.8, axis=1)
  5. def qt20_mean(row):
  6. series = row[1]
  7. quant = series.pop('q_20')
  8. return series[series.values<quant].mean()
  9. def qt80_mean(row):
  10. series = row[1]
  11. quant = series.pop('q_80')
  12. return series[series.values>quant].mean()
  13. df_returns['qt20_mean'] = map(qt20_mean, df_returns.iterrows())
  14. df_returns['qt80_mean'] = map(qt80_mean, df_returns.iterrows())
  15. print(df_returns.qt80_mean-df_returns.qt20_mean)

条件处理

Factors: RSI(10)<40
Filter: mom2>0

  1. mom2 = pd.DataFrame({name: ta.ROC(item.values, timeperiod=2) for name, item in prices.iteritems()}, index=prices.index)
  2. RSI = pd.DataFrame({name: ta.RSI(item.values, timeperiod=10) for name, item in prices.iteritems()}, index=prices.index)
  3. RSI = RSI[RSI<40]
  4. RSI_F = RSI[mom2>0]
  5. factor = RSI_F.stack()
  6. factor = factor.reset_index()
  7. factor.columns = ["datetime", "codes", "factor"]
  8. factor["factor"] = 1
  9. factor = factor.set_index(["datetime", "codes"])
  10. print(factor)
  1. import alphalens
  2. import matplotlib.pyplot as plt
  3. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices)
  4. mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True, demeaned=False)
  5. # print mean_return_by_q
  6. alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 5)
  7. plt.show()

计算因子最大收益

Groupby

Resample

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注