[关闭]
@Channelchan 2017-11-25T15:25:53.000000Z 字数 3410 阅读 12670

多因子组合

Step1_获取数据

  1. from fxdayu_data import DataAPI
  2. from datetime import datetime
  3. import alphalens
  4. import pandas as pd
  5. start = datetime(2017,1,1)
  6. end = datetime(2017,11,11)
  7. period = 5
  8. codes = DataAPI.info.codes('hs300')
  9. factors = ('PB','LCAP','HBETA', 'ROE', 'InvestCashGrowRate', 'ROC20')
  10. PN = DataAPI.factor(codes, factors, start=start, end=end)
  11. prices = DataAPI.candle(codes, 'D', 'close', start=start, end=end).minor_xs('close')
  12. print(PN)
<class 'pandas.core.panel.Panel'>
Dimensions: 300 (items) x 207 (major_axis) x 6 (minor_axis)
Items axis: 000001.XSHE to 603993.XSHG
Major_axis axis: 2017-01-03 15:00:00 to 2017-11-10 15:00:00
Minor_axis axis: HBETA to ROE

Step2_数据预处理

横截面标准化/去极值/加干扰

  1. from fxdayu_alphaman.factor.factor import Factor
  2. from fxdayu_alphaman.factor.admin import Admin
  3. origin_factors = {name: item for name, item in PN.transpose(2,1,0).iteritems()}
  4. # 实例化
  5. f = Factor()
  6. #横截面标准化/去极值
  7. PN_handle = pd.Panel({name: f.standardize(f.winsorize(frame)) for name, frame in origin_factors.items()})
  8. # 加干扰
  9. PN_disturbed = pd.Panel({name: f.get_disturbed_factor(frame) for name, frame in PN_handle.iteritems()})

Step3_权重优化生成新因子

1_获取IC的DataFrame

2_获取IC权重的DataFrame

3_计算最优权重组合的新因子值的MultiIndex

  1. f_admin = Admin()
  2. # 获取IC的DataFrame
  3. ic_df = f_admin.get_factors_ic_df(
  4. {name: item.stack() for name, item in PN_disturbed.iteritems()},
  5. pool=codes, start=start, end=end, price=prices, quantiles=5, periods=(5,))
  6. # 获取IC权重的DataFrame
  7. ic_weight_df = f_admin.get_ic_weight_df(ic_df[period], period, rollback_period=10)
  8. # 计算最优权重组合的新因子值的MultiIndex
  9. new_factor = f_admin.ic_cov_weighted_factor({name: item.stack() for name, item in PN_disturbed.iteritems()}, ic_weight_df=ic_weight_df)
  1. factor = new_factor.multifactor_value
  2. print(factor.tail())
                                   factor
date                asset                
2017-11-10 15:00:00 601997.XSHG -0.338474
                    601998.XSHG  0.630329
                    603160.XSHG  2.109389
                    603858.XSHG  0.168304
                    603993.XSHG  1.060125

Step4_用Alphalens观察绩效

1_获取factor_data

2_计算mean_return_by_q, std_err_by_q

3_展示Quantile与IC图表

4_计算多空累积收益

  1. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)
  2. mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
  1. import matplotlib.pyplot as plt
  2. alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, period)
  3. plt.show()

![png](output_10_1.png)

  1. ic = alphalens.performance.factor_information_coefficient(factor_data)
  2. alphalens.plotting.plot_ic_hist(ic)
  3. mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
  4. alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
  5. plt.show()

![png](output_11_0.png)

![png](output_11_1.png)

  1. # 多空累积收益
  2. factor_returns = alphalens.performance.factor_returns(factor_data)
  3. alphalens.plotting.plot_cumulative_returns(factor_returns[10])
  4. plt.show()

![png](output_12_0.png)

IC滚动选因子再选股

  1. big_factor = ['LCAP','ROE']
  1. import numpy as np
  2. def cal_big(ic_df):
  3. # 设置IC阈值为0.03
  4. big_ic = ic_df[big_factor]
  5. big_ic[big_ic<0.03]=np.nan
  6. big_ic[big_ic>=0.03]=1
  7. return big_ic
  1. def big_select(rows, n=30):
  2. # 只选最大的30只
  3. time, row = rows
  4. row = row.dropna().index
  5. dct = {}
  6. if len(row) == 0:
  7. return dct
  8. target = DataAPI.factor(codes, row, length=1).iloc[:, -1, :].T
  9. for name, s in target.iteritems():
  10. for code in s.nlargest(n).index:
  11. dct[code] = 1
  12. return dct
  1. select_big = pd.DataFrame(list(map(big_select, cal_big(ic_df[5]).iterrows())), index=PN.major_axis[:])
  1. big_result = select_big.stack()

使用alphaman计算绩效,再用图片显示

  1. from fxdayu_alphaman.selector.admin import Admin as s_admin
  2. s_admin = s_admin()
  3. big_performance = s_admin.calculate_performance("Roll_IC",
  4. big_result[big_result>0], #结果大于0的(选出的)
  5. start,
  6. end,
  7. periods=(5,))
  1. import alphalens
  2. def plot_performance(performance):
  3. alphalens.plotting.plot_cumulative_returns_by_quantile(performance["mean_return"],period=5)
  4. plt.show()
  5. plot_performance(big_performance)

output_21_1.png-27.2kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注