[关闭]
@Channelchan 2018-01-05T11:09:27.000000Z 字数 3778 阅读 2259

多因子组合

  1. 读取相应数据
  2. 获取每日IC的相关系数
  3. 剔除相关系数高的因子
  4. 计算最优权重组合因子
  5. 用Alphalens查看绩效

1_读取相应数据

  1. from jaqs.data.dataapi import DataApi
  2. from jaqs.data import DataView
  3. import numpy as np
  4. from datetime import datetime
  5. import pandas as pd
  6. import warnings
  7. import alphalens
  8. warnings.filterwarnings("ignore")
  9. dataview_folder = 'JAQS_Data/hs300'
  10. dv = DataView()
  11. dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools


Dataview loaded successfully.
  1. def change_columns_index(signal):
  2. new_names = {}
  3. for c in signal.columns:
  4. if c.endswith('SZ'):
  5. new_names[c] = c.replace('SZ', 'XSHE')
  6. elif c.endswith('SH'):
  7. new_names[c] = c.replace('SH', 'XSHG')
  8. signal = signal.rename_axis(new_names, axis=1)
  9. signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
  10. return signal
  1. all_factors = ['pb', 'roe', 'price_div_dps', 'ps_ttm', 'pe_ttm', 'roa']
  1. origin_factors = {f: change_columns_index(dv.get_ts(f).loc[20150105:]) for f in all_factors}
  1. from fxdayu_alphaman.factor.factor import Factor
  2. from fxdayu_alphaman.factor.admin import Admin
  3. f = Factor()
  1. # 横截面标准化/去极值
  2. PN_handle = pd.Panel({name: f.standardize(f.winsorize(frame)) for name, frame in origin_factors.items()})
  3. # 加干扰
  4. PN_disturbed = pd.Panel({name: f.get_disturbed_factor(frame) for name, frame in PN_handle.iteritems()})
  1. prices = change_columns_index(dv.get_ts('close_adj'))

2_获取每日IC的相关系数

  1. def cal_daily_ic(factor_df):
  2. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5)
  3. return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')
  4. daily_ic = {key: cal_daily_ic(value) for key, value in PN_disturbed.iteritems()}
  5. daily_panel = pd.Panel(daily_ic)
  1. factor_corr = daily_panel.minor_xs(5).corr()
  1. print(factor_corr)
                     pb    pe_ttm  price_div_dps    ps_ttm       roa       roe
pb             1.000000  0.799523       0.673749  0.901894  0.566400  0.181035
pe_ttm         0.799523  1.000000       0.858901  0.700401  0.140374 -0.305356
price_div_dps  0.673749  0.858901       1.000000  0.577473  0.055112 -0.334345
ps_ttm         0.901894  0.700401       0.577473  1.000000  0.529426  0.247359
roa            0.566400  0.140374       0.055112  0.529426  1.000000  0.860634
roe            0.181035 -0.305356      -0.334345  0.247359  0.860634  1.000000

3_剔除相关系数高的因子

  1. ic = daily_panel.minor_xs(5).mean()
  1. def compare(corr, targets):
  2. index = list(reversed(targets.index))
  3. length = len(index)
  4. for i in range(length):
  5. name = index[i]
  6. if available(corr, name, index[i+1:]):
  7. yield name
  1. def available(corr, target, compares):
  2. for c in compares:
  3. if corr.loc[target, c] > 0.9 or (corr.loc[target, c] < -0.9 ):
  4. return False
  5. return True
  1. small_df = pd.DataFrame({i: daily_panel.minor_xs(5)[i] for i in compare(factor_corr, ic.nsmallest(2))})
  1. big_df = pd.DataFrame({i: daily_panel.minor_xs(5)[i] for i in compare(factor_corr, ic.nlargest(2))})
  1. ic_df = pd.concat([big_df,small_df], axis=1)
  1. print(big_df.head())
                 roa       roe
date                          
2015-01-05  0.362524  0.209361
2015-01-06  0.280116  0.170898
2015-01-07  0.341391  0.281306
2015-01-08  0.224819  0.340688
2015-01-09  0.223593  0.281711

4_计算最优权重组合因子

  1. 导入Alphaman
  2. 获取IC权重的DataFrame
  1. from fxdayu_alphaman.factor.admin import Admin
  2. f_admin = Admin()
  3. ic_weight_df = f_admin.get_ic_weight_df(big_df, 10, rollback_period=20)
  1. new_factors = {name: PN_disturbed[name].stack() for name in ic_weight_df.columns}
  1. new_factor = f_admin.ic_cov_weighted_factor(new_factors, ic_weight_df=ic_weight_df)
  1. factor = new_factor.multifactor_value

5_用Alphalens查看绩效

  1. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)
  2. mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
  1. import matplotlib.pyplot as plt
  2. alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 5)
  3. plt.show()

output_29_0.png-74.5kB

  1. ic = alphalens.performance.factor_information_coefficient(factor_data)
  2. alphalens.plotting.plot_ic_hist(ic)
  3. mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
  4. alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
  5. plt.show()

output_30_0.png-39.4kB

output_30_1.png-38.6kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注