多因子组合
- 读取相应数据
- 获取每日IC的相关系数
- 剔除相关系数高的因子
- 计算最优权重组合因子
- 用Alphalens查看绩效
1_读取相应数据
from jaqs.data.dataapi import DataApifrom jaqs.data import DataViewimport numpy as npfrom datetime import datetimeimport pandas as pdimport warningsimport alphalenswarnings.filterwarnings("ignore")dataview_folder = 'JAQS_Data/hs300'dv = DataView()dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def change_columns_index(signal): new_names = {} for c in signal.columns: if c.endswith('SZ'): new_names[c] = c.replace('SZ', 'XSHE') elif c.endswith('SH'): new_names[c] = c.replace('SH', 'XSHG') signal = signal.rename_axis(new_names, axis=1) signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index)) return signal
all_factors = ['pb', 'roe', 'price_div_dps', 'ps_ttm', 'pe_ttm', 'roa']
origin_factors = {f: change_columns_index(dv.get_ts(f).loc[20150105:]) for f in all_factors}
from fxdayu_alphaman.factor.factor import Factorfrom fxdayu_alphaman.factor.admin import Adminf = Factor()
# 横截面标准化/去极值PN_handle = pd.Panel({name: f.standardize(f.winsorize(frame)) for name, frame in origin_factors.items()})# 加干扰PN_disturbed = pd.Panel({name: f.get_disturbed_factor(frame) for name, frame in PN_handle.iteritems()})
prices = change_columns_index(dv.get_ts('close_adj'))
2_获取每日IC的相关系数
def cal_daily_ic(factor_df): factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5) return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')daily_ic = {key: cal_daily_ic(value) for key, value in PN_disturbed.iteritems()}daily_panel = pd.Panel(daily_ic)
factor_corr = daily_panel.minor_xs(5).corr()
print(factor_corr)
pb pe_ttm price_div_dps ps_ttm roa roe
pb 1.000000 0.799523 0.673749 0.901894 0.566400 0.181035
pe_ttm 0.799523 1.000000 0.858901 0.700401 0.140374 -0.305356
price_div_dps 0.673749 0.858901 1.000000 0.577473 0.055112 -0.334345
ps_ttm 0.901894 0.700401 0.577473 1.000000 0.529426 0.247359
roa 0.566400 0.140374 0.055112 0.529426 1.000000 0.860634
roe 0.181035 -0.305356 -0.334345 0.247359 0.860634 1.000000
3_剔除相关系数高的因子
ic = daily_panel.minor_xs(5).mean()
def compare(corr, targets): index = list(reversed(targets.index)) length = len(index) for i in range(length): name = index[i] if available(corr, name, index[i+1:]): yield name
def available(corr, target, compares): for c in compares: if corr.loc[target, c] > 0.9 or (corr.loc[target, c] < -0.9 ): return False return True
small_df = pd.DataFrame({i: daily_panel.minor_xs(5)[i] for i in compare(factor_corr, ic.nsmallest(2))})
big_df = pd.DataFrame({i: daily_panel.minor_xs(5)[i] for i in compare(factor_corr, ic.nlargest(2))})
ic_df = pd.concat([big_df,small_df], axis=1)
print(big_df.head())
roa roe
date
2015-01-05 0.362524 0.209361
2015-01-06 0.280116 0.170898
2015-01-07 0.341391 0.281306
2015-01-08 0.224819 0.340688
2015-01-09 0.223593 0.281711
4_计算最优权重组合因子
- 导入Alphaman
- 获取IC权重的DataFrame
from fxdayu_alphaman.factor.admin import Adminf_admin = Admin()ic_weight_df = f_admin.get_ic_weight_df(big_df, 10, rollback_period=20)
new_factors = {name: PN_disturbed[name].stack() for name in ic_weight_df.columns}
new_factor = f_admin.ic_cov_weighted_factor(new_factors, ic_weight_df=ic_weight_df)
factor = new_factor.multifactor_value
5_用Alphalens查看绩效
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
import matplotlib.pyplot as pltalphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 5)plt.show()

ic = alphalens.performance.factor_information_coefficient(factor_data)alphalens.plotting.plot_ic_hist(ic)mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)plt.show()

