多因子组合
- 读取相应数据
- 获取每日IC的相关系数
- 剔除相关系数高的因子
- 计算最优权重组合因子
- 用Alphalens查看绩效
1_读取相应数据
from jaqs.data.dataapi import DataApi
from jaqs.data import DataView
import numpy as np
from datetime import datetime
import pandas as pd
import warnings
import alphalens
warnings.filterwarnings("ignore")
dataview_folder = 'JAQS_Data/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def change_columns_index(signal):
new_names = {}
for c in signal.columns:
if c.endswith('SZ'):
new_names[c] = c.replace('SZ', 'XSHE')
elif c.endswith('SH'):
new_names[c] = c.replace('SH', 'XSHG')
signal = signal.rename_axis(new_names, axis=1)
signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
return signal
all_factors = ['pb', 'roe', 'price_div_dps', 'ps_ttm', 'pe_ttm', 'roa']
origin_factors = {f: change_columns_index(dv.get_ts(f).loc[20150105:]) for f in all_factors}
from fxdayu_alphaman.factor.factor import Factor
from fxdayu_alphaman.factor.admin import Admin
f = Factor()
# 横截面标准化/去极值
PN_handle = pd.Panel({name: f.standardize(f.winsorize(frame)) for name, frame in origin_factors.items()})
# 加干扰
PN_disturbed = pd.Panel({name: f.get_disturbed_factor(frame) for name, frame in PN_handle.iteritems()})
prices = change_columns_index(dv.get_ts('close_adj'))
2_获取每日IC的相关系数
def cal_daily_ic(factor_df):
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5)
return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')
daily_ic = {key: cal_daily_ic(value) for key, value in PN_disturbed.iteritems()}
daily_panel = pd.Panel(daily_ic)
factor_corr = daily_panel.minor_xs(5).corr()
print(factor_corr)
pb pe_ttm price_div_dps ps_ttm roa roe
pb 1.000000 0.799523 0.673749 0.901894 0.566400 0.181035
pe_ttm 0.799523 1.000000 0.858901 0.700401 0.140374 -0.305356
price_div_dps 0.673749 0.858901 1.000000 0.577473 0.055112 -0.334345
ps_ttm 0.901894 0.700401 0.577473 1.000000 0.529426 0.247359
roa 0.566400 0.140374 0.055112 0.529426 1.000000 0.860634
roe 0.181035 -0.305356 -0.334345 0.247359 0.860634 1.000000
3_剔除相关系数高的因子
ic = daily_panel.minor_xs(5).mean()
def compare(corr, targets):
index = list(reversed(targets.index))
length = len(index)
for i in range(length):
name = index[i]
if available(corr, name, index[i+1:]):
yield name
def available(corr, target, compares):
for c in compares:
if corr.loc[target, c] > 0.9 or (corr.loc[target, c] < -0.9 ):
return False
return True
small_df = pd.DataFrame({i: daily_panel.minor_xs(5)[i] for i in compare(factor_corr, ic.nsmallest(2))})
big_df = pd.DataFrame({i: daily_panel.minor_xs(5)[i] for i in compare(factor_corr, ic.nlargest(2))})
ic_df = pd.concat([big_df,small_df], axis=1)
print(big_df.head())
roa roe
date
2015-01-05 0.362524 0.209361
2015-01-06 0.280116 0.170898
2015-01-07 0.341391 0.281306
2015-01-08 0.224819 0.340688
2015-01-09 0.223593 0.281711
4_计算最优权重组合因子
- 导入Alphaman
- 获取IC权重的DataFrame
from fxdayu_alphaman.factor.admin import Admin
f_admin = Admin()
ic_weight_df = f_admin.get_ic_weight_df(big_df, 10, rollback_period=20)
new_factors = {name: PN_disturbed[name].stack() for name in ic_weight_df.columns}
new_factor = f_admin.ic_cov_weighted_factor(new_factors, ic_weight_df=ic_weight_df)
factor = new_factor.multifactor_value
5_用Alphalens查看绩效
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)
mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
import matplotlib.pyplot as plt
alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 5)
plt.show()

ic = alphalens.performance.factor_information_coefficient(factor_data)
alphalens.plotting.plot_ic_hist(ic)
mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
plt.show()

