[关闭]
@Channelchan 2018-03-13T06:22:10.000000Z 字数 4331 阅读 26263

对比不同因子IC

  1. 读取本地数据
  2. 数据预处理
  3. 计算月度IC
  4. 对比不同因子的IC值
  5. IC的时间序列
  6. 计算各个因子的IR

1_读取本地数据

  1. from jaqs.data.dataapi import DataApi
  2. from jaqs.data import DataView
  3. import numpy as np
  4. from datetime import datetime
  5. import pandas as pd
  6. import warnings
  7. import alphalens
  8. warnings.filterwarnings("ignore")
  9. dataview_folder = 'JAQS_Data/hs300'
  10. dv = DataView()
  11. dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools


Dataview loaded successfully.

2_数据预处理

  1. def change_columns_index(signal):
  2. new_names = {}
  3. for c in signal.columns:
  4. if c.endswith('SZ'):
  5. new_names[c] = c.replace('SZ', 'XSHE')
  6. elif c.endswith('SH'):
  7. new_names[c] = c.replace('SH', 'XSHG')
  8. signal = signal.rename_axis(new_names, axis=1)
  9. signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
  10. return signal
  1. all_factor = ['pb', 'roe', 'roa', 'price_div_dps', 'ps_ttm', 'pe_ttm']
  1. origin_factors = {f: change_columns_index(dv.get_ts(f)) for f in all_factor}
  1. from jaqs.research.signaldigger import process
  2. #去极值,z_score标准化,加干扰值
  3. PN_disturbed = pd.Panel({name: process.get_disturbed_factor(process.standardize(process.winsorize(frame)))\
  4. for name, frame in origin_factors.items()})
  1. print(PN_disturbed)
<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 1453 (major_axis) x 539 (minor_axis)
Items axis: pb to roe
Major_axis axis: 2012-01-04 00:00:00 to 2017-12-22 00:00:00
Minor_axis axis: 000001.XSHE to 603993.XSHG
  1. prices = change_columns_index(dv.get_ts('close_adj'))

3_计算月度IC

alphalens.performance.mean_information_coefficient()

  1. def cal_monthly_ic(factor_df):
  2. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5)
  3. return alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
  4. monthly_ic = {key: cal_monthly_ic(value) for key, value in PN_disturbed.iteritems()}
  1. print(list(monthly_ic.values())[0].tail())
                  1         5         10
date                                    
2017-08-31 -0.001214  0.062826  0.089109
2017-09-30  0.032383  0.094269  0.137698
2017-10-31  0.041499  0.111624  0.134414
2017-11-30 -0.040708 -0.087116 -0.116298
2017-12-31  0.094886  0.264356  0.234456
  1. monthly_ic_mean = pd.DataFrame(
  2. list(map(lambda frame: frame.mean(), monthly_ic.values())),
  3. monthly_ic.keys()
  4. )
  5. monthly_ic_std = pd.DataFrame(
  6. list(map(lambda frame: frame.std(), monthly_ic.values())),
  7. monthly_ic.keys()
  8. )
  1. print(monthly_ic_mean)
  2. print (monthly_ic_std)
                     1         5         10
pb            -0.013308 -0.010537 -0.014940
pe_ttm        -0.008865 -0.010849 -0.016026
price_div_dps -0.004332 -0.003527 -0.006607
ps_ttm        -0.004566 -0.002501 -0.004181
roa            0.008087  0.018150  0.023119
roe            0.006245  0.013959  0.020110
                     1         5         10
pb             0.055441  0.123580  0.159507
pe_ttm         0.037962  0.083811  0.112212
price_div_dps  0.029059  0.064610  0.086014
ps_ttm         0.044336  0.097744  0.127836
roa            0.034680  0.080584  0.105429
roe            0.030441  0.070708  0.097227

4_对比不同因子的IC值

  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. fig, ax = plt.subplots(figsize=(15,7))
  4. N=6
  5. ind = np.arange(N) # the x locations for the groups
  6. width = 0.3 # the width of the bars
  7. # Example data
  8. ind_name = tuple(monthly_ic_mean.index)
  9. y_pos = np.arange(len(ind))
  10. one_mean = monthly_ic_mean.iloc[:,0]
  11. one_std = monthly_ic_std.iloc[:,0]
  12. five_mean = monthly_ic_mean.iloc[:,1]
  13. five_std = monthly_ic_std.iloc[:,1]
  14. ten_mean = monthly_ic_mean.iloc[:,2]
  15. ten_std = monthly_ic_std.iloc[:,2]
  16. ax.barh(ind - width, one_mean, align='edge',height=0.2, xerr=one_std, label='one_day', color='r')
  17. ax.barh(ind - 0.05, five_mean, align='edge',height=0.2,xerr=five_std, label='five_day', color='y')
  18. ax.barh(ind + width, ten_mean, align='center',height=0.2,xerr=ten_std, label='ten_day', color='b')
  19. ax.set_yticks(y_pos)
  20. ax.set_yticklabels(ind_name)
  21. ax.invert_yaxis() # labels read top-to-bottom
  22. ax.set_xlabel('mean_IC')
  23. ax.set_title('HS300_IC in Different Period')
  24. plt.legend()
  25. plt.show()

output_16_0.png-14kB

5_IC的时间序列

  1. PN_IC = pd.Panel(monthly_ic)
  1. fig = plt.figure(figsize=(15,7))
  2. plt.plot(PN_IC.minor_xs(5))
  3. plt.hlines(0.02,PN_IC.minor_xs(5).index[0],PN_IC.minor_xs(5).index[-1] , linestyles='dashed', alpha=0.5)
  4. plt.legend(PN_IC.minor_xs(5).columns)
  5. plt.title('FiveDay_IC')
  6. plt.show()

output_19_0.png-133.9kB

6_计算各个因子的IR

  1. def IR(df):
  2. return df.mean()/df.std()
  3. print ('OneDay_IR:\n', IR(PN_IC.minor_xs(1)))
  4. print ('FiveDay_IR:\n', IR(PN_IC.minor_xs(5)))
  5. print ('TenDay_IR:\n', IR(PN_IC.minor_xs(10)))
OneDay_IR:
 pb              -0.240034
pe_ttm          -0.233529
price_div_dps   -0.149082
ps_ttm          -0.102978
roa              0.233188
roe              0.205164
dtype: float64
FiveDay_IR:
 pb              -0.085268
pe_ttm          -0.129441
price_div_dps   -0.054597
ps_ttm          -0.025585
roa              0.225227
roe              0.197414
dtype: float64
TenDay_IR:
 pb              -0.093661
pe_ttm          -0.142818
price_div_dps   -0.076814
ps_ttm          -0.032705
roa              0.219280
roe              0.206836
dtype: float64
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注