@Channelchan
2018-03-13T06:22:10.000000Z
字数 4331
阅读 26263
from jaqs.data.dataapi import DataApi
from jaqs.data import DataView
import numpy as np
from datetime import datetime
import pandas as pd
import warnings
import alphalens
warnings.filterwarnings("ignore")
dataview_folder = 'JAQS_Data/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def change_columns_index(signal):
new_names = {}
for c in signal.columns:
if c.endswith('SZ'):
new_names[c] = c.replace('SZ', 'XSHE')
elif c.endswith('SH'):
new_names[c] = c.replace('SH', 'XSHG')
signal = signal.rename_axis(new_names, axis=1)
signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
return signal
all_factor = ['pb', 'roe', 'roa', 'price_div_dps', 'ps_ttm', 'pe_ttm']
origin_factors = {f: change_columns_index(dv.get_ts(f)) for f in all_factor}
from jaqs.research.signaldigger import process
#去极值,z_score标准化,加干扰值
PN_disturbed = pd.Panel({name: process.get_disturbed_factor(process.standardize(process.winsorize(frame)))\
for name, frame in origin_factors.items()})
print(PN_disturbed)
<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 1453 (major_axis) x 539 (minor_axis)
Items axis: pb to roe
Major_axis axis: 2012-01-04 00:00:00 to 2017-12-22 00:00:00
Minor_axis axis: 000001.XSHE to 603993.XSHG
prices = change_columns_index(dv.get_ts('close_adj'))
alphalens.performance.mean_information_coefficient()
def cal_monthly_ic(factor_df):
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5)
return alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
monthly_ic = {key: cal_monthly_ic(value) for key, value in PN_disturbed.iteritems()}
print(list(monthly_ic.values())[0].tail())
1 5 10
date
2017-08-31 -0.001214 0.062826 0.089109
2017-09-30 0.032383 0.094269 0.137698
2017-10-31 0.041499 0.111624 0.134414
2017-11-30 -0.040708 -0.087116 -0.116298
2017-12-31 0.094886 0.264356 0.234456
monthly_ic_mean = pd.DataFrame(
list(map(lambda frame: frame.mean(), monthly_ic.values())),
monthly_ic.keys()
)
monthly_ic_std = pd.DataFrame(
list(map(lambda frame: frame.std(), monthly_ic.values())),
monthly_ic.keys()
)
print(monthly_ic_mean)
print (monthly_ic_std)
1 5 10
pb -0.013308 -0.010537 -0.014940
pe_ttm -0.008865 -0.010849 -0.016026
price_div_dps -0.004332 -0.003527 -0.006607
ps_ttm -0.004566 -0.002501 -0.004181
roa 0.008087 0.018150 0.023119
roe 0.006245 0.013959 0.020110
1 5 10
pb 0.055441 0.123580 0.159507
pe_ttm 0.037962 0.083811 0.112212
price_div_dps 0.029059 0.064610 0.086014
ps_ttm 0.044336 0.097744 0.127836
roa 0.034680 0.080584 0.105429
roe 0.030441 0.070708 0.097227
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(figsize=(15,7))
N=6
ind = np.arange(N) # the x locations for the groups
width = 0.3 # the width of the bars
# Example data
ind_name = tuple(monthly_ic_mean.index)
y_pos = np.arange(len(ind))
one_mean = monthly_ic_mean.iloc[:,0]
one_std = monthly_ic_std.iloc[:,0]
five_mean = monthly_ic_mean.iloc[:,1]
five_std = monthly_ic_std.iloc[:,1]
ten_mean = monthly_ic_mean.iloc[:,2]
ten_std = monthly_ic_std.iloc[:,2]
ax.barh(ind - width, one_mean, align='edge',height=0.2, xerr=one_std, label='one_day', color='r')
ax.barh(ind - 0.05, five_mean, align='edge',height=0.2,xerr=five_std, label='five_day', color='y')
ax.barh(ind + width, ten_mean, align='center',height=0.2,xerr=ten_std, label='ten_day', color='b')
ax.set_yticks(y_pos)
ax.set_yticklabels(ind_name)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('mean_IC')
ax.set_title('HS300_IC in Different Period')
plt.legend()
plt.show()
PN_IC = pd.Panel(monthly_ic)
fig = plt.figure(figsize=(15,7))
plt.plot(PN_IC.minor_xs(5))
plt.hlines(0.02,PN_IC.minor_xs(5).index[0],PN_IC.minor_xs(5).index[-1] , linestyles='dashed', alpha=0.5)
plt.legend(PN_IC.minor_xs(5).columns)
plt.title('FiveDay_IC')
plt.show()
def IR(df):
return df.mean()/df.std()
print ('OneDay_IR:\n', IR(PN_IC.minor_xs(1)))
print ('FiveDay_IR:\n', IR(PN_IC.minor_xs(5)))
print ('TenDay_IR:\n', IR(PN_IC.minor_xs(10)))
OneDay_IR:
pb -0.240034
pe_ttm -0.233529
price_div_dps -0.149082
ps_ttm -0.102978
roa 0.233188
roe 0.205164
dtype: float64
FiveDay_IR:
pb -0.085268
pe_ttm -0.129441
price_div_dps -0.054597
ps_ttm -0.025585
roa 0.225227
roe 0.197414
dtype: float64
TenDay_IR:
pb -0.093661
pe_ttm -0.142818
price_div_dps -0.076814
ps_ttm -0.032705
roa 0.219280
roe 0.206836
dtype: float64