@Channelchan
2018-10-11T09:02:54.000000Z
字数 17780
阅读 75036
from jaqs_fxdayu.data import DataView
import warnings
warnings.filterwarnings("ignore")
dataview_folder = './Factor'
dv = DataView()
dv.load_dataview(dataview_folder)
dv.add_formula("momentum", "Return(close_adj, 20)", is_quarterly=False, add_data=True)
Dataview loaded successfully.
symbol | 000001.SZ | 000002.SZ | 000008.SZ | 000009.SZ | 000012.SZ | 000024.SZ | 000027.SZ | 000039.SZ | 000046.SZ | 000059.SZ | ... | 601992.SH | 601997.SH | 601998.SH | 603000.SH | 603160.SH | 603288.SH | 603699.SH | 603858.SH | 603885.SH | 603993.SH |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
trade_date | |||||||||||||||||||||
20140102 | -0.100735 | -0.085812 | -0.057592 | -0.006342 | -0.100442 | -0.051708 | -0.068143 | 0.012426 | -0.074534 | -0.089580 | ... | -0.140442 | NaN | -0.065375 | 0.104574 | NaN | NaN | NaN | NaN | NaN | -0.084892 |
20140103 | -0.111690 | -0.102975 | -0.052910 | -0.040881 | -0.116740 | -0.078923 | -0.082474 | 0.048699 | -0.091097 | -0.111111 | ... | -0.167112 | NaN | -0.075426 | 0.105497 | NaN | NaN | NaN | NaN | NaN | -0.091437 |
20140106 | -0.121896 | -0.137255 | -0.095643 | -0.059129 | -0.165380 | -0.111576 | -0.106164 | 0.011311 | -0.098121 | -0.134470 | ... | -0.214003 | NaN | -0.085575 | 0.132137 | NaN | NaN | NaN | NaN | NaN | -0.123726 |
20140107 | -0.118271 | -0.138051 | -0.109342 | -0.060228 | -0.174342 | -0.122535 | -0.104991 | 0.039841 | -0.095745 | -0.139847 | ... | -0.200000 | NaN | -0.088020 | 0.076545 | NaN | NaN | NaN | NaN | NaN | -0.118594 |
20140108 | -0.115124 | -0.144175 | -0.159346 | -0.063224 | -0.179235 | -0.160665 | -0.093103 | 0.066347 | -0.081023 | -0.156604 | ... | -0.216033 | NaN | -0.085575 | 0.118630 | NaN | NaN | NaN | NaN | NaN | -0.127941 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
917 rows × 472 columns
import numpy as np
def mask_index_member():
df_index_member = dv.get_ts('index_member')
mask_index_member = ~(df_index_member >0) #定义信号过滤条件-非指数成分
return mask_index_member
def limit_up_down():
# 定义可买卖条件——未停牌、未涨跌停
trade_status = dv.get_ts('trade_status')
mask_sus = trade_status == 0
# 涨停
dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False, add_data=True)
# 跌停
dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False, add_data=True)
can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌
can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌
return can_enter,can_exit
mask = mask_index_member()
can_enter,can_exit = limit_up_down()
接下来,我们对pb、pe、ps、float_mv、momentum五个因子进行比较、筛选
from jaqs_fxdayu.research.signaldigger import multi_factor
ic = dict()
factors_dict = {signal:dv.get_ts(signal) for signal in ["pb","pe","ps","float_mv","momentum"]}
for period in [5, 15, 30]:
ic[period]=multi_factor.get_factors_ic_df(factors_dict,
price=dv.get_ts("close_adj"),
high=dv.get_ts("high_adj"), # 可为空
low=dv.get_ts("low_adj"),# 可为空
n_quantiles=5,# quantile分类数
mask=mask,# 过滤条件
can_enter=can_enter,# 是否能进场
can_exit=can_exit,# 是否能出场
period=period,# 持有期
benchmark_price=dv.data_benchmark, # 基准价格 可不传入,持有期收益(return)计算为绝对收益
commission = 0.0008,
)
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
Nan Data Count (should be zero) : 0; Percentage of effective data: 58%
import pandas as pd
ic_mean_table = pd.DataFrame(data=np.nan,columns=[5,15,30],index=["pb","pe","ps","float_mv","momentum"])
ic_std_table = pd.DataFrame(data=np.nan,columns=[5,15,30],index=["pb","pe","ps","float_mv","momentum"])
ir_table = pd.DataFrame(data=np.nan,columns=[5,15,30],index=["pb","pe","ps","float_mv","momentum"])
for signal in ["pb","pe","ps","float_mv","momentum"]:
for period in [5, 15, 30]:
ic_mean_table.loc[signal,period]=ic[period][signal].mean()
ic_std_table.loc[signal,period]=ic[period][signal].std()
ir_table.loc[signal,period]=ic[period][signal].mean()/ic[period][signal].std()
print(ic_mean_table)
print(ic_std_table)
print(ir_table)
5 15 30
pb -0.039948 -0.069184 -0.106428
pe -0.038036 -0.065607 -0.098353
ps -0.032231 -0.057777 -0.087181
float_mv 0.006833 0.021287 0.044382
momentum -0.041551 -0.053251 -0.047145
5 15 30
pb 0.231587 0.259397 0.245520
pe 0.210134 0.220244 0.210795
ps 0.176345 0.193792 0.188749
float_mv 0.222908 0.229546 0.229144
momentum 0.207719 0.215057 0.209887
5 15 30
pb -0.172496 -0.266712 -0.433481
pe -0.181008 -0.297881 -0.466578
ps -0.182774 -0.298140 -0.461889
float_mv 0.030655 0.092735 0.193688
momentum -0.200034 -0.247614 -0.224622
可视化比较
%matplotlib inline
ic_mean_table.plot(kind="barh",xerr=ic_std_table,figsize=(15,5))
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dcfae95c0>
%matplotlib inline
ir_table.plot(kind="barh",figsize=(15,5))
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dd0abfd30>
保留momentum、ps、pe、pb 进一步处理并尝试构建组合因子
from jaqs_fxdayu.research.signaldigger import process
factor_dict = dict()
index_member = dv.get_ts("index_member")
for name in ["pb","pe","ps","momentum"]:
signal = -1*dv.get_ts(name) # 调整符号
process.winsorize(factor_df=signal,alpha=0.05,index_member=index_member)#去极值
signal = process.standardize(signal,index_member) #z-score标准化 保留排序信息和分布信息
# signal = process.rank_standardize(signal,index_member) #因子在截面排序并归一化到0-1(只保留排序信息)
# # 行业市值中性化
# signal = process.neutralize(signal,
# group=dv.get_ts("sw1"),# 行业分类标准
# float_mv = dv.get_ts("float_mv"), #流通市值 可为None 则不进行市值中性化
# index_member=index_member,# 是否只处理时只考虑指数成份股
# )
factor_dict[name] = signal
对筛选后的因子进行组合,一般有以下常规处理:
* 因子间存在较强同质性时,先使用施密特正交化方法对因子做正交化处理,用得到的正交化残差作为因子(也可以不使用,正交化会破坏因子的经济学逻辑,并剔除一些信息)
* 因子组合加权,常规的方法有:等权重、以某个时间窗口的滚动平均ic为权重、以某个时间窗口的滚动ic_ir为权重、最大化上个持有期的ic_ir为目标处理权重、最大化上个持有期的ic为目标处理权重
* 注:因为计算IC需要用到下一期股票收益,因此在动态加权方法里,实际上使用的是前一期及更早的IC值(向前推移了holding_period)计算当期的权重
# 因子间存在较强同质性时,使用施密特正交化方法对因子做正交化处理,用得到的正交化残差作为因子
new_factors = multi_factor.orthogonalize(factors_dict=factor_dict,
standardize_type="rank",#输入因子标准化方法,有"rank"(排序标准化),"z_score"(z-score标准化)两种("rank"/"z_score")
winsorization=False,#是否对输入因子去极值
index_member=index_member) # 是否只处理指数成分股
new_factors
{'momentum': symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ 000012.SZ 000024.SZ \
trade_date
20140102 0.715719 0.511706 NaN 0.354515 0.290970 0.709030
20140103 0.668896 0.491639 NaN 0.351171 0.264214 0.655518
20140106 0.722408 0.488294 NaN 0.354515 0.230769 0.645485
20140107 0.725753 0.488294 NaN 0.384615 0.190635 0.605351
20140108 0.745819 0.498328 NaN 0.367893 0.200669 0.471572
... ... ... ... ... ... ...
[977 rows x 488 columns],
'pb': symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ 000012.SZ 000024.SZ \
trade_date
20140102 0.244147 0.247492 NaN 0.719064 0.418060 0.411371
20140103 0.244147 0.220736 NaN 0.698997 0.404682 0.364548
20140106 0.311037 0.204013 NaN 0.688963 0.284281 0.331104
20140107 0.331104 0.204013 NaN 0.698997 0.284281 0.290970
20140108 0.357860 0.210702 NaN 0.688963 0.304348 0.173913
... ... ... ... ... ... ...
[977 rows x 488 columns],
'pe': symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ 000012.SZ 000024.SZ \
trade_date
20140102 0.441472 0.404682 NaN 0.869565 0.892977 0.347826
20140103 0.321070 0.451505 NaN 0.916388 0.909699 0.461538
20140106 0.327759 0.471572 NaN 0.886288 0.913043 0.421405
20140107 0.301003 0.451505 NaN 0.896321 0.909699 0.454849
20140108 0.301003 0.454849 NaN 0.919732 0.909699 0.508361
... ... ... ... ... ... ...
[977 rows x 488 columns]}
用正交化前的因子,分别进行等权、以某个时间窗口的滚动平均ic为权重、以某个时间窗口的滚动ic_ir为权重、最大化上个持有期的ic_ir为目标处理权重、最大化上个持有期的ic为目标处理权重的加权组合方式,然后测试组合因子表现
# rollback_period代表滚动窗口所用到的天数,即用前多少期的数据来计算现阶段的因子权重。 通常建议设置时间在半年以上,可以获得相对稳定的预期结果
# 多因子组合-动态加权参数配置
# rollback_period代表滚动窗口所用到的天数,即用前多少期的数据来计算现阶段的因子权重。 通常建议设置时间在半年以上,可以获得相对稳定的预期结果
# 多因子组合-动态加权参数配置
props = {
'price':dv.get_ts("close_adj"),
'high':dv.get_ts("high_adj"), # 可为空
'low':dv.get_ts("low_adj"),# 可为空
'ret_type': 'return',#可选参数还有upside_ret/downside_ret 则组合因子将以优化潜在上行、下行空间为目标
'benchmark_price': dv.data_benchmark, # 为空计算的是绝对收益 不为空计算相对收益
'period': 30, # 30天的持有期
'mask': mask,
'can_enter': can_enter,
'can_exit': can_exit,
'forward': True,
'commission': 0.0008,
"covariance_type": "shrink", # 协方差矩阵估算方法 还可以为"simple"
"rollback_period": 120} # 滚动窗口天数
comb_factors = dict()
for method in ["equal_weight","ic_weight","ir_weight","max_IR","max_IC"]:
comb_factors[method] = multi_factor.combine_factors(factor_dict,
standardize_type="rank",
winsorization=False,
weighted_method=method,
props=props)
print(method)
print(comb_factors[method].dropna(how="all").head())
equal_weight
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ ...
trade_date
20140102 0.762542 0.819398 NaN 0.143813 ...
20140103 0.745819 0.822742 NaN 0.187291 ...
20140106 0.712375 0.842809 NaN 0.190635 ...
20140107 0.705686 0.849498 NaN 0.190635 ...
20140108 0.678930 0.842809 NaN 0.204013 ...
[5 rows x 488 columns]
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
ic_weight
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ ...
trade_date
20140812 0.775920 0.826087 NaN 0.297659 ...
20140813 0.755853 0.789298 NaN 0.311037 ...
20140814 0.762542 0.799331 NaN 0.307692 ...
20140815 0.762542 0.852843 NaN 0.153846 ...
20140818 0.765886 0.913043 NaN 0.083612 ...
[5 rows x 488 columns]
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
ir_weight
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ ...
trade_date
20140812 0.769231 0.859532 NaN 0.311037 ...
20140813 0.732441 0.819398 NaN 0.331104 ...
20140814 0.732441 0.819398 NaN 0.331104 ...
20140815 0.739130 0.872910 NaN 0.170569 ...
20140818 0.735786 0.933110 NaN 0.073579 ...
[5 rows x 488 columns]
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
max_IR
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ ...
trade_date
20140813 0.334448 0.468227 NaN 0.678930 ...
20140814 0.374582 0.478261 NaN 0.678930 ...
20140815 0.414716 0.655518 NaN 0.384615 ...
20140818 0.421405 0.739130 NaN 0.163880 ...
20140819 0.505017 0.765886 NaN 0.120401 ...
[5 rows x 488 columns]
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
max_IC
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ ...
trade_date
20140221 0.030100 0.324415 NaN 0.903010 ...
20140224 0.020067 0.163880 NaN 0.956522 ...
20140225 0.193980 0.672241 NaN 0.451505 ...
20140226 0.341137 0.903010 NaN 0.120401 ...
20140227 0.471572 0.799331 NaN 0.170569 ...
[5 rows x 488 columns]
period = 30
ic_30 = multi_factor.get_factors_ic_df(comb_factors,
price=dv.get_ts("close_adj"),
high=dv.get_ts("high_adj"), # 可为空
low=dv.get_ts("low_adj"),# 可为空
n_quantiles=5,# quantile分类数
mask=mask,# 过滤条件
can_enter=can_enter,# 是否能进场
can_exit=can_exit,# 是否能出场
period=period,# 持有期
benchmark_price=dv.data_benchmark, # 基准价格 可不传入,持有期收益(return)计算为绝对收益
commission = 0.0008,
)
ic_30 = pd.concat([ic_30,-1*ic[30].drop("float_mv",axis=1)],axis=1)
ic_30.head()
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 48%
Nan Data Count (should be zero) : 0; Percentage of effective data: 48%
Nan Data Count (should be zero) : 0; Percentage of effective data: 48%
Nan Data Count (should be zero) : 0; Percentage of effective data: 56%
equal_weight | ic_weight | ir_weight | max_IR | max_IC | pb | pe | ps | momentum | ||
---|---|---|---|---|---|---|---|---|---|---|
trade_date | ||||||||||
20140102 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
20140103 | -0.046945 | NaN | NaN | NaN | NaN | -0.053375 | -0.018784 | -0.004749 | -0.050374 | |
20140106 | -0.075316 | NaN | NaN | NaN | NaN | -0.085169 | -0.053065 | -0.018863 | -0.065761 | |
20140107 | 0.027397 | NaN | NaN | NaN | NaN | 0.026080 | 0.023327 | 0.056947 | 0.013767 | |
20140108 | 0.131549 | NaN | NaN | NaN | NaN | 0.084499 | 0.081695 | 0.158560 | 0.132101 |
ic_30_mean = dict()
ic_30_std = dict()
ir_30 = dict()
for name in ic_30.columns:
ic_30_mean[name]=ic_30[name].loc[20140901:].mean()
ic_30_std[name]=ic_30[name].loc[20140901:].std()
ir_30[name] = ic_30_mean[name]/ic_30_std[name]
import datetime
trade_date = pd.Series(ic_30.index)
trade_date = trade_date.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d'))
ic_30.index = trade_date
可视化比较
pd.Series(ic_30_mean).plot(kind="barh",xerr=pd.Series(ic_30_std),figsize=(15,5))
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dd0055780>
print(ic_30_mean["equal_weight"])
print(ic_30_mean["ic_weight"])
print(ic_30_mean["pe"])
0.11461587810097988
0.10435470638726971
0.1067541063545408
pd.Series(ir_30).plot(kind="barh",figsize=(15,5))
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dcfc3d0f0>
print(ir_30["equal_weight"])
print(ir_30["ic_weight"])
print(ir_30["pe"])
0.5528241142805751
0.48673093039146453
0.4986503963545165
ic_30[["equal_weight","ic_weight","pe"]].plot(kind="line",figsize=(15,5),)
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dcfbb4780>
ic_30.loc[datetime.date(2017,1,3):,][["equal_weight","ic_weight","pe"]].plot(kind="line",figsize=(15,5),)
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dcfd1f5c0>
import matplotlib.pyplot as plt
from jaqs_fxdayu.research.signaldigger.analysis import analysis
from jaqs_fxdayu.research import SignalDigger
obj = SignalDigger()
obj.process_signal_before_analysis(signal=comb_factors["equal_weight"],
price=dv.get_ts("close_adj"),
high=dv.get_ts("high_adj"), # 可为空
low=dv.get_ts("low_adj"),# 可为空
n_quantiles=5,# quantile分类数
mask=mask,# 过滤条件
can_enter=can_enter,# 是否能进场
can_exit=can_exit,# 是否能出场
period=30,# 持有期
benchmark_price=dv.data_benchmark, # 基准价格 可不传入,持有期收益(return)计算为绝对收益
commission = 0.0008,
)
obj.create_full_report()
plt.show()
Nan Data Count (should be zero) : 0; Percentage of effective data: 56%
Value of signals of Different Quantiles Statistics
min max mean std count count %
quantile
1 0.000000 0.538462 0.103221 0.060599 53388 20.145655
2 0.180602 0.628763 0.308244 0.060081 53003 20.000377
3 0.371237 0.695652 0.510054 0.059086 52990 19.995472
4 0.565217 0.849498 0.708706 0.057843 53003 20.000377
5 0.755853 1.000000 0.904544 0.056265 52626 19.858119
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
ic
IC Mean 0.120
IC Std. 0.205
t-stat(IC) 17.957
p-value(IC) 0.000
IC Skew -0.128
IC Kurtosis -0.719
Ann. IR 0.584
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x18b506e5048>
print(analysis(obj.signal_data,is_event=False,period=30))
{'ic': return_ic upside_ret_ic downside_ret_ic
IC Mean 1.199666e-01 -0.025340 2.590128e-01
IC Std. 2.054836e-01 0.203069 1.710594e-01
t-stat(IC) 1.795679e+01 -3.838066 4.657144e+01
p-value(IC) 2.933009e-62 0.000132 6.274188e-247
IC Skew -1.281287e-01 0.369542 -4.648359e-01
IC Kurtosis -7.191112e-01 -0.728975 -1.038014e-01
Ann. IR 5.838256e-01 -0.124786 1.514168e+00, 'ret': long_ret long_short_ret top_quantile_ret bottom_quantile_ret \
t-stat 5.009707 11.970392 27.353712 -21.213261
p-value 0.000000 0.000000 0.000000 0.000000
skewness -0.049712 0.305483 2.104621 1.352262
kurtosis 4.585943 1.671780 13.188646 6.368882
Ann. Ret 0.034979 0.085021 0.097492 -0.105558
Ann. Vol 0.075573 0.076875 0.287875 0.404814
Ann. IR 0.462853 1.105960 0.338663 -0.260757
occurance 946.000000 946.000000 52626.000000 53388.000000
tmb_ret all_sample_ret
t-stat 12.331461 -2.447519
p-value 0.000000 0.014390
skewness 0.245940 1.495618
kurtosis 1.438639 9.227692
Ann. Ret 0.203389 -0.004609
Ann. Vol 0.178518 0.341296
Ann. IR 1.139320 -0.013503
occurance 946.000000 265010.000000 , 'space': long_space top_quantile_space bottom_quantile_space \
Up_sp Mean 0.128720 0.126579 0.136456
Up_sp Std 0.085865 0.140843 0.158185
Up_sp IR 1.499101 0.898725 0.862640
Up_sp Pct5 0.041861 0.004368 0.004635
Up_sp Pct25 0.076599 0.036603 0.038489
Up_sp Pct50 0.103032 0.085234 0.090519
Up_sp Pct75 0.143446 0.165011 0.176643
Up_sp Pct95 0.331293 0.391718 0.421635
Up_sp Occur 946.000000 52626.000000 53388.000000
Down_sp Mean -0.137471 -0.108866 -0.191665
Down_sp Std 0.088789 0.202787 0.282507
Down_sp IR -1.548294 -0.536849 -0.678443
Down_sp Pct5 -0.343109 -0.384268 -1.000800
Down_sp Pct25 -0.147208 -0.097760 -0.171967
Down_sp Pct50 -0.109330 -0.046730 -0.086841
Down_sp Pct75 -0.089392 -0.019800 -0.039916
Down_sp Pct95 -0.063714 -0.003965 -0.008188
Down_sp Occur 946.000000 52626.000000 53388.000000
tmb_space all_sample_space
Up_sp Mean 0.320615 0.130071
Up_sp Std 0.162529 0.143170
Up_sp IR 1.972659 0.908508
Up_sp Pct5 0.152553 0.004635
Up_sp Pct25 0.215860 0.038288
Up_sp Pct50 0.269578 0.088612
Up_sp Pct75 0.355337 0.172445
Up_sp Pct95 0.648456 0.395784
Up_sp Occur 946.000000 265010.000000
Down_sp Mean -0.247340 -0.152250
Down_sp Std 0.110376 0.253253
Down_sp IR -2.240885 -0.601178
Down_sp Pct5 -0.477266 -1.000800
Down_sp Pct25 -0.304182 -0.133021
Down_sp Pct50 -0.211650 -0.063759
Down_sp Pct75 -0.167795 -0.027543
Down_sp Pct95 -0.121475 -0.005339
Down_sp Occur 946.000000 265010.000000 }
进一步测试下等权合成因子的绝对收益效果
obj.process_signal_before_analysis(signal=comb_factors["equal_weight"],
price=dv.get_ts("close_adj"),
high=dv.get_ts("high_adj"), # 可为空
low=dv.get_ts("low_adj"),# 可为空
n_quantiles=5,# quantile分类数
mask=mask,# 过滤条件
can_enter=can_enter,# 是否能进场
can_exit=can_exit,# 是否能出场
period=30,# 持有期
#benchmark_price=dv.data_benchmark, # 基准价格 可不传入,持有期收益(return)计算为绝对收益
commission = 0.0008,
)
obj.create_full_report()
plt.show()
Nan Data Count (should be zero) : 0; Percentage of effective data: 56%
Value of signals of Different Quantiles Statistics
min max mean std count count %
quantile
1 0.000000 0.538462 0.103221 0.060599 53388 20.145655
2 0.180602 0.628763 0.308244 0.060081 53003 20.000377
3 0.371237 0.695652 0.510054 0.059086 52990 19.995472
4 0.565217 0.849498 0.708706 0.057843 53003 20.000377
5 0.755853 1.000000 0.904544 0.056265 52626 19.858119
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
ic
IC Mean 0.120
IC Std. 0.205
t-stat(IC) 17.957
p-value(IC) 0.000
IC Skew -0.128
IC Kurtosis -0.719
Ann. IR 0.584
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x18b51124cf8>
excel_data = obj.signal_data[obj.signal_data['quantile']==5]["quantile"].unstack().replace(np.nan, 0).replace(5, 1)
print (excel_data.head())
excel_data.to_excel('./equal_weight_quantile_5.xlsx')
symbol 000001.SZ 000002.SZ 000012.SZ 000024.SZ ...
trade_date
20140103 0.0 1.0 0.0 0.0 ...
20140106 0.0 1.0 0.0 0.0 ...
20140107 0.0 1.0 0.0 0.0 ...
20140108 0.0 1.0 0.0 0.0 ...
20140109 0.0 1.0 0.0 0.0 ...
[5 rows x 244 columns]