字数 18111
阅读 74729
from jaqs_fxdayu.data import DataView
import warnings
dataview_folder = '../Factor'
dv = DataView()
Dataview loaded successfully.
import numpy as np
def mask_index_member():
df_index_member = dv.get_ts('index_member')
mask_index_member = ~(df_index_member >0) #定义信号过滤条件-非指数成分
return mask_index_member
def limit_up_down():
# 定义可买卖条件——未停牌、未涨跌停
trade_status = dv.get_ts('trade_status')
mask_sus = trade_status == 0
# 涨停
dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False, add_data=True)
# 跌停
dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False, add_data=True)
can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌
can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌
return can_enter,can_exit
mask = mask_index_member()
can_enter,can_exit = limit_up_down()
from jaqs_fxdayu.research import Optimizer
:param dataview: 包含了计算公式所需要的所有数据的jaqs.data.DataView对象
:param formula: str 需要优化的公式:如'(open - Delay(close, l1)) / Delay(close, l2)'
:param params: dict 需要优化的参数范围:如{"LEN1":range(1,10,1),"LEN2":range(1,10,1)}
:param name: str (N) 信号的名称
:param price: dataFrame (N) 价格与ret不能同时存在
:param ret: dataFrame (N) 收益
:param high: dataFrame (N) 最高价 用于计算上行收益空间
:param low: dataFrame (N) 最低价 用于计算下行收益空间
:param benchmark_price: dataFrame (N) 基准价格 若不为空收益计算模式为相对benchmark的收益
:param period: int (5) 选股持有期
:param n_quantiles: int (5)
:param mask: 过滤条件 dataFrame (N)
:param can_enter: dataFrame (N) 是否能进场
:param can_exit: dataFrame (N) 是否能出场
:param forward: bool(True) 是否forward return
:param commission: float(0.0008) 手续费率
:param is_event: bool(False) 是否是事件(0/1因子)
:param is_quarterly: bool(False) 是否是季度因子
price = dv.get_ts('close_adj')
high = dv.get_ts('high_adj')
low = dv.get_ts('low_adj')
price_bench = dv.data_benchmark
optimizer = Optimizer(dataview=dv,
formula='- Correlation(vwap_adj, volume, LEN)',
benchmark_price=None,#=None求绝对收益 #=price_bench求相对收益
commission=0.0008,#手续费 默认0.0008
is_quarterly=False)#是否是季度因子 默认为False
ret_best = optimizer.enumerate_optimizer(target_type="top_quantile_ret",#优化目标类型
target="Ann. IR",#优化目标
in_sample_range=[20140101,20160101],#样本内范围 默认为None,在全样本上优化
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
# 样本内最优绩效
divert{'LEN': 12}
long_ret short_ret long_short_ret top_quantile_ret \
t-stat 5.124623 -4.671154 3.002314 33.406886
p-value 0.000000 0.000000 0.002820 0.000000
skewness -0.574749 0.308295 -1.134718 0.672716
kurtosis -0.020546 -0.321323 4.024431 3.701657
Ann. Ret 0.263163 -0.254330 0.026169 0.309079
Ann. Vol 0.399007 0.423049 0.067725 0.534581
Ann. IR 0.659545 -0.601183 0.386401 0.578172
occurance 488.000000 488.000000 488.000000 26932.000000
bottom_quantile_ret tmb_ret all_sample_ret
t-stat 23.920794 4.053846 68.895067
p-value 0.000000 0.000060 0.000000
skewness 0.734033 -0.947489 0.952250
kurtosis 2.511366 3.154532 4.297959
Ann. Ret 0.232214 0.078447 0.299310
Ann. Vol 0.564996 0.150358 0.563333
Ann. IR 0.411001 0.521735 0.531319
occurance 27326.000000 488.000000 135632.000000
return_ic upside_ret_ic downside_ret_ic
IC Mean 4.977689e-02 0.007567 4.067761e-02
IC Std. 1.375455e-01 0.136779 1.309188e-01
t-stat(IC) 7.994500e+00 1.222116 6.863780e+00
p-value(IC) 9.526921e-15 0.222255 2.048308e-11
IC Skew -2.788311e-01 -0.168869 1.339354e-01
IC Kurtosis 3.586011e-01 0.880461 3.290019e-01
Ann. IR 3.618940e-01 0.055323 3.107087e-01
long_space short_space long_short_space top_quantile_space \
Up_sp Mean 0.157002 0.190311 0.347313 0.163059
Up_sp Std 0.091633 0.121522 0.137155 0.157749
Up_sp IR 1.713368 1.566062 2.532260 1.033660
Up_sp Pct5 0.044706 0.089251 0.194148 0.008414
Up_sp Pct25 0.087171 0.117107 0.244053 0.054948
Up_sp Pct50 0.135733 0.144848 0.299405 0.118666
Up_sp Pct75 0.216674 0.199801 0.423588 0.222814
Up_sp Pct95 0.346328 0.504144 0.632715 0.467370
Up_sp Occur 488.000000 488.000000 488.000000 26932.000000
Down_sp Mean -0.205588 -0.168067 -0.373655 -0.197518
Down_sp Std 0.169212 0.102519 0.184256 0.299004
Down_sp IR -1.214970 -1.639367 -2.027911 -0.660587
Down_sp Pct5 -0.600693 -0.370054 -0.733124 -1.000800
Down_sp Pct25 -0.244028 -0.233207 -0.455733 -0.186344
Down_sp Pct50 -0.155209 -0.137987 -0.328554 -0.074158
Down_sp Pct75 -0.096340 -0.092161 -0.240092 -0.029651
Down_sp Pct95 -0.054417 -0.043182 -0.172603 -0.004708
Down_sp Occur 488.000000 488.000000 488.000000 26932.000000
bottom_quantile_space tmb_space all_sample_space
Up_sp Mean 0.166711 0.353840 0.167801
Up_sp Std 0.169874 0.140386 0.172294
Up_sp IR 0.981382 2.520474 0.973924
Up_sp Pct5 0.005588 0.181691 0.007073
Up_sp Pct25 0.050723 0.246997 0.052449
Up_sp Pct50 0.115628 0.315428 0.118884
Up_sp Pct75 0.229163 0.437551 0.227151
Up_sp Pct95 0.498527 0.651758 0.492546
Up_sp Occur 27326.000000 488.000000 135632.000000
Down_sp Mean -0.187780 -0.367400 -0.188830
Down_sp Std 0.273141 0.162273 0.282691
Down_sp IR -0.687484 -2.264084 -0.667972
Down_sp Pct5 -1.000800 -0.702965 -1.000800
Down_sp Pct25 -0.192127 -0.469411 -0.185333
Down_sp Pct50 -0.080023 -0.306924 -0.075690
Down_sp Pct75 -0.033645 -0.250947 -0.030806
Down_sp Pct95 -0.005492 -0.198553 -0.004880
Down_sp Occur 27326.000000 488.000000 135632.000000
from jaqs_fxdayu.research import SignalDigger
from jaqs_fxdayu.research.signaldigger.analysis import analysis
import matplotlib.pyplot as plt
obj = SignalDigger()
def draw_analysis(signal_data,period):
obj.signal_data = signal_data
obj.period = period
draw_analysis(optimizer.all_signals[ret_best[0]["signal_name"]], period=30)
Value of signals of Different Quantiles Statistics
min max mean std count count %
1 -18.335262 -0.054921 -0.789213 0.133347 55208 20.142803
2 -0.909730 0.444112 -0.602800 0.151864 54825 20.003065
3 -0.852591 0.664250 -0.435393 0.195649 54792 19.991025
4 -0.773218 0.804137 -0.232928 0.229822 54825 20.003065
5 -0.644345 0.987584 0.151546 0.290481 54433 19.860042
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
IC Mean 0.068
IC Std. 0.139
t-stat(IC) 15.161
p-value(IC) 0.000
IC Skew 0.001
IC Kurtosis 0.123
Ann. IR 0.493
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x27d35a74198>
draw_analysis(optimizer.all_signals[ret_best[0]["signal_name"]].loc[20140101:20160101], period=30)
Value of signals of Different Quantiles Statistics
min max mean std count count %
1 -1.523660 -0.054921 -0.802340 0.108187 27326 20.147163
2 -0.909730 0.444112 -0.629085 0.159196 27129 20.001917
3 -0.852591 0.664250 -0.468080 0.211268 27116 19.992332
4 -0.773218 0.804137 -0.269078 0.249988 27129 20.001917
5 -0.644345 0.964528 0.119799 0.308186 26932 19.856671
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
IC Mean 0.050
IC Std. 0.138
t-stat(IC) 7.994
p-value(IC) 0.000
IC Skew -0.279
IC Kurtosis 0.359
Ann. IR 0.362
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x27d354267f0>
draw_analysis(optimizer.all_signals[ret_best[0]["signal_name"]].loc[20160101:], period=30)
Value of signals of Different Quantiles Statistics
min max mean std count count %
1 -18.335262 -0.240928 -0.776347 0.152981 27882 20.138533
2 -0.908570 0.088163 -0.577054 0.139606 27696 20.004189
3 -0.826511 0.291378 -0.403368 0.173149 27676 19.989744
4 -0.744467 0.490595 -0.197519 0.202012 27696 20.004189
5 -0.615349 0.987584 0.182636 0.268419 27501 19.863345
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
IC Mean 0.088
IC Std. 0.137
t-stat(IC) 13.743
p-value(IC) 0.000
IC Skew 0.303
IC Kurtosis -0.455
Ann. IR 0.642
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x27d354ad0f0>
# 以持有期mean_ic为最优化目标
ic_best = optimizer.enumerate_optimizer(target_type="return_ic",#优化目标类型
target = "IC Mean",
in_sample_range=[20140101,20160101],#样本内范围 默认为None,在全样本上优化
divert{'LEN': 12}
return_ic upside_ret_ic downside_ret_ic
IC Mean 4.977689e-02 0.007567 4.067761e-02
IC Std. 1.375455e-01 0.136779 1.309188e-01
t-stat(IC) 7.994500e+00 1.222116 6.863780e+00
p-value(IC) 9.526921e-15 0.222255 2.048308e-11
IC Skew -2.788311e-01 -0.168869 1.339354e-01
IC Kurtosis 3.586011e-01 0.880461 3.290019e-01
Ann. IR 3.618940e-01 0.055323 3.107087e-01
long_ret short_ret long_short_ret top_quantile_ret \
t-stat 5.124623 -4.671154 3.002314 33.406886
p-value 0.000000 0.000000 0.002820 0.000000
skewness -0.574749 0.308295 -1.134718 0.672716
kurtosis -0.020546 -0.321323 4.024431 3.701657
Ann. Ret 0.263163 -0.254330 0.026169 0.309079
Ann. Vol 0.399007 0.423049 0.067725 0.534581
Ann. IR 0.659545 -0.601183 0.386401 0.578172
occurance 488.000000 488.000000 488.000000 26932.000000
bottom_quantile_ret tmb_ret all_sample_ret
t-stat 23.920794 4.053846 68.895067
p-value 0.000000 0.000060 0.000000
skewness 0.734033 -0.947489 0.952250
kurtosis 2.511366 3.154532 4.297959
Ann. Ret 0.232214 0.078447 0.299310
Ann. Vol 0.564996 0.150358 0.563333
Ann. IR 0.411001 0.521735 0.531319
occurance 27326.000000 488.000000 135632.000000
long_space short_space long_short_space top_quantile_space \
Up_sp Mean 0.157002 0.190311 0.347313 0.163059
Up_sp Std 0.091633 0.121522 0.137155 0.157749
Up_sp IR 1.713368 1.566062 2.532260 1.033660
Up_sp Pct5 0.044706 0.089251 0.194148 0.008414
Up_sp Pct25 0.087171 0.117107 0.244053 0.054948
Up_sp Pct50 0.135733 0.144848 0.299405 0.118666
Up_sp Pct75 0.216674 0.199801 0.423588 0.222814
Up_sp Pct95 0.346328 0.504144 0.632715 0.467370
Up_sp Occur 488.000000 488.000000 488.000000 26932.000000
Down_sp Mean -0.205588 -0.168067 -0.373655 -0.197518
Down_sp Std 0.169212 0.102519 0.184256 0.299004
Down_sp IR -1.214970 -1.639367 -2.027911 -0.660587
Down_sp Pct5 -0.600693 -0.370054 -0.733124 -1.000800
Down_sp Pct25 -0.244028 -0.233207 -0.455733 -0.186344
Down_sp Pct50 -0.155209 -0.137987 -0.328554 -0.074158
Down_sp Pct75 -0.096340 -0.092161 -0.240092 -0.029651
Down_sp Pct95 -0.054417 -0.043182 -0.172603 -0.004708
Down_sp Occur 488.000000 488.000000 488.000000 26932.000000
bottom_quantile_space tmb_space all_sample_space
Up_sp Mean 0.166711 0.353840 0.167801
Up_sp Std 0.169874 0.140386 0.172294
Up_sp IR 0.981382 2.520474 0.973924
Up_sp Pct5 0.005588 0.181691 0.007073
Up_sp Pct25 0.050723 0.246997 0.052449
Up_sp Pct50 0.115628 0.315428 0.118884
Up_sp Pct75 0.229163 0.437551 0.227151
Up_sp Pct95 0.498527 0.651758 0.492546
Up_sp Occur 27326.000000 488.000000 135632.000000
Down_sp Mean -0.187780 -0.367400 -0.188830
Down_sp Std 0.273141 0.162273 0.282691
Down_sp IR -0.687484 -2.264084 -0.667972
Down_sp Pct5 -1.000800 -0.702965 -1.000800
Down_sp Pct25 -0.192127 -0.469411 -0.185333
Down_sp Pct50 -0.080023 -0.306924 -0.075690
Down_sp Pct75 -0.033645 -0.250947 -0.030806
Down_sp Pct95 -0.005492 -0.198553 -0.004880
Down_sp Occur 27326.000000 488.000000 135632.000000
excel_data = optimizer.all_signals[ret_best[0]["signal_name"]][optimizer.all_signals[ret_best[0]["signal_name"]]['quantile']==5]["quantile"].unstack().replace(np.nan, 0).replace(5, 1)
print (excel_data.head())
symbol 000001.SZ 000002.SZ 000008.SZ 000009.SZ ...
20140103 1.0 0.0 0.0 0.0 ...
20140106 0.0 0.0 0.0 0.0 ...
20140107 0.0 1.0 0.0 0.0 ...
20140108 0.0 1.0 0.0 0.0 ...
20140109 0.0 0.0 0.0 0.0 ...
[5 rows x 466 columns]
event_opt = Optimizer(dataview=dv,
formula="(Ts_Mean(close_adj, SHORT)>=Ts_Mean(close_adj, LONG))&&(Delay(Ts_Mean(close_adj, SHORT)<Ts_Mean(close_adj, LONG), 1))",
benchmark_price=None,#=None求绝对收益 #=price_bench求相对收益
commission=0.0008,#手续费 默认0.0008
is_quarterly=False)#是否是季度因子 默认为False
event_best = event_opt.enumerate_optimizer(target_type="long_ret",
target="Ann. IR",
in_sample_range=[20140101,20160101],#样本内范围 默认为None,在全样本上优化
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 56%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
# 事件样本内最优绩效 ps:事件没有ic分析结果
# 可以进一步尝试优化space,辅以更精细的择时捕捉事件收益
cross{'SHORT': 10, 'LONG': 55}
long_ret long_short_ret all_sample_ret
t-stat 12.313107 -2.266010 69.719295
p-value 0.000000 0.024080 0.000000
skewness 1.005597 0.408082 0.930253
kurtosis 4.215330 2.882389 4.189708
Ann. Ret 0.453946 -0.047620 0.311239
Ann. Vol 0.495812 0.135829 0.569307
Ann. IR 0.915561 -0.350585 0.546699
occurance 1460.000000 338.000000 131192.000000
long_space all_sample_space
Up_sp Mean 0.165475 0.169426
Up_sp Std 0.165639 0.173804
Up_sp IR 0.999012 0.974813
Up_sp Pct5 0.004537 0.006992
Up_sp Pct25 0.049488 0.052771
Up_sp Pct50 0.118248 0.120406
Up_sp Pct75 0.234487 0.229496
Up_sp Pct95 0.475575 0.496821
Up_sp Occur 1460.000000 131192.000000
Down_sp Mean -0.145401 -0.191456
Down_sp Std 0.249432 0.285144
Down_sp IR -0.582929 -0.671437
Down_sp Pct5 -1.000800 -1.000800
Down_sp Pct25 -0.123930 -0.190589
Down_sp Pct50 -0.059322 -0.076026
Down_sp Pct75 -0.025526 -0.030601
Down_sp Pct95 -0.005086 -0.004816
Down_sp Occur 1460.000000 131192.000000
# 全样本
draw_analysis(event_opt.all_signals[event_best[0]["signal_name"]], period=30)
Value of signals of Different Quantiles Statistics
min max mean std count count %
1 0.0 1.0 0.01183 0.108122 269647 100.0
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
IC Mean -0.005
IC Std. 0.066
t-stat(IC) -1.849
p-value(IC) 0.065
IC Skew -0.135
IC Kurtosis 0.011
Ann. IR -0.070
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x27d375a1240>
# 样本内
draw_analysis(event_opt.all_signals[event_best[0]["signal_name"]].loc[20140101:20160101], period=30)
Value of signals of Different Quantiles Statistics
min max mean std count count %
1 0.0 1.0 0.011129 0.104905 131192 100.0
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
IC Mean -0.004
IC Std. 0.068
t-stat(IC) -1.202
p-value(IC) 0.230
IC Skew -0.118
IC Kurtosis 0.520
Ann. IR -0.065
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x27d3c9e57f0>
# 样本外
draw_analysis(event_opt.all_signals[event_best[0]["signal_name"]].loc[20160101:], period=30)
Value of signals of Different Quantiles Statistics
min max mean std count count %
1 0.0 1.0 0.012495 0.111081 138455 100.0
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\returns_report.pdf
Information Analysis
IC Mean -0.005
IC Std. 0.064
t-stat(IC) -1.413
p-value(IC) 0.158
IC Skew -0.154
IC Kurtosis -0.623
Ann. IR -0.074
Figure saved: E:\2018_Course\HighSchool\Final\5_因子研发工具实操Richard\information_report.pdf
<matplotlib.figure.Figure at 0x27d3c9e58d0>
import pandas as pd
import talib as ta
slp_opt = Optimizer(dataview=dv,
commission=0.0008,#手续费 默认0.0008
is_quarterly=False)#是否是季度因子 默认为False
signals_dict = {}
for param in range(2,9,1):
slope_df = pd.DataFrame({name: -ta.LINEARREG_SLOPE(value.values, param) for name, value in price.iteritems()}, index=price.index)
signals_dict['SLOPE_PARAM='+str(param)] = slp_opt.cal_signal(dv.get_ts('SLOPE_PARAM='+str(param)))
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
Nan Data Count (should be zero) : 0; Percentage of effective data: 57%
slp_opt.all_signals = signals_dict
# 以持有期mean_ic为最优化目标
slp_best = slp_opt.enumerate_optimizer(target_type="return_ic",#优化目标类型
target = "IC Mean",
in_sample_range=[20140101,20160101],#样本内范围 默认为None,在全样本上优化
# 样本内绩效
return_ic upside_ret_ic downside_ret_ic
IC Mean 4.310682e-02 0.013728 5.593689e-02
IC Std. 1.622111e-01 0.161274 1.560077e-01
t-stat(IC) 5.828245e+00 1.866859 7.863662e+00
p-value(IC) 1.028626e-08 0.062531 2.483445e-14
IC Skew 4.934076e-02 0.214033 1.260678e-01
IC Kurtosis 1.926748e-01 0.320293 3.347468e-02
Ann. IR 2.657451e-01 0.085121 3.585521e-01
long_ret short_ret long_short_ret top_quantile_ret \
t-stat 3.819739 -1.736368 4.529189 32.491975
p-value 0.000150 0.083140 0.000010 0.000000
skewness -0.249597 0.317080 0.363507 0.935224
kurtosis -0.148464 -0.301540 1.743548 4.261522
Ann. Ret 0.210201 -0.096996 0.052268 0.314857
Ann. Vol 0.424498 0.430009 0.089020 0.555765
Ann. IR 0.495177 -0.225567 0.587147 0.566529
occurance 481.000000 479.000000 481.000000 26535.000000
bottom_quantile_ret tmb_ret all_sample_ret
t-stat 19.076969 5.481541 69.104855
p-value 0.000000 0.000000 0.000000
skewness 0.835940 0.082211 0.941671
kurtosis 3.753251 4.390647 4.247603
Ann. Ret 0.186429 0.134394 0.303954
Ann. Vol 0.564499 0.189125 0.566070
Ann. IR 0.330256 0.710606 0.536955
occurance 26917.000000 481.000000 133610.000000