@Channelchan
2018-01-12T13:51:44.000000Z
字数 7362
阅读 25280
import numpy as np
from jaqs.data import DataView
from jaqs.research import SignalDigger
import warnings
warnings.filterwarnings("ignore")
dataview_folder = 'JAQS_Data/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)
mask = dv.get_ts('mask_index_member')
can_enter = dv.get_ts('can_enter')
can_exit = dv.get_ts('can_exit')
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
StdDev(x,n)
Delay(x,n)
Delta(x,n)
Return(x,n,log)
Ts_Skewness(x,n)
Product(x,n)
Ts_Mean(x,n)
1. 过去五天的平均成交量大于五天前的过去五天平均成交量变化率
Ts_Mean(volume,5)/Delay(Ts_Mean(volume,5),5)
2. 最低价的累积变化率上升
Product(low/Delay(low,1),5) - 1
3. 价格波动率上升
StdDev(Return(close,1,True), 10)
4. 收益率偏度
Ts_Skewness(Return(low,5),5)
5. 价量协方差
Covariance(low, volume, 10)
dv.remove_field('Cov')
Field name [Cov] does not exist.
def add_factor():
dv.add_formula('Volume_Up','Ts_Mean(volume,5)/Delay(Ts_Mean(volume,5),5)*-1*Return(close,5)',is_quarterly=False)
dv.add_formula('Low_Product','-1*Product(low/Delay(low,1),5) - 1',is_quarterly=False)
dv.add_formula('STD', '-1*StdDev(Return(close,1), 10)', is_quarterly=False)
dv.add_formula('Skewness', 'Ts_Skewness(Return(volume,5),5)', is_quarterly=False)
dv.add_formula('Cov','-1*Covariance(low,volume,10)',is_quarterly=False)
add_factor()
import matplotlib.pyplot as plt
def cal_obj(signal, name, period, quantile):
price = dv.get_ts('close_adj')
price_bench = dv.data_benchmark
obj = SignalDigger(output_folder="hs300/%s" % name,
output_format='pdf')
obj.process_signal_before_analysis(signal,
price=price,
n_quantiles=quantile, period=period,
benchmark_price=price_bench,
can_enter = can_enter,
can_exit = can_exit,
mask=mask
)
obj.create_full_report()
return obj
def plot_pfm(signal, name, period=5, quantile=5):
obj = cal_obj(signal, name, period, quantile)
plt.show()
def signal_data(signal, name, period=5, quantile=5):
obj = cal_obj(signal, name, period, quantile)
return obj.signal_data
signal_dict = {'sig_1' : dv.get_ts('Volume_Up'),
'sig_2' : dv.get_ts('Low_Product'),
'sig_3' : dv.get_ts('STD'),
'sig_4' : dv.get_ts('Skewness'),
'sig_5' : dv.get_ts('Cov')}
data_signal = {key: signal_data(frame, key) for key, frame in signal_dict.items()}
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
Value of signals of Different Quantiles Statistics
min max mean std count \
quantile
1 -4.448244e+16 1.895836e-01 -6.499444e+12 3.282543e+14 82946
2 -3.929699e-01 2.325106e-01 -2.226987e-02 4.620945e-02 82360
3 -2.352434e-01 2.659026e-01 -1.242107e-03 3.827854e-02 82350
4 -1.652057e-01 3.359749e-01 1.508382e-02 3.799451e-02 82360
5 -9.601820e-02 3.760106e+16 3.462099e+12 2.003023e+14 81770
count %
quantile
1 20.142987
2 20.000680
3 19.998252
4 20.000680
5 19.857402
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_1\returns_report.pdf
Information Analysis
ic
IC Mean 0.044
IC Std. 0.174
t-stat(IC) NaN
p-value(IC) NaN
IC Skew NaN
IC Kurtosis NaN
Ann. IR 0.254
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_1\information_report.pdf
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
Value of signals of Different Quantiles Statistics
min max mean std count count %
quantile
1 -2.668837 -1.794997 -2.060149 0.057623 83290 20.143464
2 -2.259160 -1.770604 -2.015925 0.037586 82694 19.999323
3 -2.182598 -1.746186 -1.998437 0.037362 82687 19.997630
4 -2.134259 -1.699563 -1.982646 0.039398 82694 19.999323
5 -2.076210 -1.265291 -1.947343 0.065847 82119 19.860261
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_2\returns_report.pdf
Information Analysis
ic
IC Mean 0.040
IC Std. 0.178
t-stat(IC) NaN
p-value(IC) NaN
IC Skew NaN
IC Kurtosis NaN
Ann. IR 0.223
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_2\information_report.pdf
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
Value of signals of Different Quantiles Statistics
min max mean std count count %
quantile
1 -0.230151 -0.013882 -0.038593 0.021056 83288 20.143711
2 -0.091371 -0.010489 -0.025736 0.010736 82691 19.999323
3 -0.085854 -0.008099 -0.021115 0.009997 82683 19.997388
4 -0.078773 -0.005999 -0.017388 0.009108 82691 19.999323
5 -0.068259 -0.000000 -0.012369 0.007607 82116 19.860256
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_3\returns_report.pdf
Information Analysis
ic
IC Mean 0.026
IC Std. 0.198
t-stat(IC) NaN
p-value(IC) NaN
IC Skew NaN
IC Kurtosis NaN
Ann. IR 0.130
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_3\information_report.pdf
Nan Data Count (should be zero) : 0; Percentage of effective data: 52%
Value of signals of Different Quantiles Statistics
min max mean std count \
quantile
1 -1.458161e+10 1.948145e+00 -263277.198399 5.238580e+07 81342
2 -1.081012e+00 2.108829e+00 0.062442 2.855984e-01 80772
3 -5.775263e-01 2.170505e+00 0.558179 2.597112e-01 80764
4 -1.450277e-01 2.203910e+00 1.076562 2.916370e-01 80772
5 3.750683e-01 4.399282e+09 131702.733601 1.792908e+07 80186
count %
quantile
1 20.142335
2 20.001189
3 19.999208
4 20.001189
5 19.856080
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_4\returns_report.pdf
Information Analysis
ic
IC Mean 0.000
IC Std. 0.081
t-stat(IC) NaN
p-value(IC) NaN
IC Skew NaN
IC Kurtosis NaN
Ann. IR 0.005
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_4\information_report.pdf
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
Value of signals of Different Quantiles Statistics
min max mean std count \
quantile
1 -8.818676e+08 6.625718e+05 -9.376437e+06 2.467965e+07 83288
2 -8.217876e+07 1.246660e+07 -1.482240e+06 3.203704e+06 82691
3 -5.520948e+07 2.433595e+07 -4.138087e+05 1.780147e+06 82685
4 -1.401159e+07 4.165598e+07 2.652617e+05 2.253609e+06 82691
5 -2.725820e+06 1.762297e+09 5.196908e+06 2.913466e+07 82117
count %
quantile
1 20.143565
2 19.999178
3 19.997727
4 19.999178
5 19.860353
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_5\returns_report.pdf
Information Analysis
ic
IC Mean 0.042
IC Std. 0.138
t-stat(IC) NaN
p-value(IC) NaN
IC Skew NaN
IC Kurtosis NaN
Ann. IR 0.307
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\sig_5\information_report.pdf
plot_pfm(signal_dict.get('sig_5'), 'sig_5')
import talib as ta
import pandas as pd
close_adj = dv.get_ts('close_adj')
close = close_adj.dropna(how='all', axis=1)
slope_df = pd.DataFrame({name: -ta.LINEARREG_SLOPE(value.values, 3) for name, value in close.iteritems()}, index=close.index)
dv.remove_field('slope')
Field name [slope] does not exist.
dv.append_df(slope_df,'slope')
plot_pfm(dv.get_ts('slope'), 'slope')
Nan Data Count (should be zero) : 0; Percentage of effective data: 53%
Value of signals of Different Quantiles Statistics
min max mean std count count %
quantile
1 -57.788764 0.924510 -0.807342 1.487646 83172 20.143327
2 -3.229754 1.582843 -0.109449 0.256158 82577 19.999225
3 -1.875339 2.531553 0.012089 0.219972 82570 19.997530
4 -1.387552 4.972311 0.131095 0.322812 82577 19.999225
5 -0.757552 51.460955 0.739474 1.514192 82005 19.860693
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\slope\returns_report.pdf
Information Analysis
ic
IC Mean 0.045
IC Std. 0.162
t-stat(IC) NaN
p-value(IC) NaN
IC Skew NaN
IC Kurtosis NaN
Ann. IR 0.278
Figure saved: C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\hs300\slope\information_report.pdf
dv.save_dataview('stockdata/hs300')
Store data...
Dataview has been successfully saved to:
C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\stockdata\hs300
You can load it with load_dataview('C:\Users\small\OneDrive\notebook\Internet_Course\Selection\JAQS\stockdata\hs300')