IC序列阈值选股
- 读取本地数据
- 获取每日IC的DataFrame
- 设置IC阈值
- 选出因子值最大的n只股票
- 位移与转换格式
- 回测绩效
1_读取本地数据
from jaqs.data.dataapi import DataApi
from jaqs.data import DataView
import numpy as np
from datetime import datetime
import pandas as pd
import warnings
import alphalens
warnings.filterwarnings("ignore")
dataview_folder = 'JAQS_Data/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
from pandas.core import datetools
Dataview loaded successfully.
def change_index(signal):
signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
return signal
big_factor = ['roe','roa']
PN = pd.Panel({name: change_index(dv.get_ts(name)) for name in big_factor})
prices = change_index(dv.get_ts('close_adj'))
2_获取每日IC的DataFrame
def cal_daily_ic(factor_df):
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), prices, quantiles=5, periods=(5,))
return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')
daily_ic = {key: cal_daily_ic(value) for key, value in PN.iteritems()}
ic_df = pd.DataFrame({factor: daily_ic.get(factor)[5] for factor in big_factor})
print(ic_df.head())
roa roe
date
2012-01-04 -0.022573 0.095570
2012-01-05 -0.012033 -0.010591
2012-01-06 -0.028648 0.082105
2012-01-09 -0.138597 0.084739
2012-01-10 -0.177690 0.068066
df_ic = ic_df.rename_axis(lambda s: s.year*10000+s.month*100+s.day)
3_设置IC阈值
import numpy as np
def cal_big(big_ic, limit_ic=0.02):
# 设置IC阈值为-limit_ic
big_df = big_ic.copy()
big_df[big_df<limit_ic]=np.nan
big_df[big_df>=limit_ic]=1
return big_df
4_选出因子值最大的n只股票
def big_select(rows, n=10):
# 只选最小的n只
time, row = rows
row = row.dropna().index
dct = {}
if len(row) == 0:
return dct
target = pd.DataFrame({factor: dv.get_ts(factor, end_date=time).iloc[-1].dropna().T for factor in row})
for name, s in target.iteritems():
for code in s.nlargest(n).index:
dct[code] = 1
return dct
5_位移与转换格式
select_big = pd.DataFrame(list(map(big_select, cal_big(df_ic).iterrows())), index=PN.major_axis[5:])
from datetime import timedelta
def change_columns_time(signal):
new_names = {}
for c in signal.columns:
if c.endswith('SZ'):
new_names[c] = c.replace('SZ', 'XSHE')
elif c.endswith('SH'):
new_names[c] = c.replace('SH', 'XSHG')
signal = signal.rename_axis(new_names, axis=1)
signal.index = pd.Index(map(lambda x: x+timedelta(hours=15) , signal.index))
return signal
big_df = change_columns_time(select_big)
6_回测绩效
import numpy as np
import talib as ta
import pandas as pd
import rqalpha
from rqalpha.api import *
#读取文件位置
def init(context):
context.codes = big_df
context.stocks = []
# scheduler.run_weekly(find_pool, tradingday=1)
scheduler.run_daily(find_pool)
def find_pool(context, bar_dict):
try:
codes = context.codes.loc[context.now]
except KeyError:
return
stocks = codes.index[codes == 1]
context.stocks = stocks
def handle_bar(context, bar_dict):
buy(context, bar_dict)
def buy(context, bar_dict):
pool = context.stocks
if pool is not None:
stocks_len = len(pool)
for stocks in context.portfolio.positions:
if stocks not in pool:
order_target_percent(stocks, 0)
result = []
for codes in pool:
if codes not in result and codes not in context.portfolio.positions:
result.append(codes)
if len(result):
for r in result:
order_target_percent(r, 1.0/stocks_len)
config = {
"base": {
"start_date": "2015-09-01",
"end_date": "2017-12-22",
"accounts": {'stock':1000000},
"benchmark": "000300.XSHG"
},
"extra": {
"log_level": "error",
},
"mod": {
"sys_analyser": {
"enabled": True,
"plot": True
}
}
}
rqalpha.run_func(init=init, handle_bar=handle_bar, config=config)
