@Channelchan
2017-11-27T06:06:13.000000Z
字数 4474
阅读 12439
Step_1: 设计编写因子算法的类,逐行计算合成DataFrame,返回MultiIndex
Step_2: 股票池与数据读取
Step_3: 实例化因子的类,并获取因子的数值
Step_4: 可视化与计算IC绩效
Step_5: 优化因子的参数
Step_6: 最优绩效结果可视化与保存Excel
# -*- coding: utf-8 -*-import pandas as pdimport numpy as npfrom fxdayu_alphaman.factor.factor import Factorfrom datetime import datetimefrom fxdayu_data import DataAPIfrom alphalens import utils,performance,plottingimport matplotlib.pyplot as pltfrom fxdayu_alphaman.factor.admin import Adminimport alphalensclass Factor_Volume001(Factor):c = 3def calculate_volume001(self, data):# 逐支股票计算volume001因子candle_data = data[1].dropna()if len(candle_data) == 0:returnhigh = candle_data["high"]volume = candle_data["volume"]adv_s = self.ts_mean(volume, 10)factor_volume001 = - self.correlation(high, adv_s, self.c) #计算因子值factor_volume001.index = candle_data.indexfactor_volume001 = pd.DataFrame(factor_volume001)factor_volume001.columns = [data[0],]return factor_volume001def factor_calculator(self, pn_data):# volume001factor_volume001 = map(self.calculate_volume001, pn_data.iteritems())factor_volume001 = pd.concat(factor_volume001, axis=1)factor_volume001 = self.winsorize(factor_volume001) #去极值factor_volume001 = self.standardize(factor_volume001) #标准化factor_volume001 = self.factor_df_to_factor_mi(factor_volume001) #转化成MuitiIndex格式(相当与stack()方法)return factor_volume001
# 时间设置start = datetime(2015, 1, 1)end = datetime(2017, 11, 24, 15)periods = (1, 5, 10)# 获取数据codes = DataAPI.info.codes('hs300')PN = DataAPI.candle(codes, 'D', ('high','close', 'volume'), start=start, end=end, adjust='after')prices = PN.minor_xs('close')
volume001 = Factor_Volume001()factor = volume001.get_factor(PN)print(factor.tail())
factor
date asset
2017-11-24 15:00:00 601997.XSHG 0.898358
601998.XSHG 1.042316
603160.XSHG -1.045223
603858.XSHG 1.432236
603993.XSHG -1.203755
def plot_performance(factor, prices):factor_data = utils.get_clean_factor_and_forward_returns(factor,prices,quantiles=5,periods=(1,5,10))ic = performance.factor_information_coefficient(factor_data)plotting.plot_ic_hist(ic)plotting.plot_ic_ts(ic)mean_ic = performance.mean_information_coefficient(factor_data,by_time="M")plotting.plot_monthly_ic_heatmap(mean_ic)# 按quantile区分的持股平均收益(减去了总体平均值)mean_return_by_q = performance.mean_return_by_quantile(factor_data, by_date=True, demeaned=True)[0]# 按quantile画出累积持有收益for i in [1, 5, 10]:plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, period=i)plt.show()plot_performance(factor,prices)






factor_admin = Admin()original_perf = factor_admin.calculate_performance('Volume001',factor,start,end,periods=(1,5,10),quantiles=5,price=prices)print(original_perf.mean_ic)
0
1 0.012450
5 0.016722
10 0.017339
# 参数优化para_range_dict = {"c":range(3,11,1)}# 1.枚举参数空间factor_value_list,para_dict_list = factor_admin.enumerate_parameter('Factor_Volume001',para_range_dict,codes,start,end,Factor=volume001,data=PN)factor_name_list = []for para_dict in para_dict_list:factor_name_list.append("Factor_Volume001+" + str(para_dict))factor_dict = dict(zip(factor_name_list, factor_value_list))# 2.获得不同参数下计算得出的因子值的表现performance_list = factor_admin.show_factors_performance(factor_dict,start,end,periods=(1,5,10),quantiles=5,price=prices)# print("#####################################################################################")# 按绩效指标对结果排序(寻优) 本例按10天持有期的mean_IC降序排列了所有结果。performance_list = factor_admin.rank_performance(performance_list,target_period=10,ascending=False)# 输出最优因子的ic和名称print (performance_list[0].mean_ic)print (performance_list[0].factor_name)
0
1 0.012539
5 0.019610
10 0.029310
Factor_Volume001+{'c': 7}
factor_opt = factor_admin.instantiate_factor_and_get_factor_value('Factor_Volume001',start=start,end=end,pool=codes,data=PN,Factor=volume001,para_dict={'c':7})
perf = factor_admin.calculate_performance('Volume001',factor_opt,start,end,periods=(1,5,10),quantiles=5,price=prices)print(perf.mean_ic)
0
1 0.012539
5 0.019610
10 0.029310
plot_performance(factor_opt,prices)






quantile = alphalens.utils.quantize_factor(factor_opt,quantiles=5)factor_sheet = quantile[quantile==5].unstack().replace(np.nan, 0).replace(5, 1)factor_sheet.to_excel('factor_opt.xlsx')