[关闭]
@Channelchan 2017-11-27T14:06:13.000000Z 字数 4474 阅读 12254

如何对因子算法做参数优化?

步骤

Step_1: 设计编写因子算法的类,逐行计算合成DataFrame,返回MultiIndex

Step_2: 股票池与数据读取

Step_3: 实例化因子的类,并获取因子的数值

Step_4: 可视化与计算IC绩效

Step_5: 优化因子的参数

Step_6: 最优绩效结果可视化与保存Excel

Step_1: 设计编写因子算法的类,逐行计算合成DataFrame,返回MultiIndex

  1. # -*- coding: utf-8 -*-
  2. import pandas as pd
  3. import numpy as np
  4. from fxdayu_alphaman.factor.factor import Factor
  5. from datetime import datetime
  6. from fxdayu_data import DataAPI
  7. from alphalens import utils,performance,plotting
  8. import matplotlib.pyplot as plt
  9. from fxdayu_alphaman.factor.admin import Admin
  10. import alphalens
  11. class Factor_Volume001(Factor):
  12. c = 3
  13. def calculate_volume001(self, data):
  14. # 逐支股票计算volume001因子
  15. candle_data = data[1].dropna()
  16. if len(candle_data) == 0:
  17. return
  18. high = candle_data["high"]
  19. volume = candle_data["volume"]
  20. adv_s = self.ts_mean(volume, 10)
  21. factor_volume001 = - self.correlation(high, adv_s, self.c) #计算因子值
  22. factor_volume001.index = candle_data.index
  23. factor_volume001 = pd.DataFrame(factor_volume001)
  24. factor_volume001.columns = [data[0],]
  25. return factor_volume001
  26. def factor_calculator(self, pn_data):
  27. # volume001
  28. factor_volume001 = map(self.calculate_volume001, pn_data.iteritems())
  29. factor_volume001 = pd.concat(factor_volume001, axis=1)
  30. factor_volume001 = self.winsorize(factor_volume001) #去极值
  31. factor_volume001 = self.standardize(factor_volume001) #标准化
  32. factor_volume001 = self.factor_df_to_factor_mi(factor_volume001) #转化成MuitiIndex格式(相当与stack()方法)
  33. return factor_volume001

Step_2: 股票池与数据读取

  1. # 时间设置
  2. start = datetime(2015, 1, 1)
  3. end = datetime(2017, 11, 24, 15)
  4. periods = (1, 5, 10)
  5. # 获取数据
  6. codes = DataAPI.info.codes('hs300')
  7. PN = DataAPI.candle(codes, 'D', ('high','close', 'volume'), start=start, end=end, adjust='after')
  8. prices = PN.minor_xs('close')

Step_3: 实例化因子的类,并获取因子与收益的数据

  1. volume001 = Factor_Volume001()
  2. factor = volume001.get_factor(PN)
  3. print(factor.tail())
                                   factor
date                asset                
2017-11-24 15:00:00 601997.XSHG  0.898358
                    601998.XSHG  1.042316
                    603160.XSHG -1.045223
                    603858.XSHG  1.432236
                    603993.XSHG -1.203755

Step_4: 可视化与计算IC绩效

  1. def plot_performance(factor, prices):
  2. factor_data = utils.get_clean_factor_and_forward_returns(factor,prices,quantiles=5,periods=(1,5,10))
  3. ic = performance.factor_information_coefficient(factor_data)
  4. plotting.plot_ic_hist(ic)
  5. plotting.plot_ic_ts(ic)
  6. mean_ic = performance.mean_information_coefficient(factor_data,by_time="M")
  7. plotting.plot_monthly_ic_heatmap(mean_ic)
  8. # 按quantile区分的持股平均收益(减去了总体平均值)
  9. mean_return_by_q = performance.mean_return_by_quantile(factor_data, by_date=True, demeaned=True)[0]
  10. # 按quantile画出累积持有收益
  11. for i in [1, 5, 10]:
  12. plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, period=i)
  13. plt.show()
  14. plot_performance(factor,prices)

output_9_1.png-39kB

output_9_2.png-267.6kB

output_9_3.png-39.5kB

output_9_4.png-82.1kB

output_9_5.png-61.2kB

output_9_6.png-57.5kB

  1. factor_admin = Admin()
  2. original_perf = factor_admin.calculate_performance('Volume001',
  3. factor,
  4. start,
  5. end,
  6. periods=(1,5,10),
  7. quantiles=5,
  8. price=prices)
  9. print(original_perf.mean_ic)
           0
1   0.012450
5   0.016722
10  0.017339

Step_5: 优化因子的参数,并读取最优参数的因子值与绩效

  1. # 参数优化
  2. para_range_dict = {"c":range(3,11,1)}
  3. # 1.枚举参数空间
  4. factor_value_list,para_dict_list = factor_admin.enumerate_parameter('Factor_Volume001',
  5. para_range_dict,
  6. codes,start,
  7. end,
  8. Factor=volume001,data=PN)
  9. factor_name_list = []
  10. for para_dict in para_dict_list:
  11. factor_name_list.append("Factor_Volume001+" + str(para_dict))
  12. factor_dict = dict(zip(factor_name_list, factor_value_list))
  13. # 2.获得不同参数下计算得出的因子值的表现
  14. performance_list = factor_admin.show_factors_performance(factor_dict,
  15. start,
  16. end,
  17. periods=(1,5,10),
  18. quantiles=5,
  19. price=prices)
  20. # print("#####################################################################################")
  21. # 按绩效指标对结果排序(寻优) 本例按10天持有期的mean_IC降序排列了所有结果。
  22. performance_list = factor_admin.rank_performance(performance_list,
  23. target_period=10,
  24. ascending=False)
  25. # 输出最优因子的ic和名称
  26. print (performance_list[0].mean_ic)
  27. print (performance_list[0].factor_name)
           0
1   0.012539
5   0.019610
10  0.029310
Factor_Volume001+{'c': 7}
  1. factor_opt = factor_admin.instantiate_factor_and_get_factor_value('Factor_Volume001',
  2. start=start,
  3. end=end,
  4. pool=codes,
  5. data=PN,
  6. Factor=volume001,
  7. para_dict={'c':7})
  1. perf = factor_admin.calculate_performance('Volume001',
  2. factor_opt,
  3. start,
  4. end,
  5. periods=(1,5,10),
  6. quantiles=5,
  7. price=prices)
  8. print(perf.mean_ic)
           0
1   0.012539
5   0.019610
10  0.029310

Step_6: 最优绩效结果可视化与保存Excel

  1. plot_performance(factor_opt,prices)

output_16_1.png-41kB

output_16_2.png-260.5kB

output_16_3.png-37.4kB

output_16_4.png-78.9kB

output_16_5.png-62.3kB

output_16_6.png-53.8kB

  1. quantile = alphalens.utils.quantize_factor(factor_opt,quantiles=5)
  2. factor_sheet = quantile[quantile==5].unstack().replace(np.nan, 0).replace(5, 1)
  3. factor_sheet.to_excel('factor_opt.xlsx')
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注