@Channelchan
2017-11-27T14:06:13.000000Z
字数 4474
阅读 12254
Step_1: 设计编写因子算法的类,逐行计算合成DataFrame,返回MultiIndex
Step_2: 股票池与数据读取
Step_3: 实例化因子的类,并获取因子的数值
Step_4: 可视化与计算IC绩效
Step_5: 优化因子的参数
Step_6: 最优绩效结果可视化与保存Excel
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from fxdayu_alphaman.factor.factor import Factor
from datetime import datetime
from fxdayu_data import DataAPI
from alphalens import utils,performance,plotting
import matplotlib.pyplot as plt
from fxdayu_alphaman.factor.admin import Admin
import alphalens
class Factor_Volume001(Factor):
c = 3
def calculate_volume001(self, data):
# 逐支股票计算volume001因子
candle_data = data[1].dropna()
if len(candle_data) == 0:
return
high = candle_data["high"]
volume = candle_data["volume"]
adv_s = self.ts_mean(volume, 10)
factor_volume001 = - self.correlation(high, adv_s, self.c) #计算因子值
factor_volume001.index = candle_data.index
factor_volume001 = pd.DataFrame(factor_volume001)
factor_volume001.columns = [data[0],]
return factor_volume001
def factor_calculator(self, pn_data):
# volume001
factor_volume001 = map(self.calculate_volume001, pn_data.iteritems())
factor_volume001 = pd.concat(factor_volume001, axis=1)
factor_volume001 = self.winsorize(factor_volume001) #去极值
factor_volume001 = self.standardize(factor_volume001) #标准化
factor_volume001 = self.factor_df_to_factor_mi(factor_volume001) #转化成MuitiIndex格式(相当与stack()方法)
return factor_volume001
# 时间设置
start = datetime(2015, 1, 1)
end = datetime(2017, 11, 24, 15)
periods = (1, 5, 10)
# 获取数据
codes = DataAPI.info.codes('hs300')
PN = DataAPI.candle(codes, 'D', ('high','close', 'volume'), start=start, end=end, adjust='after')
prices = PN.minor_xs('close')
volume001 = Factor_Volume001()
factor = volume001.get_factor(PN)
print(factor.tail())
factor
date asset
2017-11-24 15:00:00 601997.XSHG 0.898358
601998.XSHG 1.042316
603160.XSHG -1.045223
603858.XSHG 1.432236
603993.XSHG -1.203755
def plot_performance(factor, prices):
factor_data = utils.get_clean_factor_and_forward_returns(factor,prices,quantiles=5,periods=(1,5,10))
ic = performance.factor_information_coefficient(factor_data)
plotting.plot_ic_hist(ic)
plotting.plot_ic_ts(ic)
mean_ic = performance.mean_information_coefficient(factor_data,by_time="M")
plotting.plot_monthly_ic_heatmap(mean_ic)
# 按quantile区分的持股平均收益(减去了总体平均值)
mean_return_by_q = performance.mean_return_by_quantile(factor_data, by_date=True, demeaned=True)[0]
# 按quantile画出累积持有收益
for i in [1, 5, 10]:
plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, period=i)
plt.show()
plot_performance(factor,prices)
factor_admin = Admin()
original_perf = factor_admin.calculate_performance('Volume001',
factor,
start,
end,
periods=(1,5,10),
quantiles=5,
price=prices)
print(original_perf.mean_ic)
0
1 0.012450
5 0.016722
10 0.017339
# 参数优化
para_range_dict = {"c":range(3,11,1)}
# 1.枚举参数空间
factor_value_list,para_dict_list = factor_admin.enumerate_parameter('Factor_Volume001',
para_range_dict,
codes,start,
end,
Factor=volume001,data=PN)
factor_name_list = []
for para_dict in para_dict_list:
factor_name_list.append("Factor_Volume001+" + str(para_dict))
factor_dict = dict(zip(factor_name_list, factor_value_list))
# 2.获得不同参数下计算得出的因子值的表现
performance_list = factor_admin.show_factors_performance(factor_dict,
start,
end,
periods=(1,5,10),
quantiles=5,
price=prices)
# print("#####################################################################################")
# 按绩效指标对结果排序(寻优) 本例按10天持有期的mean_IC降序排列了所有结果。
performance_list = factor_admin.rank_performance(performance_list,
target_period=10,
ascending=False)
# 输出最优因子的ic和名称
print (performance_list[0].mean_ic)
print (performance_list[0].factor_name)
0
1 0.012539
5 0.019610
10 0.029310
Factor_Volume001+{'c': 7}
factor_opt = factor_admin.instantiate_factor_and_get_factor_value('Factor_Volume001',
start=start,
end=end,
pool=codes,
data=PN,
Factor=volume001,
para_dict={'c':7})
perf = factor_admin.calculate_performance('Volume001',
factor_opt,
start,
end,
periods=(1,5,10),
quantiles=5,
price=prices)
print(perf.mean_ic)
0
1 0.012539
5 0.019610
10 0.029310
plot_performance(factor_opt,prices)
quantile = alphalens.utils.quantize_factor(factor_opt,quantiles=5)
factor_sheet = quantile[quantile==5].unstack().replace(np.nan, 0).replace(5, 1)
factor_sheet.to_excel('factor_opt.xlsx')