@Channelchan
2017-12-02T18:17:33.000000Z
字数 6587
阅读 14258
用数学公式找到Alpha机会
WorldQuant根据数据挖掘的方法发掘了101个alpha,据说里面 80% 的因子仍然还行之有效并运行在他们的投资策略中。Alpha101给出的公式,也就是计算机代码101年真实的定量交易Alpha。他们的平均持有期大约范围0.6 - 6.4天。平均两两这些Alpha的相关性较低,为15.9%。回报是与波动强相关,但对换手率没有明显的依赖性,直接确认较早的间接经验分析结果。我们从经验上进一步发现换手率对alpha相关性的解释能力很差。
PDF下载:
Python代码下载:
详细参考原文PDF
# 1. 编制函数需要的算法
import numpy as np
import pandas as pd
import talib as ta
from scipy.stats import rankdata
from fxdayu_data import DataAPI
from datetime import datetime
import alphalens
from fxdayu_alphaman.factor.factor import Factor
import matplotlib.pyplot as plt
# 计算alpha101时会使用的函数
# 移动求和
def ts_sum(df,window=10):
return df.rolling(window).sum()
#移动平均
def ts_mean(df,window=10):
return df.rolling(window).mean()
#移动标准差
def stddev(df,window=10):
return df.rolling(window).std()
#移动相关系数
def correlation(x,y,window=10):
return x.rolling(window).corr(y)
#移动协方差
def covariance(x,y,window=10):
return x.rolling(window).cov(y)
def rolling_rank(na):
return rankdata(na)[-1]
#移动排序
def ts_rank(df, window=10):
return df.rolling(window).apply(rolling_rank)
def rolling_prod(na):
return na.prod(na)
#移动乘积
def product(df,window=10):
return df.rolling(window).apply(rolling_prod)
# 移动窗口最小值
def ts_min(df,window=10):
return df.rolling(window).min()
# 移动窗口最大值
def ts_max(df,window=10):
return df.rolling(window).max()
# 差值
def delta(df,period=1):
return df.diff(period)
# 位移
def delay(df,period=1):
return df.shift(period)
# 横向排序
def rank(df):
return df.rank(axis=1, pct=True)
# 数值规模
def scale(df,k=1):
return df.mul(k).div(np.abs(df).sum())
# 最大值的坐标
def ts_argmax(df,window=10):
return df.rolling(window).apply(np.argmax)+1
# 最小值的坐标
def ts_argmin(df,window=10):
return df.rolling(window).apply(np.argmin)+1
# 2. 定义计算alpha值的类
class alphas(object):
def __init__(self, pn_data):
"""
:传入参数 pn_data: pandas.Panel
"""
# 获取历史数据
if pn_data.isnull().values.any():
pn_data.fillna(method='ffill',inplace=True)
self.open = pd.DataFrame(pn_data.minor_xs('open'), dtype=np.float64)
self.high = pd.DataFrame(pn_data.minor_xs('high'), dtype=np.float64)
self.low = pd.DataFrame(pn_data.minor_xs('low'), dtype=np.float64)
self.close = pd.DataFrame(pn_data.minor_xs('close'), dtype=np.float64)
self.volume = pd.DataFrame(pn_data.minor_xs('volume'), dtype=np.float64)
self.returns = pd.DataFrame(self.close.pct_change())
self.adv = ts_mean(self.volume, 10)
self.vwap = ts_sum(self.close*self.volume, 10)/ts_sum(self.volume, 10)
# 3. 编制因子的函数
# alpha001:(rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) -0.5)
def alpha001(self):
inner = self.close
inner[self.returns < 0] = stddev(self.returns, 20)
alpha = rank(ts_argmax(inner ** 2, 5))
return alpha
# alpha002:(-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6))
def alpha002(self):
alpha = -1 * correlation(rank(delta(np.log(self.volume), 2)), rank((self.close - self.open) / self.open), 6)
return alpha.replace([-np.inf, np.inf], np.nan)
# alpha003:(-1 * correlation(rank(open), rank(volume), 10))
def alpha003(self):
alpha = -1 * correlation(rank(self.open), rank(self.volume), 10)
return alpha.replace([-np.inf, np.inf], np.nan)
# alpha004: (-1 * Ts_Rank(rank(low), 9))
def alpha004(self):
alpha = -1 * ts_rank(rank(self.low), 9)
return alpha
# alpha005:(rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))
def alpha005(self):
alpha = (rank((self.open - (ts_sum(self.vwap, 10) / 10))) * (-1 * np.abs(rank((self.close - self.vwap)))))
return alpha
# alpha006: (-1 * correlation(open, volume, 10))
def alpha006(self):
alpha = -1 * correlation(self.open, self.volume, 10)
return alpha
# alpha007: ((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))
def alpha007(self):
adv20 = ts_mean(self.volume, 20)
alpha = -1 * ts_rank(abs(delta(self.close, 7)), 60) * np.sign(delta(self.close, 7))
alpha[adv20 >= self.volume] = -1
return alpha
# alpha008: (-1 * rank(((sum(open, 5) * sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),10))))
def alpha008(self):
alpha = -1 * (rank(((ts_sum(self.open, 5) * ts_sum(self.returns, 5)) -
delay((ts_sum(self.open, 5) * ts_sum(self.returns, 5)), 10))))
return alpha
# alpha009:((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?delta(close, 1) : (-1 * delta(close, 1))))
def alpha009(self):
delta_close = delta(self.close, 1)
cond_1 = ts_min(delta_close, 5) > 0
cond_2 = ts_max(delta_close, 5) < 0
alpha = -1 * delta_close
alpha[cond_1 | cond_2] = delta_close
return alpha
# alpha010: rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0)? delta(close, 1) : (-1 * delta(close, 1)))))
def alpha010(self):
delta_close = delta(self.close, 1)
cond_1 = ts_min(delta_close, 4) > 0
cond_2 = ts_max(delta_close, 4) < 0
alpha = -1 * delta_close
alpha[cond_1 | cond_2] = delta_close
return alpha
# 4. 传入股票池数据
if __name__ == '__main__':
start = datetime(2017,1,1)
end = datetime(2017,11,11)
codes = DataAPI.info.codes('hs300')
pn = DataAPI.candle(codes,'D',start=start, end=end)
prices = pn.minor_xs('close')
alpha = alphas(pn)
factors = {'one': alpha.alpha001(),
'two': alpha.alpha002(),
'three': alpha.alpha003(),
'four': alpha.alpha004(),
'five': alpha.alpha005(),
'six': alpha.alpha006(),
'seven': alpha.alpha007(),
'eight': alpha.alpha008(),
'nine': alpha.alpha009(),
'ten': alpha.alpha010()}
f = Factor()
factors_disturbed = {name: f.get_disturbed_factor(frame) for name, frame in factors.items()}
def cal_monthly_ic(factor):
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor.stack(), prices, quantiles=5,periods=(1,5,10,20,60))
return alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
monthly_ic = {key: cal_monthly_ic(value) for key, value in factors_disturbed.items()}
monthly_ic_mean = pd.DataFrame(
list(map(lambda frame: frame.mean(), monthly_ic.values())),
monthly_ic.keys()
)
print (monthly_ic_mean)
1 5 10 20 60
one -0.011799 -0.007922 -0.005979 0.012213 0.025780
two 0.013996 0.004542 -0.008711 -0.016913 -0.023616
three 0.006315 0.026195 0.045510 0.029002 0.053809
four 0.025770 0.033320 0.017888 0.002327 0.007212
five 0.006262 -0.006906 -0.032523 -0.019436 -0.013545
six 0.010031 0.056760 0.075251 0.041658 0.080865
seven -0.006032 0.015775 0.002359 0.006403 0.011402
eight 0.009432 0.024283 0.018409 -0.014471 -0.017339
nine 0.013133 0.013432 0.012541 0.001124 0.005285
ten 0.008071 0.010746 0.014631 0.003472 0.006771
import seaborn as sns
plt.figure(figsize=(10, 10))
sns.heatmap(round(monthly_ic_mean,2),annot=True, square=True, )
plt.show()
factor = factors_disturbed.get('six').stack()
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)
mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 10)
plt.show()
下载Alpha101完整代码研究,并设计有效的Alpha因子,导入Alphalens计算绩效。