[关闭]
@Channelchan 2017-12-02T18:17:33.000000Z 字数 6587 阅读 14258

什么是Alpha101?

用数学公式找到Alpha机会

目录

  1. Alpha101是什么?
  2. Alpha101主要元素有什么??
  3. Alpha001-010怎么写?
  4. 如何用TA_Lib设计新的Alpha因子?

Alpha101是什么?

WorldQuant根据数据挖掘的方法发掘了101个alpha,据说里面 80% 的因子仍然还行之有效并运行在他们的投资策略中。Alpha101给出的公式,也就是计算机代码101年真实的定量交易Alpha。他们的平均持有期大约范围0.6 - 6.4天。平均两两这些Alpha的相关性较低,为15.9%。回报是与波动强相关,但对换手率没有明显的依赖性,直接确认较早的间接经验分析结果。我们从经验上进一步发现换手率对alpha相关性的解释能力很差。

PDF下载:

Python代码下载:

Alpha101主要元素有什么?

1. 因子组成元素

2. 函数与运算符

详细参考原文PDF

Alpha001-010怎么写?

  1. 编制函数需要的算法
  2. 定义计算Alpha的类
  3. 编制因子的函数
  4. 传入股票池数据
  1. # 1. 编制函数需要的算法
  2. import numpy as np
  3. import pandas as pd
  4. import talib as ta
  5. from scipy.stats import rankdata
  6. from fxdayu_data import DataAPI
  7. from datetime import datetime
  8. import alphalens
  9. from fxdayu_alphaman.factor.factor import Factor
  10. import matplotlib.pyplot as plt
  11. # 计算alpha101时会使用的函数
  12. # 移动求和
  13. def ts_sum(df,window=10):
  14. return df.rolling(window).sum()
  15. #移动平均
  16. def ts_mean(df,window=10):
  17. return df.rolling(window).mean()
  18. #移动标准差
  19. def stddev(df,window=10):
  20. return df.rolling(window).std()
  21. #移动相关系数
  22. def correlation(x,y,window=10):
  23. return x.rolling(window).corr(y)
  24. #移动协方差
  25. def covariance(x,y,window=10):
  26. return x.rolling(window).cov(y)
  27. def rolling_rank(na):
  28. return rankdata(na)[-1]
  29. #移动排序
  30. def ts_rank(df, window=10):
  31. return df.rolling(window).apply(rolling_rank)
  32. def rolling_prod(na):
  33. return na.prod(na)
  34. #移动乘积
  35. def product(df,window=10):
  36. return df.rolling(window).apply(rolling_prod)
  37. # 移动窗口最小值
  38. def ts_min(df,window=10):
  39. return df.rolling(window).min()
  40. # 移动窗口最大值
  41. def ts_max(df,window=10):
  42. return df.rolling(window).max()
  43. # 差值
  44. def delta(df,period=1):
  45. return df.diff(period)
  46. # 位移
  47. def delay(df,period=1):
  48. return df.shift(period)
  49. # 横向排序
  50. def rank(df):
  51. return df.rank(axis=1, pct=True)
  52. # 数值规模
  53. def scale(df,k=1):
  54. return df.mul(k).div(np.abs(df).sum())
  55. # 最大值的坐标
  56. def ts_argmax(df,window=10):
  57. return df.rolling(window).apply(np.argmax)+1
  58. # 最小值的坐标
  59. def ts_argmin(df,window=10):
  60. return df.rolling(window).apply(np.argmin)+1
  1. # 2. 定义计算alpha值的类
  2. class alphas(object):
  3. def __init__(self, pn_data):
  4. """
  5. :传入参数 pn_data: pandas.Panel
  6. """
  7. # 获取历史数据
  8. if pn_data.isnull().values.any():
  9. pn_data.fillna(method='ffill',inplace=True)
  10. self.open = pd.DataFrame(pn_data.minor_xs('open'), dtype=np.float64)
  11. self.high = pd.DataFrame(pn_data.minor_xs('high'), dtype=np.float64)
  12. self.low = pd.DataFrame(pn_data.minor_xs('low'), dtype=np.float64)
  13. self.close = pd.DataFrame(pn_data.minor_xs('close'), dtype=np.float64)
  14. self.volume = pd.DataFrame(pn_data.minor_xs('volume'), dtype=np.float64)
  15. self.returns = pd.DataFrame(self.close.pct_change())
  16. self.adv = ts_mean(self.volume, 10)
  17. self.vwap = ts_sum(self.close*self.volume, 10)/ts_sum(self.volume, 10)
  18. # 3. 编制因子的函数
  19. # alpha001:(rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) -0.5)
  20. def alpha001(self):
  21. inner = self.close
  22. inner[self.returns < 0] = stddev(self.returns, 20)
  23. alpha = rank(ts_argmax(inner ** 2, 5))
  24. return alpha
  25. # alpha002:(-1 * correlation(rank(delta(log(volume), 2)), rank(((close - open) / open)), 6))
  26. def alpha002(self):
  27. alpha = -1 * correlation(rank(delta(np.log(self.volume), 2)), rank((self.close - self.open) / self.open), 6)
  28. return alpha.replace([-np.inf, np.inf], np.nan)
  29. # alpha003:(-1 * correlation(rank(open), rank(volume), 10))
  30. def alpha003(self):
  31. alpha = -1 * correlation(rank(self.open), rank(self.volume), 10)
  32. return alpha.replace([-np.inf, np.inf], np.nan)
  33. # alpha004: (-1 * Ts_Rank(rank(low), 9))
  34. def alpha004(self):
  35. alpha = -1 * ts_rank(rank(self.low), 9)
  36. return alpha
  37. # alpha005:(rank((open - (sum(vwap, 10) / 10))) * (-1 * abs(rank((close - vwap)))))
  38. def alpha005(self):
  39. alpha = (rank((self.open - (ts_sum(self.vwap, 10) / 10))) * (-1 * np.abs(rank((self.close - self.vwap)))))
  40. return alpha
  41. # alpha006: (-1 * correlation(open, volume, 10))
  42. def alpha006(self):
  43. alpha = -1 * correlation(self.open, self.volume, 10)
  44. return alpha
  45. # alpha007: ((adv20 < volume) ? ((-1 * ts_rank(abs(delta(close, 7)), 60)) * sign(delta(close, 7))) : (-1* 1))
  46. def alpha007(self):
  47. adv20 = ts_mean(self.volume, 20)
  48. alpha = -1 * ts_rank(abs(delta(self.close, 7)), 60) * np.sign(delta(self.close, 7))
  49. alpha[adv20 >= self.volume] = -1
  50. return alpha
  51. # alpha008: (-1 * rank(((sum(open, 5) * sum(returns, 5)) - delay((sum(open, 5) * sum(returns, 5)),10))))
  52. def alpha008(self):
  53. alpha = -1 * (rank(((ts_sum(self.open, 5) * ts_sum(self.returns, 5)) -
  54. delay((ts_sum(self.open, 5) * ts_sum(self.returns, 5)), 10))))
  55. return alpha
  56. # alpha009:((0 < ts_min(delta(close, 1), 5)) ? delta(close, 1) : ((ts_max(delta(close, 1), 5) < 0) ?delta(close, 1) : (-1 * delta(close, 1))))
  57. def alpha009(self):
  58. delta_close = delta(self.close, 1)
  59. cond_1 = ts_min(delta_close, 5) > 0
  60. cond_2 = ts_max(delta_close, 5) < 0
  61. alpha = -1 * delta_close
  62. alpha[cond_1 | cond_2] = delta_close
  63. return alpha
  64. # alpha010: rank(((0 < ts_min(delta(close, 1), 4)) ? delta(close, 1) : ((ts_max(delta(close, 1), 4) < 0)? delta(close, 1) : (-1 * delta(close, 1)))))
  65. def alpha010(self):
  66. delta_close = delta(self.close, 1)
  67. cond_1 = ts_min(delta_close, 4) > 0
  68. cond_2 = ts_max(delta_close, 4) < 0
  69. alpha = -1 * delta_close
  70. alpha[cond_1 | cond_2] = delta_close
  71. return alpha
  1. # 4. 传入股票池数据
  2. if __name__ == '__main__':
  3. start = datetime(2017,1,1)
  4. end = datetime(2017,11,11)
  5. codes = DataAPI.info.codes('hs300')
  6. pn = DataAPI.candle(codes,'D',start=start, end=end)
  7. prices = pn.minor_xs('close')
  1. alpha = alphas(pn)
  2. factors = {'one': alpha.alpha001(),
  3. 'two': alpha.alpha002(),
  4. 'three': alpha.alpha003(),
  5. 'four': alpha.alpha004(),
  6. 'five': alpha.alpha005(),
  7. 'six': alpha.alpha006(),
  8. 'seven': alpha.alpha007(),
  9. 'eight': alpha.alpha008(),
  10. 'nine': alpha.alpha009(),
  11. 'ten': alpha.alpha010()}
  12. f = Factor()
  13. factors_disturbed = {name: f.get_disturbed_factor(frame) for name, frame in factors.items()}
  14. def cal_monthly_ic(factor):
  15. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor.stack(), prices, quantiles=5,periods=(1,5,10,20,60))
  16. return alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
  17. monthly_ic = {key: cal_monthly_ic(value) for key, value in factors_disturbed.items()}
  1. monthly_ic_mean = pd.DataFrame(
  2. list(map(lambda frame: frame.mean(), monthly_ic.values())),
  3. monthly_ic.keys()
  4. )
  5. print (monthly_ic_mean)
             1         5         10        20        60
one   -0.011799 -0.007922 -0.005979  0.012213  0.025780
two    0.013996  0.004542 -0.008711 -0.016913 -0.023616
three  0.006315  0.026195  0.045510  0.029002  0.053809
four   0.025770  0.033320  0.017888  0.002327  0.007212
five   0.006262 -0.006906 -0.032523 -0.019436 -0.013545
six    0.010031  0.056760  0.075251  0.041658  0.080865
seven -0.006032  0.015775  0.002359  0.006403  0.011402
eight  0.009432  0.024283  0.018409 -0.014471 -0.017339
nine   0.013133  0.013432  0.012541  0.001124  0.005285
ten    0.008071  0.010746  0.014631  0.003472  0.006771
  1. import seaborn as sns
  2. plt.figure(figsize=(10, 10))
  3. sns.heatmap(round(monthly_ic_mean,2),annot=True, square=True, )
  4. plt.show()

output_10_0.png-21.4kB

  1. factor = factors_disturbed.get('six').stack()
  1. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, prices, quantiles=5)
  2. mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
  1. alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 10)
  2. plt.show()

output_13_1.png-47.4kB

作业

下载Alpha101完整代码研究,并设计有效的Alpha因子,导入Alphalens计算绩效。

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注