@Channelchan 2018-01-30T18:56:13.000000Z 字数 7851 阅读 24064


  1. 读取本地数据
  2. 数据预处理
  3. 多因子组合方法
  4. 查看因子组合绩效
  5. 交集与并集组合的方法


  1. from jaqs.data.dataapi import DataApi
  2. from jaqs.data import DataView
  3. import numpy as np
  4. from datetime import datetime
  5. import pandas as pd
  6. import warnings
  7. import alphalens
  8. warnings.filterwarnings("ignore")
  9. dataview_folder = 'JAQS_Data/hs300'
  10. dv = DataView()
  11. dv.load_dataview(dataview_folder)
Dataview loaded successfully.
  1. def mask_index_member():
  2. df_index_member = dv.get_ts('index_member')
  3. mask_index_member = df_index_member ==0 #定义信号过滤条件-非指数成分
  4. return mask_index_member
  5. def limit_up_down():
  6. # 定义可买卖条件——未停牌、未涨跌停
  7. trade_status = dv.get_ts('trade_status')
  8. mask_sus = trade_status == u'停牌'
  9. # 涨停
  10. dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False)
  11. # 跌停
  12. dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False)
  13. can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌
  14. can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌
  15. return can_enter,can_exit
  1. mask = mask_index_member()
  2. can_enter,can_exit = limit_up_down()
  3. price = dv.get_ts('close_adj')
  4. price_bench = dv.data_benchmark
  1. all_factors = ['pb', 'roe', 'price_div_dps', 'ps_ttm', 'pe_ttm', 'roa']


  1. 去极值,标准化,加干扰项
  2. 修改名称索引
  3. 计算IC的相关系数,并剔除高相关性的因子
  1. origin_factors = {f: dv.get_ts(f) for f in all_factors}

1. 去极值,标准化,加干扰项

  1. from jaqs.research.signaldigger import process
  2. #去极值,z_score标准化,加干扰值
  3. factor_dict = {name: process.get_disturbed_factor(process.standardize(process.winsorize(frame)))\
  4. for name, frame in origin_factors.items()}

2. 修改名称索引

  1. def change_columns_index(signal):
  2. new_names = {}
  3. for c in signal.columns:
  4. if c.endswith('SZ'):
  5. new_names[c] = c.replace('SZ', 'XSHE')
  6. elif c.endswith('SH'):
  7. new_names[c] = c.replace('SH', 'XSHG')
  8. signal = signal.rename_axis(new_names, axis=1)
  9. signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
  10. return signal
  1. price_time = change_columns_index(price)

3. 计算IC的相关系数,并剔除高相关性的因子

  1. def cal_daily_ic(factor_df):
  2. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_df.stack(), price_time, quantiles=5)
  3. return alphalens.performance.mean_information_coefficient(factor_data, by_time='D')
  4. daily_ic = {key: cal_daily_ic(change_columns_index(value)) for key, value in factor_dict.items()}
  5. daily_panel = pd.Panel(daily_ic)
  1. factor_corr = daily_panel.minor_xs(5).corr()
  1. print(factor_corr)
                     pb    pe_ttm  price_div_dps    ps_ttm       roa       roe
pb             1.000000  0.858428       0.764783  0.913597  0.689696  0.229572
pe_ttm         0.858428  1.000000       0.823935  0.807155  0.376659 -0.169861
price_div_dps  0.764783  0.823935       1.000000  0.699770  0.320101 -0.109434
ps_ttm         0.913597  0.807155       0.699770  1.000000  0.615621  0.244025
roa            0.689696  0.376659       0.320101  0.615621  1.000000  0.789137
roe            0.229572 -0.169861      -0.109434  0.244025  0.789137  1.000000
  1. ic = daily_panel.minor_xs(5).mean()
  1. def compare(corr, targets):
  2. index = list(reversed(targets.index))
  3. length = len(index)
  4. for i in range(length):
  5. name = index[i]
  6. if available(corr, name, index[i+1:]):
  7. yield name
  1. def available(corr, target, compares):
  2. for c in compares:
  3. if corr.loc[target, c] > 0.9 or (corr.loc[target, c] < -0.9 ):
  4. return False
  5. return True
  1. for i in compare(factor_corr, ic.nlargest(2)):
  2. print(i)
  1. big_dict = {i: factor_dict[i] for i in compare(factor_corr, ic.nlargest(2))}


  1. combine_factors的equal_weight(等权重加权)
  2. combine_factors的max_IR_props(动态加权_最大化IR)

1. combine_factors的equal_weight(等权重加权)

  1. from jaqs.research import multi_factor
  2. Equal_Portfolio = multi_factor.combine_factors(big_dict,
  3. standardize_type="z_score",
  4. winsorization=False,
  5. weighted_method="equal_weight",
  6. max_IR_props=None)
  1. print(Equal_Portfolio.tail(1))
symbol      000001.SZ  000002.SZ  000008.SZ  000009.SZ  000012.SZ  000021.SZ  \
20171222    -0.346562  -0.171129  -0.823929  -0.656348   0.081882  -0.025089   

symbol      000024.SZ  000027.SZ  000031.SZ  000039.SZ    ...      601998.SH  \
trade_date                                                ...                  
20171222    -0.411117   -0.70641   -0.22589  -0.646009    ...       -0.36197   

symbol      603000.SH  603160.SH  603288.SH  603699.SH  603799.SH  603833.SH  \
20171222    -1.052117   3.039816   2.988756  -0.184505   1.917662   2.297176   

symbol      603858.SH  603885.SH  603993.SH  
20171222      0.39488   0.967938  -0.389438  

[1 rows x 539 columns]

2. combine_factors的max_IRprops(动态加权最大化IR)

  1. price_bench = dv.data_benchmark
  2. max_IR_props = {
  3. 'price': price,
  4. 'benchmark_price': price_bench,# 为空计算的是绝对收益 不为空计算相对收益
  5. 'period': 5,
  6. 'mask': mask,
  7. 'can_enter': can_enter,
  8. 'can_exit': can_exit,
  9. 'forward': True,
  10. 'commission': 0.0008,
  11. "covariance_type": "simple", #协方差矩阵估算方法 还可以为"shrink"
  12. "rollback_period": 30} #用多少期的ic做权重计算
  1. Factor_Portfolio = multi_factor.combine_factors(big_dict,
  2. standardize_type="rank",
  3. winsorization=False,
  4. weighted_method="max_IR",
  5. max_IR_props=max_IR_props)
Nan Data Count (should be zero) : 0;  Percentage of effective data: 53%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 53%
  1. print(Factor_Portfolio.tail(1))
symbol      000001.SZ  000002.SZ  000008.SZ  000009.SZ  000012.SZ  000021.SZ  \
20171222     0.664193   0.671614   0.120594   0.263451     0.6141   0.593692   

symbol      000024.SZ  000027.SZ  000031.SZ  000039.SZ    ...      601998.SH  \
trade_date                                                ...                  
20171222     0.497217   0.243043   0.617811   0.304267    ...       0.649351   

symbol      603000.SH  603160.SH  603288.SH  603699.SH  603799.SH  603833.SH  \
20171222     0.059369    0.96846    0.96475   0.441558   0.959184   0.942486   

symbol      603858.SH  603885.SH  603993.SH  
20171222      0.61039   0.892393   0.419295  

[1 rows x 539 columns]


  1. JAQS绩效
  2. alphalens绩效

1. JAQS绩效

  1. import matplotlib.pyplot as plt
  2. from jaqs.research import SignalDigger
  3. def cal_obj(signal, name, period, quantile):
  4. price = dv.get_ts('close_adj')
  5. obj = SignalDigger(output_folder="hs300/%s" % name,
  6. output_format='pdf')
  7. obj.process_signal_before_analysis(signal,
  8. price=price,
  9. n_quantiles=quantile, period=period,
  10. # benchmark_price=price_bench,
  11. can_enter = can_enter,
  12. can_exit = can_exit,
  13. mask=mask
  14. )
  15. obj.create_full_report()
  16. return obj
  17. def plot_pfm(signal, name, period=5, quantile=5):
  18. obj = cal_obj(signal, name, period, quantile)
  19. obj.fig_objs
  20. plt.show()
  21. def signal_data(signal, name, period=5, quantile=5):
  22. obj = cal_obj(signal, name, period, quantile)
  23. return obj.signal_data
  1. plot_pfm(Factor_Portfolio, 'roa_roe', 5, 5)
Nan Data Count (should be zero) : 0;  Percentage of effective data: 51%

Value of signals of Different Quantiles Statistics
               min       max      mean       std  count    count %
1         0.001855  0.818182  0.124605  0.074315  81165  20.142948
2         0.128015  0.935065  0.332423  0.073199  80584  19.998759
3         0.317254  0.942486  0.522951  0.069826  80584  19.998759
4         0.484230  0.953618  0.706823  0.065806  80584  19.998759
5         0.664193  1.000000  0.881344  0.057896  80028  19.860775
Information Analysis
IC Mean      0.010
IC Std.      0.182
t-stat(IC)   2.008
p-value(IC)  0.045
IC Skew     -0.048
IC Kurtosis -0.267
Ann. IR      0.053
<matplotlib.figure.Figure at 0x24c7449e828>



2. Alphalens绩效

  1. factor = change_columns_index(Factor_Portfolio.loc['2015-09-01':]).stack()
  1. factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor, price_time, quantiles=5)
  2. mean_return_by_q, std_err_by_q = alphalens.performance.mean_return_by_quantile(factor_data, by_date=True)
  1. import matplotlib.pyplot as plt
  2. alphalens.plotting.plot_cumulative_returns_by_quantile(mean_return_by_q, 5)
  3. plt.show()


  1. ic = alphalens.performance.factor_information_coefficient(factor_data)
  2. alphalens.plotting.plot_ic_hist(ic)
  3. mean_monthly_ic = alphalens.performance.mean_information_coefficient(factor_data, by_time='M')
  4. alphalens.plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
  5. plt.show()




  1. roe_df = dv.get_ts('roe')
  2. pb_df = dv.get_ts('pb')
  1. def largest(row, n=30):
  2. return pd.Series(1, row.nlargest(n).index)
  3. def smallest(row, n=30):
  4. return pd.Series(1, row.nlargest(n).index)
  1. from functools import partial
  2. big_roe = roe_df.agg(partial(largest, n=10), axis=1)
  3. small_pb = pb_df.agg(partial(smallest, n=10), axis=1)


  1. Intersection_data = big_roe+small_pb
  1. Intersection = Intersection_data[Intersection_data==2].replace(2,1)


  1. Union_data = big_roe.replace(np.nan,0)+small_pb.replace(np.nan,0)
  1. Union = Union_data.replace(0, np.nan)
  2. Union[Union>0] = 1