[关闭]
@Channelchan 2018-01-09T21:16:05.000000Z 字数 6251 阅读 25571

IC板块分类

  1. 读取本地因子数据
  2. 计算factor_data
  3. 计算板块IC
  4. 筛选与处理板块数据

1_读取本地因子数据

读取本地数据并转换格式
1. mask
2. group
3. factor
4. price

  1. from jaqs.data.dataapi import DataApi
  2. from jaqs.data import DataView
  3. from jaqs.research import SignalDigger
  4. import numpy as np
  5. from datetime import datetime
  6. import pandas as pd
  7. from datetime import timedelta
  8. import warnings
  9. warnings.filterwarnings("ignore")
  10. dataview_folder = 'JAQS_Data/hs300'
  11. dv = DataView()
  12. dv.load_dataview(dataview_folder)
D:\Anaconda3\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools


Dataview loaded successfully.
  1. def change_columns_index(signal):
  2. new_names = {}
  3. for c in signal.columns:
  4. if c.endswith('SZ'):
  5. new_names[c] = c.replace('SZ', 'XSHE')
  6. elif c.endswith('SH'):
  7. new_names[c] = c.replace('SH', 'XSHG')
  8. signal = signal.rename_axis(new_names, axis=1)
  9. signal.index = pd.Index(map(lambda x: datetime.strptime(str(x),"%Y%m%d") , signal.index))
  10. return signal
  1. mask = dv.get_ts('mask_fundamental')
  1. group = change_columns_index(dv.get_ts('group'))
  1. PB_Data = change_columns_index(dv.get_ts('pb').shift(1, axis=0)[mask==0])
  1. prices = change_columns_index(dv.get_ts('close_adj'))

2_计算factor_data

alphalens.utils.get_clean_factor_and_forward_returns()

  1. import alphalens as al
  2. def get_factor_data(factor, prices, group):
  3. factor_data = al.utils.get_clean_factor_and_forward_returns(factor.stack(),
  4. prices,
  5. groupby=group.stack(),
  6. by_group=False,
  7. quantiles=5,
  8. bins=None,
  9. periods=(1, 5, 10),
  10. filter_zscore=20,
  11. groupby_labels=None)
  12. return factor_data
  1. factor_data = get_factor_data(PB_Data, prices, group)
  1. print(factor_data.head())
                               1         5        10  factor  \
date       asset                                               
2012-01-05 000001.XSHE -0.002599  0.025341  0.025341  1.1142   
           000002.XSHE -0.012414  0.037241  0.074483  1.7186   
           000008.XSHE  0.006299  0.111811  0.034646  6.6091   
           000009.XSHE  0.059748  0.322851  0.363732  4.1941   
           000012.XSHE  0.008537  0.190244  0.234146  2.6377   

                                            group  factor_quantile  
date       asset                                                    
2012-01-05 000001.XSHE                    Finance                1  
           000002.XSHE                Real_Estate                2  
           000008.XSHE  Accommodation_Restaurants                5  
           000009.XSHE        Synthesise_Industry                4  
           000012.XSHE              Manufacturing                3  

3_计算板块IC

  1. 全量数据板块IC
  2. 过去一年板块IC(避免未来数据)

alphalens.performance.mean_information_coefficient()

  1. import matplotlib.pyplot as plt
  2. def get_ic_performance(factor_data):
  3. ic_by_sector = al.performance.mean_information_coefficient(factor_data, by_group=True)
  4. al.plotting.plot_ic_by_group(ic_by_sector)
  5. plt.show()
  6. return ic_by_sector
  1. factor_ic = get_ic_performance(factor_data)

output_14_0.png-46.2kB

  1. factor_ic_sample = get_ic_performance(factor_data.loc['20150106':'20161230'])

output_15_0.png-45.1kB

  1. print(factor_ic_sample)
                                      1         5         10
group                                                       
Accommodation_Restaurants      -0.020833 -0.030928 -0.216495
Agriculture                    -0.026206 -0.036600 -0.035583
Construction                   -0.017382 -0.047633 -0.064339
Culture_Sports_Entertainment   -0.057878 -0.080522 -0.101539
Energy                         -0.034706 -0.058490 -0.075038
Finance                        -0.013298 -0.044118 -0.051847
Health_And_Social_Work          0.035052  0.028807  0.067762
Information_Technology         -0.033438 -0.044260 -0.049984
Leasing_and_Commerical_Service -0.020507 -0.026818 -0.033660
Manufacturing                  -0.019122 -0.023002 -0.029964
Mining                         -0.048074 -0.063702 -0.075116
Public_Facilities_Management   -0.021703  0.049277  0.014612
Real_Estate                    -0.037063 -0.050743 -0.057497
Synthesise_Industry            -0.018480 -0.080082 -0.118070
Transportation                 -0.035793 -0.043791 -0.066313
Wholesale                      -0.033879 -0.016690 -0.026236
  1. print(factor_ic_sample[10].index[factor_ic_sample[10]<-0.1])
Index(['Accommodation_Restaurants', 'Culture_Sports_Entertainment',
       'Synthesise_Industry'],
      dtype='object', name='group')

4_筛选与处理板块数据

目的: 根据过去的IC选取板块,再从选中的板块中根据因子值排序选取股票,最后用因子值计算权重。

  1. class get_stock_df():
  2. def __init__(self, factor_data, factor_ic,period=10, n=10, ic=0.02):
  3. '''
  4. period: 持有周期
  5. n: 选取股票数量
  6. ic: ic的阈值
  7. '''
  8. self.factor_data = factor_data
  9. self.factor = factor_data.factor.unstack()
  10. self.factor_ic = factor_ic
  11. self.n = n
  12. self.ic= ic
  13. self.period = period
  14. def get_largest(self, backtest):
  15. largest_list = []
  16. for time_index, value in backtest.iterrows():
  17. largest_list.append(value.nlargest(self.n).to_dict())
  18. df = pd.DataFrame(largest_list, index = backtest.index)
  19. df.index = list(map(lambda idx: idx+timedelta(hours=15), df.index))
  20. return df
  21. def get_smallest(self, backtest):
  22. smallest_list = []
  23. for time_index, value in backtest.iterrows():
  24. smallest_list.append(value.nsmallest(self.n).to_dict())
  25. df = pd.DataFrame(smallest_list, index=backtest.index)
  26. df.index = list(map(lambda idx: idx+timedelta(hours=15), df.index))
  27. return df
  28. def get_group(self, sequence, ic_targets):
  29. for value in sequence:
  30. if value in ic_targets:
  31. yield True
  32. else:
  33. yield False
  34. def get_largest_stocks(self):
  35. high_ic = self.factor_ic[self.period][self.factor_ic[self.period]>self.ic].index
  36. group = list(self.get_group(self.factor_data.group.values, high_ic))
  37. backtest = self.factor_data.factor[group].unstack()
  38. largest = self.get_largest(backtest)
  39. weight_list = []
  40. for time_index, weight in largest.iterrows():
  41. weight[weight<0]=0
  42. weiht_result = (weight/weight.sum())
  43. weight_list.append(weiht_result.to_dict())
  44. stock_df = pd.DataFrame(weight_list, index=largest.index)
  45. return stock_df
  46. def get_smallest_stocks(self):
  47. low_ic = self.factor_ic[self.period][self.factor_ic[self.period]<-self.ic].index
  48. group = list(self.get_group(self.factor_data.group.values, low_ic))
  49. backtest = self.factor_data.factor[group].unstack()
  50. smallest = self.get_smallest(backtest)
  51. weight_list = []
  52. for time_index, weight in smallest.iterrows():
  53. weight[weight<0]=0
  54. weiht_result = (weight/weight.sum())
  55. weight_list.append(weiht_result.to_dict())
  56. stock_df = pd.DataFrame(weight_list, index=smallest.index)
  57. return stock_df
  1. small_stock = get_stock_df(factor_data, factor_ic_sample, period=10, n=5, ic=0.05).get_smallest_stocks()
  1. stock_df = small_stock.loc['20170104':]
  1. import numpy as np
  2. import talib as ta
  3. import pandas as pd
  4. import rqalpha
  5. from rqalpha.api import *
  6. #读取文件位置
  7. def init(context):
  8. context.codes = stock_df
  9. context.hs300 = '000300.XSHG'
  10. context.stocks = {}
  11. # scheduler.run_daily(find_pool)
  12. scheduler.run_weekly(find_pool, tradingday=1)
  13. def find_pool(context, bar_dict):
  14. codes = context.codes.loc[context.now].dropna()
  15. if codes is not None:
  16. context.stocks = codes
  17. else:
  18. context.stocks = {}
  19. def handle_bar(context, bar_dict):
  20. buy(context, bar_dict)
  21. def buy(context, bar_dict):
  22. pool = context.stocks
  23. if pool is not None:
  24. for stocks in context.portfolio.positions:
  25. if stocks not in pool:
  26. order_target_percent(stocks, 0)
  27. for codes, target in pool.items():
  28. try:
  29. order_target_percent(codes, target)
  30. except ValueError as ve:
  31. continue
  32. config = {
  33. "base": {
  34. "start_date": "2017-01-04",
  35. "end_date": "2017-12-22",
  36. "accounts": {'stock':1000000},
  37. "benchmark": "000300.XSHG"
  38. },
  39. "extra": {
  40. "log_level": "error",
  41. },
  42. "mod": {
  43. "sys_analyser": {
  44. "enabled": True,
  45. "plot": True
  46. }
  47. }
  48. }
  49. rqalpha.run_func(init=init, handle_bar=handle_bar, config=config)

output_23_1.png-68.6kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注