# region imports
from AlgorithmImports import *
# endregion
import random
import pandas as pd
from datetime import datetime
import cvxopt as opt
from cvxopt import blas, solvers
from config import general_setting
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.stattools import durbin_watson
from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
import joblib
import os
import lightgbm as lgb
import math
from scipy.stats import linregress
from hmmlearn.hmm import GaussianHMM
import scipy
import datetime
import json
import seaborn as sns
import joblib
import sklearn
from datetime import datetime
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from matplotlib import cm,pyplot as plt
import random
class EnhancedIndexing(QCAlgorithm):
def initialize(self):
self.set_start_date(2024, 6, 1)
# self.set_end_date(2016, 1, 10)
self.set_cash(100000)
self.spy = self.add_equity('SPY', resolution = Resolution.MINUTE)
self.spy_sma150 = self.sma("SPY", 150)
self.spy_sma200 = self.sma("SPY", 200)
option = self.AddOption('SPY',Resolution.MINUTE)
self.option_symbol = option.Symbol
option.SetFilter(lambda universe: universe.include_weeklys())
self.contract = None
self.set_benchmark("SPY")
self.contract_size = None
self.strike = None
self.has_contract = False
self.curr_spy = None
# self.set_warm_up(100)
# Access to General Setting
self.if_manual_rep = general_setting["If_manual_choose_replication"]
self.replicate_method = general_setting['replicate_method']
self.opt_method = general_setting["opt_method"]
self.rebalance_interval = general_setting["rebalance_interval"]
self.manual_Hedge = general_setting["Manual_Hedge"]
if self.manual_Hedge:
self.if_hedge = general_setting["If_Hedge"]
self.hedge_size = general_setting['hedge_size']
self.pct_num_group = general_setting["pct_num_group"]
self.lasso_alpha = general_setting['lasso_alpha']
self.if_manual_set_number_lasso = general_setting["if_manual_set_number_lasso"]
self.num_lasso = general_setting["num_lasso"]
self.elasticNet_alpha = general_setting['elasticNet_alpha']
self.elasticNet_l1_ratio = general_setting['elasticNet_l1_ratio']
self.if_manual_set_number_elasticNet = general_setting["if_manual_set_number_elasticNet"]
self.num_elasticNet = general_setting["num_elasticNet"]
self.regime_interval = general_setting["regime_interval"]
self.regime_method = general_setting["regime_method"]
self.regime_lookback_window = general_setting["regime_lookback_window"]
self.if_preselect = general_setting['if_preselect']
self.weight_threshold = general_setting['weight_threshold']
self.mom_window = general_setting['mom_window']
# Add universe
_universe = self.add_universe(self.universe.etf("SPY"))
history = self.History(_universe, 360, Resolution.Daily)
history = history.droplevel('symbol', axis=0)
self.universe_dict = {}
self.ticker_list = []
for date, fundamentals in history.items():
ls = []
for fundamental in fundamentals:
symbol = fundamental.symbol
weight = fundamental.weight
row = {'ticker': symbol.value, 'symbol': symbol, 'weight': weight}
if self.if_preselect:
if weight > self.weight_threshold:
# if weight > 0.004:
self.ticker_list.append(symbol.value)
self.universe_dict[symbol.value] = self.add_equity(symbol.value, resolution = Resolution.DAILY)
ls.append(row)
else:
continue
else:
self.ticker_list.append(symbol.value)
self.universe_dict[symbol.value] = self.add_equity(symbol.value, resolution = Resolution.DAILY)
ls.append(row)
break
self.init_df = pd.DataFrame(ls)
self.init_df['weight_adj'] = self.init_df['weight']/self.init_df['weight'].sum()
self.weight_universe = self.init_df.set_index('ticker')['weight_adj'].to_dict()
self.rebalance_signal = False
self.selected_stocks = []
self.portfolio_weights = {}
self.latent_states_sequence = None
self.best_status_sequence = None
# Add Options
# option = self.AddOption('SPX',Resolution.Daily)
# self.option_symbol = option.Symbol
# option.SetFilter(timedelta(0), timedelta(150))
# Initial a optimized weight at the begining
self.Schedule.On(
self.DateRules.today,
self.TimeRules.now,
self.rebalance_weights,
)
# Every Month rebalance
if self.rebalance_interval == 'month':
self.Schedule.On(
self.DateRules.month_start(self.spy.symbol, 0), # Month Start or Month end or add days offset (daysOffset = 0), or dynamically adjust based on weekdays, earnings, FOMC ...
self.TimeRules.after_market_open(self.spy.symbol, 10), # Before market or after market
self.rebalance_weights,
)
# Every week rebalance
if self.rebalance_interval == 'week':
self.Schedule.On(
self.DateRules.week_start(self.spy.symbol, 0),
# self.date_rules.every(DayOfWeek.THURSDAY),
self.TimeRules.after_market_open(self.spy.symbol, 10),# Before market or after market
self.rebalance_weights,
)
if not self.manual_Hedge:
self.Schedule.On(
self.DateRules.today,
self.TimeRules.now,
self.regime_recognization,
)
# Every Month recognize regimes
if self.rebalance_interval == 'month':
self.Schedule.On(
self.DateRules.month_start(self.spy.symbol, 0), # Month Start or Month end or add days offset (daysOffset = 0), or dynamically adjust based on weekdays, earnings, FOMC ...
self.TimeRules.after_market_open(self.spy.symbol, 5), # Before market or after market
self.regime_recognization,
)
# Every week recognize regime
if self.rebalance_interval == 'week':
self.Schedule.On(
self.DateRules.week_start(self.spy.symbol, 0),
# self.date_rules.every(DayOfWeek.THURSDAY),
self.TimeRules.after_market_open(self.spy.symbol, 5),# Before market or after market
self.regime_recognization,
)
# Manually set event date function
# date_rule = FuncDateRule(
# name="10th_day_of_the_month",
# get_dates_function=lambda start, end: [
# datetime(year, month, 10)
# for year in range(start.year, end.year) for month in range(1,12)
# ]
# )
# Every 2 week rebalance
# if self.rebalance_interval == 'biweek':
# self.Schedule.On(
# self.DateRules.week_start(self.spy.symbol,daysOffset= 0), # Month Start or Month end or add days offset, or dynamically adjust based on weekdays, earnings, FOMC ...
# self.TimeRules.after_market_open(self.spy.symbol, 10),
# self.rebalance_weights,
# )
# Cancel schedule
# self.schedule.remove(scheduled_event)
def regime_recognization(self):
random.seed(10)
'''
3 status:
Bullish, Bearish, flat
method:
Machine learning: classification
VIX value
Other indicators
'''
if self.regime_method == 'SMA200':
if self.spy_sma200.current.value > self.spy.Close :
self.if_hedge = True
return
else:
self.if_hedge = False
return
if self.regime_method == 'SMA150':
if self.spy_sma150.current.value > self.spy.Close :
self.if_hedge = True
return
else:
self.if_hedge = False
return
if self.regime_method == 'GMM':
start_time = datetime(self.Time.year-4,self.Time.month, self.Time.day)
end_time = self.Time.date
data_use = self.history(self.spy.symbol, timedelta(self.regime_lookback_window), Resolution.DAILY).droplevel('symbol', axis=0)
data_use = data_use.reset_index()
close=data_use['close']
high=data_use['high'][5:]
low=data_use['low'][5:]
volume=data_use['volume'][5:]
money=data_use['volume'][5:]
logreturn=(np.log(np.array(close[1:]))-np.log(np.array(close[:-1])))[4:]
logreturn5=(np.log(np.array(close[5:]))-np.log(np.array(close[:-5])))
diffreturn = (np.log(np.array(high))-np.log(np.array(low)))
closeidx = close[5:]
# data_1 = data_use.copy()
# data_1['log_close'] = np.log(data_1['close'])
# data_1['log_ret'] = data_1['log_close'].diff()
# data_1['log_high'] = np.log(data_1['high'])
# data_1['log_low'] = np.log(data_1['low'])
# data_1['log_ret5'] = data_1['log_close'].diff(5)
# data_1['diff_ret'] = data_1['log_high'] - data_1['log_low']
# data_1['closeidx'] = data_1['close']
X = np.column_stack([logreturn,diffreturn,logreturn5])
datelist= pd.to_datetime(data_use.time[5:])
hmm=GaussianHMM(n_components = 4, covariance_type ='diag', n_iter = 6000, random_state = 4).fit(X)
try:
latent_states_sequence = hmm.predict(X)
except:
self.if_hedge = True
return
# len(latent_states_sequence)
# # PLOTTING #
data_use_2=data_use.drop_duplicates(subset=['time'],keep='last')
datelist= pd.to_datetime(data_use_2.time[5:])
# # sns.set_style("white")
# plt.figure(figsize = (15,8))
for i in range(hmm.n_components):
state = (latent_states_sequence ==i)
# plt.plot(datelist[state],closeidx[state],'.',label='latent state %d'%i,lw =1)
# plt.legend()
# plt.grid(1)
data=pd.DataFrame({'datelist':datelist,'logreturn':logreturn,'state':latent_states_sequence}).set_index('datelist')
# plt.figure(figsize=(15,8))
max_idx = 0
max = -10000
for i in range(hmm.n_components):
state = (latent_states_sequence ==i)
idx = np.append(0,state[:-1])
data['state %d_return'%i] = data.logreturn.multiply(idx,axis = 0)
if np.exp(data['state %d_return'%i].cumsum())[-1] > max:
max_idx = i
max = np.exp(data['state %d_return'%i].cumsum())[-1]
# plt.plot(np.exp(data['state %d_return'%i].cumsum()),label='latent state %d'%i)
# plt.legend()
# plt.grid(1)
# buy = (latent_states_sequence ==0)
# buy = np.append(0,buy[:-1])
# sell = (latent_states_sequence == 1)
# sell = np.append(0,sell[:-1])
# data['backtest_return'] = data.logreturn.multiply(buy,axis=0)-data.logreturn.multiply(sell,axis=0)
# plt.figure(figsize = (15,8))
# plt.plot_date(datelist,np.exp(data['backtest_return'].cumsum()),'-',label='backtest result')
# plt.legend()
# plt.grid(1)
self.latent_states_sequence = latent_states_sequence[-1]
self.best_status_sequence = max_idx
if latent_states_sequence[-1] == max_idx:
self.if_hedge = False
return
else:
self.if_hedge = True
return
if self.regime_method== 'VIX':
...
'''
VIX vs EMA(100)
'''
if self.regime_method == 'vote':
...
return res
def get_historical_data(self):
def slope(data):
return linregress(np.arange(data.shape[0]),data).slope
ticker_list = self.ticker_list
start_time = datetime(self.Time.year-2,self.Time.month, self.Time.day) # obtain 2 years historical data
end_time = self.Time.date
daily_dfs = []
fund_ = {}
daily_dfs2 = []
for i in ticker_list:
try:
# daily_data = self.history([self.securities[i].symbol], timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']].rename(columns = {'close':i})
# daily_dfs.append(daily_data)
daily_data = self.history(self.securities[i].symbol, timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']]
# test_data = self.history(universe_dict[i].symbol, start_time1, end_time1).droplevel('symbol', axis=0)[['close']]
daily_data1 = daily_data.rename(columns= {'close':i})
daily_data2 = daily_data.copy()
daily_data2['ticker'] = i
daily_data2['slope_20'] = daily_data2['close'].rolling(20,min_periods=20).apply(slope,raw=True)
daily_data2['ret'] = daily_data2['close'].shift(5)/daily_data2['close'] - 1
# test_data['ticker'] = i
# test_data['slope_20'] = test_data['close'].rolling(20,min_periods=20).apply(slope,raw=True)
# test_data['ret'] = test_data['close'].shift(5)/test_data['close'] - 1
# daily_data['label1'] = pd.qcut(daily_data['label'],5, labels= [1,2,3,4,5])
daily_dfs.append(daily_data1)
daily_dfs2.append(daily_data2)
# test_dfs.append(test_data)
except:
continue
daily_ = pd.concat(daily_dfs, axis = 1, sort = True, join = 'outer')
daily_1 = daily_.dropna(axis=1).reset_index()
spy_data = self.history([self.spy.symbol], timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']].rename(columns = {'close':'spy'})
daily_2 = pd.merge(daily_1, spy_data.reset_index(), on = ['time'], how = 'left')
daily2_ = pd.concat(daily_dfs2, axis = 0, sort = True, join = 'outer')
daily2_ = daily2_.dropna()
if len(self.selected_stocks) > 0:
daily2_ = daily2_[daily2_['ticker'].isin(self.selected_stocks)]
latest_date = daily2_.index[-1]
test_df = daily2_[daily2_.index > pd.to_datetime((self.Time.date()-timedelta(30))) ]
daily2_ = daily2_[daily2_.index <= pd.to_datetime((self.Time.date()-timedelta(30))) ]
daily2_ = daily2_.set_index([daily2_.index, 'ticker']).sort_index()
daily2_['label'] = daily2_.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))
test_df = test_df.set_index([test_df.index, 'ticker']).sort_index()
test_df['label'] = test_df.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))
test_df = test_df.loc[(latest_date,slice(None)),:]
return daily_1, daily_2, daily2_, test_df
def get_fundamental_data(self):
fund_ = []
a_time = self.Time - timedelta(7)
b_time = self.Time
for i in self.ticker_list:
single_fundamental_history = self.history[Fundamental](i, a_time, b_time)
for fundamental in single_fundamental_history:
if fundamental.has_fundamental_data:
symbol = fundamental.symbol
industry1 = fundamental.asset_classification.MorningstarSectorCode
market_cap = fundamental.market_cap
# self.debug(f"symbol is {symbol}, sector is {industry1}")
row = {'ticker': i , 'industry1': industry1, 'market_cap': market_cap}
fund_.append(row)
else:
continue
fund_df = pd.DataFrame(fund_)
fund_df=fund_df.drop_duplicates(subset = 'ticker',keep = 'last')
fund_df = fund_df[fund_df['ticker'].isin([101, 102, 103, 104, 205, 206, 207, 308, 310])]
MORNINGSTAR_SECTOR_CODES = {
# 0 : 'Other', # xiabiande, meiyou 0 zhegexuanxiang
101: 'Basic Materials',
102: 'Consumer Cyclical',
103: 'Financial Services',
104: 'Real Estate',
205: 'Consumer Defensive',
206: 'Healthcare',
207: 'Utilities',
308: 'Communication Services',
309: 'Energy',
310: 'Industrials',
311: 'Technology' , }
fund_df['sector'] = fund_df['industry1'].apply(lambda x:MORNINGSTAR_SECTOR_CODES[x])
return fund_df
def replicate_portfolio(self, method = 'lasso'):
random.seed(10)
daily_1, daily_2, _, _ = self.get_historical_data()
fund_df = self.get_fundamental_data()
# train_df = daily_1[:3019]
# test_df = daily_1[3020:]
train_df = daily_1.copy()
train_df = train_df.drop(columns = {'time'})
# test_df = test_df.drop(columns = {'time'})
train_features = train_df.copy()
train_index = daily_2[['spy']]
# self.debug(f"train feature length is {train_features} and train_index length is {train_features}")
# test_features = test_df.copy()
# test_index = spy_data.reset_index()[['spy']][3020:]
assets_names = train_df.columns
if method == 'lasso':
lasso_reg = Lasso(self.lasso_alpha)
lasso_reg.fit(train_features, train_index)
# Get the model parameters
intercept_lasso = lasso_reg.intercept_
coefficients_lasso = lasso_reg.coef_
# print("Lasso Intercept: \n", intercept_lasso)
# print("Lasso Coefficients: \n", coefficients_lasso)
# Evaluate the model
y_pred_lasso = lasso_reg.predict(train_features)
# Calculate the mean squared error of the predictions
mse_lasso = mean_squared_error(train_index, y_pred_lasso)
df_Lasso = pd.DataFrame(coefficients_lasso)
df_Lasso.columns = ['weight']
df_Lasso['symbol'] = assets_names
df_Lasso['abs_weight'] = abs(df_Lasso['weight'])
df_Lasso_select = df_Lasso.sort_values(by=['abs_weight'], ascending= False)
df_Lasso_select = df_Lasso_select.head(30)
train_lasso_feature = train_features[df_Lasso_select.symbol]
train_lasso_feature_T = train_lasso_feature.T.reset_index()
train_lasso_feature_T = train_lasso_feature_T.rename(columns = {'index':'symbol'})
d = train_lasso_feature_T.drop(columns=['symbol'])
# Calculate the replication index
weights = np.array([list(df_Lasso_select['weight'])])
data = np.array(d)
result = np.dot(weights, data)
result = result[0]
# result = weights@data
# Calculate the TrackingError
df_lasso_TrackingError = pd.DataFrame(train_index)
df_lasso_TrackingError['RepIdx'] = result
df_lasso_TrackingError['SPY_ret'] = df_lasso_TrackingError['spy'].pct_change()
df_lasso_TrackingError['RepIdx_ret'] = df_lasso_TrackingError['RepIdx'].pct_change()
df_lasso_TrackingError = df_lasso_TrackingError.dropna()
df_lasso_TrackingError['TD'] = df_lasso_TrackingError['SPY_ret'] - df_lasso_TrackingError['RepIdx_ret']
df_lasso_TrackingError['TD_squared'] = df_lasso_TrackingError['TD']**2
TE = np.sqrt(sum(df_lasso_TrackingError['TD_squared'])/(len(df_lasso_TrackingError)-1))
num_trial = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
min_te_num = 10
min_te = 10000
TE_ = []
for i in num_trial:
df_Lasso = pd.DataFrame(coefficients_lasso)
df_Lasso.columns = ['weight']
df_Lasso['symbol'] = assets_names
df_Lasso['abs_weight'] = abs(df_Lasso['weight'])
df_Lasso_select = df_Lasso.sort_values(by=['abs_weight'], ascending= False)
df_Lasso_select = df_Lasso_select.head(i)
train_lasso_feature = train_features[df_Lasso_select.symbol]
train_lasso_feature_T = train_lasso_feature.T.reset_index()
train_lasso_feature_T = train_lasso_feature_T.rename(columns = {'index':'symbol'})
d = train_lasso_feature_T.drop(columns=['symbol'])
weights = np.array([list(df_Lasso_select['weight'])])
data = np.array(d)
result = np.dot(weights, data)
result = result[0]
df_lasso_TrackingError = pd.DataFrame(train_index)
df_lasso_TrackingError['RepIdx'] = result
df_lasso_TrackingError['SPY_ret'] = df_lasso_TrackingError['spy'].pct_change()
df_lasso_TrackingError['RepIdx_ret'] = df_lasso_TrackingError['RepIdx'].pct_change()
df_lasso_TrackingError = df_lasso_TrackingError.dropna()
df_lasso_TrackingError['TD'] = df_lasso_TrackingError['SPY_ret'] - df_lasso_TrackingError['RepIdx_ret']
df_lasso_TrackingError['TD_squared'] = df_lasso_TrackingError['TD']**2
TE = np.sqrt(sum(df_lasso_TrackingError['TD_squared'])/(len(df_lasso_TrackingError)-1))
if TE < min_te:
min_te = TE
min_te_num = i
TE_.append(TE)
if self.if_manual_set_number_lasso:
return list(df_Lasso_select.head(self.num_lasso)['symbol'])
else:
return list(df_Lasso_select.head(min_te_num)['symbol'])
if method == 'elasticNet':
elastic_reg = ElasticNet(
alpha=self.elasticNet_alpha, l1_ratio=self.elasticNet_l1_ratio
)
elastic_reg.fit(train_features, train_index)
# Get the model parameters
intercept_elastic = elastic_reg.intercept_
coefficients_elastic = elastic_reg.coef_
# print("Elastic Net Intercept: \n", intercept_elastic)
# print("Elastic Net Coefficients: \n", coefficients_elastic)
# Evaluate the model
y_pred_elastic = elastic_reg.predict(train_features)
# Calculate the mean squared error of the predictions
mse_elastic = mean_squared_error(train_index, y_pred_elastic)
df_Elastic = pd.DataFrame(coefficients_elastic)
df_Elastic.columns = ['weight']
df_Elastic['symbol'] = assets_names
df_Elastic['abs_weight'] = abs(df_Elastic['weight'])
df_Elastic_select = df_Elastic.sort_values(by=['abs_weight'], ascending= False)
df_Elastic_select = df_Elastic_select.head(50)
train_Elastic_feature = train_features[df_Elastic_select.symbol]
train_Elastic_feature_T = train_Elastic_feature.T.reset_index()
train_Elastic_feature_T = train_Elastic_feature_T.rename(columns = {'index':'symbol'})
d = train_Elastic_feature_T.drop(columns=['symbol'])
# Calculate the replication index
weights = np.array([list(df_Elastic_select['weight'])])
data = np.array(d)
result = np.dot(weights, data)
result = result[0]
# result = weights@data
# Calculate the TrackingError
df_Elastic_TrackingError = pd.DataFrame(train_index)
df_Elastic_TrackingError['RepIdx'] = result
df_Elastic_TrackingError['SPY_ret'] = df_Elastic_TrackingError['spy'].pct_change()
df_Elastic_TrackingError['RepIdx_ret'] = df_Elastic_TrackingError['RepIdx'].pct_change()
df_Elastic_TrackingError = df_Elastic_TrackingError.dropna()
df_Elastic_TrackingError['TD'] = df_Elastic_TrackingError['SPY_ret'] - df_Elastic_TrackingError['RepIdx_ret']
df_Elastic_TrackingError['TD_squared'] = df_Elastic_TrackingError['TD']**2
TE = np.sqrt(sum(df_Elastic_TrackingError['TD_squared'])/(len(df_Elastic_TrackingError)-1))
num_trial = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
min_te_num = 10
min_te = 10000
TE_ = []
for i in num_trial:
df_Elastic = pd.DataFrame(coefficients_elastic)
df_Elastic.columns = ['weight']
df_Elastic['symbol'] = assets_names
df_Elastic['abs_weight'] = abs(df_Elastic['weight'])
df_Elastic_select = df_Elastic.sort_values(by=['abs_weight'], ascending= False)
df_Elastic_select = df_Elastic_select.head(i)
train_Elastic_feature = train_features[df_Elastic_select.symbol]
train_Elastic_feature_T = train_Elastic_feature.T.reset_index()
train_Elastic_feature_T = train_Elastic_feature_T.rename(columns = {'index':'symbol'})
d = train_Elastic_feature_T.drop(columns=['symbol'])
weights = np.array([list(df_Elastic_select['weight'])])
data = np.array(d)
result = np.dot(weights, data)
result = result[0]
df_Elastic_TrackingError = pd.DataFrame(train_index)
df_Elastic_TrackingError['RepIdx'] = result
df_Elastic_TrackingError['SPY_ret'] = df_Elastic_TrackingError['spy'].pct_change()
df_Elastic_TrackingError['RepIdx_ret'] = df_Elastic_TrackingError['RepIdx'].pct_change()
df_Elastic_TrackingError = df_Elastic_TrackingError.dropna()
df_Elastic_TrackingError['TD'] = df_Elastic_TrackingError['SPY_ret'] - df_Elastic_TrackingError['RepIdx_ret']
df_Elastic_TrackingError['TD_squared'] = df_Elastic_TrackingError['TD']**2
TE = np.sqrt(sum(df_Elastic_TrackingError['TD_squared'])/(len(df_Elastic_TrackingError)-1))
if TE < min_te:
min_te = TE
min_te_num = i
TE_.append(TE)
if self.if_manual_set_number_elasticNet:
return list(df_Elastic_select.head(self.num_elasticNet)['symbol'])
return list(df_Elastic_select.head(min_te_num)['symbol'])
if method == 'groupSelection':
# Calculate every stock's tracking error
ret_daily = daily_2.set_index(['time']).pct_change()
tracking_error_dict = {}
ticker_list_new = daily_2.columns[1:-1]
for i in ticker_list_new:
tmp_df = ret_daily[[i,'spy']].dropna()
tmp_df['TD'] = tmp_df['spy'] - tmp_df[i]
tmp_df['TD_squared'] = tmp_df['TD']**2
TE = np.sqrt(sum(tmp_df['TD_squared'])/(len(tmp_df)-1))
tracking_error_dict[i] = TE
fund_df_new = fund_df[fund_df['ticker'].isin(ticker_list_new)]
fund_df_new = fund_df_new[['ticker','market_cap','sector']]
fund_df_new['tracking_error'] = fund_df_new['ticker'].apply(lambda x:tracking_error_dict[x])
fund_df_group_industry = fund_df_new.groupby(['sector'])
industry_dict = {}
select_stock = []
for industry_sec, industry_sec_df in fund_df_group_industry:
# try:
industry_sec_df = industry_sec_df.sort_values(by = ['market_cap'])
industry_sec_df = industry_sec_df[industry_sec_df['market_cap'] > 0 ]
industry_sec_df['market_size'] = industry_sec_df['market_cap'].transform(lambda x:pd.qcut(x,[0,0.33,0.67,1],labels=['small','medium','large']))
industry_dict[industry_sec[0]] = industry_sec_df
for a, b in industry_sec_df.groupby(['market_size']):
num = int(0.1*len(b)) + 1
b = b.sort_values(by = ['tracking_error'], ascending = True)
select_stock = select_stock + list(b['ticker'])[:num]
# except:
# continue
return select_stock
# if method == 'passive_Ownership':
# if method == 'compare':
return stock_list
def optimal_portfolio(self, method='markowitz'):
random.seed(10)
if method == 'markowitz':
# Obtain returns
daily_1 = self.get_historical_data()[0]
opt_df1 = daily_1[['time']+self.selected_stocks]
returns = opt_df1.set_index(['time']).pct_change().dropna()
# Optimization
n = len(returns)
returns = np.asmatrix(returns)
N = 3
mus = [10**(5.0 * t/N - 1.0) for t in range(N)]
# 转化为cvxopt matrices
S = opt.matrix(np.cov(returns))
pbar = opt.matrix(np.mean(returns, axis=1))
# 约束条件
G = -opt.matrix(np.eye(n)) # opt默认是求最大值,因此要求最小化问题,还得乘以一个负号
h = opt.matrix(1/500, (n ,1))
A = opt.matrix(1.0, (1, n))
b = opt.matrix(1.0)
# 使用凸优化计算有效前沿
portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x']
for mu in mus]
## 计算有效前沿的收益率和风险
returns = [blas.dot(pbar, x) for x in portfolios]
risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]
m1 = np.polyfit(returns, risks, 2) # polyfit函数可以使用最小二乘法将一些点拟合成一条曲线.
x1 = np.sqrt(m1[2] / m1[0])
# 计算最优组合
try:
wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
# return np.asarray(wt), returns, risks
return np.asarray(wt)
except:
self.rebalance_signal = False
return 0
if method == 'StockRanker':
def prepare_groups(df):
df_copy = df.copy(deep=True)
df_copy['day'] = df_copy.index
group = df_copy.groupby('day')['day'].count()
return group
def train(x_train, y_train, q_train, model_save_path):
'''
模型的训练和保存
'''
train_data = lgb.Dataset(x_train, label=y_train, group=q_train)
params = {
'task': 'train', # 执行的任务类型
'boosting_type': 'gbrt', # 基学习器
'objective': 'lambdarank', # 排序任务(目标函数)
'metric': 'ndcg', # 度量的指标(评估函数)
'max_position': 10, # @NDCG 位置优化
'metric_freq': 1, # 每隔多少次输出一次度量结果
'train_metric': True, # 训练时就输出度量结果
'ndcg_at': [10],
'max_bin': 255, # 一个整数,表示最大的桶的数量。默认值为 255。lightgbm 会根据它来自动压缩内存。如max_bin=255 时,则lightgbm 将使用uint8 来表示特征的每一个值。
'num_iterations': 500, # 迭代次数
'learning_rate': 0.01, # 学习率
'num_leaves': 31, # 叶子数
# 'max_depth':6,
'tree_learner': 'serial', # 用于并行学习,‘serial’: 单台机器的tree learner
'min_data_in_leaf': 30, # 一个叶子节点上包含的最少样本数量
'verbose': 2 # 显示训练时的信息
}
gbm = lgb.train(params, train_data, valid_sets=[train_data])
# import pickle
# serializedObject = pickle.dumps(gbm)
# modelKey = "MyModel"
# qb.ObjectStore.SaveBytes(modelKey, serializedObject)
# serializedObject = bytes(qb.ObjectStore.ReadBytes(modelKey))
gbm.save_model(model_save_path)
# qb.ObjectStore.Save(model_key, clf)
def predict(x_test, model_input_path):
'''
预测得分并排序
'''
# clf = pickle.loads(serializedObject)
gbm = lgb.Booster(model_file=model_input_path) # 加载model
ypred = gbm.predict(x_test)
predicted_sorted_indexes = np.argsort(ypred)[::-1] # 返回从大到小的索引
# t_results = comments[predicted_sorted_indexes] # 返回对应的comments,从大到小的排序
# return t_results
return predicted_sorted_indexes
# daily2_ = pd.concat(daily_dfs2, axis = 0, sort = False, join = 'outer')
# daily2_ = daily2_.dropna()
# daily2_ = daily2_.set_index([daily2_.index, 'ticker']).sort_index()
# daily2_['label'] = daily2_.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))
# test_df = pd.concat(test_dfs, axis = 0, sort = False, join = 'outer')
# test_df = test_df.dropna()
# # test_df = test_df[test_df['time'] == datetime(2024, 6, 29)]
# test_df = test_df.set_index([test_df.index, 'ticker']).sort_index()
# test_df['label'] = test_df.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))
# test_df = test_df.loc[(datetime(2024, 6,29),slice(None)),:]
_, _, daily2_, test_df = self.get_historical_data()
x_train = daily2_[['slope_20']]
y_train = daily2_['label'].astype(int)
q_train = prepare_groups(x_train)
x_test = test_df[['slope_20']]
y_test = test_df['label'].astype(int)
q_test = prepare_groups(x_test)
train(x_train, y_train, q_train, 'stock_ranker.pkl')
t_results = predict(x_test, 'stock_ranker.pkl')
# self.debug(test_df.index)
new_seq = [i[1] for i in test_df.index]
rank = [new_seq[x] for x in t_results ]
self.selected_stocks = rank
stock_weights = np.array(
[1 / math.log(i + 2) for i in range(0, len(rank))]
)
stock_weights = stock_weights / stock_weights.sum()
return stock_weights
if method == 'Momentum':
daily_1, _, _, _, = self.get_historical_data()
pct_daily = daily_1.set_index(['time']).pct_change()
winrate_daily = pct_daily.dropna().copy()
winrate_daily = winrate_daily.apply(lambda x:np.where(x>0, 1,0))
winrate_daily = winrate_daily.tail(self.mom_window)
winrate_dict = {}
# for i in self.ticker_list:
# winrate_dict[i] = winrate_daily[i].sum()/self.mom_window
winrate_dict = {}
for i in pct_daily.columns:
winrate_dict[i] = winrate_daily[i].sum()/self.mom_window
winrate_df = pd.DataFrame.from_dict(winrate_dict, orient='index',columns=['winrate'])
winrate_df = winrate_df.reset_index().rename(columns= {'index':'ticker'})
# stocks = list(df_Lasso_select['symbol'])
stocks = self.selected_stocks
winrate_df = winrate_df[winrate_df['ticker'].isin(stocks)]
winrate_df = winrate_df.sort_values(by = ['winrate'], ascending= False).set_index(['ticker']).reset_index()
weight_df = pd.DataFrame.from_dict(self.weight_universe,orient='index',columns=['weight'] ).reset_index().rename(columns= {'index':'ticker'})
winrate_df = winrate_df.merge(weight_df, how = 'left', on = ['ticker'])
winrate_df['weight_adj'] = winrate_df['weight']/winrate_df['weight'].sum()
#winrate_df['weight_new'] = winrate_df['weight_adj'].apply(lambda x:x+0.001*(len(winrate_df) - x.index))
winrate_df['weight_new'] = winrate_df['weight_adj'] + 0.001 * (len(winrate_df) - winrate_df.index)
winrate_df['weight_new_adj'] = winrate_df['weight_new']/winrate_df['weight_new'].sum()
self.selected_stocks = list(winrate_df['ticker'])
return list(winrate_df['weight_new_adj'])
def rebalance_weights(self):
self.selected_stocks = self.replicate_portfolio(method = self.replicate_method) # Obtain stocks list
self.rebalance_signal = True
weights = self.optimal_portfolio(method = self.opt_method)
if self.rebalance_signal:
if self.opt_method== 'markowitz':
for stock, weight in zip(self.selected_stocks, weights):
self.portfolio_weights[stock] = weight[0]
if self.opt_method== 'StockRanker' or self.opt_method == 'Momentum':
for stock, weight in zip(self.selected_stocks, weights):
self.portfolio_weights[stock] = weight
return
def submit_orders(self):
price = self.curr_spy
if self.if_hedge:
for i in self.selected_stocks:
self.set_holdings(i, self.portfolio_weights[i])
# self.set_holdings('SPY', - 1)
self.debug(price)
self.contract_size = int(self.portfolio.total_portfolio_value * self.hedge_size / (price *100))
self.debug(f"contract size is {self.contract_size}")
self.market_order(self.contract.Symbol, self.contract_size)
self.has_contract = True
else:
self.contract = None
self.contract_size = None
self.strike = None
for i in self.selected_stocks:
self.set_holdings(i, self.portfolio_weights[i])
self.has_contract = False
def on_data(self, slice: Slice):
# if slice.Bars.ContainsKey("SPY"):
# bar = slice["SPY"].close
# self.debug('yes we have SPY')
# else:
# self.debug('no we do not have SPY')
self.curr_spy = slice["SPY"].close
chain = slice.OptionChains.get(self.option_symbol)
# expiry = DateTime(2021,11,19)
if chain:
# self.debug('has chain')
contracts = [i for i in chain if i.Right == OptionRight.PUT and i.Strike < i.UnderlyingLastPrice ]
#self.debug(f"{len(contracts)}")
S = [i.Strike for i in contracts]
S = list(set(sorted(S, reverse=True)))
S = sorted(S, reverse= True)
# e = [i.expiry for i in contracts]
# e = sorted(list(set(sorted(e))))
# self.debug(f"strike max is {max(S)}, min is {min(S)}")
# self.debug(f"expiry max is {max(e)}, min is {min(e)}")
# strike = sorted(contracts, key=lambda x: x.Strike, reverse = True)[0].Strike
# expiry = sorted(contracts, key=lambda x: x.expiry)[2].expiry
strike = S[1]
self.strike = strike
# for i in S:
# self.debug(f"{self.Time}: {i}")
#self.debug(f"Date is {self.time}, the expiry is {expiry}")
# self.debug( sorted(contracts, key=lambda x: x.expiry))
# self.contract = [contract for contract in contracts if contract.Strike == strike and contract.expiry == expiry][0]
contracts = [contract for contract in contracts if contract.Strike == strike]
e = [i.expiry for i in contracts]
e = sorted(list(set(sorted(e))))
# self.debug(f"expiry max is {max(e)}, min is {min(e)}")
# for i in e:
# self.debug(f'{self.Time}: {i}')
expiry = e[0]
# self.debug(f"{self.Time}: expiry is {expiry}, strike is {strike}")
self.contract = [contract for contract in contracts if contract.expiry == expiry][0]
# if not self.portfolio.invested:
# self.market_order(self.contract.Symbol, 1)
if self.rebalance_signal:
self.debug('rebalance')
# if self.has_contract and slice[self.spy.symbol].close < self.strike:
# self.exercise_option(self.contract, self.contract_size)
self.liquidate()
self.has_contract = False
self.debug('before submit')
self.debug(f"selected stocks:{len(self.selected_stocks)}")
self.debug(f"selected stocks:{self.selected_stocks[0]}")
self.debug(f"portfolio {len(self.portfolio_weights)}")
self.debug(f"portfolio {self.portfolio_weights[self.selected_stocks[0]]}")
self.submit_orders()
self.debug('after submit')
self.rebalance_signal = False
# self.debug(f"latent_states_sequence is {self.latent_states_sequence}")
# self.debug(f"best state is {self.best_status_sequence}")
return
# elif not self.portfolio.invested:
# self.regime_recognization()
# self.rebalance_weights()
# self.liquidate()
# self.submit_orders()
# self.rebalance_signal = False
# return
'''
stop loss and take profit (if necessary)
'''
return
def OnOrderEvent(self, orderEvent):
if orderEvent.Status != OrderStatus.Filled:
return
# Webhook Notification
symbol = orderEvent.symbol
price = orderEvent.FillPrice
quantity = orderEvent.quantity
self.Log(f"SP500 Enhanced-Indexing Paper order update] \nSymbol: {symbol} \nPrice: {price} \nQuantity: {quantity}")
a = { "text": f"[SP500 Enhanced-Indexing Paper order update] \nSymbol: {symbol} \nPrice: {price} \nQuantity: {quantity}" }
payload = json.dumps(a)
self.notify.web("https://hooks.slack.com/services/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", payload)