| Overall Statistics |
|
Total Trades 2894 Average Win 0.25% Average Loss -0.22% Compounding Annual Return 4.890% Drawdown 5.200% Expectancy 0.127 Net Profit 46.508% Sharpe Ratio 1.029 Loss Rate 47% Win Rate 53% Profit-Loss Ratio 1.14 Alpha 0.043 Beta 0.076 Annual Standard Deviation 0.047 Annual Variance 0.002 Information Ratio -0.132 Tracking Error 0.172 Treynor Ratio 0.641 Total Fees $12233.15 |
import math
import bisect
import operator
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import itertools
from sklearn.mixture import BayesianGaussianMixture as BGM
import random
import talib as tb
import numpy as np
import pandas as pd
import sklearn as sn
from sklearn.neighbors import KNeighborsRegressor as KNR
from sklearn.tree import DecisionTreeRegressor as DTR
from sklearn.metrics import mean_absolute_error as mae
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.ensemble import RandomForestRegressor as RFR
from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Common")
from System import *
from QuantConnect import *
from QuantConnect.Algorithm import *
from sklearn.model_selection import cross_validate as CV
seed = 1
random.seed(seed)
np.random.seed(seed)
class Data_preparator():
def __init__(self,y_periods = [10,20,30,60],max_timeframe=2,max_for_period=15):
self.eval_lookback = 500
self.train_lookback = 6000
self.warmup_count = self.eval_lookback+self.train_lookback
self.y_periods = y_periods
self.max_timeframe = max_timeframe
self.max_for_period = max_for_period
def get_labels(self,df_xy):
#self.df_y = pd.DataFrame(index = np.arange(self.train_lookback))
close = np.array(list(self.hist_close), float)
self.outputs = []
for y_period in self.y_periods:
self.outputs.append('y'+str(y_period))
df_xy['y'+str(y_period)] = 0
df_xy['y'+str(y_period)][:-y_period] = (tb.SMA(self.close,timeperiod=y_period)[y_period:] \
-self.close[:-y_period])/self.close[:-y_period]
return df_xy
def market_data_interface(self,how='train'):
if how=='train':
length = self.warmup_count
if how=='eval':
length = self.eval_lookback
self.opn = np.array(list(self.hist_open), float)[-length:]#[::-1]
self.close = np.array(list(self.hist_close), float)[-length:]#[::-1]
self.high = np.array(list(self.hist_high), float)[-length:]#[::-1]
self.low = np.array(list(self.hist_low), float)[-length:]#[::-1]
self.vol = np.array(list(self.hist_vol), float)[-length:]#[::-1]
def get_data_for_train(self):
self.rand_vec = np.random.randint(high=15,low=2,size=10)
self.market_data_interface()
df_xy = pd.DataFrame(index = np.arange(self.warmup_count))
df_xy = self.timeframe_aggregator(df_x = df_xy)
df_xy = self.get_labels(df_xy)
df_xy = df_xy.dropna(axis = 0, how = 'any')
self.df_xy = df_xy
x = self.df_xy.drop(self.outputs,axis = 1).as_matrix()
y = self.df_xy[self.outputs].as_matrix()
return x,y
def get_data_for_eval(self):
self.market_data_interface(how='eval')
#df_xy = pd.DataFrame(index = np.arange(self.eval_lookback))
df_xy = self.timeframe_aggregator()
x = df_xy.values[-1,:]
return x.reshape(1,-1)
def timeframe_aggregator(self,df_x=None):
flag = 0 # training by default
if df_x is None:
flag = 1
df_x = pd.DataFrame(index = np.arange(self.eval_lookback))
for i in range(1,self.max_timeframe,2):
self.compute_indics(df_x=df_x, idd='_'+str(i), multiplier=i)
return self.df_x
def reset_indicators(self):
self.get_data_for_train()
def compute_indics(self, df_x = None,idd=None,multiplier = 1):
scaler_trigger = 0
if multiplier==1:
opn,high,low,close,vol = self.opn,self.high,self.low,self.close,self.vol
else:
opn,close,high,low,vol = self.get_bigger_tf(multiplier)
if df_x is None:
df_x = pd.DataFrame(index = np.arange(self.eval_lookback))
for i in range(1,self.max_for_period):
df_x['CCI_'+str(14*i)+idd] = tb.CCI(high, low, close, timeperiod=self.rand_vec[0]*i)
df_x['AROON_'+str(10*i)+idd] = tb.AROONOSC(high,low, timeperiod=self.rand_vec[1]*i)
df_x['sma_slow_'+str(i*25)+idd] = tb.SMA(close, timeperiod=self.rand_vec[2]*i)/close
df_x['sma'+str(i)+idd] =tb.ROCP(close, timeperiod=self.rand_vec[3]*i)
df_x['atr'+str(i)+idd] = tb.WILLR(high, low, close, timeperiod=self.rand_vec[4]*i)
df_x['natr'+str(i)+idd] = tb.NATR(high, low, close, timeperiod=self.rand_vec[5]*i)
df_x['cci'+str(i)+idd] = tb.DX(high, low, close, timeperiod=i*self.rand_vec[6])
df_x['mfi'+str(i)+idd] = tb.MFI(high, low, close, vol, timeperiod=self.rand_vec[7]*i)
#df_x = df_x.dropna(axis = 0, how = 'any')
df_x['atr_fast'+idd] = tb.ATR(high, low, close, timeperiod=30)
df_x['atr_slow'+idd] = tb.ATR(high, low, close, timeperiod=160)
df_x['ad'+idd] = tb.AD(high, low, close, vol)
df_x['obv'+idd] = tb.OBV(close, vol)
df_x['hil1'+idd] = tb.HT_DCPERIOD(close)
df_x['hil2'+idd] = tb.HT_DCPHASE(close)
self.df_x = df_x
def get_bigger_tf(self,multiplier):
opn,high,low,close,vol = self.opn,self.high,self.low,self.close,self.vol
new_opn = np.array(opn)
new_high = np.array(opn)
new_close = np.array(opn)
new_low = np.array(opn)
new_vol = np.array(opn)
for i in range(multiplier, opn.shape[0]-multiplier+1,multiplier):
new_opn[i:i+multiplier] = opn[i-multiplier]
new_high[i:i+multiplier] = np.max(high[i-multiplier:i+1])
new_low[i:i+multiplier] = np.min(low[i-multiplier:i+1])
new_close[i:i+multiplier] = close[i]
new_vol[i:i+multiplier] = np.sum(vol[i-multiplier:i+1])
return new_opn, new_close, new_high, new_low, new_vol
class Interactor():
def __init__(self,num_of_subsets=2000,num_of_best=5,max_num_of_feat = 6,y_period_coeffs = [0.1,0.1,0.1,0.1]):
self.num_of_subsets = num_of_subsets
self.num_of_best = num_of_best
self.max_num_of_feat = max_num_of_feat
self.eval_lookback = 1000
self.train_lookback = 6000
self.warmup_count = self.eval_lookback+self.train_lookback
self.y_period_coeffs = y_period_coeffs
def get_pred_minus_test(self,X,y):
esti = KNR()
if y.ndim<2:
y = y.reshape(-1,1)
if X.ndim<2:
X = X.reshape(-1,1)
esti.fit(X,y)
cv = CV(estimator = esti,n_jobs = -1,X=X,y=y,scoring = 'neg_mean_absolute_error')
res = -np.average(cv['test_score'])
return res
def get_score_diff(self,x,y):
#each sample error, take mininum for each features, aggregate; compare to the subset
score_diff_final = []
score_final = []
#coeffs = [0.1,0.2,0.3,0.5]
for y_period in range(y.shape[1]):
subset_pred = self.get_pred_minus_test(x,y[:,y_period])
score_diff = 1#np.sum(singles_pred_aggregate)-np.sum(subset_pred) # the more the better
score = 1.0/subset_pred#*np.std(y_test[:,y_period])
score_final.append(score*self.y_period_coeffs[y_period])
score_diff_final.append(score_diff*self.y_period_coeffs[y_period])
score_diff_final = np.sum(score_diff_final)/float(len(score_diff_final))
score_final = np.sum(score_final)/float(len(score_final))
return score_diff_final,score_final
def split(self,x,y):
split = 2000
X_train, X_test, y_train, y_test = x[:-split,:],x[-split:,:],y[:-split],y[-split:]
return X_train,X_test,y_train,y_test
def scale(self,X_train,X_test,y_train,y_test):
self.scaler_x = StandardScaler()
self.scaler_y = StandardScaler()
self.scaler_x.fit(X_train)
self.scaler_y.fit(y_train)
X_train,X_test = self.scaler_x.transform(X_train),self.scaler_x.transform(X_test)
y_train,y_test = self.scaler_y.transform(y_train),self.scaler_y.transform(y_test)
return X_train,X_test,y_train,y_test
def generate_subsets(self,x,y):
num_of_inds = x.shape[1]
X_train,X_test,y_train,y_test = self.split(x,y)
X_train,X_test,y_train,y_test = self.scale(X_train,X_test,y_train,y_test)
df_res = pd.DataFrame(index = np.arange(self.num_of_subsets),columns=['score_diff','subs_score','indexes'])
temp = []
for i in (range(self.num_of_subsets)):
temp_subset_size = np.random.randint(low=2,high=self.max_num_of_feat)
rand_subs_inds = np.random.randint(low=0,high=num_of_inds,size=temp_subset_size)
while len(rand_subs_inds)>len(set(rand_subs_inds)) or str(np.sort(rand_subs_inds)) in temp:
rand_subs_inds = np.random.randint(low=0,high=num_of_inds,size=temp_subset_size)
temp.append(str(np.sort(rand_subs_inds)))
df_res.loc[i].score_diff,df_res.loc[i].subs_score = self.get_score_diff(x[:,rand_subs_inds],y)
df_res.loc[i].indexes = list(rand_subs_inds)
return df_res
def get_top_subsets(self,x,y):
df = self.generate_subsets(x,y)
#df_1 = df.sort_values(by='score_diff',ascending=False).iloc[:self.num_of_best].indexes.values
df_2 = df.sort_values(by='subs_score',ascending=False).iloc[:self.num_of_best].indexes.values
final = df_2
self.tempy = df.sort_values(by='subs_score',ascending=False).iloc[0].subs_score
return final
class Estimator():
def __init__(self, indx):
self.inds = list(np.array(indx))
self.create()
def create(self):
hehe=np.random.randint(low=2,high=15)
self.classy = KNR(n_neighbors=hehe,weights='distance')#RFR()
def fit(self, tr_x, tr_y, te_x=None, te_y=None):
self.classy.fit(tr_x[:,self.inds],tr_y)
#self.score = 1#self.classy.score(te_x[:,self.inds],te_y)
def predict(self, x):
return self.classy.predict(x[:,self.inds])
class Predictor():
def __init__(self,y_period_ind = 0,filter_coef=1.):
self.y_period_ind = y_period_ind
self.error_frac = 0.25
self.filter_coef = filter_coef #if prediciton > avg(pred)*filter_coef -> do not trade
def split(self,x,y):
X_test, X_train, y_test,y_train = x[:-3000,:],x[-3000:,:],y[:-3000],y[-3000:]
return X_train,X_test,y_train,y_test
def train(self,x,y,subsets):
self.scaler_x = StandardScaler()
#self.scaler_y = StandardScaler()
self.x = self.scaler_x.fit_transform(x)
if y.ndim==1:
y = y.reshape(-1,1)
self.y = y[:,self.y_period_ind].reshape(-1,1)#self.scaler_y.fit_transform(y[:,self.y_period_ind].reshape(-1,1))
self.estimators = []
self.X_train,self.X_test,self.y_train,self.y_test = self.split(self.x,self.y)
for subset in subsets:
self.estimators.append(Estimator(subset))
self.estimators[-1].fit(self.X_train,self.y_train)
self.train_manager_estimator()
def train_filter(self):
errors = np.zeros((self.X_test.shape[0],len(self.estimators)),float)
preds = np.zeros((self.X_test.shape[0],len(self.estimators)),float)
it = 0
for est in self.estimators:
y_pred = est.predict(self.X_test)
preds[:,it] = y_pred.reshape(-1,)
errors[:,it] = abs(self.y_test.reshape(-1,)-y_pred.reshape(-1,))/abs(self.y_test.reshape(-1,))
it += 1
errors[errors>5] = 1.0
agg_errs = np.average(errors,axis=1)
avg = np.sum(errors.flatten())/float(len(errors.flatten()))/1.5
labels_temp = np.array(agg_errs)
labels = np.array(labels_temp)
labels[labels_temp>avg] = 0
labels[labels_temp<=avg] = 1
self.labels = np.array(np.count_nonzero(labels))
self.filter =RFC()
self.filter.fit(self.X_test, labels) #preds
self.avg_pred = np.average(abs(preds.flatten()))
def train_manager_estimator(self):
predic_errors = np.zeros((self.x.shape[0],len(self.estimators)),float)
it = 0
for est in self.estimators:
y_pred = est.predict(self.x).reshape(-1,1)
difference = abs(y_pred-self.y.reshape(-1,1))
predic_errors[:,it] = difference.reshape(-1,)
it+=1
best_esti = np.argmin(predic_errors,axis=1)
self.manager = RFC()
self.manager.fit(self.x,best_esti)
self.train_filter()
def ensamble_predict(self,x):
if x.shape[0] == 1:
x = self.scaler_x.transform(x.reshape(1,-1))
#if self.filter.predict(x) != 1:
# return 0
estimator_num = self.manager.predict(x[-1,:].reshape(1,-1))[0]
#-------------------------------
preds = []
for est in self.estimators:
preds.append(est.predict(x))
#if self.filter.predict(np.array(x).reshape(1,-1)) != 1: #preds
# return 0
if self.avg_pred > np.average(preds)*self.filter_coef:
return 0
#-------------------------------
prediction = self.estimators[estimator_num].predict(x.reshape(1,-1))
return prediction
else:
x = self.scaler_x.transform(x)
prediction = []
for i in range(x.shape[0]):
estimator_num = self.manager.predict(x[i,:].reshape(1,-1))[0]
prediction.extend(self.estimators[estimator_num].predict(x[i,:].reshape(1,-1)))
return np.array(prediction).reshape(-1,1)
return prediction
# class Y_preparator():
# def __init__(self,x):
# self.periods = [30]
# self.x = x # assuming that x[:,0] = close
# def get_atr_result(self,period):
# y = np.zeros(self.x[:,0])
# atr = tb.ATR(self.x[:,0],period)
# time_coef = 1.0/np.log(np.arange(2+self.periods[0])
# for i in range(period,self.x.shape[0]-self.periods[0],1):
# y[i-self.periods[0]] = ((x[i-self.periods[0],0]-np.max(x[i-self.periods[0]:i+1,0]*time_coef)) + (x[i-self.periods[0],0]-np.min(x[i-self.periods[0]:i+1,0]*time_coef)))/atr[i]
# y = y[y!=0]
# atr = atr[atr is not np.nan]
# return np.sum(y)*np.average(atr)
# def optimize_atr_period(self):
# periods = []
# results = []
# for atr_period in range(10,100,5):
# reults.append(self.get_atr_result(atr_period))
# best = periods[np.argmax(results)]
class Position():
def __init__(self):
self.max_position = 4.0
self.num_of_stds = 24.0
self.price = 0.0
self.position = 0.0
self.volatility = 0.0
self.current_price = 0.0
self.pl_fac = 1.0
def manage(self):
if self.position != 0.0:
sign = abs(self.position)/self.position
if (self.price+self.num_of_stds*self.volatility < self.current_price and self.position>0.0): #self.num_of_stds*self.volatility
self.price = self.current_price
if abs(self.position) < self.max_position:
self.position += 1.0
return 1.0
else:
return 0.0
if (self.price-self.num_of_stds*self.volatility > self.current_price and self.position<0.0): #self.num_of_stds*self.volatility
self.price = self.current_price
if abs(self.position) < self.max_position:
self.position -= 1.0
return 1.0
else:
return 0.0
if (self.price-self.num_of_stds*self.volatility/self.pl_fac > self.current_price and self.position>0): #self.volatility*(self.num_of_stds)
return -1.0
if (self.price+self.num_of_stds*self.volatility/self.pl_fac < self.current_price and self.position<0): #self.volatility*(self.num_of_stds)
return -1.0
return 0.0
class BasicTemplateAlgorithm(QCAlgorithm):
def Initialize(self):
self.position1 = Position()
self.data_prep = Data_preparator(y_periods = [10],max_timeframe=3,max_for_period=10)
self.inter_detect = Interactor(num_of_subsets=200,num_of_best=5,max_num_of_feat = 7)
self.predictor = Predictor(y_period_ind=0,filter_coef=0.75)
self.cash = 100000
self.SetStartDate(2010,1,1) #Set Start Date
self.SetEndDate(2018,1,1) #Set End Date
self.SetCash(self.cash) #Set Strategy Cash
self.multiplier = 100.0
self.init_coef = 0.6 # initial trade size - fraction of the deposit
self.add_coef = 0.2 # position management trade size - fraction of the deposit
# Find more symbols here: http://quantconnect.com/data
self.symbol = "XOM"
#self.model.symbol = self.symbol
self.granularity = Resolution.Minute
self.HighBar_p = RollingWindow[float](self.data_prep.eval_lookback)
self.LowBar_p = RollingWindow[float](self.data_prep.eval_lookback)
self.OpenBar_p = RollingWindow[float](self.data_prep.eval_lookback)
self.CloseBar_p = RollingWindow[float](self.data_prep.eval_lookback)
self.VolBar_p = RollingWindow[float](self.data_prep.eval_lookback)
self.HighBar = RollingWindow[float](self.data_prep.warmup_count)
self.LowBar = RollingWindow[float](self.data_prep.warmup_count)
self.OpenBar = RollingWindow[float](self.data_prep.warmup_count)
self.CloseBar = RollingWindow[float](self.data_prep.warmup_count)
self.VolBar = RollingWindow[float](self.data_prep.warmup_count)
self.ttrig = 0
self.SetBrokerageModel(BrokerageName.InteractiveBrokersBrokerage, AccountType.Margin)
mm = self.AddEquity(self.symbol, self.granularity)
mm.MarginModel = PatternDayTradingMarginModel()
self.SetBenchmark(self.symbol)
self.SetWarmUp(self.data_prep.warmup_count)
self.consolidator = TradeBarConsolidator(5)
self.consolidator.DataConsolidated += self.OnDataConsolidated
self.SubscriptionManager.AddConsolidator(self.symbol, self.consolidator)
sPlot = Chart('Strategy Equity')
self.atr_slow = self.ATR(self.symbol, 160, MovingAverageType.Simple, Resolution.Minute);
self.atr_fast = self.ATR(self.symbol, 30, MovingAverageType.Simple, Resolution.Minute);
self.previous = None
self.Schedule.On(self.DateRules.MonthStart(self.symbol), self.TimeRules.AfterMarketOpen(self.symbol), \
Action(self.reset_train))
self.Schedule.On(self.DateRules.EveryDay(self.symbol), self.TimeRules.BeforeMarketClose(self.symbol,1), \
Action(self.liqui))
def liqui(self):
self.Liquidate(self.symbol)
self.position1.position = 0.0
def OnDataConsolidated(self, sender, TradeBar):
try:
self.HighBar_p.Add(float(TradeBar.High))
self.LowBar_p.Add(float(TradeBar.Low))
self.OpenBar_p.Add(float(TradeBar.Open))
self.CloseBar_p.Add(float(TradeBar.Close))
self.VolBar_p.Add(float(TradeBar.Volume))
self.HighBar.Add(float(TradeBar.High))
self.LowBar.Add(float(TradeBar.Low))
self.OpenBar.Add(float(TradeBar.Open))
self.CloseBar.Add(float(TradeBar.Close))
self.VolBar.Add(float(TradeBar.Volume))
except:
self.Debug('Failed to retrieve quotes')
if not self.VolBar.IsReady:
return
stock_coef = self.cash/float(self.Securities[self.symbol].Price)
#Evaluated once a month - generates features ans estimators
if self.ttrig == 1:
self.data_prep.hist_open = self.OpenBar
self.data_prep.hist_close = self.CloseBar
self.data_prep.hist_high = self.HighBar
self.data_prep.hist_low = self.LowBar
self.data_prep.hist_vol = self.VolBar
x,y = self.data_prep.get_data_for_train()
subsets = self.inter_detect.get_top_subsets(x,y)
#self.Debug('Number of subsets:'+str(len(subsets)))
self.predictor.train(x,y,subsets)
self.ttrig = 0
return
self.data_prep.hist_open = self.OpenBar_p
self.data_prep.hist_close = self.CloseBar_p
self.data_prep.hist_high = self.HighBar_p
self.data_prep.hist_low = self.LowBar_p
self.data_prep.hist_vol = self.VolBar_p
x = self.data_prep.get_data_for_eval()
prediction = self.predictor.ensamble_predict(x.reshape(1,-1))
#self.Debug(str(predictio.shape))
holdings = float(self.Portfolio[self.symbol].Quantity)
self.position1.volatility = float(self.atr_fast.Current.Value)
self.position1.current_price = float(self.Securities[self.symbol].Price)
#position management
if self.position1.position != 0.0 and holdings != 0.0:
self.position1.volatility = float(self.atr_fast.Current.Value)
action = self.position1.manage()
if self.position1.position > 0:
direc = OrderDirection.Buy
else:
direc = OrderDirection.Sell
#bp = self.Portfolio.GetBuyingPower(self.symbol, direction = direc)
bp = float(self.Portfolio.GetMarginRemaining(self.symbol))
if self.position1.position < 0:
ordr = -1
else:
ordr = 1
if action == 1.0 and abs(bp) > abs(stock_coef*self.add_coef)*1.5*self.position1.current_price:
self.MarketOrder(self.symbol, ordr*stock_coef*self.add_coef)
#self.Debug('Added: '+str(ordr*stock_coef*self.add_coef))
self.position1.price = float(self.Securities[self.symbol].Price)
if action == -1.0:
self.position1.position = 0.0
self.Liquidate(self.symbol)
#self.Debug('Closed all')
else:
return
bp = float(self.Portfolio.GetMarginRemaining(self.symbol))
if self.position1.position == 0.0 and holdings == 0.0:
if prediction > 0.0 and abs(bp) > stock_coef*self.init_coef*self.position1.current_price:
self.position1.position = 1.0
self.MarketOrder(self.symbol, stock_coef*self.init_coef)
#self.Debug('Initiated: '+str(stock_coef*self.init_coef))
self.position1.price = float(self.Securities[self.symbol].Price)
self.position1.current_price = float(self.Securities[self.symbol].Price)
self.position1.volatility = float(self.atr_fast.Current.Value)
if prediction < 0.0 and abs(bp) > stock_coef*self.init_coef*self.position1.current_price:
self.position1.position = -1.0
self.MarketOrder(self.symbol, -stock_coef*self.init_coef)
#self.Debug('Initiated: '+str(-stock_coef*self.init_coef))
self.position1.volatility = float(self.atr_fast.Current.Value)
self.position1.price = float(self.Securities[self.symbol].Price)
self.position1.current_price = float(self.Securities[self.symbol].Price)
if float(self.Portfolio[self.symbol].Quantity) != 0 and self.position1.position == 0:
self.position1.position = abs(float(self.Portfolio[self.symbol].Quantity))/float(self.Portfolio[self.symbol].Quantity)
self.Debug('ACHTUNG!!!!')
self.previous = self.Time
def reset_train(self):
self.ttrig = 1