| Overall Statistics |
|
Total Trades 1636 Average Win 0.53% Average Loss -0.34% Compounding Annual Return 72.110% Drawdown 15.300% Expectancy 0.326 Net Profit 138.407% Sharpe Ratio 2.254 Probabilistic Sharpe Ratio 94.717% Loss Rate 48% Win Rate 52% Profit-Loss Ratio 1.57 Alpha 0.489 Beta 0.022 Annual Standard Deviation 0.217 Annual Variance 0.047 Information Ratio 1.834 Tracking Error 0.274 Treynor Ratio 21.878 Total Fees $24274.43 Estimated Strategy Capacity $4400000.00 Lowest Capacity Asset RBLX XMP3AJ4KU3C5 |
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.42a: Improved positions calculation for DAS Integration
v0.41a: Added use_kelly parameter and 9am training for easier live trading
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.42a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 800000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability
self.use_kelly = self.GetParameter("use_kelly", 1) # Whether to use the kelly criterion for sizing
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(9, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.Schedule.On(every_day, at(16, 0), self.save_data)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \
if self.use_kelly else 1 # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self):
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
self.SetHoldings(row["long_sym"], row["pos"]/2)
self.SetHoldings(row["short_sym"], -row["pos"]/2)
if self.LiveMode:
order_value = self.Portfolio.TotalPortfolioValue*row["pos"]/2
long_qty = int(order_value/self.Securities[row["long_sym"]].Price)
utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
short_qty = -int(order_value/self.Securities[row["short_sym"]].Price)
utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
if self.LiveMode: utl.das_liquidate(self)
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
@version: 0.1
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2015, 1, 1)
self.SetCash(100000)
self.UniverseSettings.ExtendedMarketHours = True
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
self.test_acc = 0
self.train_days = timedelta(91) # Trainining on the last quarter
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
self.benchmark = self.GetParameter("benchmark")
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Daily)
self.SetBenchmark(self.benchmark)
self.Train(self.DateRules.MonthStart(),
self.TimeRules.At(0, 0),
self.train_model)
self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
self.TimeRules.At(9, 35),
self.trade)
self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
self.TimeRules.At(15, 55),
self.stop_trading)
def coarse_filter(self, coarse):
symbols = [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
return np.random.choice(symbols, size=self.max_symbols,
replace=False).tolist()
def train_model(self):
x, y = self.get_data(self.Time - self.train_days, self.Time)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5,
shuffle=True)
self.model.fit(x_train, (y_train > 0).astype(float),
sample_weight=abs(y_train)) # TODO: Use log returns for training weights
self.test_acc = self.model.score(x_test, (y_test > 0).astype(float),
sample_weight=abs(y_test))
self.Debug(f"Training Points: {len(x_train)} Test Accuracy: {self.test_acc:.1%}")
self.Plot("ML", "Test Score", self.test_acc)
def trade(self):
if self.test_acc > 0.5:
x_pred = self.get_data(self.Time-timedelta(10), self.Time,
with_target=False)
y_proba = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index).groupby("symbol").last()
positions = y_proba.apply(lambda x: x if x > 0.5 else 0) # TODO: Implement shorting
if positions.sum() > 1: positions /= positions.sum() # Max portfolio size 100%
self.Debug(f"Trading\n{y_proba}\nPos: {positions}")
self.Plot("ML", "Prediction", y_proba.mean())
for symbol in y_proba.index:
self.SetHoldings(symbol, positions[symbol])
def stop_trading(self):
self.Debug("End of day")
self.Transactions.CancelOpenOrders()
self.Liquidate()
def get_data(self, start, end, with_target=True):
tickers = [t for t in list(self.ActiveSecurities.Keys)
if str(t) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(minute_bars, "09:30", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["range"] = min5_bar.eval("(low+close)/(high-low)")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"]/yesterday_close
features["pm_volume_usd"] = pm_bar.eval("close*volume")
features.dropna(inplace=True)
if with_target:
trade_day_bars = idx.filter_bars(minute_bars, "09:30", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
target = trade_day_bar.eval("close/open-1").dropna()
index = target.index.intersection(features.index)
return features.loc[index], target.loc[index]
else:
return features"""
Big Bertha Strategy with Machine Learning
@version: 0.2
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2015, 1, 1)
self.UniverseSettings.ExtendedMarketHours = True
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=1)
self.test_acc = 0
self.train_days = timedelta(30) # Training on the last quarter
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
self.benchmark = self.GetParameter("benchmark")
self.capital = literal_eval(self.GetParameter("capital"))
self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))
self.SetCash(self.capital)
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Daily)
self.SetBenchmark(self.benchmark)
self.Train(self.DateRules.MonthStart(),
self.TimeRules.At(0, 0),
self.train_model)
self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
self.TimeRules.At(9, 35),
self.trade)
self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
self.TimeRules.At(15, 55),
self.stop_trading)
def coarse_filter(self, coarse):
symbols = [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
return np.random.choice(symbols, size=self.max_symbols,
replace=False).tolist()
def train_model(self):
x, y = self.get_data(self.Time - self.train_days, self.Time)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5,
shuffle=True)
self.model.fit(x_train, (y_train > 0).astype(float),
sample_weight=abs(y_train))
self.test_acc = self.model.score(x_test, (y_test > 0).astype(float),
sample_weight=abs(y_test))
self.Debug(f"Training Points: {len(x_train)} Test Accuracy: {self.test_acc:.1%}")
self.Plot("ML", "Test Score", self.test_acc)
def trade(self):
if self.test_acc > 0.5:
x_pred = self.get_data(self.Time-timedelta(5), self.Time,
with_target=False)
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
positions = y_proba.apply(lambda x: -1 if x < 0.25 else +1 if x > 0.75 else 0) # To short, 50% is position 0
if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100%
self.Debug(f"Trading\n{y_proba}\nPos: {positions}")
self.Plot("ML", "Prediction", y_proba.mean())
for symbol in positions.index:
self.SetHoldings(symbol, positions[symbol])
def stop_trading(self):
self.Debug("End of day")
self.Transactions.CancelOpenOrders()
self.Liquidate()
def get_data(self, start, end, with_target=True):
tickers = [t for t in list(self.ActiveSecurities.Keys)
if str(t) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"]/yesterday_close
features["pm_volume_usd"] = pm_bar.eval("close*volume")
features.dropna(inplace=True)
if with_target:
trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
target = trade_day_bar.eval("close/open-1").apply(np.log1p)
index = target.dropna().index.intersection(features.index)
return features.loc[index], target.loc[index]
else:
return features"""
Big Bertha Strategy with Machine Learning
@version: 0.3
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2015, 1, 1)
self.UniverseSettings.ExtendedMarketHours = True
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
self.test_acc = 0
self.train_days = timedelta(30) # Training on the last quarter
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
self.min_gap = literal_eval(self.GetParameter("min_gap"))
self.benchmark = self.GetParameter("benchmark")
self.capital = literal_eval(self.GetParameter("capital"))
self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Daily)
self.SetBenchmark(self.benchmark)
self.Train(self.DateRules.MonthStart(),
self.TimeRules.At(0, 0),
self.train_model)
self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
self.TimeRules.At(9, 35),
self.trade)
self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
self.TimeRules.At(15, 55),
self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
x, y = self.get_data(self.Time - self.train_days, self.Time,
min_gap=self.min_gap)
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
if self.test_acc <= 0.5: return
x_pred = self.get_data(self.Time-timedelta(5), self.Time,
with_target=False, min_gap=self.min_gap)
if x_pred is None: return
today = self.Time.replace(hour=0, minute=0, second=0)
x_pred.query("time == @today", inplace=True)
if len(x_pred) == 0: return
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index)
positions = y_proba.apply(lambda x: -1 if x < 0.4 else +1 if x > 0.6 else 0) # To short, 50% is position 0
if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100%
self.Debug(f"Trading\n{y_proba}\nPos: {positions}")
self.Plot("ML", "Prediction", y_proba.mean())
for symbol in positions.index:
self.SetHoldings(symbol, positions[symbol])
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def get_data(self, start, end, with_target=True, min_gap=0):
tickers = [t for t in list(self.ActiveSecurities.Keys)
if str(t) not in self.benchmark]
daily_bars = self.History(tickers, start, end, Resolution.Daily)
features = pd.DataFrame()
features["gap"] = idx.gap(daily_bars).dropna()
features.query("abs(gap) >= @min_gap", inplace=True)
if len(features) == 0:
return None, None if with_target else None
minute_bars = pd.concat([self.History([s], d, d + timedelta(1),
Resolution.Minute)
for s, d in features.index])
pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(minute_bars, "09:30", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
features.dropna(inplace=True)
if not with_target:
return features
trade_day_bars = idx.filter_bars(minute_bars, "09:35", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
target = trade_day_bar.eval("close/open-1").apply(np.log1p)
index = target.dropna().index.intersection(features.index)
return features.loc[index], target.loc[index]"""
Big Bertha Strategy with Machine Learning
@version: 0.4
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
self.capital = literal_eval(self.GetParameter("capital"))
self.SetStartDate(2020, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.test_acc = 0
self.train_days = timedelta(30) # Training on the last quarter
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
self.Train(self.DateRules.MonthStart(),
self.TimeRules.At(0, 0),
self.train_model)
self.Schedule.On(self.DateRules.EveryDay(),
self.TimeRules.At(9, 35),
self.trade)
self.Schedule.On(self.DateRules.EveryDay(),
self.TimeRules.At(15, 55),
self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData]
def train_model(self):
x, y = self.get_data(self.Time - self.train_days, self.Time)
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50%
x_pred = self.get_data(self.Time-timedelta(5), self.Time,
with_target=False)
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management
if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100%
self.Plot("ML", "Prediction", y_proba.mean())
for symbol, position in positions[abs(positions) > 0.01].items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def get_data(self, start, end, with_target=True): # TODO: Add daily dataset update
tickers = np.random.choice(list(self.ActiveSecurities.Keys),
size=self.max_symbols,
replace=False).tolist()
minute_bars = self.History(tickers, start, end, Resolution.Minute)
day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
features = features.dropna().query("abs(gap) >= 0.02")
if with_target:
trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
target = trade_day_bar.eval("close/open-1").apply(np.log1p)
index = target.dropna().index.intersection(features.index)
return features.loc[index], target.loc[index]
else:
return features"""
Big Bertha Strategy with Machine Learning
@version: 0.6
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
self.capital = literal_eval(self.GetParameter("capital"))
self.SetStartDate(2020, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.benchmark = "SPY"
self.AddEquity(self.benchmark, Resolution.Daily)
self.SetBenchmark(self.benchmark)
self.history = pd.DataFrame()
self.test_acc = 0
self.train_days = timedelta(30) # Training on the last quarter
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.update_history)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(16, 0), self.update_history)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData]
def train_model(self):
if len(self.history) > 10000:
x, y = self.get_data(self.Time - self.train_days, self.Time)
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
if len(self.history) > 10000:
model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50%
x_pred = self.get_data(self.Time-timedelta(5), self.Time,
with_target=False)
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management
if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100%
self.Plot("ML", "Prediction", y_proba.mean())
for symbol, position in positions[abs(positions) > 0.01].items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def update_history(self):
tickers = list(self.ActiveSecurities.Keys)
new_data = self.History(tickers, 5, Resolution.Minute)
self.history = pd.concat([self.history, new_data])
def get_data(self, start, end, with_target=True): # TODO: Add daily dataset update
time_index = self.history.index.get_level_values("time")
time_filter = (time_index >= start) & (time_index <= end)
minute_bars = self.history.loc[time_filter]
day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
features = features.dropna().query("abs(gap) >= 0.02")
if with_target:
trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
target = trade_day_bar.eval("close/open-1").apply(np.log1p)
index = target.dropna().index.intersection(features.index)
return features.loc[index], target.loc[index]
else:
return features"""
Big Bertha Strategy with Machine Learning
@version: 0.6
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
self.capital = literal_eval(self.GetParameter("capital"))
self.SetStartDate(2020, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.test_acc = 0
self.train_days = timedelta(30) # Training on the last quarter
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
self.Train(self.DateRules.MonthStart(),
self.TimeRules.At(0, 0),
self.train_model)
self.Schedule.On(self.DateRules.EveryDay(),
self.TimeRules.At(9, 35),
self.trade)
self.Schedule.On(self.DateRules.EveryDay(),
self.TimeRules.At(15, 55),
self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData]
def train_model(self):
x, y = self.get_data(self.Time - self.train_days, self.Time)
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50%
x_pred = self.get_data(self.Time-timedelta(5), self.Time,
with_target=False)
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management
if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100%
self.Plot("ML", "Prediction", y_proba.mean())
for symbol, position in positions[abs(positions) > 0.01].items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def get_data(self, start, end, with_target=True):
tickers = np.random.choice(list(self.ActiveSecurities.Keys),
size=self.max_symbols,
replace=False).tolist()
minute_bars = self.History(tickers, start, end, Resolution.Minute)
day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
features = features.dropna().query("abs(gap) >= 0.02")
if with_target:
trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
target = trade_day_bar.eval("close/open-1").apply(np.log1p)
index = target.dropna().index.intersection(features.index)
return features.loc[index], target.loc[index]
else:
return features"""
Big Bertha Strategy with Machine Learning
- Implementing offline data storage to avoid symbols limitation
@version: 0.7
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2020, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.test_acc = 0
self.features, self.targets = None, None
self.train_days = 63 # Training on the last quarter
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.update_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.get_dataset_days() < 21: return
self.Debug(f"{self.Time} Training")
x, y = self.get_data(self.train_days)
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50%
if model_confidence <= 0: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_data(1, with_target=False)
x_pred.query("time == @self.Time.date()")
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
positions = (y_proba - 0.5) * 2 * model_confidence # TODO: Fix risk management
if positions.abs().sum() > 1: positions /= positions.abs().sum() # Max portfolio size 100%
self.Plot("ML", "Prediction", y_proba.mean())
for symbol, position in positions[abs(positions) > 0.01].items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def update_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
if str(ticker) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
features = self.calculate_features(minute_bars).dropna()
self.features = pd.concat([self.features, features], copy=False)
targets = self.calculate_targets(minute_bars).dropna()
self.targets = pd.concat([self.targets, targets], copy=False)
memory = self.features.memory_usage(deep=True).sum()
memory += self.targets.memory_usage(deep=True)
self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")
def calculate_features(self, minute_bars):
day_bars = idx.filter_bars(minute_bars, "09:31", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
return features
def calculate_targets(self, minute_bars):
trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
return trade_day_bar.eval("close/open-1").apply(np.log1p)
def get_data(self, n_points, with_target=True):
if with_target:
common_index = self.targets.index.intersection(self.features.index)
y = self.targets.loc[common_index].groupby("symbol").tail(n_points)
x = self.features.loc[y.index]
return x, y
else:
return self.features.groupby("symbol").tail(n_points)
def get_dataset_days(self):
return len(self.features.index.get_level_values("time").unique()) \
if self.features is not None else 0
"""
Big Bertha Strategy with Machine Learning
- Implementing offline data storage to avoid symbols limitation
@version: 0.8
@creation date: 05/07/2022
First prototype
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2020, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.test_acc = 0
self.features, self.targets = None, None
self.train_days = 63 # Training on the last quarter
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.update_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.get_dataset_days() < 21: return # At least a month of data to train the model
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50%
if model_confidence <= 0: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
trades = y_proba[(y_proba < 0.3) | (y_proba > 0.7)]
positions = (trades - 0.5) * 2 * model_confidence # TODO: Fix risk management # Max portfolio size 100% including shorts
for symbol, position in positions.items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def update_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
if str(ticker) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
features = self.calculate_features(minute_bars).dropna()
self.features = pd.concat([self.features, features]).drop_duplicates()
targets = self.calculate_targets(minute_bars).dropna()
self.targets = pd.concat([self.targets, targets]).drop_duplicates()
memory = self.features.memory_usage(deep=True).sum()
memory += self.targets.memory_usage(deep=True)
self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")
def calculate_features(self, minute_bars):
day_bars = idx.filter_bars(minute_bars, "09:31", "16:30")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
return features
def calculate_targets(self, minute_bars):
trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
return trade_day_bar.eval("close/open-1").apply(np.log1p)
def get_train_data(self):
common_index = self.targets.index.intersection(self.features.index)
y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
x = self.features.loc[y.index]
return x, y
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def get_dataset_days(self):
return len(self.features.index.get_level_values("time").unique()) \
if self.features is not None else 0
"""
Big Bertha Strategy with Machine Learning
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Risk management with stop loss
@version: 0.9
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2020, 10, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.test_acc = 0
self.features, self.targets = None, None
self.train_days = 252 # Training on the last year of data
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.update_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.get_dataset_days() < 21: return # At least a month of data to train the model
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.test_acc = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
self.Plot("ML", "Test Accuracy", self.test_acc)
def trade(self):
model_confidence = max(self.test_acc-0.5, 0)*2 # 100% if accuracy 100%, 0% if below 50%
if model_confidence <= 0: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)]
positions = (trades - 0.5) * 2 * model_confidence # TODO: Fix risk management Max portfolio size 100% including shorts
for symbol, position in positions.items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
qty = self.CalculateOrderQuantity(symbol, position)
limit_price = self.Securities[symbol].Price
self.LimitOrder(symbol, qty, limit_price)
stop_price = 0 if qty > 0 else 2 * limit_price
self.StopMarketOrder(symbol, -qty, stop_price)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def update_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
if str(ticker) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
features = self.calculate_features(minute_bars).dropna()
self.features = pd.concat([self.features, features]).drop_duplicates()
targets = self.calculate_targets(minute_bars).dropna()
self.targets = pd.concat([self.targets, targets]).drop_duplicates()
memory = self.features.memory_usage(deep=True).sum()
memory += self.targets.memory_usage(deep=True)
self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")
def calculate_features(self, minute_bars):
day_bars = idx.filter_bars(minute_bars, "09:31", "16:00")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
return features
def calculate_targets(self, minute_bars):
trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
return trade_day_bar.eval("close/open-1").apply(np.log1p)
def get_train_data(self):
common_index = self.targets.index.intersection(self.features.index)
y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
x = self.features.loc[y.index]
return x, y
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
"""
Big Bertha Strategy with Machine Learning
Done
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
Todo
- Risk management with stop loss
@version: 0.9
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2020, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.confidence = 0
self.features, self.targets = None, None
self.train_days = 252 # Training on the last year of data
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.update_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.confidence = max(np.mean(cv_scores) - 0.5, 0) * 2 # 100% if accuracy 100%, 0% if below 50%
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Confidence:{self.confidence:.1%}")
self.Plot("ML", "Confidence", self.confidence)
def trade(self):
if self.confidence <= 0: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)]
positions = (trades - 0.5) * 2 * self.confidence # TODO: Fix risk management Max portfolio size 100% including shorts
for symbol, position in positions.items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def update_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
if str(ticker) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
features = self.calculate_features(minute_bars).dropna()
self.features = pd.concat([self.features, features]).drop_duplicates()
targets = self.calculate_targets(minute_bars).dropna()
self.targets = pd.concat([self.targets, targets]).drop_duplicates()
memory = self.features.memory_usage(deep=True).sum()
memory += self.targets.memory_usage(deep=True)
self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")
def calculate_features(self, minute_bars):
day_bars = idx.filter_bars(minute_bars, "09:31", "16:00")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha"] = min5_bar.eval("(high-low)/open")
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
return features
def calculate_targets(self, minute_bars):
trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
return trade_day_bar.eval("close/open-1").apply(np.log1p)
def get_train_data(self):
common_index = self.targets.index.intersection(self.features.index)
y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
x = self.features.loc[y.index]
return x, y
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def get_dataset_days(self):
return len(self.features.index.get_level_values("time").unique()) \
if self.features is not None else 0
"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
Todo
- Risk management with stop loss
@version: 0.10
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
"high": "max", "low": "min",
"volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2020, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.confidence = 0
self.features, self.targets = None, None
self.train_days = 252 # Training on the last year of data
self.model = GradientBoostingClassifier(warm_start=True,
n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.update_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
scoring="accuracy", fit_params=fit_params)
self.confidence = max(np.mean(cv_scores) - 0.5, 0) * 2 # 100% if accuracy 100%, 0% if below 50%
self.model.fit(x, (y > 0).astype(float), **fit_params)
self.Debug(f"{self.Time} Points:{len(x)} Confidence:{self.confidence:.1%}")
self.Plot("ML", "Confidence", self.confidence)
def trade(self):
if self.confidence <= 0: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
self.Debug(f"Predictions: {len(trades)} - Proba {min(trades):.0%}-{max(trades):.0%}")
trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)]
positions = (trades - 0.5) * 2 * self.confidence # TODO: Fix risk management Max portfolio size 100% including shorts
for symbol, position in positions.items():
self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
self.SetHoldings(symbol, position)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def update_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
if str(ticker) not in self.benchmark]
minute_bars = self.History(tickers, start, end, Resolution.Minute)
features = self.calculate_features(minute_bars).dropna()
self.features = pd.concat([self.features, features]).drop_duplicates()
targets = self.calculate_targets(minute_bars).dropna()
self.targets = pd.concat([self.targets, targets]).drop_duplicates()
self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")
def calculate_features(self, minute_bars):
day_bars = idx.filter_bars(minute_bars, "09:31", "16:00")
day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)
features = pd.DataFrame()
features["big_bertha_size"] = min5_bar.eval("(high-low)/open")
features["big_bertha_volume"] = min5_bar["volume"]
features["big_bertha_open"] = min5_bar["open"]
features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close*volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close
return features
def calculate_targets(self, minute_bars):
trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
return trade_day_bar.eval("close/open-1").apply(np.log1p)
def get_train_data(self):
common_index = self.targets.index.intersection(self.features.index)
y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
x = self.features.loc[y.index]
return x, y
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def get_dataset_days(self):
return len(self.features.index.get_level_values("time").unique()) \
if self.features is not None else 0
"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
Todo
- Risk management with stop loss
@version: 0.11
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.accuracy = None
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3) # TODO: Evaluate Grid search for different parameters
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
fit_params = dict(sample_weight=abs(y))
cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
cv=10, fit_params=fit_params)
self.accuracy = np.mean(cv_scores)
self.model.fit(x, (y > 0).astype(float))
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
self.Plot("ML", "Accuracy", self.accuracy)
def trade(self):
if self.accuracy is None: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
index=x_pred.index).groupby("symbol").last()
self.Debug(f"Predictions: {len(y_proba)} - Proba {min(y_proba):.0%}-{max(y_proba):.0%}")
positions = (y_proba[(y_proba <= 0.4)|(y_proba >= 0.6)] - 0.5) * self.accuracy # Model and trade confidence
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
[self.SetHoldings(symbol, pos) for symbol, pos in positions.items()]
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = list(filter(lambda x: str(x) not in self.benchmark,
self.ActiveSecurities.Keys))
minute_bars = self.History(tickers, start, end, Resolution.Minute)
self.add_features(minute_bars)
self.add_targets(minute_bars)
self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")
def add_features(self, minute_bars):
day_bar = self.agg_bars(minute_bars, "09:31", "16:00")
pm_bar = self.agg_bars(minute_bars, "00:01", "09:30")
min5_bar = self.agg_bars(minute_bars, "09:31", "09:35")
features = min5_bar.add_prefix("bb_")
features["bb_size"] = min5_bar.eval("(high-low)/open")
features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close-1
features.dropna(inplace=True)
if self.features is not None:
new_idx = features.index.difference(self.features.index) # Removing potential duplicates
self.features = pd.concat([self.features, features.loc[new_idx]])
else:
self.features = features
def add_targets(self, minute_bars):
trading_bar = self.agg_bars(minute_bars, "09:36", "15:55")
targets = trading_bar.eval("close/open-1").dropna()
if self.targets is not None:
new_idx = targets.index.difference(self.targets.index) # Removing potential duplicates
self.targets = pd.concat([self.targets, targets.loc[new_idx]])
else:
self.targets = targets
def get_train_data(self):
train_idx = self.targets.index.intersection(self.features.index)
return self.features.loc[train_idx], self.targets.loc[train_idx]
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def agg_bars(self, minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
Todo
- Risk management with stop loss
@version: 0.12
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
RETR_PCT = 0.7
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.accuracy = 0
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
cv_scores = cross_val_score(self.model, X=x, y=y, cv=10)
self.accuracy = np.mean(cv_scores)
self.model.fit(x, y)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
self.Plot("ML", "Accuracy", self.accuracy)
def trade(self):
if self.accuracy <= 0.5: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_).groupby("symbol").last()
actions = y_proba.idxmax(axis=1)
positions = actions.apply(lambda x: 0.01 if x=="long" else -0.01 if x=="short" else 0)
self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}")
for symbol, pos in positions.items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
feats = x_pred.loc[symbol].iloc[0] # TODO: Refactor
window = (feats.bb_high - feats.bb_low) * RETR_PCT
stop_loss = feats.bb_high - window if pos > 0 \
else feats.bb_low + window # TODO: Refactor
self.StopMarketOrder(symbol, -qty, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = list(filter(lambda x: str(x) not in self.benchmark,
self.ActiveSecurities.Keys))
minute_bars = self.History(tickers, start, end, Resolution.Minute)
new_features = self.calc_features(minute_bars).dropna()
if self.features is not None:
new_idx = new_features.index.difference(self.features.index) # Removing potential duplicates
self.features = pd.concat([self.features, new_features.loc[new_idx]])
else:
self.features = new_features
new_targets = self.calc_targets(minute_bars).dropna()
if self.targets is not None:
new_idx = new_targets.index.difference(self.targets.index) # Removing potential duplicates
self.targets = pd.concat([self.targets, new_targets.loc[new_idx]])
else:
self.targets = new_targets
self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")
def calc_features(self, minute_bars):
day_bar = agg_bars(minute_bars, "09:31", "16:00")
pm_bar = agg_bars(minute_bars, "00:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
features = min5_bar.add_prefix("bb_")
features["bb_size"] = min5_bar.eval("(high-low)/open")
features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close-1
return features
def calc_targets(self, minute_bars):
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
return trading_bar.apply(calc_exit_price, axis=1)
def get_train_data(self):
train_idx = self.targets.index.intersection(self.features.index)
return self.features.loc[train_idx], self.targets.loc[train_idx]
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
def calc_exit_price(row, retr_pct=RETR_PCT):
window = (row.bb_high-row.bb_low)*retr_pct
if row.close > row.open: # long trade
stop_loss = row.bb_high - window
target = "long" if row.low > stop_loss else "pass" # 1 if profitable long and not touching the SL
else: # short trade
stop_loss = row.bb_low + window
target = "short" if row.high < stop_loss else "pass" # -1 if profitable short and not touching the SL
return target"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
Todo
- Risk management with stop loss
@version: 0.13
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
RETR_PCT = 0.7
EXT_PCT = 0.39
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.accuracy = None
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
cv_scores = cross_val_score(self.model, X=x, y=y, cv=10)
self.accuracy = np.mean(cv_scores)
self.model.fit(x, y)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
self.Plot("ML", "Accuracy", self.accuracy)
def trade(self):
if self.accuracy is None: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data()
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_).groupby("symbol").last()
actions = y_proba.idxmax(axis=1)
positions = actions.apply(lambda x: 0.01 if x=="long" else -0.01 if x=="short" else 0)
self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}")
for symbol, pos in positions.items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
feats = x_pred.loc[symbol].iloc[0] # TODO: Refactor
window = (feats.bb_high - feats.bb_low)
stop_loss = feats.bb_high - window * RETR_PCT if pos > 0 \
else feats.bb_low + window * RETR_PCT # TODO: Refactor
take_profit = feats.bb_high + window * EXT_PCT if pos > 0 \
else feats.bb_low - window * EXT_PCT
self.StopMarketOrder(symbol, -qty, stop_loss) # TODO: Need to cancel TP when SL and viceversa
self.LimitMarketOrder(symbol, -qty, take_profit) # TODO: Need to cancel TP when SL and viceversa
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = list(filter(lambda x: str(x) not in self.benchmark,
self.ActiveSecurities.Keys))
minute_bars = self.History(tickers, start, end, Resolution.Minute)
new_features = self.calc_features(minute_bars).dropna()
if self.features is not None:
new_idx = new_features.index.difference(self.features.index) # Removing potential duplicates
self.features = pd.concat([self.features, new_features.loc[new_idx]])
else:
self.features = new_features
new_targets = self.calc_targets(minute_bars).dropna()
if self.targets is not None:
new_idx = new_targets.index.difference(self.targets.index) # Removing potential duplicates
self.targets = pd.concat([self.targets, new_targets.loc[new_idx]])
else:
self.targets = new_targets
self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")
def calc_features(self, minute_bars):
day_bar = agg_bars(minute_bars, "09:31", "16:00")
pm_bar = agg_bars(minute_bars, "00:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
features = min5_bar.add_prefix("bb_")
features["bb_size"] = min5_bar.eval("(high-low)/open")
features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close-1
return features
def calc_targets(self, minute_bars):
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
return trading_bar.apply(calc_exit_target, axis=1)
def get_train_data(self):
train_idx = self.targets.index.intersection(self.features.index)
return self.features.loc[train_idx], self.targets.loc[train_idx]
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
def calc_exit_target(price_bar, retr_pct=RETR_PCT, ext_pct=EXT_PCT):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close > price_bar.open: # long trade
# take_profit = data_bar.bb_high + window * ext_pct TODO: Not used yet, would need to compare TP/SL timing
stop_loss = price_bar.bb_high - window * retr_pct
profitable_long = (price_bar.low > stop_loss) \
and (price_bar.close > price_bar.open)
target = "long" if profitable_long else "pass" # 1 if profitable long and not touching the SL
else: # short trade
# take_profit = data_bar.bb_low - window * ext_pct TODO: Not used yet, would need to compare TP/SL timing
stop_loss = price_bar.bb_low + window * retr_pct
profitable_short = (price_bar.high < stop_loss) \
and (price_bar.close < price_bar.open)
target = "short" if profitable_short else "pass" # -1 if profitable short and not touching the SL
return target"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Triple barrier target with TP and SL
@version: 0.14
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
RETRACEMENT_SL = 0.7
EXTENSION_TP = 0.39
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.accuracy = None
self.features, self.targets = pd.DataFrame(), pd.DataFrame()
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_data)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if len(self.features) == 0: return # No training data available
self.Debug(f"{self.Time} Training")
x, y = self.get_train_data()
cv_scores = cross_val_score(self.model, X=x, y=y, cv=10,
scoring="balanced_accuracy")
self.accuracy = np.mean(cv_scores)
self.model.fit(x, y)
self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
self.Plot("ML", "Accuracy", self.accuracy)
def trade(self):
if self.accuracy is None: return
self.Debug(f"{self.Time} Trading")
x_pred = self.get_pred_data().droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
positions_func = lambda x: x[1] if x[1] > 0.5 \
else -x[-1] if x[-1] > 0.5 else 0
positions = y_proba.apply(positions_func, axis=1) * self.accuracy
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}")
for symbol, pos in positions.items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low)
stop_loss = features.bb_high - window * RETRACEMENT_SL if pos > 0 \
else features.bb_low + window * RETRACEMENT_SL # TODO: Refactor
#take_profit = features.bb_high + window * EXTENSION_TP if pos > 0 \
# else features.bb_low - window * EXTENSION_TP
self.StopMarketOrder(symbol, -qty, stop_loss) # TODO: Need to cancel TP when SL and viceversa
#self.LimitOrder(symbol, -qty, take_profit) # TODO: Need to cancel TP when SL and viceversa
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_data(self):
trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
self.Time - timedelta(1))
last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
trade_days))[-1].Date
start = last_day.replace(hour=9, minute=30, second=0)
end = self.Time.replace(hour=9, minute=35, second=0)
tickers = list(filter(lambda x: str(x) not in self.benchmark,
self.ActiveSecurities.Keys))
minute_bars = self.History(tickers, start, end, Resolution.Minute)
new_features = self.calc_features(minute_bars).dropna()
new_idx = new_features.index.difference(self.features.index) # Removing potential duplicates
self.features = pd.concat([self.features, new_features.loc[new_idx]])
new_targets = self.calc_targets(minute_bars).dropna()
new_idx = new_targets.index.difference(self.targets.index) # Removing potential duplicates
self.targets = pd.concat([self.targets, new_targets.loc[new_idx]])
self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")
def calc_features(self, minute_bars):
day_bar = agg_bars(minute_bars, "09:31", "16:00")
pm_bar = agg_bars(minute_bars, "00:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
features = min5_bar.add_prefix("bb_")
features["bb_size"] = min5_bar.eval("(high-low)/open")
features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
features["gap"] = day_bar["open"] / yesterday_close-1
return features
def calc_targets(self, minute_bars):
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
return trading_bar.apply(calc_exit_target, axis=1)
def get_train_data(self):
train_idx = self.targets.index.intersection(self.features.index)
return self.features.loc[train_idx], self.targets.loc[train_idx]
def get_pred_data(self):
return self.features.query("time == @self.Time.date()")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL, ext_pct=EXTENSION_TP):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close > price_bar.open: # long trade
# take_profit = data_bar.bb_high + window * ext_pct TODO: Not used yet, would need to compare TP/SL timing
stop_loss = price_bar.bb_high - window * retr_pct
target = 1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else: # short trade
# take_profit = data_bar.bb_low - window * ext_pct TODO: Not used yet, would need to compare TP/SL timing
stop_loss = price_bar.bb_low + window * retr_pct
target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
return target
"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
- Implemented Pipeline with clustering/dimensionality reduction
TODO:
- Implement Triple Barrier with TP
@version: 0.15
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
RETRACEMENT_SL = 0.7
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.score = 0
self.features, self.targets = pd.DataFrame(), pd.Series()
self.pipe = Pipeline([#("scaling", MinMaxScaler()),
#("clustering", KMeans(n_clusters=2)),
("model", GradientBoostingClassifier(n_iter_no_change=3,
n_estimators=100))])
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if len(self.features) == 0: return
self.clean_data()
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) < 20: return # Minimum of month of training
cv_scores = cross_val_score(self.pipe, X=self.features, y=self.targets,
cv=10, scoring="balanced_accuracy")
self.score = np.mean(cv_scores)
self.pipe.fit(self.features, self.targets)
self.print(f"CV Sharpe {self.score / np.std(cv_scores):.1f}")
self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
self.Plot("ML", "Score", self.score)
def trade(self):
if self.score <= 1/3: return
x_pred = self.features.query("time == @self.Time.date()").droplevel("time")
y_pred = pd.Series(self.pipe.predict(x_pred), index=x_pred.index)
positions_map = {"long": 0.1, "short": -0.1, "pass": 0}
positions = y_pred.apply(lambda x: positions_map[x])
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * RETRACEMENT_SL
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Refactor
self.StopMarketOrder(symbol, -qty, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
last_day = self.get_last_day(self.Time)
start = last_day.replace(hour=9, minute=30, second=0)
tickers = self.get_active_tickers()
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
day_bar = agg_bars(minute_bars, "09:31", "16:00")
pm_bar = agg_bars(minute_bars, "00:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_close = day_bar["close"].groupby("symbol").shift(1)
new_features["gap"] = day_bar["open"] / yesterday_close - 1
self.features = pd.concat([self.features, new_features.dropna()])
def store_targets(self):
last_features = self.features.groupby("symbol").last()
tickers = list(last_features.index)
start = self.Time.replace(hour=9, minute=31, second=0)
end = self.Time.replace(hour=15, minute=54, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
trading_bar = agg_bars(minute_bars, "09:36", "15:54")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
new_targets = trading_bar.apply(calc_exit_target, axis=1)
self.targets = pd.concat([self.targets, new_targets.dropna()])
def clean_data(self):
self.features = self.features[~self.features.index.duplicated(keep='first')]
self.targets = self.targets[~self.targets.index.duplicated(keep='first')]
common_idx = self.targets.index.intersection(self.features.index)
self.features = self.features.loc[common_idx]
self.targets = self.targets.loc[common_idx]
def get_active_tickers(self):
return list(filter(lambda x: str(x) not in self.benchmark,
self.ActiveSecurities.Keys))
def get_last_day(self, date):
start, end = date - timedelta(7), date - timedelta(1)
calendar_days = self.TradingCalendar.GetTradingDays(start, end)
return list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
calendar_days))[-1].Date
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close > price_bar.open: # long trade
stop_loss = price_bar.bb_high - window * retr_pct
target = "long" if price_bar.low > stop_loss else "pass" # 1 if profitable long and not touching the SL
else: # short trade
stop_loss = price_bar.bb_low + window * retr_pct
target = "short" if price_bar.high < stop_loss else "pass" # -1 if profitable short and not touching the SL
return target
"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
TODO: Implement Triple Barrier with TP
@version: 0.16
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
RETRACEMENT_SL = 1.0
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.score = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) < 20: return
time_groups = self.targets.index.get_level_values("time")
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=time_groups,
scoring="balanced_accuracy")
self.score = np.nanmean(cv_scores)
self.model.fit(self.features, self.targets)
self.print(f"Training score:{self.score:.1%}")
self.Plot("ML", "Score", self.score)
def trade(self):
kelly_size = (3 * self.score - 1) / 2 # calculating the edge like binary Kelly
if kelly_size <= 0: return
x_pred = self.features.groupby("symbol").last()
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * kelly_size * 0.5 # Using 50% Kelly
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * RETRACEMENT_SL
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Refactor
self.StopMarketOrder(symbol, -qty, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = min5_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=31, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
new_targets = trading_bar.apply(calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close > price_bar.open: # long trade
stop_loss = price_bar.bb_high - window * retr_pct
target = +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else: # short trade
stop_loss = price_bar.bb_low + window * retr_pct
target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
return target
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
TODO: Implement Triple Barrier with TP
@version: 0.17
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def multi_precision(y_true, y_pred):
non_zero_pred = y_pred!=0
matches = y_true[non_zero_pred] == y_pred [non_zero_pred]
return np.mean(matches) if len(matches)>0 else 0
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.score = 0
self.scoring = make_scorer(multi_precision)
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) < 20: return
time_groups = self.targets.index.get_level_values("time")
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=time_groups,
scoring=self.scoring)
self.score = np.mean(cv_scores)
self.model.fit(self.features, self.targets)
self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
self.Plot("ML", "Score", self.score)
def trade(self):
if self.score == 0: return
x_pred = self.features.groupby("symbol").last()
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * self.score * 0.1
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Refactor
self.StopMarketOrder(symbol, -qty, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = min5_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=31, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_exit_target(self, price_bar):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close > price_bar.open: # long trade
stop_loss = price_bar.bb_high - window * self.retracement_sl
target = +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else: # short trade
stop_loss = price_bar.bb_low + window * self.retracement_sl
target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
return target
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
@version: 0.18
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.score = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) < 20: return
time_groups = self.targets.index.get_level_values("time")
fit_params = dict(sample_weight=abs(np.log1p(self.targets)))
y_binary = (self.targets > 0).astype(float)
cv_scores = cross_val_score(self.model, X=self.features, y=y_binary,
cv=self.cv, groups=time_groups,
fit_params=fit_params)
self.score = np.mean(cv_scores)
self.model.fit(self.features, y_binary, **fit_params)
self.print(f"Training: {y_binary.value_counts()} Score:{self.score:.1%}")
self.Plot("ML", "Score", self.score)
def trade(self):
edge = self.score - (1 - self.score)
if edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * edge * self.kelly_frac
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions.items():
self.SetHoldings(symbol, pos)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = min5_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=36, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
new_targets = trading_bar.eval("close/open - 1")
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
@version: 0.18
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
self.capital = literal_eval(self.GetParameter("capital"))
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.score = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=3)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) < 20: return
time_groups = self.targets.index.get_level_values("time")
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=time_groups,
scoring="balanced_accuracy")
self.score = np.mean(cv_scores)
self.model.fit(self.features, self.targets)
self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
self.Plot("ML", "Score", self.score)
def trade(self):
edge = (3 * self.score - 1) / 2
if edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * edge * self.kelly_frac
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Refactor
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = min5_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=31, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_exit_target(self, price_bar):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close > price_bar.open: # long trade
stop_loss = price_bar.bb_high - window * self.retracement_sl
target = +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else: # short trade
stop_loss = price_bar.bb_low + window * self.retracement_sl
target = -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
return target
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
@version: 0.19
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
self.capital = literal_eval(self.GetParameter("capital"))
self.use_sl = literal_eval(self.GetParameter("use_sl"))
self.target_gain = literal_eval(self.GetParameter("target_gain"))
self.strategy = self.GetParameter("strategy")
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=10)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) <= 10: return
time_groups = self.targets.index.get_level_values("time")
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=time_groups,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
score = np.mean(np.nan_to_num(cv_scores, 0))
n_classes = len(self.model.classes_)
self.edge = (n_classes * score - 1) / (n_classes - 1)
self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
self.Plot("ML", "Edge", self.edge)
def trade(self):
if self.edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * (self.edge * self.kelly_frac).clip(0, 1)
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Refactor
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = min5_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=31, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_exit_target(self, price_bar):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close >= price_bar.open * (1+self.target_gain) \
and "long" in self.strategy: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif price_bar.close <= price_bar.open * (1 - self.target_gain) \
and "short" in self.strategy: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
@version: 0.20
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
self.target_gain = literal_eval(self.GetParameter("target_gain"))
self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
self.capital = literal_eval(self.GetParameter("capital"))
self.use_sl = literal_eval(self.GetParameter("use_sl"))
self.strategy = self.GetParameter("strategy")
self.benchmark = self.GetParameter("benchmark")
self.SetStartDate(2021, 1, 1)
self.SetEndDate(2022, 1, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=10)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
training_days = self.features.index.get_level_values("time").unique()
if len(training_days) <= 10: return
time_groups = self.targets.index.get_level_values("time")
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=time_groups,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
score = np.mean(np.nan_to_num(cv_scores, 0))
n_classes = len(self.model.classes_)
self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
self.Plot("ML", "Edge", self.edge)
def trade(self):
if self.edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * (self.edge * self.kelly_frac).clip(0, 1)
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Simplify SL?
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
min5_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = min5_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = min5_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self): # TODO: Run it only before training
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=36, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
trading_bar = trading_bar.join(last_features)
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_exit_target(self, price_bar): # TODO: Simplify SL?
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close >= price_bar.open * (1+self.target_gain) \
and "long" in self.strategy: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif price_bar.close <= price_bar.open * (1 - self.target_gain) \
and "short" in self.strategy: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
@version: 0.21
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 0)
self.target_gain = self.GetParameter("target_gain", 0.05)
self.kelly_frac = self.GetParameter("kelly_frac", 0.25)
self.capital = self.GetParameter("capital", 80000)
self.use_sl = self.GetParameter("use_sl", 0)
self.retracement_sl = self.GetParameter("retracement_sl", 1)
self.strategy = self.GetParameter("strategy", "long_short")
self.benchmark = self.GetParameter("benchmark", "SPY")
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=10)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.store_features)
self.Schedule.On(every_day, at(9, 35), self.trade)
self.Schedule.On(every_day, at(15, 55), self.stop_trading)
self.Schedule.On(every_day, at(15, 55), self.store_targets)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = self.features.index.get_level_values("time")
if len(training_days.unique()) <= 10: return # Require more than 10 days of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
self.Plot("ML", "Edge", self.edge)
def trade(self):
if self.edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * self.edge * self.kelly_frac
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window # TODO: Simplify SL?
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def stop_trading(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
bertha_bar = agg_bars(minute_bars, "09:31", "09:35")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
start = self.Time.replace(hour=9, minute=36, second=0)
end = self.Time.replace(hour=15, minute=55, second=0)
minute_bars = self.History(tickers, start, end, Resolution.Minute)
trading_bar = agg_bars(minute_bars, "09:36", "15:55")
trading_bar = trading_bar.join(last_features)
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_exit_target(self, price_bar):
window = (price_bar.bb_high - price_bar.bb_low)
if price_bar.close >= price_bar.open * (1 + self.target_gain) \
and "long" in self.strategy: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif price_bar.close <= price_bar.open * (1 - self.target_gain) \
and "short" in self.strategy: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
@version: 0.22
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 0)
self.target_gain = self.GetParameter("target_gain", 0.05)
self.kelly_frac = self.GetParameter("kelly_frac", 0.25)
self.capital = self.GetParameter("capital", 80000)
self.use_sl = self.GetParameter("use_sl", 0)
self.retracement_sl = self.GetParameter("retracement_sl", 1)
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY")
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=10)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=10)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = self.features.index.get_level_values("time")
if len(training_days.unique()) <= 10: return # Require more than 10 days of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
self.Plot("ML", "Edge", self.edge)
def enter_trades(self):
self.store_features()
if self.edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * self.edge * self.kelly_frac
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = self.Time.hour, self.Time.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
now = self.Time
minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
Resolution.Minute)
try:
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
except (KeyError, ValueError) as e:
self.print(e)
return
def calc_exit_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.23: lookback parameter
@version: 0.23
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 0) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter in the trade
self.kelly_frac = self.GetParameter("kelly_frac", 0.25) # Kelly ratio to use for the position sizing
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.use_sl = self.GetParameter("use_sl", 0) # Use or not the Stop Loss (0/1)
self.retracement_sl = self.GetParameter("retracement_sl", 1) # Retracement percentage to use for the Stop Loss
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
self.model = GradientBoostingClassifier(n_iter_no_change=10)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = self.features.index.get_level_values("time")
if len(training_days.unique()) <= self.cv_splits: return # Require more than 10 days of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.edge = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
self.Plot("ML", "Edge", self.edge)
def enter_trades(self):
self.store_features()
if self.edge <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * self.edge * self.kelly_frac
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = self.Time.hour, self.Time.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
now = self.Time
minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
Resolution.Minute)
try:
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
except (KeyError, ValueError) as e:
self.print(e)
return
def calc_exit_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.24
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 0) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter in the trade
self.kelly_frac = self.GetParameter("kelly_frac", 0.25) # Kelly ratio to use for the position sizing
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.use_sl = self.GetParameter("use_sl", 0) # Use or not the Stop Loss (0/1)
self.retracement_sl = self.GetParameter("retracement_sl", 1) # Retracement percentage to use for the Stop Loss
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if needs to be stored
self.SetStartDate(2021, 6, 1)
#self.SetEndDate(2022, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=10)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = self.features.index.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
positions = y_pred * self.model.kelly * self.kelly_frac
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = self.Time.hour, self.Time.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
tickers = list(last_features.index.get_level_values("symbol"))
now = self.Time
minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
Resolution.Minute)
try:
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
except (KeyError, ValueError) as e:
self.print(e)
return
def calc_exit_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.25
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 0) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.use_sl = self.GetParameter("use_sl", 0) # Use or not the Stop Loss (0/1)
self.retracement_sl = self.GetParameter("retracement_sl", 1) # Retracement percentage to use for the Stop Loss
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
# self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=10)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = idx.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > 50% and scaling to 100%
positions = y_proba.idxmax(axis=1) * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_proba.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
if self.use_sl:
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low) * self.retracement_sl
stop_loss = features.bb_high - window if pos > 0 \
else features.bb_low + window
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0)
tickers = list(self.ActiveSecurities.Keys)
minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = self.Time.hour, self.Time.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
now = self.Time
minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
Resolution.Minute)
self.Log(minute_bars)
try:
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
except (KeyError, ValueError) as e:
self.print(e)
return
def calc_exit_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.use_sl:
stop_loss = price_bar.bb_high - window * self.retracement_sl
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.use_sl:
stop_loss = price_bar.bb_low + window * self.retracement_sl
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.26: Adding both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.26
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.sl_retr = self.GetParameter("retracement_sl", 0) # Retracement percentage to use for the Stop Loss, disabled if 0
self.tp_ext = self.GetParameter("extension_tp", 0) # Extension percentage to use for the Take Profit, disabled if 0
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=10)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = idx.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > 50% and scaling to 100%
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low)
if self.sl_retr > 0:
stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
else features.bb_low + window * self.sl_retr
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
if self.tp_ext > 0:
take_profit = features.bb_low + window * self.tp_ext if pos > 0 \
else features.bb_high - window * self.tp_ext
self.LimitOrder(symbol, -qty, take_profit)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
try:
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
except (KeyError, ValueError) as e:
self.print(e)
return
def calc_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_high - window * self.sl_retr
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_low + window * self.sl_retr
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.28
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def catch_errors(func):
def wrap(self, *args, **kwargs):
try:
result = func(self, *args, **kwargs)
return result
except (KeyError, ValueError) as e:
self.print(e)
return
return wrap
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal
self.sl_retr = self.GetParameter("retracement_sl", 0) # Retracement percentage to use for the Stop Loss, disabled if 0
self.tp_ext = self.GetParameter("extension_tp", 0) # Extension percentage to use for the Take Profit, disabled if 0
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=10)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = idx.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
scaling = 1 / (1 - self.min_proba)
sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low)
if self.sl_retr > 0:
stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
else features.bb_low + window * self.sl_retr
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
if self.tp_ext > 0:
take_profit = features.bb_low + window * self.tp_ext if pos > 0 \
else features.bb_high - window * self.tp_ext
self.LimitOrder(symbol, -qty, take_profit)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
@catch_errors
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_high - window * self.sl_retr
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_low + window * self.sl_retr
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)"""
Big Bertha Strategy with Machine Learning
Last changes:
V0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.28
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import DBSCAN, OPTICS, KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.gaussian_process import GaussianProcessClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def catch_errors(func):
def wrap(self, *args, **kwargs):
try:
result = func(self, *args, **kwargs)
return result
except (KeyError, ValueError) as e:
self.print(e)
return
return wrap
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal
self.sl_retr = self.GetParameter("retracement_sl", 0.0) # Retracement percentage to use for the Stop Loss, disabled if 0
self.tp_ext = self.GetParameter("extension_tp", 0.0) # Extension percentage to use for the Take Profit, disabled if 0
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
pipe = Pipeline([("scaling", None),
("clustering", None),
("model", LogisticRegression())])
params = dict(scaling=[None,
# MinMaxScaler(),
StandardScaler()],
clustering=[None,
# KMeans(),
# OPTICS(),
DBSCAN()],
model=[DummyClassifier(),
# LogisticRegression(),
# KNeighborsClassifier(),
# GaussianProcessClassifier(),
# GaussianNB(),
GradientBoostingClassifier(n_iter_no_change=3)])
self.model = GridSearchCV(pipe, param_grid=params)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = idx.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.print(f"Best model: {self.model.best_estimator_}")
self.Log(pd.DataFrame(self.model.cv_results_).to_string())
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
scaling = 1 / (1 - self.min_proba)
sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low)
if self.sl_retr > 0:
stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
else features.bb_low + window * self.sl_retr
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
if self.tp_ext > 0:
take_profit = features.bb_high + window * self.tp_ext if pos > 0 \
else features.bb_low - window * self.tp_ext
self.LimitOrder(symbol, -qty, take_profit)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
@catch_errors
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_high - window * self.sl_retr
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_low + window * self.sl_retr
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.29
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn.cluster import DBSCAN
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def catch_errors(func):
def wrap(self, *args, **kwargs):
try:
result = func(self, *args, **kwargs)
return result
except (KeyError, ValueError) as e:
self.print(e)
return
return wrap
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal
self.sl_retr = self.GetParameter("retracement_sl", 0.0) # Retracement percentage to use for the Stop Loss, disabled if 0
self.tp_ext = self.GetParameter("extension_tp", 0.0) # Extension percentage to use for the Take Profit, disabled if 0
self.entry_mns = self.GetParameter("entry_mn", 5) # Entry time (minutes after 9.30)
self.exit_mns = self.GetParameter("exit_mn", 385) # Exit time (minutes after 9.30)
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
pipe = Pipeline([("scaling", None),
("clustering", None),
("model", LogisticRegression())])
params = dict(scaling=[None, StandardScaler()],
clustering=[None, DBSCAN()],
model=[DummyClassifier(),
GradientBoostingClassifier(n_iter_no_change=3)])
self.model = GridSearchCV(pipe, param_grid=params)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
entry_hr, entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60
exit_hr, exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(entry_hr, entry_mn), self.enter_trades)
self.Schedule.On(every_day, at(exit_hr, exit_mn), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = idx.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.print(f"Best model: {self.model.best_estimator_}")
self.Log(pd.DataFrame(self.model.cv_results_).to_string())
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
scaling = 1 / (1 - self.min_proba)
sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
features = x_pred.loc[symbol]
window = (features.bb_high - features.bb_low)
if self.sl_retr > 0:
stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
else features.bb_low + window * self.sl_retr
self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
if self.tp_ext > 0:
take_profit = features.bb_high + window * self.tp_ext if pos > 0 \
else features.bb_low - window * self.tp_ext
self.LimitOrder(symbol, -qty, take_profit)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
@catch_errors
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_high - window * self.sl_retr
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_low + window * self.sl_retr
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.30
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def catch_errors(func):
def wrap(self, *args, **kwargs):
try:
result = func(self, *args, **kwargs)
return result
except (KeyError, ValueError) as e:
self.print(e)
return
return wrap
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
days = idx.get_level_values("time")
if len(days.unique()) <= 21: return # Require more than one month of training data
params = dict(sample_weight=abs(self.targets)) # Weighting each sample by its importance
targets_bin = self.targets.apply(lambda x: +1 if x > self.target_gain else
-1 if x < -self.target_gain else 0)
cv_scores = cross_val_score(self.model, X=self.features, y=targets_bin,
cv=self.cv, groups=days, fit_params=params,
scoring="balanced_accuracy")
self.model.fit(self.features, targets_bin, **params)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {targets_bin.value_counts()} Edge:{self.model.kelly:.1%}")
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
scaling = 1 / (1 - self.min_proba)
sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
@catch_errors
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.eval("close / bb_close - 1") # Calculate the trading return
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.30a
@creation date: 05/07/2022
"""
import numpy as np
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def catch_errors(func):
def wrap(self, *args, **kwargs):
try:
result = func(self, *args, **kwargs)
return result
except (KeyError, ValueError) as e:
self.print(e)
return
return wrap
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.model.kelly = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.x, self.y = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.x is None or self.y is None: return
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
days = idx.get_level_values("time")
if len(days.unique()) <= 21: return # Require more than one month of training data
y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
-1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
# Replace cross_val_score with custom loop to use sample weighted metrics
scores = []
for train_idx, test_idx in self.cv.split(self.x, groups=days):
model_temp = sklearn.base.clone(self.model)
x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns
y_pred = model_temp.predict(x_test)
scores += [kelly_pos(y_test, y_pred, sample_weight=abs(w_test))] # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
self.model.kelly = 0 if np.isnan(np.nanmean(scores)) else np.nanmean(scores)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
self.print(f"Training: {y_bin.value_counts()} Edge:{self.model.kelly:.1%}")
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.x.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly.clip(0, 1) # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_features.dropna(), self.x])
self.print(f"Stored new features, total: {len(self.x)}")
@catch_errors
def store_targets(self):
last_x = self.x.query("time == @self.Time.date()")
self.Log(last_x)
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1") # Calculate the trading return
self.y = pd.concat([new_y.dropna(), self.y])
self.print(f"Stored new targets, total: {len(self.y)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
self.Log(f"{self.Time} {msg}")
def kelly_pos(y_true, y_pred, sample_weight=None): # TODO: differentiate between losses on 0 and on 1/-1
trades = y_pred!=0
wins = y_true[trades]==y_pred[trades]
win_rate = wins.mean()
loss_rate = 1-win_rate
avg_win = sample_weight[trades][wins].mean()
avg_loss = sample_weight[trades][~wins].mean()
return win_rate/avg_loss - loss_rate/avg_win
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.31a
@creation date: 05/07/2022
"""
import numpy as np
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
# self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.model.kelly = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.x, self.y = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.x is None or self.y is None: return
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
days = idx.get_level_values("time")
if len(days.unique()) <= self.cv_splits: return # Days required for CV
y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
-1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
# Replace cross_val_score with custom loop to use sample weighted metrics
scores = []
for train_idx, test_idx in self.cv.split(self.x, groups=days):
model_tmp = sklearn.base.clone(self.model)
x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns
y_pred = model_tmp.predict(x_test)
scores += [
utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
self.model.kelly = np.clip(np.nanmean(scores), 0, 1)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}")
self.Plot("ML", "Kelly", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.x.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@utl.catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_features.dropna(), self.x])
self.print(f"Stored new features, total: {len(self.x)}")
@utl.catch_errors
def store_targets(self):
last_x = self.x.query("time == @self.Time.date()")
self.Log(last_x)
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1") # Calculate the trading return
self.y = pd.concat([new_y.dropna(), self.y])
self.print(f"Stored new targets, total: {len(self.y)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
self.Log(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.32a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
from timeseriescv import TimeSeriesSplitGroups
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 11, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
# self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.model.kelly = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.x, self.y = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.x is None or self.y is None: return
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
days = idx.get_level_values("time")
if len(days.unique()) <= self.cv_splits: return # Days required for CV
y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
-1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
# Replace cross_val_score with custom loop to use sample weighted metrics
scores = []
for train_idx, test_idx in self.cv.split(self.x, groups=days):
model_tmp = sklearn.base.clone(self.model)
x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns
y_pred = model_tmp.predict(x_test)
scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
self.model.kelly = np.clip(np.nanmean(scores), 0, 1)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}")
self.Plot("ML", "Kelly", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.x.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
self.print(f"Predictions: {y_proba}")
self.Notify.Email("hb_beawai@googlegroups.com", "Big Bertha Predictions", y_proba)
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@utl.catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
@utl.catch_errors
def store_targets(self):
last_x = self.x.query("time == @self.Time.date()")
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
self.Log(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.33a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
from timeseriescv import TimeSeriesSplitGroups
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 11, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
# self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.model.kelly = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.x, self.y = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.x is None or self.y is None: return
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
days = idx.get_level_values("time")
if len(days.unique()) <= self.cv_splits: return # Days required for CV
y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
-1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
# Replace cross_val_score with custom loop to use sample weighted metrics
scores = []
for train_idx, test_idx in self.cv.split(self.x, groups=days):
model_tmp = sklearn.base.clone(self.model)
x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns
y_pred = model_tmp.predict(x_test)
scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
self.model.kelly = np.clip(np.nanmean(scores), 0, 1)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}")
self.Plot("ML", "Kelly", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.x.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
self.print(f"Predictions: {y_proba.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@utl.catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
@utl.catch_errors
def store_targets(self):
last_x = self.x.query("time == @self.Time.date()")
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
self.Log(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.34a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
from timeseriescv import TimeSeriesSplitGroups
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 4) # Number of splits for model cross validation
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 11, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
if not self.LiveMode: # Store data during backtest
self.Schedule.On(every_day, at(16, 0), self.save_data)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if ((self.Time - self.last_training).days < 30 and (self.kelly > 0)) \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= self.cv_splits * 10: return
y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
-1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
# Replace cross_val_score with custom loop to use sample weighted metrics
scores = []
days = self.x.index.get_level_values("time")
for train_idx, test_idx in self.cv.split(self.x, groups=days):
model_tmp = sklearn.base.clone(self.model)
x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns
y_pred = model_tmp.predict(x_test)
scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
self.last_training = self.Time
self.kelly = np.clip(np.nanmean(scores), 0, 1)
self.print(f"Training: {y_bin.value_counts()} Kelly:{self.kelly:.1%}")
self.Plot("ML", "Kelly", self.kelly)
def enter_trades(self):
self.calc_features()
if self.kelly <= 0: return
x_pred = self.x.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
self.print(f"Predictions: {y_proba.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.kelly # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.calc_targets()
@utl.catch_errors
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
@utl.catch_errors
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.35a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.025) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
if not self.LiveMode: # Store data during backtest
self.Schedule.On(every_day, at(16, 0), self.save_data)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= self.target_gain and self.strategy >= 0 else
-1 if x <= -self.target_gain and self.strategy <= 0 else 0)
weight = self.y - self.target_gain # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self): # TODO: Balance Long/Short Trades?
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
self.print(f"Predictions: {y_proba.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.kelly # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.36a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.exposure = self.GetParameter("exposure", 0.5) # 0 100% short, 1 100% long, 0.5 market neutral
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.n_positions = self.GetParameter("n_positions", 10) # Number of total positions per day
self.n_longs = int(self.n_positions * self.exposure)
self.n_shorts = self.n_positions - self.n_longs
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
if not self.LiveMode: # Store data during backtest
self.Schedule.On(every_day, at(16, 0), self.save_data)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = (self.y > 0).astype(float)
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self): # TODO: Balance Long/Short Trades?
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index).sort_values(ascending=False)
self.print(f"Predictions: {y_pred.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string())
longs = y_pred.head(self.n_longs)
longs_pos = longs * self.exposure / longs.sum() # Normalizing by confidence (proba UP)
shorts = y_pred.tail(self.n_shorts)
shorts_pos = -(1 - shorts) * (1 - self.exposure) / (1 - shorts).sum() # Normalizing by confidence (1 - proba UP)
positions = pd.concat([longs_pos, shorts_pos]) * self.kelly
self.print(f"Positions {positions}")
for symbol, pos in positions.items():
self.SetHoldings(symbol, pos)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.37a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.2) # Minimum differential probability
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
if not self.LiveMode: # Store data during backtest
self.Schedule.On(every_day, at(16, 0), self.save_data)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self): # TODO: Balance Long/Short Trades?
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
self.SetHoldings(row["long_sym"], row["pos"]/2)
self.SetHoldings(row["short_sym"], -row["pos"]/2)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.38a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.2) # Minimum differential probability
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
if not self.LiveMode: # Store data during backtest
self.Schedule.On(every_day, at(16, 0), self.save_data)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self): # TODO: Balance Long/Short Trades?
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
self.SetHoldings(row["long_sym"], row["pos"]/2)
self.SetHoldings(row["short_sym"], -row["pos"]/2)
if self.LiveMode:
utl.das_send_order(self, row["long_sym"], position=row["pos"] / 2)
utl.das_send_order(self, row["short_sym"], position=-row["pos"] / 2)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
if self.LiveMode: utl.das_liquidate(self)
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.38a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
if not self.LiveMode: # Store data during backtest
self.Schedule.On(every_day, at(16, 0), self.save_data)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self): # TODO: Balance Long/Short Trades?
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
self.SetHoldings(row["long_sym"], row["pos"]/2)
self.SetHoldings(row["short_sym"], -row["pos"]/2)
#if self.LiveMode:
long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2)
utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2)
utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
#if self.LiveMode:
utl.das_liquidate(self)
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.40a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(0, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.Schedule.On(every_day, at(16, 0), self.save_data)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self):
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
self.SetHoldings(row["long_sym"], row["pos"]/2)
self.SetHoldings(row["short_sym"], -row["pos"]/2)
if self.LiveMode:
long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2)
utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2)
utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
if self.LiveMode: utl.das_liquidate(self)
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.41a: Added use_kelly parameter and 9am training for easier live trading
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.40a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability
self.use_kelly = self.GetParameter("use_kelly", 1) # Whether to use the kelly criterion for sizing
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(9, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.Schedule.On(every_day, at(16, 0), self.save_data)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \
if self.use_kelly else 1 # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self):
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
self.SetHoldings(row["long_sym"], row["pos"]/2)
self.SetHoldings(row["short_sym"], -row["pos"]/2)
if self.LiveMode:
long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2)
utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2)
utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
if self.LiveMode: utl.das_liquidate(self)
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.42a: Improved positions calculation for DAS Integration
v0.41a: Added use_kelly parameter and 9am training for easier live trading
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.42a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.capital = self.GetParameter("capital", 800000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.min_proba = self.GetParameter("min_proba", 0.1) # Minimum differential probability
self.use_kelly = self.GetParameter("use_kelly", 1) # Whether to use the kelly criterion for sizing
self.SetStartDate(2021, 6, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.kelly = 0
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.last_training = datetime(2000, 1, 1, 0, 0, 0)
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.Train(every_day, at(9, 0), self.train_model)
self.Schedule.On(every_day, at(9, 35), self.enter_trades)
self.Schedule.On(every_day, at(15, 55), self.exit_trades)
self.Schedule.On(every_day, at(16, 0), self.save_data)
self.x, self.y = None, None
if self.LiveMode and self.ObjectStore.ContainsKey("data"): # Load data when live and saved
self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
self.print(f"Loaded data {self.x.shape} {self.y.shape}")
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if (self.Time - self.last_training).days < 7 \
or self.x is None or self.y is None: return
self.clean_data()
days = self.x.index.get_level_values("time")
if len(days.unique()) <= 21: return
y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1) # Using +1/-1 labels to calculate the Kelly score for both
weight = self.y # TODO: How to use log with -100% returns?
model_temp = sklearn.base.clone(self.model)
x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
self.x, y_bin, weight, train_size=0.5, shuffle=False)
model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by returns
y_pred = model_temp.predict(x_test)
score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \
if self.use_kelly else 1 # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
self.kelly = np.nan_to_num(score).clip(0, 1)
self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
f"Kelly: {self.kelly:.1%}\n")
self.Plot("ML", "Score", self.kelly)
def enter_trades(self):
self.calc_features()
x_pred = self.x.query("time == @self.Time.date()")
if self.kelly <= 0 or len(x_pred) == 0: return
x_pred.index = x_pred.index.droplevel("time")
y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
index=x_pred.index)
self.print(f"Predictions: {y_pred.to_string()}")
for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())
proba_desc = y_pred.sort_values(ascending=False)
proba_asc = y_pred.sort_values(ascending=True)
pairs = dict(proba=proba_desc.values-proba_asc.values,
long_sym=proba_desc.index.get_level_values("symbol"),
short_sym=proba_asc.index.get_level_values("symbol"))
pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
pairs.eval("pos = proba * @self.kelly", inplace=True)
if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
self.print(f"Positions {pairs}")
for _, row in pairs.iterrows():
order_value = self.Portfolio.TotalPortfolioValue*row["pos"]/2
self.SetHoldings(row["long_sym"], order_value)
self.SetHoldings(row["short_sym"], -order_value)
if self.LiveMode:
long_qty = int(order_value/self.Securities[row["long_sym"]].Price)
utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
short_qty = int(order_value/self.Securities[row["short_sym"]].Price)
utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
if self.LiveMode: utl.das_liquidate(self)
self.calc_targets()
def calc_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
self.x = pd.concat([new_x.dropna(), self.x])
self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")
def calc_targets(self):
last_x = self.x.query("time == @self.Time.date()")
if len(last_x) == 0: return
tickers = list(last_x.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
if len(minute_bars) == 0: return
trading_bar = minute_bars.droplevel("time").join(last_x)
new_y = trading_bar.eval("close / bb_close - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")
def clean_data(self):
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
def save_data(self):
mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.29
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import pickle
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn.cluster import DBSCAN
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups
pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
"low": "min", "volume": "sum"}
def catch_errors(func):
def wrap(self, *args, **kwargs):
try:
result = func(self, *args, **kwargs)
return result
except (KeyError, ValueError) as e:
self.print(e)
return
return wrap
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.target_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.min_proba = self.GetParameter("min_proba", 0.5) # Threshold probability to trigger a long/short signal
self.entry_mns = self.GetParameter("entry_mn", 5) # Entry time (minutes after 9.30)
self.exit_mns = self.GetParameter("exit_mn", 385) # Exit time (minutes after 9.30)
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 9, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
#self.ObjectStore.Delete(self.store_model) # Deleting existing data
if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
else:
pipe = Pipeline([("scaling", None),
("clustering", None),
("model", LogisticRegression())])
params = dict(scaling=[None, StandardScaler()],
clustering=[None, DBSCAN()],
model=[DummyClassifier(),
GradientBoostingClassifier(n_iter_no_change=3)])
self.model = GridSearchCV(pipe, param_grid=params)
self.model.edge = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.features, self.targets = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
entry_hr, entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60
exit_hr, exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(entry_hr, entry_mn), self.enter_trades)
self.Schedule.On(every_day, at(exit_hr, exit_mn), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if
x.HasFundamentalData and
x.DollarVolume > self.min_usd_volume]
def train_model(self):
if self.features is None or self.targets is None: return
idx = self.features.index.intersection(self.targets.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
self.features = self.features.loc[idx]
self.targets = self.targets.loc[idx]
training_days = idx.get_level_values("time")
if len(training_days.unique()) <= 21: return # Require more than one month of training data
cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
cv=self.cv, groups=training_days,
scoring="balanced_accuracy")
self.model.fit(self.features, self.targets)
if self.store_model is not None:
self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))
score = np.mean(cv_scores)
n_classes = len(self.model.classes_)
self.model.kelly = (n_classes * score - 1) / (n_classes - 1) # Kelly edge calculation with multiple classes
self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
self.print(f"Best model: {self.model.best_estimator_}")
self.Log(pd.DataFrame(self.model.cv_results_).to_string())
self.Plot("ML", "Edge", self.model.kelly)
def enter_trades(self):
self.store_features()
if self.model.kelly <= 0: return
x_pred = self.features.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
y_pred = y_proba.idxmax(axis=1)
scaling = 1 / (1 - self.min_proba)
sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.model.kelly # Sizing based on Kelly and individual probabilty
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty)
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@catch_errors
def store_features(self):
start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
tickers = list(self.ActiveSecurities.Keys)
last_minute = self.Time.replace(second=0, microsecond=0)
minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
pm_bar = agg_bars(minute_bars, "07:01", "09:30")
entry_hr, entry_mn = last_minute.hour, last_minute.minute
bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")
new_features = bertha_bar.add_prefix("bb_")
new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
yesterday_bar = self.History(tickers, 1, Resolution.Daily)
yesterday_close = yesterday_bar["close"].droplevel("time")
new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
self.features = pd.concat([new_features.dropna(), self.features])
self.print(f"Stored new features, total: {len(self.features)}")
@catch_errors
def store_targets(self):
last_features = self.features.query("time == @self.Time.date()")
self.Log(last_features)
tickers = list(last_features.index.get_level_values("symbol"))
last_minute = self.Time.replace(second=0, microsecond=0)
self.Log(f"Target time: {last_minute}")
minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
last_minute, Resolution.Minute)
self.Log(minute_bars)
trading_bar = minute_bars.droplevel("time").join(last_features)
new_targets = trading_bar.apply(self.calc_target, axis=1)
self.targets = pd.concat([new_targets.dropna(), self.targets])
self.print(f"Stored new targets, total: {len(self.targets)}")
def calc_target(self, price_bar):
entry_price, exit_price = price_bar.bb_close, price_bar.close
window = (price_bar.bb_high - price_bar.bb_low)
if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0: # long trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_high - window * self.sl_retr
return +1 if price_bar.low > stop_loss else 0 # 1 if profitable long and not touching the SL
else:
return +1
elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0: # short trade
if self.sl_retr > 0:
stop_loss = price_bar.bb_low + window * self.sl_retr
return -1 if price_bar.high < stop_loss else 0 # -1 if profitable short and not touching the SL
else:
return -1
else:
return 0
def print(self, msg):
self.Debug(f"{self.Time} {msg}")
def agg_bars(minute_bars, start_time, end_time):
filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
return filtered_bars.groupby(GROUPER).agg(AGG_OPS)"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.34a: Backward features creation and no need for pre-trained model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter
@version: 0.34a
@creation date: 05/07/2022
"""
from AlgorithmImports import *
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier
import utils as utl
from timeseriescv import TimeSeriesSplitGroups
class BigBerthaML(QCAlgorithm):
def Initialize(self):
self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9) # Minimum trading volume in previous trading day
self.tgt_gain = self.GetParameter("target_gain", 0.05) # Minimum target gain to enter the trade
self.capital = self.GetParameter("capital", 80000) # Starting capital
self.lookback = self.GetParameter("lookback", 365) # Trading days used for model training
self.strategy = self.GetParameter("strategy", 0) # -1 short only, +1 long only, 0 long/short
self.benchmark = self.GetParameter("benchmark", "SPY") # Performance benchmark
self.cv_splits = self.GetParameter("cv_splits", 10) # Number of splits for model cross validation
self.store_model = self.GetParameter("store_model", None) # Model name if it needs to be stored
self.entry_mns = self.GetParameter("entry_mn", 5) # Entry time (minutes after 9.30)
self.exit_mns = self.GetParameter("exit_mn", 385) # Exit time (minutes after 9.30)
self.SetStartDate(2021, 6, 1)
self.SetEndDate(2022, 11, 1)
self.SetCash(self.capital)
self.UniverseSettings.Resolution = Resolution.Minute
self.UniverseSettings.ExtendedMarketHours = True
self.AddUniverse(self.coarse_filter)
self.AddEquity(self.benchmark, Resolution.Minute)
self.SetBenchmark(self.benchmark)
self.model = GradientBoostingClassifier(n_iter_no_change=3)
self.kelly = 0
self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
self.x, self.y = None, None
at = self.TimeRules.At
every_day = self.DateRules.EveryDay(self.benchmark)
self.entry_hr, self.entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60
self.exit_hr, self.exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60
self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
self.Schedule.On(every_day, at(self.entry_hr, self.entry_mn), self.enter_trades)
self.Schedule.On(every_day, at(self.exit_hr, self.exit_mn), self.exit_trades)
def coarse_filter(self, coarse):
return [x.Symbol for x in coarse if x.HasFundamentalData]
def train_model(self):
if self.x is None or self.y is None: return
idx = self.x.index.intersection(self.y.index) # Removing features without matching targets
idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)] # keeping only last X days
self.x, self.y = self.x.loc[idx], self.y.loc[idx]
days = idx.get_level_values("time")
if len(days.unique()) <= self.cv_splits: return # Days required for CV
y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
-1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
# Replace cross_val_score with custom loop to use sample weighted metrics
scores = []
for train_idx, test_idx in self.cv.split(self.x, groups=days):
model_tmp = sklearn.base.clone(self.model)
x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train))) # Training weighted by log returns
y_pred = model_tmp.predict(x_test)
scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))] # Kelly calculated with normal returns
self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
self.kelly = np.clip(np.nanmean(scores), 0, 1)
self.print(f"Training: {y_bin.value_counts()} Kelly:{self.kelly:.1%}")
self.Plot("ML", "Kelly", self.kelly)
def enter_trades(self):
self.store_features()
if self.kelly <= 0: return
x_pred = self.x.query("time == @self.Time.date()")
x_pred.index = x_pred.index.droplevel("time")
y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
index=x_pred.index,
columns=self.model.classes_)
self.print(f"Predictions: {y_proba.to_string()}")
self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
y_pred = y_proba.idxmax(axis=1)
sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2 # Selecting only prob > min_proba and scaling
positions = y_pred * sizes * self.kelly # Sizing based on Kelly and individual probability
if sum(abs(positions)) > 1: positions /= sum(abs(positions)) # Ensuring no leverage is used
self.print(f"Trading: {y_pred.value_counts()}")
for symbol, pos in positions[positions != 0].items():
qty = self.CalculateOrderQuantity(symbol, pos)
self.MarketOrder(symbol, qty) # TODO: Change to Limit Order?
def exit_trades(self):
self.Transactions.CancelOpenOrders()
self.Liquidate()
self.store_targets()
@utl.catch_errors
def store_features(self):
tickers = list(self.ActiveSecurities.Keys)
#tickers = list(np.random.choice(tickers, size=100, replace=False))
today = self.Time.replace(hour=0, minute=0, second=0, microsecond=0)
start_day = today - timedelta(self.lookback) if self.x is None else \
self.x.index.get_level_values("time").max()
day_bars = self.History(tickers, start_day, today, Resolution.Daily)
time_idx = day_bars.index.get_level_values("time").shift(-1, freq="D")
symbol_idx = day_bars.index.get_level_values("symbol")
day_bars.set_index([symbol_idx, time_idx], inplace=True)
today_start = today.replace(hour=9, minute=30)
today_stop = today.replace(hour=self.entry_hr, minute=self.entry_mn)
today_bar = self.History(tickers, today_start, today_stop, Resolution.Minute)
day_bars = day_bars.append(utl.agg_bars(today_bar)).groupby("symbol").shift(1)
valid_bars = day_bars.query("close * volume >= @self.min_usd_volume")
universe = valid_bars.reset_index().groupby("time")["symbol"].apply(list)
for day, symbols in universe.items():
start = day.replace(hour=7, minute=1)
last_minute = day.replace(hour=self.entry_hr, minute=self.entry_mn)
minute_bars = self.History(symbols, start, last_minute, Resolution.Minute)
if len(minute_bars) == 0: continue
pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{self.entry_hr}:{self.entry_mn}") # TODO: Check 9:31 filter
new_x = bertha_bar.add_prefix("bb_")
new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
"""prev_day_bar = self.History(tickers, 1, Resolution.Daily)
prev_day_close = prev_day_bar["close"].droplevel("time")
new_x["gap"] = bertha_bar["open"] / prev_day_close - 1"""
new_x = new_x.dropna()
self.x = pd.concat([new_x, self.x])
self.print(f"Stored features, total: {len(self.x)}")
@utl.catch_errors
def store_targets(self):
last_x = self.x if self.y is None else \
self.x.loc[self.x.index.difference(self.y.index)]
universe = last_x.reset_index().groupby("time")["symbol"].apply(list)
for day, symbols in universe.items():
entry_time = day.replace(hour=self.entry_hr, minute=self.entry_mn)
exit_time = day.replace(hour=self.exit_hr, minute=self.exit_mn)
minute_bars = self.History(symbols, entry_time, exit_time, Resolution.Minute)
trading_bar = utl.agg_bars(minute_bars)
new_y = trading_bar.eval("close / open - 1").dropna() # Calculate the trading return
self.y = pd.concat([new_y, self.y])
self.print(f"Stored targets, total: {len(self.y)}")
def print(self, msg):
self.Debug(f"{self.Time} {msg}")