Backtest

Overall Statistics
Total Trades 1636 Average Win 0.53% Average Loss -0.34% Compounding Annual Return 72.110% Drawdown 15.300% Expectancy 0.326 Net Profit 138.407% Sharpe Ratio 2.254 Probabilistic Sharpe Ratio 94.717% Loss Rate 48% Win Rate 52% Profit-Loss Ratio 1.57 Alpha 0.489 Beta 0.022 Annual Standard Deviation 0.217 Annual Variance 0.047 Information Ratio 1.834 Tracking Error 0.274 Treynor Ratio 21.878 Total Fees $24274.43 Estimated Strategy Capacity $4400000.00 Lowest Capacity Asset RBLX XMP3AJ4KU3C5

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.42a: Improved positions calculation for DAS Integration
v0.41a: Added use_kelly parameter and 9am training for easier live trading
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.42a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 800000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.1)  # Minimum differential probability
        self.use_kelly = self.GetParameter("use_kelly", 1)  # Whether to use the kelly criterion for sizing

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(9, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)
        self.Schedule.On(every_day, at(16, 0), self.save_data)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \
            if self.use_kelly else 1 # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
            self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            self.SetHoldings(row["long_sym"], row["pos"]/2)
            self.SetHoldings(row["short_sym"], -row["pos"]/2)
            if self.LiveMode:
                order_value = self.Portfolio.TotalPortfolioValue*row["pos"]/2
                long_qty = int(order_value/self.Securities[row["long_sym"]].Price)
                utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
                short_qty = -int(order_value/self.Securities[row["short_sym"]].Price)
                utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        if self.LiveMode: utl.das_liquidate(self)
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
@version: 0.1
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2015, 1, 1)
        self.SetCash(100000)
        self.UniverseSettings.ExtendedMarketHours = True
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)
        self.test_acc = 0
        self.train_days = timedelta(91)  # Trainining on the last quarter

        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
        self.benchmark = self.GetParameter("benchmark")

        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Daily)
        self.SetBenchmark(self.benchmark)

        self.Train(self.DateRules.MonthStart(),
                   self.TimeRules.At(0, 0),
                   self.train_model)
        self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
                         self.TimeRules.At(9, 35),
                         self.trade)
        self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
                         self.TimeRules.At(15, 55),
                         self.stop_trading)

    def coarse_filter(self, coarse):
         symbols = [x.Symbol for x in coarse if
                    x.HasFundamentalData and
                    x.DollarVolume > self.min_usd_volume]
         return np.random.choice(symbols, size=self.max_symbols,
                                 replace=False).tolist()

    def train_model(self):
        x, y = self.get_data(self.Time - self.train_days, self.Time)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5,
                                                            shuffle=True)
        self.model.fit(x_train, (y_train > 0).astype(float),
                       sample_weight=abs(y_train))  # TODO: Use log returns for training weights
        self.test_acc = self.model.score(x_test, (y_test > 0).astype(float),
                                         sample_weight=abs(y_test))
        self.Debug(f"Training Points: {len(x_train)} Test Accuracy: {self.test_acc:.1%}")
        self.Plot("ML", "Test Score", self.test_acc)

    def trade(self):
        if self.test_acc > 0.5:
            x_pred = self.get_data(self.Time-timedelta(10), self.Time,
                                   with_target=False)
            y_proba = pd.Series(self.model.predict_proba(x_pred)[:,1],
                               index=x_pred.index).groupby("symbol").last()
            positions = y_proba.apply(lambda x: x if x > 0.5 else 0)  # TODO: Implement shorting
            if positions.sum() > 1: positions /= positions.sum()  # Max portfolio size 100%
            self.Debug(f"Trading\n{y_proba}\nPos: {positions}")
            self.Plot("ML", "Prediction", y_proba.mean())
            for symbol in y_proba.index:
                self.SetHoldings(symbol, positions[symbol])

    def stop_trading(self):
        self.Debug("End of day")
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def get_data(self, start, end, with_target=True):
        tickers = [t for t in list(self.ActiveSecurities.Keys)
                   if str(t) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(minute_bars, "09:30", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["range"] = min5_bar.eval("(low+close)/(high-low)")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"]/yesterday_close
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        features.dropna(inplace=True)

        if with_target:
            trade_day_bars = idx.filter_bars(minute_bars, "09:30", "15:55")
            trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
            target = trade_day_bar.eval("close/open-1").dropna()
            index = target.index.intersection(features.index)
            return features.loc[index], target.loc[index]
        else:
            return features

"""
Big Bertha Strategy with Machine Learning
@version: 0.2
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2015, 1, 1)
        self.UniverseSettings.ExtendedMarketHours = True
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=1)
        self.test_acc = 0
        self.train_days = timedelta(30)  # Training on the last quarter

        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
        self.benchmark = self.GetParameter("benchmark")
        self.capital = literal_eval(self.GetParameter("capital"))
        self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))

        self.SetCash(self.capital)
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Daily)
        self.SetBenchmark(self.benchmark)

        self.Train(self.DateRules.MonthStart(),
                   self.TimeRules.At(0, 0),
                   self.train_model)
        self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
                         self.TimeRules.At(9, 35),
                         self.trade)
        self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
                         self.TimeRules.At(15, 55),
                         self.stop_trading)

    def coarse_filter(self, coarse):
         symbols = [x.Symbol for x in coarse if
                    x.HasFundamentalData and
                    x.DollarVolume > self.min_usd_volume]
         return np.random.choice(symbols, size=self.max_symbols,
                                 replace=False).tolist()

    def train_model(self):
        x, y = self.get_data(self.Time - self.train_days, self.Time)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5,
                                                            shuffle=True)
        self.model.fit(x_train, (y_train > 0).astype(float),
                       sample_weight=abs(y_train))
        self.test_acc = self.model.score(x_test, (y_test > 0).astype(float),
                                         sample_weight=abs(y_test))
        self.Debug(f"Training Points: {len(x_train)} Test Accuracy: {self.test_acc:.1%}")
        self.Plot("ML", "Test Score", self.test_acc)

    def trade(self):
        if self.test_acc > 0.5:
            x_pred = self.get_data(self.Time-timedelta(5), self.Time,
                                   with_target=False)
            y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                                index=x_pred.index).groupby("symbol").last()
            positions = y_proba.apply(lambda x: -1 if x < 0.25 else +1 if x > 0.75 else 0)  # To short, 50% is position 0
            if positions.abs().sum() > 1: positions /= positions.abs().sum()  # Max portfolio size 100%
            self.Debug(f"Trading\n{y_proba}\nPos: {positions}")
            self.Plot("ML", "Prediction", y_proba.mean())
            for symbol in positions.index:
                self.SetHoldings(symbol, positions[symbol])

    def stop_trading(self):
        self.Debug("End of day")
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def get_data(self, start, end, with_target=True):
        tickers = [t for t in list(self.ActiveSecurities.Keys)
                   if str(t) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"]/yesterday_close
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        features.dropna(inplace=True)

        if with_target:
            trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
            trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
            target = trade_day_bar.eval("close/open-1").apply(np.log1p)
            index = target.dropna().index.intersection(features.index)
            return features.loc[index], target.loc[index]
        else:
            return features

"""
Big Bertha Strategy with Machine Learning
@version: 0.3
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2015, 1, 1)
        self.UniverseSettings.ExtendedMarketHours = True
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)
        self.test_acc = 0
        self.train_days = timedelta(30)  # Training on the last quarter

        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
        self.min_gap = literal_eval(self.GetParameter("min_gap"))
        self.benchmark = self.GetParameter("benchmark")
        self.capital = literal_eval(self.GetParameter("capital"))
        self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))

        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Daily)
        self.SetBenchmark(self.benchmark)

        self.Train(self.DateRules.MonthStart(),
                   self.TimeRules.At(0, 0),
                   self.train_model)
        self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
                         self.TimeRules.At(9, 35),
                         self.trade)
        self.Schedule.On(self.DateRules.EveryDay(self.benchmark),
                         self.TimeRules.At(15, 55),
                         self.stop_trading)

    def coarse_filter(self, coarse):
         return [x.Symbol for x in coarse if
                    x.HasFundamentalData and
                    x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        x, y = self.get_data(self.Time - self.train_days, self.Time,
                             min_gap=self.min_gap)
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.test_acc = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
        self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        if self.test_acc <= 0.5: return

        x_pred = self.get_data(self.Time-timedelta(5), self.Time,
                                   with_target=False, min_gap=self.min_gap)
        if x_pred is None: return

        today = self.Time.replace(hour=0, minute=0, second=0)
        x_pred.query("time == @today", inplace=True)
        if len(x_pred) == 0: return

        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index)
        positions = y_proba.apply(lambda x: -1 if x < 0.4 else +1 if x > 0.6 else 0)  # To short, 50% is position 0
        if positions.abs().sum() > 1: positions /= positions.abs().sum()  # Max portfolio size 100%
        self.Debug(f"Trading\n{y_proba}\nPos: {positions}")
        self.Plot("ML", "Prediction", y_proba.mean())
        for symbol in positions.index:
            self.SetHoldings(symbol, positions[symbol])

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def get_data(self, start, end, with_target=True, min_gap=0):
        tickers = [t for t in list(self.ActiveSecurities.Keys)
                   if str(t) not in self.benchmark]
        daily_bars = self.History(tickers, start, end, Resolution.Daily)
        features = pd.DataFrame()
        features["gap"] = idx.gap(daily_bars).dropna()
        features.query("abs(gap) >= @min_gap", inplace=True)
        if len(features) == 0:
            return None, None if with_target else None

        minute_bars = pd.concat([self.History([s], d, d + timedelta(1),
                                              Resolution.Minute)
                                 for s, d in features.index])
        pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(minute_bars, "09:30", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        features.dropna(inplace=True)
        if not with_target:
            return features

        trade_day_bars = idx.filter_bars(minute_bars, "09:35", "15:55")
        trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
        target = trade_day_bar.eval("close/open-1").apply(np.log1p)
        index = target.dropna().index.intersection(features.index)
        return features.loc[index], target.loc[index]

"""
Big Bertha Strategy with Machine Learning
@version: 0.4
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
        self.capital = literal_eval(self.GetParameter("capital"))

        self.SetStartDate(2020, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)

        self.test_acc = 0
        self.train_days = timedelta(30)  # Training on the last quarter
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        self.Train(self.DateRules.MonthStart(),
                   self.TimeRules.At(0, 0),
                   self.train_model)
        self.Schedule.On(self.DateRules.EveryDay(),
                         self.TimeRules.At(9, 35),
                         self.trade)
        self.Schedule.On(self.DateRules.EveryDay(),
                         self.TimeRules.At(15, 55),
                         self.stop_trading)

    def coarse_filter(self, coarse):
         return [x.Symbol for x in coarse if
                 x.HasFundamentalData]

    def train_model(self):
        x, y = self.get_data(self.Time - self.train_days, self.Time)
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.test_acc = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
        self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        model_confidence = max(self.test_acc-0.5, 0)*2  # 100% if accuracy 100%, 0% if below 50%
        x_pred = self.get_data(self.Time-timedelta(5), self.Time,
                               with_target=False)
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        positions = (y_proba - 0.5) * 2 * model_confidence  # TODO: Fix risk management
        if positions.abs().sum() > 1: positions /= positions.abs().sum()  # Max portfolio size 100%
        self.Plot("ML", "Prediction", y_proba.mean())
        for symbol, position in positions[abs(positions) > 0.01].items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def get_data(self, start, end, with_target=True):  # TODO: Add daily dataset update
        tickers = np.random.choice(list(self.ActiveSecurities.Keys),
                                   size=self.max_symbols,
                                   replace=False).tolist()
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        features = features.dropna().query("abs(gap) >= 0.02")

        if with_target:
            trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
            trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
            target = trade_day_bar.eval("close/open-1").apply(np.log1p)
            index = target.dropna().index.intersection(features.index)
            return features.loc[index], target.loc[index]
        else:
            return features

"""
Big Bertha Strategy with Machine Learning
@version: 0.6
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
        self.capital = literal_eval(self.GetParameter("capital"))

        self.SetStartDate(2020, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.benchmark = "SPY"
        self.AddEquity(self.benchmark, Resolution.Daily)
        self.SetBenchmark(self.benchmark)

        self.history = pd.DataFrame()
        self.test_acc = 0
        self.train_days = timedelta(30)  # Training on the last quarter
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.update_history)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(16, 0), self.update_history)

    def coarse_filter(self, coarse):
         return [x.Symbol for x in coarse if
                 x.HasFundamentalData]

    def train_model(self):
        if len(self.history) > 10000:
            x, y = self.get_data(self.Time - self.train_days, self.Time)
            fit_params = dict(sample_weight=abs(y))
            cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                        scoring="accuracy", fit_params=fit_params)
            self.test_acc = np.mean(cv_scores)
            self.model.fit(x, (y > 0).astype(float), **fit_params)
            self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
            self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        if len(self.history) > 10000:
            model_confidence = max(self.test_acc-0.5, 0)*2  # 100% if accuracy 100%, 0% if below 50%
            x_pred = self.get_data(self.Time-timedelta(5), self.Time,
                                   with_target=False)
            y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                                index=x_pred.index).groupby("symbol").last()
            positions = (y_proba - 0.5) * 2 * model_confidence  # TODO: Fix risk management
            if positions.abs().sum() > 1: positions /= positions.abs().sum()  # Max portfolio size 100%
            self.Plot("ML", "Prediction", y_proba.mean())
            for symbol, position in positions[abs(positions) > 0.01].items():
                self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
                self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_history(self):
        tickers = list(self.ActiveSecurities.Keys)
        new_data = self.History(tickers, 5, Resolution.Minute)
        self.history = pd.concat([self.history, new_data])

    def get_data(self, start, end, with_target=True):  # TODO: Add daily dataset update
        time_index = self.history.index.get_level_values("time")
        time_filter = (time_index >= start) & (time_index <= end)
        minute_bars = self.history.loc[time_filter]
        day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        features = features.dropna().query("abs(gap) >= 0.02")

        if with_target:
            trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
            trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
            target = trade_day_bar.eval("close/open-1").apply(np.log1p)
            index = target.dropna().index.intersection(features.index)
            return features.loc[index], target.loc[index]
        else:
            return features

"""
Big Bertha Strategy with Machine Learning
@version: 0.6
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.max_symbols = literal_eval(self.GetParameter("max_symbols"))
        self.capital = literal_eval(self.GetParameter("capital"))

        self.SetStartDate(2020, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)

        self.test_acc = 0
        self.train_days = timedelta(30)  # Training on the last quarter
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        self.Train(self.DateRules.MonthStart(),
                   self.TimeRules.At(0, 0),
                   self.train_model)
        self.Schedule.On(self.DateRules.EveryDay(),
                         self.TimeRules.At(9, 35),
                         self.trade)
        self.Schedule.On(self.DateRules.EveryDay(),
                         self.TimeRules.At(15, 55),
                         self.stop_trading)

    def coarse_filter(self, coarse):
         return [x.Symbol for x in coarse if
                 x.HasFundamentalData]

    def train_model(self):
        x, y = self.get_data(self.Time - self.train_days, self.Time)
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.test_acc = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
        self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        model_confidence = max(self.test_acc-0.5, 0)*2  # 100% if accuracy 100%, 0% if below 50%
        x_pred = self.get_data(self.Time-timedelta(5), self.Time,
                               with_target=False)
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        positions = (y_proba - 0.5) * 2 * model_confidence  # TODO: Fix risk management
        if positions.abs().sum() > 1: positions /= positions.abs().sum()  # Max portfolio size 100%
        self.Plot("ML", "Prediction", y_proba.mean())
        for symbol, position in positions[abs(positions) > 0.01].items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def get_data(self, start, end, with_target=True):
        tickers = np.random.choice(list(self.ActiveSecurities.Keys),
                                   size=self.max_symbols,
                                   replace=False).tolist()
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        day_bars = idx.filter_bars(minute_bars, "09:30", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:00", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:30", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        features = features.dropna().query("abs(gap) >= 0.02")

        if with_target:
            trade_day_bars = idx.filter_bars(day_bars, "09:35", "15:55")
            trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
            target = trade_day_bar.eval("close/open-1").apply(np.log1p)
            index = target.dropna().index.intersection(features.index)
            return features.loc[index], target.loc[index]
        else:
            return features

"""
Big Bertha Strategy with Machine Learning
- Implementing offline data storage to avoid symbols limitation

@version: 0.7
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2020, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.test_acc = 0
        self.features, self.targets = None, None
        self.train_days = 63  # Training on the last quarter
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.update_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.get_dataset_days() < 21: return

        self.Debug(f"{self.Time} Training")
        x, y = self.get_data(self.train_days)
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.test_acc = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
        self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        model_confidence = max(self.test_acc-0.5, 0)*2  # 100% if accuracy 100%, 0% if below 50%
        if model_confidence <= 0: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_data(1, with_target=False)
        x_pred.query("time == @self.Time.date()")
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        positions = (y_proba - 0.5) * 2 * model_confidence  # TODO: Fix risk management
        if positions.abs().sum() > 1: positions /= positions.abs().sum()  # Max portfolio size 100%
        self.Plot("ML", "Prediction", y_proba.mean())
        for symbol, position in positions[abs(positions) > 0.01].items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
                   if str(ticker) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        features = self.calculate_features(minute_bars).dropna()
        self.features = pd.concat([self.features, features], copy=False)
        targets = self.calculate_targets(minute_bars).dropna()
        self.targets = pd.concat([self.targets, targets], copy=False)
        memory = self.features.memory_usage(deep=True).sum()
        memory += self.targets.memory_usage(deep=True)
        self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")

    def calculate_features(self, minute_bars):
        day_bars = idx.filter_bars(minute_bars, "09:31", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        return features

    def calculate_targets(self, minute_bars):
        trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
        trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
        return trade_day_bar.eval("close/open-1").apply(np.log1p)

    def get_data(self, n_points, with_target=True):
        if with_target:
            common_index = self.targets.index.intersection(self.features.index)
            y = self.targets.loc[common_index].groupby("symbol").tail(n_points)
            x = self.features.loc[y.index]
            return x, y
        else:
            return self.features.groupby("symbol").tail(n_points)

    def get_dataset_days(self):
        return len(self.features.index.get_level_values("time").unique()) \
            if self.features is not None else 0

"""
Big Bertha Strategy with Machine Learning
- Implementing offline data storage to avoid symbols limitation

@version: 0.8
@creation date: 05/07/2022
First prototype
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2020, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.test_acc = 0
        self.features, self.targets = None, None
        self.train_days = 63  # Training on the last quarter
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.update_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.get_dataset_days() < 21: return  # At least a month of data to train the model

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.test_acc = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
        self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        model_confidence = max(self.test_acc-0.5, 0)*2  # 100% if accuracy 100%, 0% if below 50%
        if model_confidence <= 0: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        trades = y_proba[(y_proba < 0.3) | (y_proba > 0.7)]
        positions = (trades - 0.5) * 2 * model_confidence  # TODO: Fix risk management  # Max portfolio size 100% including shorts
        for symbol, position in positions.items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
                   if str(ticker) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        features = self.calculate_features(minute_bars).dropna()
        self.features = pd.concat([self.features, features]).drop_duplicates()
        targets = self.calculate_targets(minute_bars).dropna()
        self.targets = pd.concat([self.targets, targets]).drop_duplicates()

        memory = self.features.memory_usage(deep=True).sum()
        memory += self.targets.memory_usage(deep=True)
        self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")

    def calculate_features(self, minute_bars):
        day_bars = idx.filter_bars(minute_bars, "09:31", "16:30")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        return features

    def calculate_targets(self, minute_bars):
        trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
        trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
        return trade_day_bar.eval("close/open-1").apply(np.log1p)

    def get_train_data(self):
        common_index = self.targets.index.intersection(self.features.index)
        y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
        x = self.features.loc[y.index]
        return x, y

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")

    def get_dataset_days(self):
        return len(self.features.index.get_level_values("time").unique()) \
            if self.features is not None else 0

"""
Big Bertha Strategy with Machine Learning
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Risk management with stop loss
@version: 0.9
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2020, 10, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.test_acc = 0
        self.features, self.targets = None, None
        self.train_days = 252  # Training on the last year of data
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.update_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.get_dataset_days() < 21: return  # At least a month of data to train the model

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.test_acc = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.test_acc:.1%}")
        self.Plot("ML", "Test Accuracy", self.test_acc)

    def trade(self):
        model_confidence = max(self.test_acc-0.5, 0)*2  # 100% if accuracy 100%, 0% if below 50%
        if model_confidence <= 0: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)]
        positions = (trades - 0.5) * 2 * model_confidence  # TODO: Fix risk management Max portfolio size 100% including shorts
        for symbol, position in positions.items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            qty = self.CalculateOrderQuantity(symbol, position)
            limit_price = self.Securities[symbol].Price
            self.LimitOrder(symbol, qty, limit_price)
            stop_price = 0 if qty > 0 else 2 * limit_price
            self.StopMarketOrder(symbol, -qty, stop_price)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
                   if str(ticker) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        features = self.calculate_features(minute_bars).dropna()
        self.features = pd.concat([self.features, features]).drop_duplicates()
        targets = self.calculate_targets(minute_bars).dropna()
        self.targets = pd.concat([self.targets, targets]).drop_duplicates()

        memory = self.features.memory_usage(deep=True).sum()
        memory += self.targets.memory_usage(deep=True)
        self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")

    def calculate_features(self, minute_bars):
        day_bars = idx.filter_bars(minute_bars, "09:31", "16:00")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        return features

    def calculate_targets(self, minute_bars):
        trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
        trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
        return trade_day_bar.eval("close/open-1").apply(np.log1p)

    def get_train_data(self):
        common_index = self.targets.index.intersection(self.features.index)
        y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
        x = self.features.loc[y.index]
        return x, y

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")

"""
Big Bertha Strategy with Machine Learning
Done
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades

Todo
- Risk management with stop loss
@version: 0.9
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2020, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.confidence = 0
        self.features, self.targets = None, None
        self.train_days = 252  # Training on the last year of data
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.update_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.confidence = max(np.mean(cv_scores) - 0.5, 0) * 2  # 100% if accuracy 100%, 0% if below 50%
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Confidence:{self.confidence:.1%}")
        self.Plot("ML", "Confidence", self.confidence)

    def trade(self):
        if self.confidence <= 0: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)]
        positions = (trades - 0.5) * 2 * self.confidence  # TODO: Fix risk management Max portfolio size 100% including shorts
        for symbol, position in positions.items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
                   if str(ticker) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        features = self.calculate_features(minute_bars).dropna()
        self.features = pd.concat([self.features, features]).drop_duplicates()
        targets = self.calculate_targets(minute_bars).dropna()
        self.targets = pd.concat([self.targets, targets]).drop_duplicates()

        memory = self.features.memory_usage(deep=True).sum()
        memory += self.targets.memory_usage(deep=True)
        self.Debug(f"{self.Time} Data updated: {len(tickers)} tickers {memory/10**6:.1f} MB")

    def calculate_features(self, minute_bars):
        day_bars = idx.filter_bars(minute_bars, "09:31", "16:00")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha"] = min5_bar.eval("(high-low)/open")
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        return features

    def calculate_targets(self, minute_bars):
        trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
        trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
        return trade_day_bar.eval("close/open-1").apply(np.log1p)

    def get_train_data(self):
        common_index = self.targets.index.intersection(self.features.index)
        y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
        x = self.features.loc[y.index]
        return x, y

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")

    def get_dataset_days(self):
        return len(self.features.index.get_level_values("time").unique()) \
            if self.features is not None else 0

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades

Todo
- Risk management with stop loss
@version: 0.10
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last",
           "high": "max", "low": "min",
           "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2020, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.confidence = 0
        self.features, self.targets = None, None
        self.train_days = 252  # Training on the last year of data
        self.model = GradientBoostingClassifier(warm_start=True,
                                                n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.update_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    scoring="accuracy", fit_params=fit_params)
        self.confidence = max(np.mean(cv_scores) - 0.5, 0) * 2  # 100% if accuracy 100%, 0% if below 50%
        self.model.fit(x, (y > 0).astype(float), **fit_params)
        self.Debug(f"{self.Time} Points:{len(x)} Confidence:{self.confidence:.1%}")
        self.Plot("ML", "Confidence", self.confidence)

    def trade(self):
        if self.confidence <= 0: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        self.Debug(f"Predictions: {len(trades)} - Proba {min(trades):.0%}-{max(trades):.0%}")
        trades = y_proba[(y_proba < 0.25) | (y_proba > 0.75)]
        positions = (trades - 0.5) * 2 * self.confidence  # TODO: Fix risk management Max portfolio size 100% including shorts
        for symbol, position in positions.items():
            self.Debug(f"{self.Time} - Trading {symbol} at {position:.1%}")
            self.SetHoldings(symbol, position)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def update_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = [ticker for ticker in list(self.ActiveSecurities.Keys)
                   if str(ticker) not in self.benchmark]
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        features = self.calculate_features(minute_bars).dropna()
        self.features = pd.concat([self.features, features]).drop_duplicates()
        targets = self.calculate_targets(minute_bars).dropna()
        self.targets = pd.concat([self.targets, targets]).drop_duplicates()
        self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")

    def calculate_features(self, minute_bars):
        day_bars = idx.filter_bars(minute_bars, "09:31", "16:00")
        day_bar = day_bars.groupby(GROUPER).agg(AGG_OPS)
        pm_bars = idx.filter_bars(minute_bars, "00:01", "09:30")
        pm_bar = pm_bars.groupby(GROUPER).agg(AGG_OPS)
        min5_bars = idx.filter_bars(day_bars, "09:31", "09:35")
        min5_bar = min5_bars.groupby(GROUPER).agg(AGG_OPS)

        features = pd.DataFrame()
        features["big_bertha_size"] = min5_bar.eval("(high-low)/open")
        features["big_bertha_volume"] = min5_bar["volume"]
        features["big_bertha_open"] = min5_bar["open"]
        features["close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close*volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close
        return features

    def calculate_targets(self, minute_bars):
        trade_day_bars = idx.filter_bars(minute_bars, "09:36", "15:55")
        trade_day_bar = trade_day_bars.groupby(GROUPER).agg(AGG_OPS)
        return trade_day_bar.eval("close/open-1").apply(np.log1p)

    def get_train_data(self):
        common_index = self.targets.index.intersection(self.features.index)
        y = self.targets.loc[common_index].groupby("symbol").tail(self.train_days)
        x = self.features.loc[y.index]
        return x, y

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")

    def get_dataset_days(self):
        return len(self.features.index.get_level_values("time").unique()) \
            if self.features is not None else 0

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades

Todo
- Risk management with stop loss
@version: 0.11
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.accuracy = None
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)   # TODO: Evaluate Grid search for different parameters

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        fit_params = dict(sample_weight=abs(y))
        cv_scores = cross_val_score(self.model, X=x, y=(y > 0).astype(float),
                                    cv=10, fit_params=fit_params)
        self.accuracy = np.mean(cv_scores)
        self.model.fit(x, (y > 0).astype(float))
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
        self.Plot("ML", "Accuracy", self.accuracy)

    def trade(self):
        if self.accuracy is None: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.Series(self.model.predict_proba(x_pred)[:, 1],
                            index=x_pred.index).groupby("symbol").last()
        self.Debug(f"Predictions: {len(y_proba)} - Proba {min(y_proba):.0%}-{max(y_proba):.0%}")
        positions = (y_proba[(y_proba <= 0.4)|(y_proba >= 0.6)] - 0.5) * self.accuracy  # Model and trade confidence
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        [self.SetHoldings(symbol, pos) for symbol, pos in positions.items()]

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = list(filter(lambda x: str(x) not in self.benchmark,
                              self.ActiveSecurities.Keys))
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        self.add_features(minute_bars)
        self.add_targets(minute_bars)
        self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")

    def add_features(self, minute_bars):
        day_bar = self.agg_bars(minute_bars, "09:31", "16:00")
        pm_bar = self.agg_bars(minute_bars, "00:01", "09:30")
        min5_bar = self.agg_bars(minute_bars, "09:31", "09:35")
        features = min5_bar.add_prefix("bb_")
        features["bb_size"] = min5_bar.eval("(high-low)/open")
        features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close-1
        features.dropna(inplace=True)

        if self.features is not None:
            new_idx = features.index.difference(self.features.index)  # Removing potential duplicates
            self.features = pd.concat([self.features, features.loc[new_idx]])
        else:
            self.features = features

    def add_targets(self, minute_bars):
        trading_bar = self.agg_bars(minute_bars, "09:36", "15:55")
        targets = trading_bar.eval("close/open-1").dropna()
        if self.targets is not None:
            new_idx = targets.index.difference(self.targets.index)  # Removing potential duplicates
            self.targets = pd.concat([self.targets, targets.loc[new_idx]])
        else:
            self.targets = targets

    def get_train_data(self):
        train_idx = self.targets.index.intersection(self.features.index)
        return self.features.loc[train_idx], self.targets.loc[train_idx]

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")

    def agg_bars(self, minute_bars, start_time, end_time):
        filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
        return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades

Todo
- Risk management with stop loss
@version: 0.12
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}
RETR_PCT = 0.7

class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.accuracy = 0
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        cv_scores = cross_val_score(self.model, X=x, y=y, cv=10)
        self.accuracy = np.mean(cv_scores)
        self.model.fit(x, y)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
        self.Plot("ML", "Accuracy", self.accuracy)

    def trade(self):
        if self.accuracy <= 0.5: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_).groupby("symbol").last()
        actions = y_proba.idxmax(axis=1)
        positions = actions.apply(lambda x: 0.01 if x=="long" else -0.01 if x=="short" else 0)
        self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}")
        for symbol, pos in positions.items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            feats = x_pred.loc[symbol].iloc[0]  # TODO: Refactor
            window = (feats.bb_high - feats.bb_low) * RETR_PCT
            stop_loss = feats.bb_high - window if pos > 0 \
                else feats.bb_low + window    # TODO: Refactor
            self.StopMarketOrder(symbol, -qty, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = list(filter(lambda x: str(x) not in self.benchmark,
                              self.ActiveSecurities.Keys))
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        new_features = self.calc_features(minute_bars).dropna()
        if self.features is not None:
            new_idx = new_features.index.difference(self.features.index)  # Removing potential duplicates
            self.features = pd.concat([self.features, new_features.loc[new_idx]])
        else:
            self.features = new_features

        new_targets = self.calc_targets(minute_bars).dropna()
        if self.targets is not None:
            new_idx = new_targets.index.difference(self.targets.index)    # Removing potential duplicates
            self.targets = pd.concat([self.targets, new_targets.loc[new_idx]])
        else:
            self.targets = new_targets
        self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")

    def calc_features(self, minute_bars):
        day_bar = agg_bars(minute_bars, "09:31", "16:00")
        pm_bar = agg_bars(minute_bars, "00:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        features = min5_bar.add_prefix("bb_")
        features["bb_size"] = min5_bar.eval("(high-low)/open")
        features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close-1
        return features

    def calc_targets(self, minute_bars):
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        return trading_bar.apply(calc_exit_price, axis=1)

    def get_train_data(self):
        train_idx = self.targets.index.intersection(self.features.index)
        return self.features.loc[train_idx], self.targets.loc[train_idx]

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)


def calc_exit_price(row, retr_pct=RETR_PCT):
    window = (row.bb_high-row.bb_low)*retr_pct
    if row.close > row.open:  # long trade
        stop_loss = row.bb_high - window
        target = "long" if row.low > stop_loss else "pass"  # 1 if profitable long and not touching the SL
    else:  # short trade
        stop_loss = row.bb_low + window
        target = "short" if row.high < stop_loss else "pass"  # -1 if profitable short and not touching the SL
    return target

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades

Todo
- Risk management with stop loss
@version: 0.13
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}
RETR_PCT = 0.7
EXT_PCT = 0.39

class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.accuracy = None
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        cv_scores = cross_val_score(self.model, X=x, y=y, cv=10)
        self.accuracy = np.mean(cv_scores)
        self.model.fit(x, y)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
        self.Plot("ML", "Accuracy", self.accuracy)

    def trade(self):
        if self.accuracy is None: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data()
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_).groupby("symbol").last()
        actions = y_proba.idxmax(axis=1)
        positions = actions.apply(lambda x: 0.01 if x=="long" else -0.01 if x=="short" else 0)
        self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}")
        for symbol, pos in positions.items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            feats = x_pred.loc[symbol].iloc[0]  # TODO: Refactor
            window = (feats.bb_high - feats.bb_low)
            stop_loss = feats.bb_high - window * RETR_PCT if pos > 0 \
                else feats.bb_low + window * RETR_PCT   # TODO: Refactor
            take_profit = feats.bb_high + window * EXT_PCT if pos > 0 \
                else feats.bb_low - window * EXT_PCT
            self.StopMarketOrder(symbol, -qty, stop_loss)  # TODO: Need to cancel TP when SL and viceversa
            self.LimitMarketOrder(symbol, -qty, take_profit)  # TODO: Need to cancel TP when SL and viceversa

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = list(filter(lambda x: str(x) not in self.benchmark,
                              self.ActiveSecurities.Keys))
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        new_features = self.calc_features(minute_bars).dropna()
        if self.features is not None:
            new_idx = new_features.index.difference(self.features.index)  # Removing potential duplicates
            self.features = pd.concat([self.features, new_features.loc[new_idx]])
        else:
            self.features = new_features

        new_targets = self.calc_targets(minute_bars).dropna()
        if self.targets is not None:
            new_idx = new_targets.index.difference(self.targets.index)    # Removing potential duplicates
            self.targets = pd.concat([self.targets, new_targets.loc[new_idx]])
        else:
            self.targets = new_targets
        self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")

    def calc_features(self, minute_bars):
        day_bar = agg_bars(minute_bars, "09:31", "16:00")
        pm_bar = agg_bars(minute_bars, "00:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        features = min5_bar.add_prefix("bb_")
        features["bb_size"] = min5_bar.eval("(high-low)/open")
        features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close-1
        return features

    def calc_targets(self, minute_bars):
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        return trading_bar.apply(calc_exit_target, axis=1)

    def get_train_data(self):
        train_idx = self.targets.index.intersection(self.features.index)
        return self.features.loc[train_idx], self.targets.loc[train_idx]

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)


def calc_exit_target(price_bar, retr_pct=RETR_PCT, ext_pct=EXT_PCT):
    window = (price_bar.bb_high - price_bar.bb_low)
    if price_bar.close > price_bar.open:  # long trade
        # take_profit = data_bar.bb_high + window * ext_pct  TODO: Not used yet, would need to compare TP/SL timing
        stop_loss = price_bar.bb_high - window * retr_pct
        profitable_long = (price_bar.low > stop_loss) \
                          and (price_bar.close > price_bar.open)
        target = "long" if profitable_long else "pass"  # 1 if profitable long and not touching the SL
    else:  # short trade
        # take_profit = data_bar.bb_low - window * ext_pct  TODO: Not used yet, would need to compare TP/SL timing
        stop_loss = price_bar.bb_low + window * retr_pct
        profitable_short = (price_bar.high < stop_loss) \
                           and (price_bar.close < price_bar.open)
        target = "short" if profitable_short else "pass"  # -1 if profitable short and not touching the SL
    return target

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Triple barrier target with TP and SL

@version: 0.14
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}
RETRACEMENT_SL = 0.7
EXTENSION_TP = 0.39

class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.accuracy = None
        self.features, self.targets = pd.DataFrame(), pd.DataFrame()
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_data)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if len(self.features) == 0: return  # No training data available

        self.Debug(f"{self.Time} Training")
        x, y = self.get_train_data()
        cv_scores = cross_val_score(self.model, X=x, y=y, cv=10,
                                    scoring="balanced_accuracy")
        self.accuracy = np.mean(cv_scores)
        self.model.fit(x, y)
        self.Debug(f"{self.Time} Points:{len(x)} Accuracy:{self.accuracy:.1%}")
        self.Plot("ML", "Accuracy", self.accuracy)

    def trade(self):
        if self.accuracy is None: return

        self.Debug(f"{self.Time} Trading")
        x_pred = self.get_pred_data().droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        positions_func = lambda x: x[1] if x[1] > 0.5 \
            else -x[-1] if x[-1] > 0.5 else 0
        positions = y_proba.apply(positions_func, axis=1) * self.accuracy
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.Debug(f"Predictions: {len(y_proba)} - Proba {y_proba}")
        for symbol, pos in positions.items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low)
            stop_loss = features.bb_high - window * RETRACEMENT_SL if pos > 0 \
                else features.bb_low + window * RETRACEMENT_SL   # TODO: Refactor
            #take_profit = features.bb_high + window * EXTENSION_TP if pos > 0 \
            #    else features.bb_low - window * EXTENSION_TP
            self.StopMarketOrder(symbol, -qty, stop_loss)  # TODO: Need to cancel TP when SL and viceversa
            #self.LimitOrder(symbol, -qty, take_profit)  # TODO: Need to cancel TP when SL and viceversa

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_data(self):
        trade_days = self.TradingCalendar.GetTradingDays(self.Time - timedelta(7),
                                                         self.Time - timedelta(1))
        last_day = list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                               trade_days))[-1].Date
        start = last_day.replace(hour=9, minute=30, second=0)
        end = self.Time.replace(hour=9, minute=35, second=0)
        tickers = list(filter(lambda x: str(x) not in self.benchmark,
                              self.ActiveSecurities.Keys))
        minute_bars = self.History(tickers, start, end, Resolution.Minute)

        new_features = self.calc_features(minute_bars).dropna()
        new_idx = new_features.index.difference(self.features.index)  # Removing potential duplicates
        self.features = pd.concat([self.features, new_features.loc[new_idx]])
        new_targets = self.calc_targets(minute_bars).dropna()
        new_idx = new_targets.index.difference(self.targets.index)    # Removing potential duplicates
        self.targets = pd.concat([self.targets, new_targets.loc[new_idx]])
        self.Debug(f"{self.Time} Data updated: {len(self.features)} datapoints")

    def calc_features(self, minute_bars):
        day_bar = agg_bars(minute_bars, "09:31", "16:00")
        pm_bar = agg_bars(minute_bars, "00:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        features = min5_bar.add_prefix("bb_")
        features["bb_size"] = min5_bar.eval("(high-low)/open")
        features["bb_close_range"] = min5_bar.eval("(close-low)/(high-low)")
        features["bb_open_range"] = min5_bar.eval("(open-low)/(high-low)")
        features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        features["gap"] = day_bar["open"] / yesterday_close-1
        return features

    def calc_targets(self, minute_bars):
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        return trading_bar.apply(calc_exit_target, axis=1)

    def get_train_data(self):
        train_idx = self.targets.index.intersection(self.features.index)
        return self.features.loc[train_idx], self.targets.loc[train_idx]

    def get_pred_data(self):
        return self.features.query("time == @self.Time.date()")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)


def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL, ext_pct=EXTENSION_TP):
    window = (price_bar.bb_high - price_bar.bb_low)
    if price_bar.close > price_bar.open:  # long trade
        # take_profit = data_bar.bb_high + window * ext_pct  TODO: Not used yet, would need to compare TP/SL timing
        stop_loss = price_bar.bb_high - window * retr_pct
        target = 1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
    else:  # short trade
        # take_profit = data_bar.bb_low - window * ext_pct  TODO: Not used yet, would need to compare TP/SL timing
        stop_loss = price_bar.bb_low + window * retr_pct
        target = -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
    return target

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL
- Implemented Pipeline with clustering/dimensionality reduction

TODO:
- Implement Triple Barrier with TP

@version: 0.15
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}
RETRACEMENT_SL = 0.7

class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.score = 0
        self.features, self.targets = pd.DataFrame(), pd.Series()
        self.pipe = Pipeline([#("scaling", MinMaxScaler()),
                              #("clustering", KMeans(n_clusters=2)),
                              ("model", GradientBoostingClassifier(n_iter_no_change=3,
                                                                   n_estimators=100))])

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.MonthStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if len(self.features) == 0: return

        self.clean_data()
        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) < 20: return  # Minimum of month of training

        cv_scores = cross_val_score(self.pipe, X=self.features, y=self.targets,
                                    cv=10, scoring="balanced_accuracy")
        self.score = np.mean(cv_scores)
        self.pipe.fit(self.features, self.targets)
        self.print(f"CV Sharpe {self.score / np.std(cv_scores):.1f}")
        self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
        self.Plot("ML", "Score", self.score)

    def trade(self):
        if self.score <= 1/3: return

        x_pred = self.features.query("time == @self.Time.date()").droplevel("time")
        y_pred = pd.Series(self.pipe.predict(x_pred), index=x_pred.index)
        positions_map = {"long": 0.1, "short": -0.1, "pass": 0}
        positions = y_pred.apply(lambda x: positions_map[x])
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low) * RETRACEMENT_SL
            stop_loss = features.bb_high - window if pos > 0 \
                else features.bb_low + window   # TODO: Refactor
            self.StopMarketOrder(symbol, -qty, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        last_day = self.get_last_day(self.Time)
        start = last_day.replace(hour=9, minute=30, second=0)
        tickers = self.get_active_tickers()
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        day_bar = agg_bars(minute_bars, "09:31", "16:00")
        pm_bar = agg_bars(minute_bars, "00:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_close = day_bar["close"].groupby("symbol").shift(1)
        new_features["gap"] = day_bar["open"] / yesterday_close - 1

        self.features = pd.concat([self.features, new_features.dropna()])

    def store_targets(self):
        last_features = self.features.groupby("symbol").last()
        tickers = list(last_features.index)
        start = self.Time.replace(hour=9, minute=31, second=0)
        end = self.Time.replace(hour=15, minute=54, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        trading_bar = agg_bars(minute_bars, "09:36", "15:54")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        new_targets = trading_bar.apply(calc_exit_target, axis=1)
        self.targets = pd.concat([self.targets, new_targets.dropna()])

    def clean_data(self):
        self.features = self.features[~self.features.index.duplicated(keep='first')]
        self.targets = self.targets[~self.targets.index.duplicated(keep='first')]
        common_idx = self.targets.index.intersection(self.features.index)
        self.features = self.features.loc[common_idx]
        self.targets = self.targets.loc[common_idx]

    def get_active_tickers(self):
        return list(filter(lambda x: str(x) not in self.benchmark,
                           self.ActiveSecurities.Keys))

    def get_last_day(self, date):
        start, end = date - timedelta(7), date - timedelta(1)
        calendar_days = self.TradingCalendar.GetTradingDays(start, end)
        return list(filter(lambda p: p.BusinessDay and not p.PublicHoliday,
                           calendar_days))[-1].Date

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)


def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL):
    window = (price_bar.bb_high - price_bar.bb_low)
    if price_bar.close > price_bar.open:  # long trade
        stop_loss = price_bar.bb_high - window * retr_pct
        target = "long" if price_bar.low > stop_loss else "pass"  # 1 if profitable long and not touching the SL
    else:  # short trade
        stop_loss = price_bar.bb_low + window * retr_pct
        target = "short" if price_bar.high < stop_loss else "pass"  # -1 if profitable short and not touching the SL
    return target

"""
Big Bertha Strategy with Machine Learning
Done
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

TODO: Implement Triple Barrier with TP

@version: 0.16
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}
RETRACEMENT_SL = 1.0


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.score = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) < 20: return

        time_groups = self.targets.index.get_level_values("time")
        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=time_groups,
                                    scoring="balanced_accuracy")
        self.score = np.nanmean(cv_scores)
        self.model.fit(self.features, self.targets)
        self.print(f"Training score:{self.score:.1%}")
        self.Plot("ML", "Score", self.score)

    def trade(self):
        kelly_size = (3 * self.score - 1) / 2  # calculating the edge like binary Kelly
        if kelly_size <= 0: return

        x_pred = self.features.groupby("symbol").last()
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * kelly_size * 0.5 # Using 50% Kelly
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low) * RETRACEMENT_SL
            stop_loss = features.bb_high - window if pos > 0 \
                else features.bb_low + window   # TODO: Refactor
            self.StopMarketOrder(symbol, -qty, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=31, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        new_targets = trading_bar.apply(calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)


def calc_exit_target(price_bar, retr_pct=RETRACEMENT_SL):
    window = (price_bar.bb_high - price_bar.bb_low)
    if price_bar.close > price_bar.open:  # long trade
        stop_loss = price_bar.bb_high - window * retr_pct
        target = +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
    else:  # short trade
        stop_loss = price_bar.bb_low + window * retr_pct
        target = -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
    return target

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

TODO: Implement Triple Barrier with TP

@version: 0.17
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def multi_precision(y_true, y_pred):
    non_zero_pred = y_pred!=0
    matches = y_true[non_zero_pred] == y_pred [non_zero_pred]
    return np.mean(matches) if len(matches)>0 else 0


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.score = 0
        self.scoring = make_scorer(multi_precision)
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) < 20: return

        time_groups = self.targets.index.get_level_values("time")
        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=time_groups,
                                    scoring=self.scoring)
        self.score = np.mean(cv_scores)
        self.model.fit(self.features, self.targets)
        self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
        self.Plot("ML", "Score", self.score)

    def trade(self):
        if self.score == 0: return

        x_pred = self.features.groupby("symbol").last()
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * self.score * 0.1
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low) * self.retracement_sl
            stop_loss = features.bb_high - window if pos > 0 \
                else features.bb_low + window   # TODO: Refactor
            self.StopMarketOrder(symbol, -qty, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=31, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_exit_target(self, price_bar):
        window = (price_bar.bb_high - price_bar.bb_low)
        if price_bar.close > price_bar.open:  # long trade
            stop_loss = price_bar.bb_high - window * self.retracement_sl
            target = +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
        else:  # short trade
            stop_loss = price_bar.bb_low + window * self.retracement_sl
            target = -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
        return target

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.18
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
        self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.score = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) < 20: return

        time_groups = self.targets.index.get_level_values("time")
        fit_params = dict(sample_weight=abs(np.log1p(self.targets)))
        y_binary = (self.targets > 0).astype(float)
        cv_scores = cross_val_score(self.model, X=self.features, y=y_binary,
                                    cv=self.cv, groups=time_groups,
                                    fit_params=fit_params)
        self.score = np.mean(cv_scores)
        self.model.fit(self.features, y_binary, **fit_params)
        self.print(f"Training: {y_binary.value_counts()} Score:{self.score:.1%}")
        self.Plot("ML", "Score", self.score)

    def trade(self):
        edge = self.score - (1 - self.score)
        if edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * edge * self.kelly_frac
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions.items():
            self.SetHoldings(symbol, pos)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=36, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        new_targets = trading_bar.eval("close/open - 1")
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.18
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
        self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.score = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=3)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) < 20: return

        time_groups = self.targets.index.get_level_values("time")
        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=time_groups,
                                    scoring="balanced_accuracy")
        self.score = np.mean(cv_scores)
        self.model.fit(self.features, self.targets)
        self.print(f"Training: {self.targets.value_counts()} Score:{self.score:.1%}")
        self.Plot("ML", "Score", self.score)

    def trade(self):
        edge = (3 * self.score - 1) / 2
        if edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * edge * self.kelly_frac
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low) * self.retracement_sl
            stop_loss = features.bb_high - window if pos > 0 \
                else features.bb_low + window   # TODO: Refactor
            self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=31, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_exit_target(self, price_bar):
        window = (price_bar.bb_high - price_bar.bb_low)
        if price_bar.close > price_bar.open:  # long trade
            stop_loss = price_bar.bb_high - window * self.retracement_sl
            target = +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
        else:  # short trade
            stop_loss = price_bar.bb_low + window * self.retracement_sl
            target = -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
        return target

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.19
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
        self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.use_sl = literal_eval(self.GetParameter("use_sl"))
        self.target_gain = literal_eval(self.GetParameter("target_gain"))
        self.strategy = self.GetParameter("strategy")
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.edge = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=10)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) <= 10: return

        time_groups = self.targets.index.get_level_values("time")
        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=time_groups,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        score = np.mean(np.nan_to_num(cv_scores, 0))
        n_classes = len(self.model.classes_)
        self.edge = (n_classes * score - 1) / (n_classes - 1)
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
        self.Plot("ML", "Edge", self.edge)

    def trade(self):
        if self.edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * (self.edge * self.kelly_frac).clip(0, 1)
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window   # TODO: Refactor
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=31, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(min5_bar.add_prefix("bb_"))
        new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_exit_target(self, price_bar):
        window = (price_bar.bb_high - price_bar.bb_low)
        if price_bar.close >= price_bar.open * (1+self.target_gain) \
                and "long" in self.strategy:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif price_bar.close <= price_bar.open * (1 - self.target_gain) \
                and "short" in self.strategy:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.20
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = literal_eval(self.GetParameter("min_usd_volume"))
        self.retracement_sl = literal_eval(self.GetParameter("retracement_sl"))
        self.target_gain = literal_eval(self.GetParameter("target_gain"))
        self.kelly_frac = literal_eval(self.GetParameter("kelly_frac"))
        self.capital = literal_eval(self.GetParameter("capital"))
        self.use_sl = literal_eval(self.GetParameter("use_sl"))
        self.strategy = self.GetParameter("strategy")
        self.benchmark = self.GetParameter("benchmark")

        self.SetStartDate(2021, 1, 1)
        self.SetEndDate(2022, 1, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.edge = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=10)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        training_days = self.features.index.get_level_values("time").unique()
        if len(training_days) <= 10: return

        time_groups = self.targets.index.get_level_values("time")
        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=time_groups,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        score = np.mean(np.nan_to_num(cv_scores, 0))
        n_classes = len(self.model.classes_)
        self.edge = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
        self.Plot("ML", "Edge", self.edge)

    def trade(self):
        if self.edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * (self.edge * self.kelly_frac).clip(0, 1)
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window   # TODO: Simplify SL?
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        min5_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = min5_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = min5_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):  # TODO: Run it only before training
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=36, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(last_features)
        new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_exit_target(self, price_bar):  # TODO: Simplify SL?
        window = (price_bar.bb_high - price_bar.bb_low)
        if price_bar.close >= price_bar.open * (1+self.target_gain) \
                and "long" in self.strategy:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif price_bar.close <= price_bar.open * (1 - self.target_gain) \
                and "short" in self.strategy:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.21
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 0)
        self.target_gain = self.GetParameter("target_gain", 0.05)
        self.kelly_frac = self.GetParameter("kelly_frac", 0.25)
        self.capital = self.GetParameter("capital", 80000)
        self.use_sl = self.GetParameter("use_sl", 0)
        self.retracement_sl = self.GetParameter("retracement_sl", 1)
        self.strategy = self.GetParameter("strategy", "long_short")
        self.benchmark = self.GetParameter("benchmark", "SPY")

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.edge = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=10)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.store_features)
        self.Schedule.On(every_day, at(9, 35), self.trade)
        self.Schedule.On(every_day, at(15, 55), self.stop_trading)
        self.Schedule.On(every_day, at(15, 55), self.store_targets)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = self.features.index.get_level_values("time")
        if len(training_days.unique()) <= 10: return  # Require more than 10 days of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.edge = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
        self.Plot("ML", "Edge", self.edge)

    def trade(self):
        if self.edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * self.edge * self.kelly_frac
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window   # TODO: Simplify SL?
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def stop_trading(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        bertha_bar = agg_bars(minute_bars, "09:31", "09:35")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        start = self.Time.replace(hour=9, minute=36, second=0)
        end = self.Time.replace(hour=15, minute=55, second=0)
        minute_bars = self.History(tickers, start, end, Resolution.Minute)
        trading_bar = agg_bars(minute_bars, "09:36", "15:55")
        trading_bar = trading_bar.join(last_features)
        new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_exit_target(self, price_bar):
        window = (price_bar.bb_high - price_bar.bb_low)
        if price_bar.close >= price_bar.open * (1 + self.target_gain) \
                and "long" in self.strategy:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif price_bar.close <= price_bar.open * (1 - self.target_gain) \
                and "short" in self.strategy:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Done
- Custom precision scoring
- New Features (bb volume and open)
- Offline data storage to avoid symbols limitation
- Trade execution on high probability trades
- Double barrier target with SL

@version: 0.22
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 0)
        self.target_gain = self.GetParameter("target_gain", 0.05)
        self.kelly_frac = self.GetParameter("kelly_frac", 0.25)
        self.capital = self.GetParameter("capital", 80000)
        self.use_sl = self.GetParameter("use_sl", 0)
        self.retracement_sl = self.GetParameter("retracement_sl", 1)
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.edge = 0
        self.cv = TimeSeriesSplitGroups(n_splits=10)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=10)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = self.features.index.get_level_values("time")
        if len(training_days.unique()) <= 10: return  # Require more than 10 days of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.edge = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
        self.Plot("ML", "Edge", self.edge)

    def enter_trades(self):
        self.store_features()
        if self.edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * self.edge * self.kelly_frac
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = self.Time.hour, self.Time.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        now = self.Time
        minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
                                   Resolution.Minute)
        try:
            trading_bar = minute_bars.droplevel("time").join(last_features)
            new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
            self.targets = pd.concat([new_targets.dropna(), self.targets])
            self.print(f"Stored new targets, total: {len(self.targets)}")
        except (KeyError, ValueError) as e:
            self.print(e)
            return

    def calc_exit_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.23: lookback parameter

@version: 0.23
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 0)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter in the trade
        self.kelly_frac = self.GetParameter("kelly_frac", 0.25)  # Kelly ratio to use for the position sizing
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.use_sl = self.GetParameter("use_sl", 0)  # Use or not the Stop Loss (0/1)
        self.retracement_sl = self.GetParameter("retracement_sl", 1)  # Retracement percentage to use for the Stop Loss
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.edge = 0
        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None
        self.model = GradientBoostingClassifier(n_iter_no_change=10)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = self.features.index.get_level_values("time")
        if len(training_days.unique()) <= self.cv_splits: return  # Require more than 10 days of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.edge = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.edge:.1%}")
        self.Plot("ML", "Edge", self.edge)

    def enter_trades(self):
        self.store_features()
        if self.edge <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * self.edge * self.kelly_frac
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = self.Time.hour, self.Time.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        now = self.Time
        minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
                                   Resolution.Minute)
        try:
            trading_bar = minute_bars.droplevel("time").join(last_features)
            new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
            self.targets = pd.concat([new_targets.dropna(), self.targets])
            self.print(f"Stored new targets, total: {len(self.targets)}")
        except (KeyError, ValueError) as e:
            self.print(e)
            return

    def calc_exit_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.24
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 0)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter in the trade
        self.kelly_frac = self.GetParameter("kelly_frac", 0.25)  # Kelly ratio to use for the position sizing
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.use_sl = self.GetParameter("use_sl", 0)  # Use or not the Stop Loss (0/1)
        self.retracement_sl = self.GetParameter("retracement_sl", 1)  # Retracement percentage to use for the Stop Loss
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if needs to be stored

        self.SetStartDate(2021, 6, 1)
        #self.SetEndDate(2022, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=10)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = self.features.index.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict(x_pred), index=x_pred.index)
        positions = y_pred * self.model.kelly * self.kelly_frac
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = self.Time.hour, self.Time.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        tickers = list(last_features.index.get_level_values("symbol"))
        now = self.Time
        minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
                                   Resolution.Minute)
        try:
            trading_bar = minute_bars.droplevel("time").join(last_features)
            new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
            self.targets = pd.concat([new_targets.dropna(), self.targets])
            self.print(f"Stored new targets, total: {len(self.targets)}")
        except (KeyError, ValueError) as e:
            self.print(e)
            return

    def calc_exit_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.25
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 0)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.use_sl = self.GetParameter("use_sl", 0)  # Use or not the Stop Loss (0/1)
        self.retracement_sl = self.GetParameter("retracement_sl", 1)  # Retracement percentage to use for the Stop Loss
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        # self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=10)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = idx.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > 50% and scaling to 100%
        positions = y_proba.idxmax(axis=1) * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_proba.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            if self.use_sl:
                features = x_pred.loc[symbol]
                window = (features.bb_high - features.bb_low) * self.retracement_sl
                stop_loss = features.bb_high - window if pos > 0 \
                    else features.bb_low + window
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0)
        tickers = list(self.ActiveSecurities.Keys)
        minute_bars = self.History(tickers, start, self.Time, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = self.Time.hour, self.Time.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        now = self.Time
        minute_bars = self.History(tickers, now - timedelta(minutes=1), now,
                                   Resolution.Minute)
        self.Log(minute_bars)
        try:
            trading_bar = minute_bars.droplevel("time").join(last_features)
            new_targets = trading_bar.apply(self.calc_exit_target, axis=1)
            self.targets = pd.concat([new_targets.dropna(), self.targets])
            self.print(f"Stored new targets, total: {len(self.targets)}")
        except (KeyError, ValueError) as e:
            self.print(e)
            return

    def calc_exit_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.use_sl:
                stop_loss = price_bar.bb_high - window * self.retracement_sl
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.use_sl:
                stop_loss = price_bar.bb_low + window * self.retracement_sl
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.26: Adding both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.26
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.sl_retr = self.GetParameter("retracement_sl", 0)  # Retracement percentage to use for the Stop Loss, disabled if 0
        self.tp_ext = self.GetParameter("extension_tp", 0)  # Extension percentage to use for the Take Profit, disabled if 0

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=10)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = idx.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > 50% and scaling to 100%
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low)
            if self.sl_retr > 0:
                stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
                    else features.bb_low + window * self.sl_retr
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
            if self.tp_ext > 0:
                take_profit = features.bb_low + window * self.tp_ext if pos > 0 \
                    else features.bb_high - window * self.tp_ext
                self.LimitOrder(symbol, -qty, take_profit)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        try:
            trading_bar = minute_bars.droplevel("time").join(last_features)
            new_targets = trading_bar.apply(self.calc_target, axis=1)
            self.targets = pd.concat([new_targets.dropna(), self.targets])
            self.print(f"Stored new targets, total: {len(self.targets)}")
        except (KeyError, ValueError) as e:
            self.print(e)
            return

    def calc_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_high - window * self.sl_retr
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_low + window * self.sl_retr
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.28
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def catch_errors(func):
    def wrap(self, *args, **kwargs):
        try:
            result = func(self, *args, **kwargs)
            return result
        except (KeyError, ValueError) as e:
            self.print(e)
            return
    return wrap


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.min_proba = self.GetParameter("min_proba", 0.5)  # Threshold probability to trigger a long/short signal
        self.sl_retr = self.GetParameter("retracement_sl", 0)  # Retracement percentage to use for the Stop Loss, disabled if 0
        self.tp_ext = self.GetParameter("extension_tp", 0)  # Extension percentage to use for the Take Profit, disabled if 0

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=10)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = idx.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        scaling = 1 / (1 - self.min_proba)
        sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low)
            if self.sl_retr > 0:
                stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
                    else features.bb_low + window * self.sl_retr
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
            if self.tp_ext > 0:
                take_profit = features.bb_low + window * self.tp_ext if pos > 0 \
                    else features.bb_high - window * self.tp_ext
                self.LimitOrder(symbol, -qty, take_profit)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    @catch_errors
    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_features)
        new_targets = trading_bar.apply(self.calc_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_high - window * self.sl_retr
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_low + window * self.sl_retr
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
V0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.28
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import DBSCAN, OPTICS, KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.gaussian_process import GaussianProcessClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def catch_errors(func):
    def wrap(self, *args, **kwargs):
        try:
            result = func(self, *args, **kwargs)
            return result
        except (KeyError, ValueError) as e:
            self.print(e)
            return
    return wrap


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.min_proba = self.GetParameter("min_proba", 0.5)  # Threshold probability to trigger a long/short signal
        self.sl_retr = self.GetParameter("retracement_sl", 0.0)  # Retracement percentage to use for the Stop Loss, disabled if 0
        self.tp_ext = self.GetParameter("extension_tp", 0.0)  # Extension percentage to use for the Take Profit, disabled if 0

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            pipe = Pipeline([("scaling", None),
                             ("clustering", None),
                             ("model", LogisticRegression())])
            params = dict(scaling=[None,
                                   # MinMaxScaler(),
                                   StandardScaler()],
                          clustering=[None,
                                      # KMeans(),
                                      # OPTICS(),
                                      DBSCAN()],
                          model=[DummyClassifier(),
                                 # LogisticRegression(),
                                 # KNeighborsClassifier(),
                                 # GaussianProcessClassifier(),
                                 # GaussianNB(),
                                 GradientBoostingClassifier(n_iter_no_change=3)])
            self.model = GridSearchCV(pipe, param_grid=params)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = idx.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.print(f"Best model: {self.model.best_estimator_}")
        self.Log(pd.DataFrame(self.model.cv_results_).to_string())
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        scaling = 1 / (1 - self.min_proba)
        sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low)
            if self.sl_retr > 0:
                stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
                    else features.bb_low + window * self.sl_retr
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
            if self.tp_ext > 0:
                take_profit = features.bb_high + window * self.tp_ext if pos > 0 \
                    else features.bb_low - window * self.tp_ext
                self.LimitOrder(symbol, -qty, take_profit)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    @catch_errors
    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_features)
        new_targets = trading_bar.apply(self.calc_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_high - window * self.sl_retr
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_low + window * self.sl_retr
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.29
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn.cluster import DBSCAN
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def catch_errors(func):
    def wrap(self, *args, **kwargs):
        try:
            result = func(self, *args, **kwargs)
            return result
        except (KeyError, ValueError) as e:
            self.print(e)
            return
    return wrap


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.min_proba = self.GetParameter("min_proba", 0.5)  # Threshold probability to trigger a long/short signal
        self.sl_retr = self.GetParameter("retracement_sl", 0.0)  # Retracement percentage to use for the Stop Loss, disabled if 0
        self.tp_ext = self.GetParameter("extension_tp", 0.0)  # Extension percentage to use for the Take Profit, disabled if 0
        self.entry_mns = self.GetParameter("entry_mn", 5)  # Entry time (minutes after 9.30)
        self.exit_mns = self.GetParameter("exit_mn", 385)  # Exit time (minutes after 9.30)

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            pipe = Pipeline([("scaling", None),
                             ("clustering", None),
                             ("model", LogisticRegression())])
            params = dict(scaling=[None, StandardScaler()],
                          clustering=[None, DBSCAN()],
                          model=[DummyClassifier(),
                                 GradientBoostingClassifier(n_iter_no_change=3)])
            self.model = GridSearchCV(pipe, param_grid=params)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        entry_hr, entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60
        exit_hr, exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(entry_hr, entry_mn), self.enter_trades)
        self.Schedule.On(every_day, at(exit_hr, exit_mn), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = idx.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.print(f"Best model: {self.model.best_estimator_}")
        self.Log(pd.DataFrame(self.model.cv_results_).to_string())
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        scaling = 1 / (1 - self.min_proba)
        sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?
            features = x_pred.loc[symbol]
            window = (features.bb_high - features.bb_low)
            if self.sl_retr > 0:
                stop_loss = features.bb_high - window * self.sl_retr if pos > 0 \
                    else features.bb_low + window * self.sl_retr
                self.StopLimitOrder(symbol, -qty, stop_loss, stop_loss)
            if self.tp_ext > 0:
                take_profit = features.bb_high + window * self.tp_ext if pos > 0 \
                    else features.bb_low - window * self.tp_ext
                self.LimitOrder(symbol, -qty, take_profit)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    @catch_errors
    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_features)
        new_targets = trading_bar.apply(self.calc_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_high - window * self.sl_retr
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_low + window * self.sl_retr
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.30
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def catch_errors(func):
    def wrap(self, *args, **kwargs):
        try:
            result = func(self, *args, **kwargs)
            return result
        except (KeyError, ValueError) as e:
            self.print(e)
            return
    return wrap


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.min_proba = self.GetParameter("min_proba", 0.5)  # Threshold probability to trigger a long/short signal

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=3)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        days = idx.get_level_values("time")
        if len(days.unique()) <= 21: return  # Require more than one month of training data

        params = dict(sample_weight=abs(self.targets))  # Weighting each sample by its importance
        targets_bin = self.targets.apply(lambda x: +1 if x > self.target_gain else
                                                   -1 if x < -self.target_gain else 0)
        cv_scores = cross_val_score(self.model, X=self.features, y=targets_bin,
                                    cv=self.cv, groups=days, fit_params=params,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, targets_bin, **params)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {targets_bin.value_counts()} Edge:{self.model.kelly:.1%}")
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        scaling = 1 / (1 - self.min_proba)
        sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    @catch_errors
    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_features)
        new_targets = trading_bar.eval("close / bb_close - 1")  # Calculate the trading return
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.30a
@creation date: 05/07/2022
"""

import numpy as np
from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def catch_errors(func):
    def wrap(self, *args, **kwargs):
        try:
            result = func(self, *args, **kwargs)
            return result
        except (KeyError, ValueError) as e:
            self.print(e)
            return
    return wrap


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.tgt_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=3)
            self.model.kelly = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.x, self.y = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.x is None or self.y is None: return

        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]
        days = idx.get_level_values("time")
        if len(days.unique()) <= 21: return  # Require more than one month of training data

        y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
                                       -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
        # Replace cross_val_score with custom loop to use sample weighted metrics
        scores = []
        for train_idx, test_idx in self.cv.split(self.x, groups=days):
            model_temp = sklearn.base.clone(self.model)
            x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
            y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
            w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
            model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by log returns
            y_pred = model_temp.predict(x_test)
            scores += [kelly_pos(y_test, y_pred, sample_weight=abs(w_test))]  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
        self.model.kelly = 0 if np.isnan(np.nanmean(scores)) else np.nanmean(scores)

        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        self.print(f"Training: {y_bin.value_counts()} Edge:{self.model.kelly:.1%}")
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.x.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly.clip(0, 1)  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_features.dropna(), self.x])
        self.print(f"Stored new features, total: {len(self.x)}")

    @catch_errors
    def store_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        self.Log(last_x)
        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1")  # Calculate the trading return
        self.y = pd.concat([new_y.dropna(), self.y])
        self.print(f"Stored new targets, total: {len(self.y)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")
        self.Log(f"{self.Time} {msg}")


def kelly_pos(y_true, y_pred, sample_weight=None):  # TODO: differentiate between losses on 0 and on 1/-1
    trades = y_pred!=0
    wins = y_true[trades]==y_pred[trades]
    win_rate = wins.mean()
    loss_rate = 1-win_rate
    avg_win = sample_weight[trades][wins].mean()
    avg_loss = sample_weight[trades][~wins].mean()
    return win_rate/avg_loss - loss_rate/avg_win


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.31a
@creation date: 05/07/2022
"""

import numpy as np
from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl
import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.tgt_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        # self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=3)
            self.model.kelly = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.x, self.y = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.x is None or self.y is None: return

        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]
        days = idx.get_level_values("time")
        if len(days.unique()) <= self.cv_splits: return  # Days required for CV

        y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
                                       -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
        # Replace cross_val_score with custom loop to use sample weighted metrics
        scores = []
        for train_idx, test_idx in self.cv.split(self.x, groups=days):
            model_tmp = sklearn.base.clone(self.model)
            x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
            y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
            w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
            model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by log returns
            y_pred = model_tmp.predict(x_test)
            scores += [
                utl.kelly_score(y_test, y_pred, returns=abs(w_test))]  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
        self.model.kelly = np.clip(np.nanmean(scores), 0, 1)

        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}")
        self.Plot("ML", "Kelly", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.x.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @utl.catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_features.dropna(), self.x])
        self.print(f"Stored new features, total: {len(self.x)}")

    @utl.catch_errors
    def store_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        self.Log(last_x)
        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1")  # Calculate the trading return
        self.y = pd.concat([new_y.dropna(), self.y])
        self.print(f"Stored new targets, total: {len(self.y)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")
        self.Log(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.32a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl
from timeseriescv import TimeSeriesSplitGroups


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.tgt_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 11, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        # self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=3)
            self.model.kelly = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.x, self.y = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.x is None or self.y is None: return

        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]
        days = idx.get_level_values("time")
        if len(days.unique()) <= self.cv_splits: return  # Days required for CV

        y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
                                       -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
        # Replace cross_val_score with custom loop to use sample weighted metrics
        scores = []
        for train_idx, test_idx in self.cv.split(self.x, groups=days):
            model_tmp = sklearn.base.clone(self.model)
            x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
            y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
            w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
            model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by log returns
            y_pred = model_tmp.predict(x_test)
            scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))]  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
        self.model.kelly = np.clip(np.nanmean(scores), 0, 1)

        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}")
        self.Plot("ML", "Kelly", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.x.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        self.print(f"Predictions: {y_proba}")
        self.Notify.Email("hb_beawai@googlegroups.com", "Big Bertha Predictions", y_proba)
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @utl.catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    @utl.catch_errors
    def store_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")
        self.Log(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.33a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl
from timeseriescv import TimeSeriesSplitGroups


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.tgt_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 11, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        # self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            self.model = GradientBoostingClassifier(n_iter_no_change=3)
            self.model.kelly = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.x, self.y = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.x is None or self.y is None: return

        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]
        days = idx.get_level_values("time")
        if len(days.unique()) <= self.cv_splits: return  # Days required for CV

        y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
                                       -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
        # Replace cross_val_score with custom loop to use sample weighted metrics
        scores = []
        for train_idx, test_idx in self.cv.split(self.x, groups=days):
            model_tmp = sklearn.base.clone(self.model)
            x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
            y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
            w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
            model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by log returns
            y_pred = model_tmp.predict(x_test)
            scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))]  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
        self.model.kelly = np.clip(np.nanmean(scores), 0, 1)

        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        self.print(f"Training: {y_bin.value_counts()} Kelly:{self.model.kelly:.1%}")
        self.Plot("ML", "Kelly", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.x.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        self.print(f"Predictions: {y_proba.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @utl.catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    @utl.catch_errors
    def store_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")
        self.Log(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.34a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl
from timeseriescv import TimeSeriesSplitGroups


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.tgt_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 4)  # Number of splits for model cross validation

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 11, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

        if not self.LiveMode:  # Store data during backtest
            self.Schedule.On(every_day, at(16, 0), self.save_data)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if ((self.Time - self.last_training).days < 30 and (self.kelly > 0)) \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= self.cv_splits * 10: return

        y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
                                       -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
        # Replace cross_val_score with custom loop to use sample weighted metrics
        scores = []
        days = self.x.index.get_level_values("time")
        for train_idx, test_idx in self.cv.split(self.x, groups=days):
            model_tmp = sklearn.base.clone(self.model)
            x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
            y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
            w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
            model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by log returns
            y_pred = model_tmp.predict(x_test)
            scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))]  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
        self.last_training = self.Time
        self.kelly = np.clip(np.nanmean(scores), 0, 1)

        self.print(f"Training: {y_bin.value_counts()} Kelly:{self.kelly:.1%}")
        self.Plot("ML", "Kelly", self.kelly)

    def enter_trades(self):
        self.calc_features()
        if self.kelly <= 0: return

        x_pred = self.x.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        self.print(f"Predictions: {y_proba.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.kelly  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.calc_targets()

    @utl.catch_errors
    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    @utl.catch_errors
    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.35a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.025)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

        if not self.LiveMode:  # Store data during backtest
            self.Schedule.On(every_day, at(16, 0), self.save_data)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= self.target_gain and self.strategy >= 0 else
                                       -1 if x <= -self.target_gain and self.strategy <= 0 else 0)
        weight = self.y - self.target_gain  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test))  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):  # TODO: Balance Long/Short Trades?
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        self.print(f"Predictions: {y_proba.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.kelly  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.36a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.exposure = self.GetParameter("exposure", 0.5)  # 0 100% short, 1 100% long, 0.5 market neutral
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.n_positions = self.GetParameter("n_positions", 10)  # Number of total positions per day
        self.n_longs = int(self.n_positions * self.exposure)
        self.n_shorts = self.n_positions - self.n_longs

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

        if not self.LiveMode:  # Store data during backtest
            self.Schedule.On(every_day, at(16, 0), self.save_data)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = (self.y > 0).astype(float)
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test))  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):  # TODO: Balance Long/Short Trades?
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index).sort_values(ascending=False)
        self.print(f"Predictions: {y_pred.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string())

        longs = y_pred.head(self.n_longs)
        longs_pos = longs * self.exposure / longs.sum()  # Normalizing by confidence (proba UP)
        shorts = y_pred.tail(self.n_shorts)
        shorts_pos = -(1 - shorts) * (1 - self.exposure) / (1 - shorts).sum()    # Normalizing by confidence (1 - proba UP)
        positions = pd.concat([longs_pos, shorts_pos]) * self.kelly
        self.print(f"Positions {positions}")
        for symbol, pos in positions.items():
            self.SetHoldings(symbol, pos)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.37a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.2)  # Minimum differential probability

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

        if not self.LiveMode:  # Store data during backtest
            self.Schedule.On(every_day, at(16, 0), self.save_data)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test))  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):  # TODO: Balance Long/Short Trades?
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            self.SetHoldings(row["long_sym"], row["pos"]/2)
            self.SetHoldings(row["short_sym"], -row["pos"]/2)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.38a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.2)  # Minimum differential probability

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

        if not self.LiveMode:  # Store data during backtest
            self.Schedule.On(every_day, at(16, 0), self.save_data)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test))  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):  # TODO: Balance Long/Short Trades?
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_pred.to_string())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            self.SetHoldings(row["long_sym"], row["pos"]/2)
            self.SetHoldings(row["short_sym"], -row["pos"]/2)
            if self.LiveMode:
                utl.das_send_order(self, row["long_sym"], position=row["pos"] / 2)
                utl.das_send_order(self, row["short_sym"], position=-row["pos"] / 2)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        if self.LiveMode: utl.das_liquidate(self)
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.38a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.1)  # Minimum differential probability

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

        if not self.LiveMode:  # Store data during backtest
            self.Schedule.On(every_day, at(16, 0), self.save_data)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test))  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):  # TODO: Balance Long/Short Trades?
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
            self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            self.SetHoldings(row["long_sym"], row["pos"]/2)
            self.SetHoldings(row["short_sym"], -row["pos"]/2)
            #if self.LiveMode:
            long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2)
            utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
            short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2)
            utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        #if self.LiveMode:
        utl.das_liquidate(self)
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.40a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.1)  # Minimum differential probability

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)
        self.Schedule.On(every_day, at(16, 0), self.save_data)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test))  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
            self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            self.SetHoldings(row["long_sym"], row["pos"]/2)
            self.SetHoldings(row["short_sym"], -row["pos"]/2)
            if self.LiveMode:
                long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2)
                utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
                short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2)
                utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        if self.LiveMode: utl.das_liquidate(self)
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.41a: Added use_kelly parameter and 9am training for easier live trading
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.40a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.1)  # Minimum differential probability
        self.use_kelly = self.GetParameter("use_kelly", 1)  # Whether to use the kelly criterion for sizing

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(9, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)
        self.Schedule.On(every_day, at(16, 0), self.save_data)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \
            if self.use_kelly else 1 # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
            self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            self.SetHoldings(row["long_sym"], row["pos"]/2)
            self.SetHoldings(row["short_sym"], -row["pos"]/2)
            if self.LiveMode:
                long_qty = self.CalculateOrderQuantity(row["long_sym"], row["pos"]/2)
                utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
                short_qty = self.CalculateOrderQuantity(row["short_sym"], -row["pos"] / 2)
                utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        if self.LiveMode: utl.das_liquidate(self)
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.42a: Improved positions calculation for DAS Integration
v0.41a: Added use_kelly parameter and 9am training for easier live trading
v0.40a: Improvements to DAS Integration and logging
v0.39a: Minor improvements and changes to DAS Integration
v0.38a: DAS Trader integration
v0.37a: Long Short with probability threshold
v0.36a: Long Short version (market exposure parameter)
v0.35a: CV and training improvements
v0.34a: Storing features instead of model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.42a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.capital = self.GetParameter("capital", 800000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.min_proba = self.GetParameter("min_proba", 0.1)  # Minimum differential probability
        self.use_kelly = self.GetParameter("use_kelly", 1)  # Whether to use the kelly criterion for sizing

        self.SetStartDate(2021, 6, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        self.kelly = 0
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.last_training = datetime(2000, 1, 1, 0, 0, 0)

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.Train(every_day, at(9, 0), self.train_model)
        self.Schedule.On(every_day, at(9, 35), self.enter_trades)
        self.Schedule.On(every_day, at(15, 55), self.exit_trades)
        self.Schedule.On(every_day, at(16, 0), self.save_data)

        self.x, self.y = None, None
        if self.LiveMode and self.ObjectStore.ContainsKey("data"):  # Load data when live and saved
            self.x, self.y = pickle.loads(bytes(self.ObjectStore.ReadBytes("data")))
            self.print(f"Loaded data {self.x.shape} {self.y.shape}")

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if (self.Time - self.last_training).days < 7 \
                or self.x is None or self.y is None: return
        self.clean_data()
        days = self.x.index.get_level_values("time")
        if len(days.unique()) <= 21: return

        y_bin = self.y.apply(lambda x: +1 if x >= 0 else -1)  # Using +1/-1 labels to calculate the Kelly score for both
        weight = self.y  # TODO: How to use log with -100% returns?
        model_temp = sklearn.base.clone(self.model)
        x_train, x_test, y_train, y_test, w_train, w_test = train_test_split(
            self.x, y_bin, weight, train_size=0.5, shuffle=False)
        model_temp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by returns
        y_pred = model_temp.predict(x_test)
        score = utl.kelly_score(y_test, y_pred, sample_weight=abs(w_test)) \
            if self.use_kelly else 1 # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(weight)))
        self.kelly = np.nan_to_num(score).clip(0, 1)
        self.Debug(f"{self.Time} Training - Pts.: {y_bin.value_counts()}\n"
                   f"Kelly: {self.kelly:.1%}\n")
        self.Plot("ML", "Score", self.kelly)

    def enter_trades(self):
        self.calc_features()
        x_pred = self.x.query("time == @self.Time.date()")
        if self.kelly <= 0 or len(x_pred) == 0: return

        x_pred.index = x_pred.index.droplevel("time")
        y_pred = pd.Series(self.model.predict_proba(x_pred)[:,1],
                           index=x_pred.index)
        self.print(f"Predictions: {y_pred.to_string()}")
        for email in ["fbaldisserri@gmail.com", "hbrewer27@gmail.com"]:
            self.Notify.Email(email, "Big Bertha Predictions", y_pred.to_string())

        proba_desc = y_pred.sort_values(ascending=False)
        proba_asc = y_pred.sort_values(ascending=True)
        pairs = dict(proba=proba_desc.values-proba_asc.values,
                     long_sym=proba_desc.index.get_level_values("symbol"),
                     short_sym=proba_asc.index.get_level_values("symbol"))
        pairs = pd.DataFrame.from_dict(pairs).query("proba >= @self.min_proba")
        pairs.eval("pos = proba * @self.kelly", inplace=True)
        if pairs["pos"].sum() >= 1: pairs["pos"] /= pairs["pos"].sum()
        self.print(f"Positions {pairs}")
        for _, row in pairs.iterrows():
            order_value = self.Portfolio.TotalPortfolioValue*row["pos"]/2
            self.SetHoldings(row["long_sym"], order_value)
            self.SetHoldings(row["short_sym"], -order_value)
            if self.LiveMode:
                long_qty = int(order_value/self.Securities[row["long_sym"]].Price)
                utl.das_send_order(self, row["long_sym"].split()[0], quantity=long_qty)
                short_qty = int(order_value/self.Securities[row["short_sym"]].Price)
                utl.das_send_order(self, row["short_sym"].split()[0], quantity=short_qty)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        if self.LiveMode: utl.das_liquidate(self)
        self.calc_targets()

    def calc_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_x = bertha_bar.add_prefix("bb_")
        new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_x["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.x = pd.concat([new_x.dropna(), self.x])
        self.print(f"Stored features, total/new: {len(self.x)}/{len(new_x.dropna())}")

    def calc_targets(self):
        last_x = self.x.query("time == @self.Time.date()")
        if len(last_x) == 0: return

        tickers = list(last_x.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        if len(minute_bars) == 0: return

        trading_bar = minute_bars.droplevel("time").join(last_x)
        new_y = trading_bar.eval("close / bb_close - 1").dropna()  # Calculate the trading return
        self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total/new: {len(self.y)}/{len(new_y)}")

    def clean_data(self):
        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]

    def save_data(self):
        mem_x, mem_y = self.x.memory_usage(deep=True).sum(), self.y.memory_usage(deep=True)
        self.print(f"Memory Used: {(mem_x + mem_y) / 10 ** 6:.2f} mb")
        self.ObjectStore.SaveBytes("data", pickle.dumps((self.x, self.y)))

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.29
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import pickle
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.dummy import DummyClassifier
from sklearn.cluster import DBSCAN
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, cross_val_score

import indicators as idx
from timeseriescv import TimeSeriesSplitGroups

pd.set_option('mode.use_inf_as_na', True)
GROUPER = [pd.Grouper(level="symbol"), pd.Grouper(level="time", freq="1D")]
AGG_OPS = {"open": "first", "close": "last", "high": "max",
           "low": "min", "volume": "sum"}


def catch_errors(func):
    def wrap(self, *args, **kwargs):
        try:
            result = func(self, *args, **kwargs)
            return result
        except (KeyError, ValueError) as e:
            self.print(e)
            return
    return wrap


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.target_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.min_proba = self.GetParameter("min_proba", 0.5)  # Threshold probability to trigger a long/short signal
        self.entry_mns = self.GetParameter("entry_mn", 5)  # Entry time (minutes after 9.30)
        self.exit_mns = self.GetParameter("exit_mn", 385)  # Exit time (minutes after 9.30)

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 9, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)

        #self.ObjectStore.Delete(self.store_model)  # Deleting existing data
        if self.store_model is not None and self.ObjectStore.ContainsKey(self.store_model):
            self.model = pickle.loads(bytes(self.ObjectStore.ReadBytes(self.store_model)))
        else:
            pipe = Pipeline([("scaling", None),
                             ("clustering", None),
                             ("model", LogisticRegression())])
            params = dict(scaling=[None, StandardScaler()],
                          clustering=[None, DBSCAN()],
                          model=[DummyClassifier(),
                                 GradientBoostingClassifier(n_iter_no_change=3)])
            self.model = GridSearchCV(pipe, param_grid=params)
            self.model.edge = 0

        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.features, self.targets = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        entry_hr, entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60
        exit_hr, exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(entry_hr, entry_mn), self.enter_trades)
        self.Schedule.On(every_day, at(exit_hr, exit_mn), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if
                x.HasFundamentalData and
                x.DollarVolume > self.min_usd_volume]

    def train_model(self):
        if self.features is None or self.targets is None: return

        idx = self.features.index.intersection(self.targets.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]
        self.features = self.features.loc[idx]
        self.targets = self.targets.loc[idx]
        training_days = idx.get_level_values("time")
        if len(training_days.unique()) <= 21: return  # Require more than one month of training data

        cv_scores = cross_val_score(self.model, X=self.features, y=self.targets,
                                    cv=self.cv, groups=training_days,
                                    scoring="balanced_accuracy")
        self.model.fit(self.features, self.targets)
        if self.store_model is not None:
            self.ObjectStore.SaveBytes(self.store_model, pickle.dumps(self.model))

        score = np.mean(cv_scores)
        n_classes = len(self.model.classes_)
        self.model.kelly = (n_classes * score - 1) / (n_classes - 1)  # Kelly edge calculation with multiple classes
        self.print(f"Training: {self.targets.value_counts()} Edge:{self.model.kelly:.1%}")
        self.print(f"Best model: {self.model.best_estimator_}")
        self.Log(pd.DataFrame(self.model.cv_results_).to_string())
        self.Plot("ML", "Edge", self.model.kelly)

    def enter_trades(self):
        self.store_features()
        if self.model.kelly <= 0: return

        x_pred = self.features.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        y_pred = y_proba.idxmax(axis=1)
        scaling = 1 / (1 - self.min_proba)
        sizes = (y_proba.max(axis=1) - self.min_proba).clip(0, 1) * scaling  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.model.kelly  # Sizing based on Kelly and individual probabilty
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @catch_errors
    def store_features(self):
        start = self.Time.replace(hour=7, minute=1, second=0, microsecond=0)
        tickers = list(self.ActiveSecurities.Keys)
        last_minute = self.Time.replace(second=0, microsecond=0)
        minute_bars = self.History(tickers, start, last_minute, Resolution.Minute)
        pm_bar = agg_bars(minute_bars, "07:01", "09:30")
        entry_hr, entry_mn = last_minute.hour, last_minute.minute
        bertha_bar = agg_bars(minute_bars, "09:31", f"{entry_hr}:{entry_mn}")

        new_features = bertha_bar.add_prefix("bb_")
        new_features.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
        new_features.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
        new_features["pm_volume_usd"] = pm_bar.eval("close * volume")
        yesterday_bar = self.History(tickers, 1, Resolution.Daily)
        yesterday_close = yesterday_bar["close"].droplevel("time")
        new_features["gap"] = bertha_bar["open"] / yesterday_close - 1
        self.features = pd.concat([new_features.dropna(), self.features])
        self.print(f"Stored new features, total: {len(self.features)}")

    @catch_errors
    def store_targets(self):
        last_features = self.features.query("time == @self.Time.date()")
        self.Log(last_features)
        tickers = list(last_features.index.get_level_values("symbol"))
        last_minute = self.Time.replace(second=0, microsecond=0)
        self.Log(f"Target time: {last_minute}")
        minute_bars = self.History(tickers, last_minute - timedelta(minutes=1),
                                   last_minute, Resolution.Minute)
        self.Log(minute_bars)
        trading_bar = minute_bars.droplevel("time").join(last_features)
        new_targets = trading_bar.apply(self.calc_target, axis=1)
        self.targets = pd.concat([new_targets.dropna(), self.targets])
        self.print(f"Stored new targets, total: {len(self.targets)}")

    def calc_target(self, price_bar):
        entry_price, exit_price = price_bar.bb_close, price_bar.close
        window = (price_bar.bb_high - price_bar.bb_low)
        if exit_price >= entry_price * (1 + self.target_gain) and self.strategy >= 0:  # long trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_high - window * self.sl_retr
                return +1 if price_bar.low > stop_loss else 0  # 1 if profitable long and not touching the SL
            else:
                return +1
        elif exit_price <= entry_price * (1 - self.target_gain) and self.strategy <= 0:  # short trade
            if self.sl_retr > 0:
                stop_loss = price_bar.bb_low + window * self.sl_retr
                return -1 if price_bar.high < stop_loss else 0  # -1 if profitable short and not touching the SL
            else:
                return -1
        else:
            return 0

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")


def agg_bars(minute_bars, start_time, end_time):
    filtered_bars = idx.filter_bars(minute_bars, start_time, end_time)
    return filtered_bars.groupby(GROUPER).agg(AGG_OPS)

"""
Big Bertha Strategy with Machine Learning
Last changes:
v0.34a: Backward features creation and no need for pre-trained model
v0.33a: Improved email notification
v0.32a: Improved logging
v0.31a: Minor improvements and library implementation
v0.30a: New cv metric based on Kelly criterion
v0.29a: Sample weight for training points (no TP/SL and gridsearch for performance)
v0.29: Parameter for entry/exit time
v0.28: Gridsearch pipeline with multiple models
v0.27: Minimum probability parameter
v0.26: Added both TP and SL capabilities (naive triple barrier targets)
v0.25: Individual probability-based sizing (in addition to general Kelly sizing)
v0.24: Offline model storage
v0.23: Lookback parameter

@version: 0.34a
@creation date: 05/07/2022
"""

from AlgorithmImports import *

import sklearn
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.ensemble import GradientBoostingClassifier

import utils as utl
from timeseriescv import TimeSeriesSplitGroups


class BigBerthaML(QCAlgorithm):

    def Initialize(self):
        self.min_usd_volume = self.GetParameter("min_usd_volume", 1e9)  # Minimum trading volume in previous trading day
        self.tgt_gain = self.GetParameter("target_gain", 0.05)  # Minimum target gain to enter the trade
        self.capital = self.GetParameter("capital", 80000)  # Starting capital
        self.lookback = self.GetParameter("lookback", 365)  # Trading days used for model training
        self.strategy = self.GetParameter("strategy", 0)  # -1 short only, +1 long only, 0 long/short
        self.benchmark = self.GetParameter("benchmark", "SPY")  # Performance benchmark
        self.cv_splits = self.GetParameter("cv_splits", 10)  # Number of splits for model cross validation
        self.store_model = self.GetParameter("store_model", None)  # Model name if it needs to be stored
        self.entry_mns = self.GetParameter("entry_mn", 5)  # Entry time (minutes after 9.30)
        self.exit_mns = self.GetParameter("exit_mn", 385)  # Exit time (minutes after 9.30)

        self.SetStartDate(2021, 6, 1)
        self.SetEndDate(2022, 11, 1)
        self.SetCash(self.capital)
        self.UniverseSettings.Resolution = Resolution.Minute
        self.UniverseSettings.ExtendedMarketHours = True
        self.AddUniverse(self.coarse_filter)
        self.AddEquity(self.benchmark, Resolution.Minute)
        self.SetBenchmark(self.benchmark)
        
        self.model = GradientBoostingClassifier(n_iter_no_change=3)
        self.kelly = 0
        self.cv = TimeSeriesSplitGroups(n_splits=self.cv_splits)
        self.x, self.y = None, None

        at = self.TimeRules.At
        every_day = self.DateRules.EveryDay(self.benchmark)
        self.entry_hr, self.entry_mn = 9 + self.entry_mns // 60, 30 + self.entry_mns % 60
        self.exit_hr, self.exit_mn = 9 + self.exit_mns // 60, 30 + self.exit_mns % 60
        self.Train(self.DateRules.WeekStart(), at(0, 0), self.train_model)
        self.Schedule.On(every_day, at(self.entry_hr, self.entry_mn), self.enter_trades)
        self.Schedule.On(every_day, at(self.exit_hr, self.exit_mn), self.exit_trades)

    def coarse_filter(self, coarse):
        return [x.Symbol for x in coarse if x.HasFundamentalData]

    def train_model(self):
        if self.x is None or self.y is None: return

        idx = self.x.index.intersection(self.y.index)  # Removing features without matching targets
        idx = idx[idx.get_level_values("time") > self.Time - timedelta(self.lookback)]  # keeping only last X days
        self.x, self.y = self.x.loc[idx], self.y.loc[idx]
        days = idx.get_level_values("time")
        if len(days.unique()) <= self.cv_splits: return  # Days required for CV

        y_bin = self.y.apply(lambda x: +1 if x >= self.tgt_gain and self.strategy >= 0 else
                                       -1 if x <= -self.tgt_gain and self.strategy <= 0 else 0)
        # Replace cross_val_score with custom loop to use sample weighted metrics
        scores = []
        for train_idx, test_idx in self.cv.split(self.x, groups=days):
            model_tmp = sklearn.base.clone(self.model)
            x_train, x_test = self.x.iloc[train_idx], self.x.iloc[test_idx]
            y_train, y_test = y_bin.iloc[train_idx], y_bin.iloc[test_idx]
            w_train, w_test = self.y.iloc[train_idx], self.y.iloc[test_idx]
            model_tmp.fit(x_train, y_train, sample_weight=abs(np.log1p(w_train)))  # Training weighted by log returns
            y_pred = model_tmp.predict(x_test)
            scores += [utl.kelly_score(y_test, y_pred, returns=abs(w_test))]  # Kelly calculated with normal returns
        self.model.fit(self.x, y_bin, sample_weight=abs(np.log1p(self.y)))
        self.kelly = np.clip(np.nanmean(scores), 0, 1)

        self.print(f"Training: {y_bin.value_counts()} Kelly:{self.kelly:.1%}")
        self.Plot("ML", "Kelly", self.kelly)

    def enter_trades(self):
        self.store_features()
        if self.kelly <= 0: return

        x_pred = self.x.query("time == @self.Time.date()")
        x_pred.index = x_pred.index.droplevel("time")
        y_proba = pd.DataFrame(self.model.predict_proba(x_pred),
                               index=x_pred.index,
                               columns=self.model.classes_)
        self.print(f"Predictions: {y_proba.to_string()}")
        self.Notify.Email("fbaldisserri@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        self.Notify.Email("hbrewer27@gmail.com", "Big Bertha Predictions", y_proba.to_html())
        y_pred = y_proba.idxmax(axis=1)
        sizes = (y_proba.max(axis=1) - 0.5).clip(0, 1) * 2  # Selecting only prob > min_proba and scaling
        positions = y_pred * sizes * self.kelly  # Sizing based on Kelly and individual probability
        if sum(abs(positions)) > 1: positions /= sum(abs(positions))  # Ensuring no leverage is used
        self.print(f"Trading: {y_pred.value_counts()}")
        for symbol, pos in positions[positions != 0].items():
            qty = self.CalculateOrderQuantity(symbol, pos)
            self.MarketOrder(symbol, qty)  # TODO: Change to Limit Order?

    def exit_trades(self):
        self.Transactions.CancelOpenOrders()
        self.Liquidate()
        self.store_targets()

    @utl.catch_errors
    def store_features(self):
        tickers = list(self.ActiveSecurities.Keys)
        #tickers = list(np.random.choice(tickers, size=100, replace=False))
        today = self.Time.replace(hour=0, minute=0, second=0, microsecond=0)
        start_day = today - timedelta(self.lookback) if self.x is None else \
            self.x.index.get_level_values("time").max()
        day_bars = self.History(tickers, start_day, today, Resolution.Daily)
        time_idx = day_bars.index.get_level_values("time").shift(-1, freq="D")
        symbol_idx = day_bars.index.get_level_values("symbol")
        day_bars.set_index([symbol_idx, time_idx], inplace=True)

        today_start = today.replace(hour=9, minute=30)
        today_stop = today.replace(hour=self.entry_hr, minute=self.entry_mn)
        today_bar = self.History(tickers, today_start, today_stop, Resolution.Minute)
        day_bars = day_bars.append(utl.agg_bars(today_bar)).groupby("symbol").shift(1)
        valid_bars = day_bars.query("close * volume >= @self.min_usd_volume")
        universe = valid_bars.reset_index().groupby("time")["symbol"].apply(list)

        for day, symbols in universe.items():
            start = day.replace(hour=7, minute=1)
            last_minute = day.replace(hour=self.entry_hr, minute=self.entry_mn)
            minute_bars = self.History(symbols, start, last_minute, Resolution.Minute)
            if len(minute_bars) == 0: continue

            pm_bar = utl.agg_bars(minute_bars, "07:01", "09:30")
            bertha_bar = utl.agg_bars(minute_bars, "09:31", f"{self.entry_hr}:{self.entry_mn}")  # TODO: Check 9:31 filter
            new_x = bertha_bar.add_prefix("bb_")
            new_x.eval("bb_size = (bb_high-bb_low)/bb_open", inplace=True)
            new_x.eval("bb_close_range = (bb_close-bb_low)/(bb_high-bb_low)", inplace=True)
            new_x.eval("bb_open_range = (bb_open-bb_low)/(bb_high-bb_low)", inplace=True)
            new_x["pm_volume_usd"] = pm_bar.eval("close * volume")
            """prev_day_bar = self.History(tickers, 1, Resolution.Daily)
            prev_day_close = prev_day_bar["close"].droplevel("time")
            new_x["gap"] = bertha_bar["open"] / prev_day_close - 1"""
            new_x = new_x.dropna()
            self.x = pd.concat([new_x, self.x])
        self.print(f"Stored features, total: {len(self.x)}")

    @utl.catch_errors
    def store_targets(self):
        last_x = self.x if self.y is None else \
            self.x.loc[self.x.index.difference(self.y.index)]
        universe = last_x.reset_index().groupby("time")["symbol"].apply(list)
        for day, symbols in universe.items():
            entry_time = day.replace(hour=self.entry_hr, minute=self.entry_mn)
            exit_time = day.replace(hour=self.exit_hr, minute=self.exit_mn)
            minute_bars = self.History(symbols, entry_time, exit_time, Resolution.Minute)
            trading_bar = utl.agg_bars(minute_bars)
            new_y = trading_bar.eval("close / open - 1").dropna()  # Calculate the trading return
            self.y = pd.concat([new_y, self.y])
        self.print(f"Stored targets, total: {len(self.y)}")

    def print(self, msg):
        self.Debug(f"{self.Time} {msg}")