Overall Statistics
"""
Machine Learning Algorithm with the option to use meta-labeling for position sizing

@email: info@beawai.com
@creation date: 10/01/2023
"""

from AlgorithmImports import *

import copy
import random
import pandas as pd
pd.set_option('mode.use_inf_as_na', True)
from sklearn.exceptions import NotFittedError
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier


class MLCryptoAlgo(QCAlgorithm):
    def Initialize(self):
        self.SetStartDate(2008, 1, 1)
        self.SetCash(100000)
        self.lookback = self.GetParameter("lookback", 21)
        self.use_meta = self.GetParameter("use_meta", 1)  # Whether to use the meta-model for position sizing
        self.seed = self.GetParameter("seed", 42)

        random.seed(self.seed)
        self.training_len = 252 * 100
        self.resolution = Resolution.Daily
        self.ticker = "SPY"
        self.AddEquity(self.ticker, self.resolution)

        self.main_model = GradientBoostingClassifier(n_iter_no_change=1)  # Model to predict direction (side)
        self.meta_model = GradientBoostingClassifier(n_iter_no_change=1)  # Meta-model to predict position (size)

        self.Train(self.DateRules.WeekStart(), self.TimeRules.At(9, 0), self.train)
        every_day = self.DateRules.EveryDay(self.ticker)
        at_market_open = self.TimeRules.AfterMarketOpen(self.ticker, 0)
        self.Schedule.On(every_day, at_market_open, self.trade)

    def train(self):
        x, y = self.get_main_data(self.training_len, include_y=True)
        # Train models
        if self.use_meta:
            x_main, x_meta, y_main, y_meta = train_test_split(x, y, test_size=0.5, shuffle=False)
            self.main_model.fit(x_main, y_main)
            self.Debug(f"{self.Time} Main model score {self.main_model.score(x_main, y_main)}")
            x_meta, y_meta = self.get_meta_data(x_meta, y_meta)
            self.meta_model.fit(x_meta, y_meta)
            self.Debug(f"{self.Time} Meta model score {self.meta_model.score(x_meta, y_meta)}")
        else:
            self.main_model.fit(x, y)
            self.Debug(f"{self.Time} Main model score {self.main_model.score(x, y)}")

    def trade(self):
        self.Transactions.CancelOpenOrders()
        x_main = self.get_main_data(self.lookback + 1, include_y=False).tail(1)  # getting last datapoint for prediction
        try:
            y_main_proba = self.main_model.predict_proba(x_main)[:, 1]
        except NotFittedError as e:
            self.Debug(e)
            return

        if self.use_meta:  # using meta-model to decide the position size
            y_main = y_main_proba > 0.5  # Buy signal from main model
            x_meta = self.get_meta_data(x_main)
            y_meta = self.meta_model.predict_proba(x_meta)[:,1]
            y_pred = (y_meta * y_main)[0]  # Combining Buy signal with position size
            self.Debug(f"{self.Time} Predictions: Main {y_main} - Meta {y_meta}")
        else:  # using the main model probability for the position size
            y_pred = y_main_proba
            self.Debug(f"{self.Time} Predictions: Main {y_pred}")
        self.SetHoldings(self.ticker, y_pred)

    def get_main_data(self, datapoints, include_y=True):
        """ Get features and target for the main model """
        tickers = list(self.ActiveSecurities.Keys)
        data = self.History(tickers, datapoints, self.resolution)
        features = data["open"].pct_change().to_frame("returns")
        features["range"] = data.eval("(high-low)/close")
        features["volume_usd"] = data.eval("volume * close")
        x = pd.concat([features.shift(s) for s in range(self.lookback)],
                      axis=1).dropna()
        if include_y:
            y = features["returns"].shift(-1).reindex_like(x).dropna()
            return x.loc[y.index], y > 0  # the target of the main model is binary (up/down)
        else:
            return x

    def get_meta_data(self, x_main, y_main_true=None):
        """
        Get features and target for the meta-model
        Using the same features as the main model plus the main model
        predicted probability and whether it was right as a target
        """
        x_meta = copy.copy(x_main)
        x_meta["proba"] = self.main_model.predict_proba(x_main)[:, 1]  # Adding main model prediction to meta features
        if y_main_true is None:
            return x_meta
        else:
            y_main_pred = self.main_model.predict(x_main)
            return (x_meta, y_main_pred == y_main_true)