Backtest

Overall Statistics
Total Trades 2548 Average Win 0.13% Average Loss -0.10% Compounding Annual Return 17.296% Drawdown 8.000% Expectancy 0.128 Net Profit 17.296% Sharpe Ratio 1.193 Probabilistic Sharpe Ratio 54.983% Loss Rate 51% Win Rate 49% Profit-Loss Ratio 1.31 Alpha 0.147 Beta 0.115 Annual Standard Deviation 0.125 Annual Variance 0.016 Information Ratio 0.735 Tracking Error 0.175 Treynor Ratio 1.294 Total Fees $3387.84 Estimated Strategy Capacity $2800000.00 Lowest Capacity Asset LFC SUKR265DCACL

# KFold time-based with also groups (tickers)
import numpy as np
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples


class TimeSeriesSplitGroups(_BaseKFold):
    def __init__(self, n_splits=5):
        super().__init__(n_splits, shuffle=False, random_state=None)

    def split(self, X, y=None, groups=None):
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        n_folds = n_splits + 1
        group_list = np.unique(groups)
        n_groups = len(group_list)
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds ={0} greater"
                 " than the number of samples: {1}.").format(n_folds,
                                                             n_groups))
        indices = np.arange(n_samples)
        test_size = (n_groups // n_folds)
        test_starts = range(test_size + n_groups % n_folds,
                            n_groups, test_size)
        test_starts = list(test_starts)[::-1]
        for test_start in test_starts:
            yield (indices[groups.isin(group_list[:test_start])],
                   indices[groups.isin(group_list[test_start:test_start + test_size])])

# Technical ML Alpha Model with persistent object storage
from clr import AddReference

AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightDirection

import pickle
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV

import timeseriessplitgroups as tss

STEPS = [("pca", PCA()),
         ("mlp", MLPClassifier(n_iter_no_change=1, max_iter=100,
                               solver="adam", early_stopping=True,
                               warm_start=True, validation_fraction=0.2))]
PARAMS = {"pca__n_components": [None, 0.9],
          "mlp__activation": ["logistic", "relu"],
          "mlp__alpha": [0.1, 0.01, 0.001, 0.0001, 0],
          "mlp__hidden_layer_sizes": [[96, ], [48, 48], [32, 32, 32]]}


class MLTechnical(AlphaModel):
    def __init__(self, algorithm, model_key="mltechnical06"):
        self.algorithm = algorithm
        self.resolution = algorithm.UniverseSettings.Resolution
        self.model_key = model_key
        self.model = None
        #algorithm.ObjectStore.Delete(model_key)
        if algorithm.ObjectStore.ContainsKey(model_key):
            model_buffer = algorithm.ObjectStore.ReadBytes(model_key)
            self.model = pickle.loads(bytes(model_buffer))
        self.lookbacks = [1, 5, 10, 21]

    def Update(self, algorithm, data):
        insights = []
        if self.model is not None:
            predictions = self.predict()
            for idx in predictions.index:
                signal = predictions.loc[idx]["Signal"]
                symbol = self.algorithm.Symbol(idx[0])
                direction = InsightDirection.Up if signal > 0 else InsightDirection.Down
                insights.append(Insight.Price(symbol, timedelta(days=1),
                                              direction, abs(signal), None))
        self.algorithm.Debug(f"{self.algorithm.Time} {len(insights)} insights")
        return insights

    def train(self):
        if self.model is None:
            cv = tss.TimeSeriesSplitGroups(n_splits=10)
            self.model = RandomizedSearchCV(Pipeline(steps=STEPS), PARAMS,
                                            scoring="accuracy", cv=cv, 
                                            n_iter=10, n_jobs=1)
        x, y = self.get_data(252 * 1, include_y=True)
        groups = x.index.get_level_values("time")
        self.model.fit(x, y, groups=groups)
        self.algorithm.ObjectStore.SaveBytes(self.model_key, pickle.dumps(self.model))
        self.algorithm.Debug(f"{self.algorithm.Time} Model Validation {self.model.best_score_:.1%}")
        self.algorithm.Plot("Model", "Accuracy", self.model.best_score_)

    def predict(self):
        x = self.get_data(max(self.lookbacks) + 1, include_y=False)
        y = pd.DataFrame(self.model.predict_proba(x)[:, 1],
                         index=x.index,
                         columns=["Signal"])
        return y #return y[dates == max(dates)]

    def get_data(self, datapoints=1, include_y=True):
        tickers = list(self.algorithm.ActiveSecurities.Keys)
        data = self.algorithm.History(tickers, datapoints, self.resolution)
        data["volatility"] = data["high"] - data["low"]
        data = data[["close", "volatility", "volume"]]
        groups = data.groupby("symbol")
        features = [groups.pct_change(p) for p in self.lookbacks]  # Momentum
        features += [data/groups.apply(lambda x: x.rolling(p).mean())  # Normalized by average
                     for p in self.lookbacks]
        features = pd.concat(features, join="inner", axis="columns").dropna()
        if include_y:
            target = groups["close"].pct_change(1).shift(-1)
            target = target.reindex_like(features).dropna()
            return features.loc[target.index], (target > 0).astype("float")
        else:
            return features

# First stable version

from clr import AddReference
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightType, InsightDirection

import datetime as dt
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split


class MLTechnical(AlphaModel):
    ''' Provides an implementation of IAlphaModel that always returns the same insight for each security'''

    def __init__(self, algorithm, resolution=Resolution.Daily, model=None):
        self.algorithm = algorithm
        self.resolution = resolution
        self.model = model

    def Update(self, algorithm, data):
        insights = []
        if self.model is not None:
            predictions = self.predict()
            for symbol in predictions.index.get_level_values("symbol"):
                signal = predictions["Signal"].loc[symbol].iloc[0]  # TODO: Fix
                symbol = self.algorithm.Symbol(symbol)
                direction = InsightDirection.Up if signal > 0 else InsightDirection.Down
                insights.append(Insight.Price(symbol, timedelta(days=1), direction, abs(signal), None))
        self.algorithm.Debug(f"Generated {len(insights)} insights")
        return insights

    def OnSecuritiesChanged(self, algorithm, changes):
        ''' Event fired each time the we add/remove securities from the data feed
        Args:
            algorithm: The algorithm instance that experienced the change in securities
            changes: The security additions and removals from the algorithm'''
        self.algorithm.Debug(f"Security Changed: {changes}")
    
    def train(self):
        if self.model is None:
            self.model = Ridge()
            """self.model = MLPRegressor(n_iter_no_change=1,
                                      max_iter=1000,
                                      early_stopping=True,
                                      solver="adam",
                                      validation_fraction=0.5, 
                                      shuffle=True)"""
            x, y = self.get_data(252*10)
            y = y.dropna()
            x_train, x_test, y_train, y_test = train_test_split(x.loc[y.index], y, shuffle=True)
            self.model.fit(x_train, y_train)
            self.algorithm.Debug(f"Model trained - Score {self.model.score(x_test, y_test):.2f}")
    
    def predict(self):
        x, _ = self.get_data(22)
        y = pd.DataFrame(self.model.predict(x),
                         index=x.index,
                         columns=["Signal"])
        dates = y.index.get_level_values("time")
        return y[dates==max(dates)]
    
    def get_data(self, datapoints=1):
        tickers = list(self.algorithm.ActiveSecurities.Keys)
        data = self.algorithm.History(tickers, datapoints, self.resolution)
        groups = data["close"].groupby("symbol")
        features = pd.concat([groups.pct_change(p)
                              for p in [1, 5, 10, 21]], 
                              join="inner", axis="columns").dropna()
        target = groups.pct_change(1).shift(-1).reindex(features.index)
        return features, target

# Flexible execution model with stop and limit price options
from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from System import *
from QuantConnect import *
from QuantConnect.Orders import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Execution import *
from QuantConnect.Algorithm.Framework.Portfolio import *

class CustomExecution(ExecutionModel):
    """Flexible Execution Model"""

    def __init__(self, limit_price=None, stop_price=None):
        """Initializes a new instance of the ImmediateExecutionModel class
           Equivalent to immediate execution when limit_price and stop_price are None
        Args:
            limit_order: Price margin to add to current price (or subtract for short)
            (Long: 0.01=101%*Current Price, -0.01=99%*Current Price, None=Market Order
             Short: 0.01=99%*Current Price, -0.01=101%*Current Price, None=Market Order)
            stop_loss: Loss in percentage before exiting the position
            (0.01=1% Loss, None=No stop loss)"""
        self.limit_price = limit_price
        self.stop_price = stop_price
        self.targetsCollection = PortfolioTargetCollection()

    def Execute(self, algorithm, targets):
        """Immediately submits orders for the specified portfolio targets.
        Args:
            algorithm: The algorithm instance
            targets: The portfolio targets to be ordered"""

        [algorithm.Transactions.CancelOrder(x.Id) 
         for x in algorithm.Transactions.GetOpenOrders()]
        
        self.targetsCollection.AddRange(targets)
        #if self.targetsCollection.Count > 0:
        for target in self.targetsCollection.OrderByMarginImpact(algorithm):
            # calculate remaining quantity to be ordered
            quantity = OrderSizing.GetUnorderedQuantity(algorithm, target)
            price = algorithm.Securities[target.Symbol].Close
            sign = +1 if quantity > 0 else -1
            if self.limit_price:
                limit = price * (1+self.limit_price*sign)
                if self.stop_price:
                    stop = price * (1+self.stop_price*sign)
                    ticket = algorithm.StopLimitOrder(target.Symbol, quantity, stop, limit)
                else:
                    ticket = algorithm.LimitOrder(target.Symbol, quantity, limit)
            else:
                if self.stop_price:
                    stop = price * (1-self.stop_price*sign)
                    ticket = algorithm.StopMarketOrder(target.Symbol, quantity, stop)
                else:
                    ticket = algorithm.MarketOrder(target.Symbol, quantity)
        self.targetsCollection.ClearFulfilled(algorithm)

# Custom Portfolio with flexible Long/Short exposure and number of positions
from clr import AddReference
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm.Framework.Alphas import *
from QuantConnect.Algorithm.Framework.Portfolio import *
from itertools import groupby
from datetime import datetime, timedelta

class CustomPortfolio(PortfolioConstructionModel):
    """ Equal Weighted portfolio with flexible portfolio size and long/short exposure"""

    def __init__(self, long_short_ratio = 1.0, portfolio_size=1, 
                 rebalance = Resolution.Daily):
        """ Set up parameters for portfolio creation """
        self.long_short_ratio = long_short_ratio
        self.portfolio_size = portfolio_size
        self.long_pos = int(portfolio_size*long_short_ratio)
        self.short_pos = portfolio_size - self.long_pos
        self.pos_size = 1.0/portfolio_size

        # If the argument is an instance of Resolution or Timedelta
        # Redefine rebalancingFunc
        rebalancingFunc = rebalance
        if isinstance(rebalance, int):
            rebalance = Extensions.ToTimeSpan(rebalance)
        elif isinstance(rebalance, timedelta):
            rebalancingFunc = lambda dt: dt + rebalance
        elif rebalancingFunc:
            self.SetRebalancingFunc(rebalancingFunc)

    def DetermineTargetPercent(self, activeInsights):
        """Rank insights by direction and magnitude to identify long and short trades"""
        rank = sorted(activeInsights, key=lambda x: (x.Direction, x.Magnitude), 
                      reverse=True)
        longs = {insight: self.pos_size for insight in rank[:self.long_pos]}
        shorts_idx = len(activeInsights)-self.short_pos
        shorts = {insight: -self.pos_size for insight in rank[shorts_idx:]}
        exits = {insight: 0 for insight in rank[self.long_pos:shorts_idx]}
        return {**longs , **shorts, **exits}

# Top X stocks by market capitalization with Volume and Price > 0 and fundamentals
from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect.Data.UniverseSelection import *
from Selection.FundamentalUniverseSelectionModel import FundamentalUniverseSelectionModel


class TopMktCapStocks(FundamentalUniverseSelectionModel):
    """
    Select top stocks by market cap which have fundamentals,
    positive volume and price
    """

    def __init__(self, size=50, filterFineData = True, universeSettings = None):
        """Universe init"""
        super().__init__(filterFineData, universeSettings)
        self.size = size
        self.lastMonth = -1

    def SelectCoarse(self, algorithm, coarse):
        """Select stocks with fundamental data, volume>0 and price>0"""
        if algorithm.Time.month == self.lastMonth:
            return Universe.Unchanged

        coarse_stocks = [x.Symbol for x in coarse if x.HasFundamentalData 
                         and x.Volume > 0 and x.Price > 0]
        return coarse_stocks

    def SelectFine(self, algorithm, fine):
        """Select top stocks by market cap"""
        fine_stocks = sorted([x for x in fine if x.MarketCap > 0],
                              key=lambda x: x.MarketCap, reverse=True)

        return [x.Symbol for x in fine_stocks[:self.size]]

# Implementing GridSearch

from clr import AddReference
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightType, InsightDirection

import numpy as np
import datetime as dt
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV

import timeseriessplitgroups as tss


STEPS = [("pca", PCA()), 
         ("mlp", MLPRegressor(n_iter_no_change=1, max_iter=100, 
                              solver="adam", early_stopping=True, 
                              warm_start=True, validation_fraction=0.1))]
PARAMS = {"pca__n_components": [None, 0.9],
          "mlp__activation": ["logistic", "relu"],
          "mlp__alpha": [0.1, 0.01, 0.001, 0.0001, 0],
          "mlp__hidden_layer_sizes": [[96, ], [48, 48], [32, 32, 32]]}


class MLTechnical(AlphaModel):
    def __init__(self, algorithm, model=None):
        self.algorithm = algorithm
        self.resolution = algorithm.UniverseSettings.Resolution
        self.model = model
        self.lookbacks = [1, 5, 10, 21, 63]

    def Update(self, algorithm, data):
        insights = []
        if self.model is not None:
            predictions = self.predict()
            for idx in predictions.index:
                signal = predictions.loc[idx]["Signal"]
                symbol = self.algorithm.Symbol(idx[0])
                direction = InsightDirection.Up if signal > 0 else InsightDirection.Down
                insights.append(Insight.Price(symbol, timedelta(days=1), 
                                              direction, abs(signal), None))
        self.algorithm.Debug(f"{self.algorithm.Time} Generated {len(insights)} insights")
        return insights

    def OnSecuritiesChanged(self, algorithm, changes):
        pass
    
    def train(self):
        if self.model is None:
            cv = tss.TimeSeriesSplitGroups(n_splits=10)
            self.model = RandomizedSearchCV(Pipeline(steps=STEPS), PARAMS,
                                            scoring="neg_mean_absolute_error",
                                            cv=cv, n_iter=10, n_jobs=1)
        x, y = self.get_data(252*1)
        y = y.dropna()
        x = x.loc[y.index]
        groups = x.index.get_level_values("time")
        results = self.model.fit(x, y, groups=groups)
        self.algorithm.Debug(f"{self.algorithm.Time} Training score {self.model.best_score_}")
    
    def predict(self):
        x, _ = self.get_data(max(self.lookbacks)+1)
        y = pd.DataFrame(self.model.predict(x),
                         index=x.index,
                         columns=["Signal"])
        dates = y.index.get_level_values("time")
        return y[dates==max(dates)]  # Get last period prediction
    
    def get_data(self, datapoints=1):
        tickers = list(self.algorithm.ActiveSecurities.Keys)
        data = self.algorithm.History(tickers, datapoints, self.resolution)
        groups = data["close"].groupby("symbol")
        features = pd.concat([groups.pct_change(p)
                              for p in self.lookbacks], 
                              join="inner", axis="columns").dropna()
        #features = data[["open","low","high"]].divide(data["close"], axis=0).dropna()
        #features = features.join(np.log10(data["volume"])).dropna()
        target = groups.pct_change(1).shift(-1).reindex(features.index)
        return features, target

# Implementing GridSearch

from clr import AddReference
AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightType, InsightDirection

import datetime as dt
from scipy.stats import spearmanr
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV

import timeseriessplitgroups as tss


STEPS = [("pca", PCA()), 
         ("mlp", MLPRegressor(n_iter_no_change=1, max_iter=100, 
                              solver="adam", early_stopping=True, 
                              warm_start=True, validation_fraction=0.5))]
PARAMS = {"pca__n_components": [None, 0.9],
          "mlp__activation": ["logistic", "relu"],
          "mlp__alpha": [0.1, 0.01, 0.001, 0.0001, 0],
          "mlp__hidden_layer_sizes": [[96, ], [48, 48], [32, 32, 32]]}


class MLTechnical(AlphaModel):
    def __init__(self, algorithm, model=None):
        self.algorithm = algorithm
        self.resolution = algorithm.UniverseSettings.Resolution
        self.model = model

    def Update(self, algorithm, data):
        insights = []
        if self.model is not None:
            predictions = self.predict()
            for idx in predictions.index:
                signal = predictions.loc[idx]["Signal"]
                symbol = self.algorithm.Symbol(idx[0])
                direction = InsightDirection.Up if signal > 0 else InsightDirection.Down
                insights.append(Insight.Price(symbol, timedelta(days=1), direction, abs(signal), None))
        self.algorithm.Debug(f"Generated {len(insights)} insights")
        return insights

    def OnSecuritiesChanged(self, algorithm, changes):
        pass
    
    def train(self):
        if self.model is None:
            cv = tss.TimeSeriesSplitGroups(n_splits=10)
            self.model = RandomizedSearchCV(Pipeline(steps=STEPS), 
                                            PARAMS, scoring="r2",
                                            cv=cv, n_iter=10, n_jobs=1)
        x, y = self.get_data(252*1)
        y = y.dropna()
        x = x.loc[y.index]
        groups = x.index.get_level_values("time")
        results = self.model.fit(x, y, groups=groups)
        results = pd.DataFrame(results.cv_results_).sort_values("rank_test_score")
        results = results[["params", "mean_test_score", 
                           "std_test_score", "mean_fit_time"]]
        self.algorithm.Debug(f"Training results\n{results.to_string()}")
    
    def predict(self):
        x, _ = self.get_data(64)
        y = pd.DataFrame(self.model.predict(x),
                         index=x.index,
                         columns=["Signal"])
        dates = y.index.get_level_values("time")
        return y[dates==max(dates)]
    
    def get_data(self, datapoints=1):
        tickers = list(self.algorithm.ActiveSecurities.Keys)
        data = self.algorithm.History(tickers, datapoints, self.resolution)
        groups = data["close"].groupby("symbol")
        features = pd.concat([groups.pct_change(p)
                              for p in [1, 5, 10, 21]], 
                              join="inner", axis="columns").dropna()
        target = groups.pct_change(1).shift(-1).reindex(features.index)
        return features, target

# Algorith with ML Alpha based on Technical Features
# Rolling monthly training, limit price execution 
# Flexible Long/Short exposure

from Risk.MaximumDrawdownPercentPerSecurity import MaximumDrawdownPercentPerSecurity
from alpha.mltechnical06 import MLTechnical
from universe.topmktcap import TopMktCapStocks
from portfolio.customportfolio import CustomPortfolio
from execution.customexec import CustomExecution


class DancingFluorescentYellowZebra(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2015, 1, 1)
        self.SetEndDate(2016, 1, 1)
        self.SetCash(100000) 
        self.SetBrokerageModel(BrokerageName.AlphaStreams)
        
        # Custom Universe - Top X stocks by market cap with fundamentals
        self.AddUniverseSelection(TopMktCapStocks(size=50))
        self.UniverseSettings.Resolution = Resolution.Daily
        
        # Custom Portfolio - Flexible Long/Short exposure and portfolio size
        self.SetPortfolioConstruction(CustomPortfolio(long_short_ratio=1.0,  
                                                      portfolio_size=10)) 
        
        # Custom Execution - Flexible order type with limit and stop options
        self.SetExecution(CustomExecution(limit_price=None, stop_price=None))
        #self.AddRiskManagement(MaximumDrawdownPercentPerSecurity())
        
        alpha = MLTechnical(self)
        self.AddAlpha(alpha)
        self.Train(self.DateRules.MonthStart(),
                   self.TimeRules.At(0, 0),
                   alpha.train)

# New Technical features and chart
from clr import AddReference

AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightType, InsightDirection

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV

import timeseriessplitgroups as tss

STEPS = [("pca", PCA()),
         ("mlp", MLPClassifier(n_iter_no_change=1, max_iter=100,
                               solver="adam", early_stopping=True,
                               warm_start=True, validation_fraction=0.2))]
PARAMS = {"pca__n_components": [None, 0.9],
          "mlp__activation": ["logistic", "relu"],
          "mlp__alpha": [0.1, 0.01, 0.001, 0.0001, 0],
          "mlp__hidden_layer_sizes": [[96, ], [48, 48], [32, 32, 32]]}


class MLTechnical(AlphaModel):
    def __init__(self, algorithm, model=None):
        self.algorithm = algorithm
        self.resolution = algorithm.UniverseSettings.Resolution
        self.model = model
        self.lookbacks = [1, 5, 10, 21, 63]

    def Update(self, algorithm, data):
        insights = []
        if self.model is not None:
            predictions = self.predict()
            for idx in predictions.index:
                signal = predictions.loc[idx]["Signal"]
                symbol = self.algorithm.Symbol(idx[0])
                direction = InsightDirection.Up if signal > 0 else InsightDirection.Down
                insights.append(Insight.Price(symbol, timedelta(days=1),
                                              direction, abs(signal), None))
        self.algorithm.Debug(f"{self.algorithm.Time} {len(insights)} insights")
        return insights

    def train(self):
        if self.model is None:
            cv = tss.TimeSeriesSplitGroups(n_splits=10)
            self.model = RandomizedSearchCV(Pipeline(steps=STEPS), PARAMS,
                                            scoring="accuracy", cv=cv, 
                                            n_iter=10, n_jobs=1)
        x, y = self.get_data(252 * 1, include_y=True)
        groups = x.index.get_level_values("time")
        self.model.fit(x, y, groups=groups)
        self.algorithm.Debug(f"{self.algorithm.Time} Model Validation {self.model.best_score_:.1%}")
        self.algorithm.Plot("Model", "Accuracy", self.model.best_score_)

    def predict(self):
        x = self.get_data(max(self.lookbacks) + 1, include_y=False)
        y = pd.DataFrame(self.model.predict_proba(x)[:, 1],
                         index=x.index,
                         columns=["Signal"])
        dates = y.index.get_level_values("time")
        return y[dates == max(dates)]  # Get last period prediction

    def get_data(self, datapoints=1, include_y=True):
        tickers = list(self.algorithm.ActiveSecurities.Keys)
        data = self.algorithm.History(tickers, datapoints, self.resolution)
        data["volatility"] = data["high"] - data["low"]
        data = data[["close", "volatility", "volume"]]
        data = data[["close"]]
        groups = data.groupby("symbol")
        features = [groups.pct_change(p) for p in self.lookbacks]  # Momentum
        features += [data/groups.apply(lambda x: x.rolling(p).mean())  # Normalized average
                     for p in self.lookbacks]
        features = pd.concat(features, join="inner", axis="columns").dropna()
        if include_y:
            target = data["close"].groupby("symbol").pct_change(1).shift(-1)
            target = target.reindex_like(features).dropna()
            return features.loc[target.index], (target > 0).astype("float")
        else:
            return features

# Improving target and CV
from clr import AddReference

AddReference("QuantConnect.Common")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Algorithm.Framework")

from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Algorithm.Framework import *
from QuantConnect.Algorithm.Framework.Alphas import AlphaModel, Insight, InsightType, InsightDirection

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV

import timeseriessplitgroups as tss

STEPS = [("pca", PCA()),
         ("mlp", MLPClassifier(n_iter_no_change=1, max_iter=100,
                               solver="adam", early_stopping=True,
                               warm_start=True, validation_fraction=0.1))]
PARAMS = {"pca__n_components": [None, 0.9],
          "mlp__activation": ["logistic", "relu"],
          "mlp__alpha": [0.1, 0.01, 0.001, 0.0001, 0],
          "mlp__hidden_layer_sizes": [[96, ], [48, 48], [32, 32, 32]]}


class MLTechnical(AlphaModel):
    def __init__(self, algorithm, model=None):
        self.algorithm = algorithm
        self.resolution = algorithm.UniverseSettings.Resolution
        self.model = model
        self.lookbacks = [1, 5, 10, 21, 63]

    def Update(self, algorithm, data):
        insights = []
        if self.model is not None:
            predictions = self.predict()
            for idx in predictions.index:
                signal = predictions.loc[idx]["Signal"]
                symbol = self.algorithm.Symbol(idx[0])
                direction = InsightDirection.Up if signal > 0 else InsightDirection.Down
                insights.append(Insight.Price(symbol, timedelta(days=1),
                                              direction, abs(signal), None))
        self.algorithm.Debug(f"{self.algorithm.Time} {len(insights)} insights")
        return insights

    def train(self):
        if self.model is None:
            cv = tss.TimeSeriesSplitGroups(n_splits=10)
            self.model = RandomizedSearchCV(Pipeline(steps=STEPS), PARAMS,
                                            cv=cv, n_iter=10, n_jobs=1)
        x, y = self.get_data(252 * 10, include_y=True)
        groups = x.index.get_level_values("time")
        self.model.fit(x, y, groups=groups)
        self.algorithm.Debug(f"{self.algorithm.Time} Model Validation {self.model.best_score_:.1%}")

    def predict(self):
        x = self.get_data(max(self.lookbacks) + 1, include_y=False)
        y = pd.DataFrame(self.model.predict_proba(x)[:, 1],
                         index=x.index,
                         columns=["Signal"])
        dates = y.index.get_level_values("time")
        return y[dates == max(dates)]  # Get last period prediction

    def get_data(self, datapoints=1, include_y=True):
        tickers = list(self.algorithm.ActiveSecurities.Keys)
        data = self.algorithm.History(tickers, datapoints, self.resolution)
        groups = data["close"].groupby("symbol")
        features = pd.concat([groups.pct_change(p)
                              for p in self.lookbacks],
                             join="inner", axis="columns").dropna()
        if include_y:
            target = groups.pct_change(1).shift(-1).reindex_like(features).dropna()
            return features.loc[target.index], (target > 0).astype("float")
        else:
            return features