Backtest

Overall Statistics
Total Trades 2306 Average Win 0.40% Average Loss -0.28% Compounding Annual Return 7.589% Drawdown 16.400% Expectancy 0.169 Net Profit 62.056% Sharpe Ratio 0.586 Probabilistic Sharpe Ratio 10.426% Loss Rate 52% Win Rate 48% Profit-Loss Ratio 1.45 Alpha 0.069 Beta -0.005 Annual Standard Deviation 0.116 Annual Variance 0.014 Information Ratio -0.307 Tracking Error 0.2 Treynor Ratio -13.831 Total Fees $2769.99 Estimated Strategy Capacity $430000000.00 Lowest Capacity Asset GOOG T1AZ164W5VTX

from SignalProccesor import SignalProccesor

# initialize SignalProccesor class for our model fitting and prediction
signalProccesor = SignalProccesor()
        
class SymbolData:
    ''' Class object storing machine learning models of symbol and usage methods '''
    
    def __init__(self, symbol, edge):
        ''' Initialize a new instance of symbolModel for input symbol
        Args:
            symbol: input symbol
            edge: required percentage of model given edge, calculated by correct accuracy % - wrong %, if less than that, recalibrate'''
        self.symbol = symbol
        self.edge = edge
        
        # initiate a new Principal Component Analysis model
        self.PCA = None
        # initiate a new Support Vector Machine Classifier
        self.SVM = None
        
    def CheckModel(self, rankingDf, priceSeries, benchmark):
        ''' check should the model be recalibrated
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price of specific symbol
            benchmark: daily close price of benchmark
        Return:
            (boolean) if criteria of recalibrating models were met'''
        # mark if neither model is fitted or edge % is less than desired
        return self.PCA is None or self.SVM is None \
        or signalProccesor.ModelScore(rankingDf, priceSeries, benchmark, self.PCA, self.SVM) < 0.5 + self.edge/2

    def UpdateModel(self, rankingDf, priceSeries, benchmark):
        ''' updating PCA and SVM models
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price of specific symbol
            benchmark: daily close price of benchmark
        Update:
            PCA and SVM models'''
        # update the inaccurate models
        self.PCA, self.SVM = signalProccesor.ModelFitting(rankingDf, priceSeries, benchmark)
    
    def CheckBias(self, rankingDf, priceSeries, benchmark):
        ''' check which side of the SVM model is suitable to predict for
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price of specific symbol
            benchmark: daily close price of benchmark
        Return:
            the side of the SVM model is suitable to predict for'''
        return signalProccesor.ModelBias(rankingDf, priceSeries, benchmark, self.PCA, self.SVM, self.edge)
        
    def Prediction(self, rankingData):
        ''' predict next direction
        Args:
            rankingDf: previous day data of Brain's ML Stock Ranking 2, 3, 5 days
        Return:
            next week directional forecast on close price'''
        return signalProccesor.ModelPrediction(rankingData, self.PCA, self.SVM)

from QuantConnect.DataSource import *
import pandas as pd
from PortfolioOptimization import PortfolioOptimization
from SymbolData import SymbolData

class BrainMLRankPortfolio(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2015, 1, 1)
        self.SetCash(100000) 
        
        ''' 
        Parameters:
            self.numberOfDays: number of days of daily return data to be used in model check and risk parity
            self.trainingDataLength: number of days of daily data to be used in calibrating model
            self.edge: required percentage of model's given self.edge, calculated by correct accuracy % - wrong %, if less than that, recalibrate
            self.min_: minimum weight for individual asset weighting (0 - max)
            self.max_: maximum weight for individual asset weighting (min to inf)
            self.total_: target total weight for all assets (max to inf)
        '''
        self.numberOfDays = 252
        self.trainingDataLength = 5*252
        self.edge = 0.05
        self.min_ = 0.
        self.max_ = 1.
        self.total_ = 1.
        
        # FAANG + benchmark from research
        tickers = ["FB", "AAPL", "AMZN", "NFLX", "GOOGL", "SPY"] 
        # Subscribe to the tickers
        for symbol in tickers:
            self.AddEquity(symbol, Resolution.Daily).Symbol
        
        # Add Portfolio Construction Model 
        # InsightWeightingPortfolioConstructionModel is used as the position sizing will be determined by calculation in algorithm
        # weekly rebalance as we predict weekly active return
        self.SetPortfolioConstruction(InsightWeightingPortfolioConstructionModel(lambda time: Expiry.EndOfWeek(time)))
        
        # Add Execution Function
        # We stick with our research assuming immediate execution
        self.SetExecution(ImmediateExecutionModel())
        
        # Null Risk Management
        # We don't want to expose to systematic risk tempering the beta-neutral balance
        self.AddRiskManagement(NullRiskManagementModel())
        
        # schedule our weekly insight generation
        self.Schedule.On(self.DateRules.WeekStart("SPY"), \
                        self.TimeRules.AfterMarketOpen("SPY"), \
                        self.InsightGenerator)
    
        # dictionary holding symbol of Brain's data
        self.brainStockRank = {}
        # dictionary holding custom symbolData class
        self.data = {}
        # dictionary holding trading side allowed
        self.side = {}
        # dict contain corresponding historical trade bar data
        self.history = {}
        
        # initialize RiskParityCalculation class for risk parity weightings calculation
        self.portfolioOptimization = PortfolioOptimization(self.min_, self.max_, self.total_)
        
    def InsightGenerator(self):
        ''' Creates insights '''
        insights = []
        
        # dictionaries holding symbols and their historical data
        longData = {}; shortData = {}
        
        benchmark = self.History(self.Symbol("SPY"), self.numberOfDays, Resolution.Daily)
        if benchmark.empty: return
        benchmark = benchmark.close.unstack("symbol")
            
        for symbol, symbolData in self.data.items():
            # get Brain's ML Stock Ranking data, close price, benchmark's close price for testing models
            rankingDf = self.History(self.brainStockRank[symbol], self.numberOfDays, Resolution.Daily)
            history = pd.DataFrame(self.history[symbol], columns=[symbol]).iloc[:self.numberOfDays][::-1]
            
            # discontinue if no data available
            if rankingDf.empty or history.empty: continue
        
            rankingDf = rankingDf['rank'].unstack("symbol")
            priceSeries = history.applymap(lambda bar: bar.Close)
            priceSeries.index = pd.to_datetime(history.applymap(lambda bar: bar.EndTime).values.flatten().tolist())
            
            # check if models recalibrating criteria is met
            check = symbolData.CheckModel(rankingDf, priceSeries, benchmark)
            
            # update models if one or more of criteria was/were met
            if check:
                # get 5 year Brain's ML Stock Ranking data, close price, benchmark's close price for training models
                rankingDfTrain = self.History(self.brainStockRank[symbol], self.trainingDataLength, Resolution.Daily)['rank'].unstack("symbol")
                history = pd.DataFrame(self.history[symbol], columns=[symbol]).iloc[:self.trainingDataLength][::-1]
                priceSeriesTrain = history.applymap(lambda bar: bar.Close)
                priceSeriesTrain.index = pd.to_datetime(history.applymap(lambda bar: bar.EndTime).values.flatten().tolist())
                benchmarkTrain = self.History(self.Symbol("SPY"), self.trainingDataLength, Resolution.Daily).close.unstack("symbol")
                # update models
                symbolData.UpdateModel(rankingDfTrain, priceSeriesTrain, benchmarkTrain)
            
                # select the side (long/short/both) the model is suitable to trade for
                self.side[symbol] = symbolData.CheckBias(rankingDf, priceSeries, benchmark)
            
            # predict the coming week close price direction
            predict = symbolData.Prediction(rankingDf.iloc[-1].values.reshape(1, -1))
            
            # classification according to prediction
            if predict == 1 and (self.side[symbol] == "long" or self.side[symbol] == "both"):
                longData[symbol] = priceSeries
            elif predict == 0 and (self.side[symbol] == "short" or self.side[symbol] == "both"):
                shortData[symbol] = priceSeries
        
        # preset beta values to avoid errors
        longBeta = 1; shortBeta = 1
        
        if longData:
            # get a dataframe for long symbols's historical data
            longDf = pd.concat([df for df in list(longData.values())], axis=1)
            # positional sizing for long symbols
            longWeights = self.portfolioOptimization.CalculatePositionSize(longDf, benchmark) if len(longDf) > 1 else [1.]
            # calculate beta
            longBeta = self.portfolioOptimization.CalculateBeta(longDf, longWeights, benchmark)
        # if all equities in short list, we long benchmark SPY 
        else:
            longDf = benchmark; longWeights = [1.]
        
        if shortData:
            # get a dataframe for short symbols's historical data
            shortDf = pd.concat([df for df in list(shortData.values())], axis=1)
            # positional sizing for short symbols
            shortWeights = self.portfolioOptimization.CalculatePositionSize(-shortDf, -benchmark) if len(shortDf) > 1 else [1.]
            # calculate beta
            shortBeta = self.portfolioOptimization.CalculateBeta(shortDf, shortWeights, benchmark)
        # if all equities in long list, we short benchmark SPY 
        else:
            shortDf = benchmark; shortWeights = [1.]
        
        for n in range(len(longWeights)):
            # use beta to have inter-portfolio weighting as to wash out systematic risk
            insights.append(Insight(longDf.columns[n], Expiry.EndOfWeek, InsightType.Price, InsightDirection.Up, 
                                    None, None, None, shortBeta*longWeights[n]))
    
        for n in range(len(shortWeights)):
            # use beta to have inter-portfolio weighting as to wash out systematic risk
            insights.append(Insight(shortDf.columns[n], Expiry.EndOfWeek, InsightType.Price, InsightDirection.Down, 
                                    None, None, None, longBeta*shortWeights[n]))
        
        # emit insights
        self.EmitInsights(insights)
    
    def OnSecuritiesChanged(self, changes):
        for change in changes.AddedSecurities:
            # skip benchmark
            if change.Symbol == self.Symbol("SPY"): continue
        
            # Subscribe to Brain's ML stock ranking datasets (2, 3, 5, 10 days) for the tickers
            brainStockRank2Days = self.AddData(BrainStockRanking2Day, change.Symbol).Symbol
            brainStockRank3Days = self.AddData(BrainStockRanking3Day, change.Symbol).Symbol
            brainStockRank5Days = self.AddData(BrainStockRanking5Day, change.Symbol).Symbol
            self.brainStockRank[change.Symbol] = [brainStockRank2Days, brainStockRank3Days, brainStockRank5Days]
            # holding custom symbolData class for symbol
            self.data[change.Symbol] = SymbolData(change.Symbol, self.edge)
            
            # set up historical data rolling window
            windowSize = max(self.numberOfDays, self.trainingDataLength) + 1
            self.history[change.Symbol] = RollingWindow[TradeBar](windowSize)
            
            # warm up rolling windows
            data = self.History(change.Symbol, windowSize, Resolution.Daily)
            for time, bar in data.loc[change.Symbol].iterrows():
                tradeBar = TradeBar(time, change.Symbol, bar.open, bar.high, bar.low, bar.close, bar.volume)
                self.history[change.Symbol].Add(tradeBar)
                
            # set up consolidator for future auto-update
            self.Consolidate(change.Symbol, Resolution.Daily, self.DailyBarHandler)
            
    def DailyBarHandler(self, bar):
        self.history[bar.Symbol].Add(bar)

import numpy as np
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
np.random.seed(0)

class SignalProccesor:
    ''' Uses Brain's ML Stock Ranking data and SVM to predict weekly active return direction '''
    
    def __init__(self):
        ''' Initialize a new instance of DirectionPrediction class '''
        pass
    
    def DataProcessing(self, rankingDf, priceSeries, benchmark):
        ''' Process raw Brain's ML Stock Ranking data, close price return series of specific symbol and benchmark
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price return of specific symbol
            benchmark: daily close price return of benchmark
        Returns:
            week-start Brain's ML Stock Ranking data and weekly directional close price return'''
        # business day data for resample and offset
        bd = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
        
        # we're interested in weekly active return change on week start
        priceSeries = priceSeries.resample("W-MON", loffset=bd).first()
        benchmark = benchmark.resample("W-MON", loffset=bd).first()
        rankingDf = rankingDf.resample("W-MON", loffset=bd).first()
        
        # for training model, we should use log return by Palomar's signal processing book
        # shift it back 1 bar for prediction analysis
        priceSeries = np.log(priceSeries/priceSeries.shift(1)).shift(-1)
        benchmark = np.log(benchmark/benchmark.shift(1)).shift(-1)
        
        # matching date for DateTime index
        priceSeries.index = pd.to_datetime(priceSeries.index, utc=True).date
        benchmark.index = pd.to_datetime(benchmark.index, utc=True).date
        rankingDf.index = pd.to_datetime(rankingDf.index, utc=True).date
        
        # we would only compare those dates with data in price, ML datasets and benchmark, dropping those did not
        df = pd.concat([rankingDf, priceSeries, benchmark], axis=1).dropna()
        
        # we want active return only
        activeReturn = df.iloc[:, -2].subtract(df.iloc[:, -1], axis=0)
        
        # change into directional return series (1 for up, 0 for no change/down)
        activeReturn[activeReturn <= 0] = 0
        activeReturn[activeReturn > 0] = 1
        
        return df.iloc[:, :-2], activeReturn
        
    def ModelFitting(self, rankingDf, priceSeries, benchmark):
        ''' Fitting the SVM model for specific symbol by given Brain's ML Stock Ranking data
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price return of specific symbol
            benchmark: daily close price return of benchmark
        Returns:
            fitted PCA model & SVM classifier'''
        # process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return
        rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark)
        
        # initialize a PCA model to reduce dimension to avoid overfitting, uses MLE for estimate number of components
        pca = PCA(n_components = 'mle')
        # no need to scale data as they're already scaled 
        reduced = pca.fit_transform(rankingDf.fillna(0))
        reduced = pd.DataFrame(reduced)
        
        # initialize a SVM classifier to predict directional weekly return, we'll be using rbf kernel as we want best dimensional soft margin
        svm = SVC(kernel='rbf')
        # no need to scale data as they're already scaled, we'll be using binary return
        svm.fit(reduced, binaryReturn.fillna(0))
        
        return pca, svm
        
    def ModelPrediction(self, rankingData, pca, svm):
        ''' Making directional prediction on coming week by Brain's ML Stock Ranking data
        Args:
            rankingData: Brain's ML Stock Ranking 2, 3, 5 days of this instance
            pca: fitted PCA tranform model for specific symbol
            svm: fitted SVM classifier model for specific symbol
        Returns:
            directional prediction (1 for up, 0 for down)'''
        # reduce data dimension from PCA model transformation
        reduced = pca.transform(rankingData)
        reduced = pd.DataFrame(reduced).fillna(0)
        
        return int(svm.predict(reduced))
    
    def ModelScore(self, rankingDf, priceSeries, benchmark, pca, svm):
        ''' Get score of the SVM model for specific symbol by current data
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price return of specific symbol
            benchmark: daily close price return of benchmark
            pca: fitted PCA tranform model for specific symbol
            svm: fitted SVM classifier model for specific symbol
        Returns:
            accuracy score of SVM classfier'''
        # process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return
        rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark)
        
        # reduce data dimension from PCA model transformation
        reduced = pca.transform(rankingDf.fillna(0))
        reduced = pd.DataFrame(reduced)
        
        return svm.score(reduced, binaryReturn.fillna(0))
        
    def ModelBias(self, rankingDf, priceSeries, benchmark, pca, svm, edge):
        ''' Get sensitivity and specificity of model to decide it should trade for long/short/both side signals
        Args:
            rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
            priceSeries: daily close price return of specific symbol
            benchmark: daily close price return of benchmark
            pca: fitted PCA tranform model for specific symbol
            svm: fitted SVM classifier model for specific symbol
            edge: required percentage of model's given edge, calculated by correct accuracy % - wrong %, if less than that, abandon side
        Returns:
            which side of the SVM model is suitable to predict for'''
        # process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return
        rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark)
        
        # reduce data dimension from PCA model transformation
        reduced = pca.transform(rankingDf.fillna(0))
        reduced = pd.DataFrame(reduced)
        
        # get predicted results
        predicted = svm.predict(reduced)
        
        # contrast observed and predicted results
        contrast = confusion_matrix(binaryReturn.fillna(0), predicted)
        
        # do not proceed if no differential prediction
        if contrast.shape[0] <= 1: return None
        
        # get sensitivity and specificity
        sensitivity = contrast[1, 1]/sum(contrast[1, :])
        specificity = contrast[0, 0]/sum(contrast[0, :])
        
        # return the signal accepted side
        if sensitivity > 0.5 + edge/2:
            if specificity > 0.5 + edge/2:
                return "both"
            return "long"
        elif specificity > 0.5 + edge/2:
            return "short"
        return None

import cvxopt as cvx
import numpy as np

class PortfolioOptimization:
    ''' Calculate insight weights for risk parity portfolio. '''
    
    def __init__(self, min_=0., max_=1., total_=1.):
        ''' Initialize a new instance of RiskCalculation class
        Args:
            min_: minimum weight for individual asset weighting (0 - max)
            max_: maximum weight for individual asset weighting (min to inf)
            total_: target total weight for all assets (max to inf)'''
        self.minWeight = max(0, min_)
        self.maxWeight = max(max_, min_)
        self.totalWeight = max(max_, total_)
    
    def CalculatePositionSize(self, df, benchmark):
        ''' Calculate the positional size for imported assets
        Args:
            df: 2-d array or dataframe contains daily return m x n size matrix of m days and n assets
            benchmark: benchmark close price daily return series
        Return:
            (n,) size array containing position sizes for n assets'''
        # number of assets
        n = int(df.shape[1])
        
        # change to return series
        df = df.pct_change()[1:]
        benchmark = benchmark.pct_change()[1:]
        
        # get active returns
        activeReturn = df.sub(benchmark.iloc[:, 0], axis=0)
        
        # inequality constraints: Gx <= h
        # we want each individual assets bounded by min and max weights allowed
        # negative identity matrix as to flip to Ix >= h
        G = cvx.matrix(np.concatenate((-np.eye(n), np.eye(n)), axis=0))
        # individual weight must be >= 0 for weights in convex optimization
        h = cvx.matrix(np.concatenate((self.minWeight*np.ones((n, 1)), max(1./n, self.maxWeight)*np.ones((n, 1))), axis=0))
        
        # eqaulity constraints: Ax == b
        # we want total weights sum up to target total weights
        A = cvx.matrix(1.0, (1, n))
        # total weight must be positive
        b = cvx.matrix(max(0.001, self.totalWeight))
        
        # Quadratic programming
        return np.asarray(cvx.solvers.qp(cvx.matrix(df.cov().values), -cvx.matrix(df.mean().values), G=G, h=h, A=A, b=b)['x'])
        
    def CalculateBeta(self, df, weights, benchmark):
        ''' Calculate beta of portfolio
        Args:
            df: dataframe/2-d array historical close price data
            weights: corresponding weights of each asset in portfolio
            benchmark: benchmark historical close price data
        Return:
            beta value of portfolio'''
        # get return series
        df = df.pct_change()[1:]
        benchmark = benchmark.pct_change()[1:]
        
        # distribute the weight
        df = np.dot(df, weights.reshape(-1, 1))
        
        # get covariance with benchmark
        cov = np.cov(np.concatenate((df, benchmark), axis=1).T)[0, 1]
        
        # get variance of benchmark
        var = benchmark.var()
        
        return cov/var