Backtest

Overall Statistics
Total Trades 4245 Average Win 0.07% Average Loss -0.03% Compounding Annual Return 0.778% Drawdown 21.800% Expectancy 0.031 Net Profit 1.456% Sharpe Ratio 0.113 Probabilistic Sharpe Ratio 9.028% Loss Rate 68% Win Rate 32% Profit-Loss Ratio 2.18 Alpha 0.011 Beta 0.019 Annual Standard Deviation 0.109 Annual Variance 0.012 Information Ratio -0.436 Tracking Error 0.174 Treynor Ratio 0.629 Total Fees $26540.19

from QuantConnect.Data.UniverseSelection import *
from QuantConnect.Data.Custom.USTreasury import *

import numpy as np
import statsmodels.api as sm
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import cross_val_score, GridSearchCV

# Import our custom functions
from RegressionFunction import FitRegressionModel

class VerticalParticleShield(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2018, 1, 1)  # Set Start Date
        self.SetCash(1000000)  # Set Strategy Cash
        
        self.SetBrokerageModel(AlphaStreamsBrokerageModel())
        
        self.SetBenchmark('SPY')
        
        self.SetExecution(ImmediateExecutionModel())

        self.SetPortfolioConstruction(EqualWeightingPortfolioConstructionModel())

        self.UniverseSettings.Resolution = Resolution.Minute
        self.SetUniverseSelection(LiquidETFUniverse())
        
        self.AddEquity('TLT')
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday), self.TimeRules.AfterMarketOpen('TLT', 5), self.RunRegression)
        
        self.yieldCurve = self.AddData(USTreasuryYieldCurveRate, "USTYCR", Resolution.Daily).Symbol
    
    def RunRegression(self):
        qb = self
        
        symbols = [x for x in LiquidETFUniverse.Treasuries]
        ids = {str(symbol.ID): symbol for symbol in LiquidETFUniverse.Treasuries}
        
        # Get history
        history = qb.History(self.yieldCurve, 100, Resolution.Daily)
        # Get prices and returns
        bonds = history.loc[self.yieldCurve].pct_change().fillna(method='ffill').fillna(method='bfill').fillna(value = 0)
        
        #### Prepare data set -- feature names, training set, and testing set
        training = bonds.iloc[:len(bonds)-1].copy()
        testing = bonds.iloc[len(bonds)-1:].copy()
        
        
        # Find number of components to explain > 95% of variance of treasury prices
        pca = PCA(n_components=0.95)

        # Fit the PCA model to our training data
        pca.fit(training)

        # Initialize the regression model selected in the research notebook
        model = RandomForestRegressor(random_state=0, n_estimators = 100)

        # Fit the regression model and return predictions
        results = FitRegressionModel(self, pca, model, training, testing)
        
        # Find out if the prediction is up or down relative to current price
        
        # Generate Insights
        insights = []
        if results.mean(axis = 1).values[0] > 0:
            insights += [Insight.Price(symbol, timedelta(days = 7), InsightDirection.Up) for symbol in LiquidETFUniverse.Treasuries.Long]
            insights += [Insight.Price(symbol, timedelta(1), InsightDirection.Flat) for symbol in LiquidETFUniverse.Treasuries.Inverse]
        else:
            insights += [Insight.Price(symbol, timedelta(1), InsightDirection.Flat) for symbol in LiquidETFUniverse.Treasuries.Long]
            insights += [Insight.Price(symbol, timedelta(days = 7), InsightDirection.Up) for symbol in LiquidETFUniverse.Treasuries.Inverse]            

        # Emit Insights
        self.EmitInsights(insights)

import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import cross_val_score, GridSearchCV

def FitRegressionModel(algorithm, pca, model, training, testing, alpha = False):    
    estimators = []
    y_predicted = []
    y_actual = []
    
    # Project the data onto the components
    x_projected = pca.transform(training)

    # Iterate over all principle components:
    for i in range(pca.n_components_):

        # Here, we lag X compared to Y. X will be one time period behind Y
        X = x_projected[:-1, i]
        Y = x_projected[1:, i]
        
        y_actual.append(Y)
        X = sm.add_constant(X)
        
        # Add necessaary components if using RandomForestRegressor
        if alpha:
            test_range = np.arange(10)
            param_grid = {"alpha": test_range}
            grid_search = GridSearchCV(model,param_grid)
            grid_search.fit(X, Y)
            best_params = grid_search.best_params_
        
        # Fit model and make predictions
        est = model.fit(X, Y)
        estimators.append(est)
        y_predicted.append(model.predict(X))
        algorithm.Log("Estimator {}: R2 = {:.3f}\n".format(i, model.score(X,Y)))
    
    y_predicted = np.array(y_predicted).transpose()
    y_actual = np.array(y_actual).transpose()
    
    # Transform back into original space
    y_actual_original_space = pca.inverse_transform(y_actual)
    y_predicted_original_space = pca.inverse_transform(y_predicted)

    # Compute sum of squared error:
    train_sse = np.sum((y_predicted_original_space - y_actual_original_space)** 2)
    algorithm.Log(f'Sum of squared error: {train_sse}\n')
    
    # Transform testing data
    testing_proj = pca.transform(testing)
    testing_prediction = []
    for i in range(pca.n_components_):
        # Create a data row
        row = [1, testing_proj[:,i]]
        row = np.reshape(row, (1, 2))

        # Predict this row
        p = model.predict(row)
        testing_prediction.append(p[0])
    
    # Project predictions back to original space    
    predictions = pca.inverse_transform(testing_prediction)
    actual_pred = pd.DataFrame({'Predicted':predictions}, index = testing.columns)
    return actual_pred