Backtest

Overall Statistics
Total Orders 152 Average Win 22.14% Average Loss -7.96% Compounding Annual Return 77.357% Drawdown 66.900% Expectancy 0.941 Start Equity 10000 End Equity 306228.66 Net Profit 2962.287% Sharpe Ratio 1.292 Sortino Ratio 1.296 Probabilistic Sharpe Ratio 53.005% Loss Rate 49% Win Rate 51% Profit-Loss Ratio 2.78 Alpha 0.582 Beta 0.748 Annual Standard Deviation 0.511 Annual Variance 0.261 Information Ratio 1.118 Tracking Error 0.497 Treynor Ratio 0.882 Total Fees $555.59 Estimated Strategy Capacity $390000000.00 Lowest Capacity Asset MSTR RBGP9S2961YD Portfolio Turnover 7.01%
# random_forest_01 (ChatGPT)
from AlgorithmImports import *
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

class MLTradingAlgorithm(QCAlgorithm):
    
    def Initialize(self):
        # ✅ Step 1: Set Up Algorithm Parameters
        self.SetStartDate(2019, 1, 1)  # Start date
        self.SetEndDate(2024, 12, 31)    # End date
        self.SetCash(10000)  # Initial capital

        # ✅ Step 2: Add NVDA Stock
        self.symbol = self.AddEquity("MSTR", Resolution.Daily).Symbol

        # ✅ Step 3: Correctly Initialize RollingWindow to Store 200 Days of TradeBar Data
        self.data = RollingWindow[TradeBar](200)   # Store last 200 bars

        # ✅ Step 4: Warm-Up Period (Ensure Sufficient Historical Data Before Trading)
        self.SetWarmUp(200)

        # ✅ Step 5: Initialize Machine Learning Model
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.training_count = 0  # Add count

        # ✅ Step 6: Schedule Training Every Monday at 10:00 AM
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday), 
                         self.TimeRules.At(10, 0), 
                         self.TrainModel)
    
    def OnData(self, data):
        # ✅ Step 7: Ensure Data Exists Before Accessing
        if not data.ContainsKey(self.symbol):
            return  # Skip if no data
        
        trade_bar = data[self.symbol]
        if trade_bar is None:
            return
        
        # ✅ Step 8: Add TradeBar Data to Rolling Window
        self.data.Add(trade_bar)

        # ✅ Step 9: Ensure Sufficient Data Before Making Predictions
        if not self.data.IsReady or self.data.count < 200:
            return
        
        # ✅ Ensure Model is Fitted Before Prediction
        if not hasattr(self, "is_model_trained") or not self.is_model_trained:
            self.Debug("Model is not trained yet. Skipping prediction.")
            return

        # ✅ Step 10: Extract Features for Prediction
        df = self.GetFeatureDataFrame()
        if df is None:
            return
        
        latest_features = df.iloc[-1, :-1].values.reshape(1, -1)
        
        # ✅ Ensure Model is Trained Before Making Predictions
        try:
            prediction = self.model.predict(latest_features)[0]  # 1 = Buy, 0 = Sell
        except:
            self.Debug("Error: Model not trained yet.")
            return
        
        # ✅ Step 11: Trading Logic Based on Predictions
        holdings = self.Portfolio[self.symbol].Quantity
        
        if prediction == 1 and holdings <= 0:
            self.SetHoldings(self.symbol, 1)  # Buy full allocation
        elif prediction == 0 and holdings > 0:
            self.Liquidate(self.symbol)  # Sell position

    def TrainModel(self):
        # Step 12: Prepare Training Data
        df = self.GetFeatureDataFrame()
        if df is None or len(df) < 50:  # Minimum rows for meaningful split
            self.Debug("Insufficient data for training.")
            return

        # Step 13: Split Data into Training and Testing Sets
        X = df.iloc[:, :-1]  # Features
        y = df.iloc[:, -1]   # Target (Buy/Sell)
        
        # Split 80% train, 20% test, no shuffling to preserve time-series order
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, shuffle=False, random_state=42
        )

        # Step 14: Train the Random Forest Model on Training Data
        self.model.fit(X_train, y_train)
        self.is_model_trained = True

        # Step 15: Evaluate Model Accuracy
        # Training accuracy (in-sample)
        y_train_pred = self.model.predict(X_train)
        train_accuracy = accuracy_score(y_train, y_train_pred)
        
        # Testing accuracy (out-of-sample)
        y_test_pred = self.model.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_test_pred)
        
        # Log both accuracies
        self.training_count += 1
        self.Debug(f"Training #{self.training_count}: Training Accuracy: {train_accuracy:.2%}, Testing Accuracy: {test_accuracy:.2%}")

    
    def GetFeatureDataFrame(self):
        # ✅ Step 15: Convert RollingWindow Data to DataFrame
        if self.data.count < 200:
            return None  # Not enough data
        
        close_prices = [bar.Close for bar in self.data]
        df = pd.DataFrame(close_prices, columns=["Close"])
        
        # ✅ Step 16: Feature Engineering
        df["SMA_10"] = df["Close"].rolling(window=10).mean()
        df["SMA_50"] = df["Close"].rolling(window=50).mean()
        
        # RSI Calculation
        delta = df["Close"].diff()
        gain = (delta.where(delta > 0, 0)).rolling(14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
        rs = gain / loss
        df["RSI"] = 100 - (100 / (1 + rs))
        
        # MACD Calculation
        df["MACD"] = df["Close"].ewm(span=12, adjust=False).mean() - df["Close"].ewm(span=26, adjust=False).mean()
        df["MACD_Signal"] = df["MACD"].ewm(span=9, adjust=False).mean()

        # Historical Volatility (HV)
        df["HV_30"] = df["Close"].pct_change().rolling(window=30).std() * np.sqrt(252)

        # ✅ Step 17: Define Target Variable (1 = Buy, 0 = Sell)
        df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)
        
        # ✅ Step 18: Drop NaN values
        df.dropna(inplace=True)
        
        return df