Overall Statistics
Total Orders
949
Average Win
0.39%
Average Loss
-0.30%
Compounding Annual Return
16.892%
Drawdown
10.900%
Expectancy
0.116
Start Equity
100000
End Equity
149733.49
Net Profit
49.733%
Sharpe Ratio
0.615
Sortino Ratio
0.815
Probabilistic Sharpe Ratio
44.635%
Loss Rate
51%
Win Rate
49%
Profit-Loss Ratio
1.28
Alpha
0.052
Beta
0.289
Annual Standard Deviation
0.124
Annual Variance
0.015
Information Ratio
-0.043
Tracking Error
0.154
Treynor Ratio
0.263
Total Fees
$1303.48
Estimated Strategy Capacity
$9000.00
Lowest Capacity Asset
AWX RBSIMWGA33VP
Portfolio Turnover
1.01%
# region imports
from AlgorithmImports import *
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier


class XGBoostClassifierLongShortAlgorithm(QCAlgorithm):
    def Initialize(self):
        # Set algorithm start and end dates
        self.SetStartDate(2022, 5, 1)
        self.SetEndDate(2024, 12, 1)
        self.SetCash(100000)  # Starting cash balance

        # Set universe resolution and add custom universe functions
        self.UniverseSettings.Resolution = Resolution.Daily
        self.AddUniverse(self.CoarseSelectionFunction, self.FineSelectionFunction)
        
        # Configuration parameters
        self.num_stocks = 500  # Select the top 500 stocks by market cap
        self.num_groups = 5  # Number of return-based groups for classification
        self.current_month = -1  # Track the current month for updates
        self.model = None  # Machine learning model placeholder
        self.last_month_features = pd.DataFrame()  # Storage for previous month's factors
        self.symbols = []  # List of selected symbols for trading
        self.initial_portfolio_value = self.Portfolio.TotalPortfolioValue
        self.stop_loss_threshold = -0.07  # Stop loss at -7% unrealized losses
        self.SetWarmup(30)  # Warmup period of 30 days to gather historical data

    def CoarseSelectionFunction(self, coarse):
        # Only update the universe once per month
        if self.Time.month == self.current_month:
            return Universe.Unchanged
        self.current_month = self.Time.month
        
        # Filter stocks with fundamental data and sort by market cap (descending)
        sorted_by_market_cap = sorted([x for x in coarse if x.HasFundamentalData], 
                                      key=lambda x: x.Market, reverse=True)
        return [x.Symbol for x in sorted_by_market_cap[:self.num_stocks]]

    def FineSelectionFunction(self, fine):
        fine_list = list(fine)
        if not fine_list:
            return []

        # Initialize DataFrames for factor data and returns
        current_month_features = pd.DataFrame()
        current_month_returns = pd.DataFrame()

        for stock in fine_list:
            try:
                symbol = str(stock.Symbol)

                # Get 20 days of historical data for factor calculations
                history = self.History(stock.Symbol, 20, Resolution.Daily)
                if history.empty or 'volume' not in history.columns:
                    continue  # Skip if history is empty or 'volume' is missing

                # Calculate volatility (annualized standard deviation of daily returns)
                daily_returns = history['close'].pct_change().dropna()
                volatility = daily_returns.std() * np.sqrt(252)
                if np.isnan(volatility) or np.isinf(volatility):
                    continue

                # Calculate momentum features (5-day and 20-day simple moving averages)
                sma_20 = history['close'].mean()
                sma_5 = history['close'].iloc[-5:].mean()

                # Calculate turnover rates (volume relative to shares outstanding)
                shares_outstanding = stock.CompanyProfile.SharesOutstanding
                if shares_outstanding is None or shares_outstanding <= 0:
                    continue
                turnover_rates = history['volume'] / shares_outstanding
                turnover_rate_20 = turnover_rates.mean()
                turnover_rate_5 = turnover_rates.iloc[-5:].mean()

                # Calculate value (inverse of P/E ratio)
                pe_ratio = stock.ValuationRatios.PERatio
                value = 1 / pe_ratio if 0 < pe_ratio < 100 else None
                if value is None:
                    continue

                # Calculate size (logarithm of market cap)
                market_cap = stock.MarketCap
                size = np.log(market_cap) if market_cap > 0 else None
                if size is None:
                    continue

                # Extract quality (return on equity)
                quality = stock.OperationRatios.ROE.Value
                if np.isnan(quality) or np.isinf(quality):
                    continue

                # Add calculated factors to the features DataFrame
                current_month_features.loc[symbol, 'Sma_20'] = sma_20
                current_month_features.loc[symbol, 'Sma_5'] = sma_5
                current_month_features.loc[symbol, 'Value'] = value
                current_month_features.loc[symbol, 'Size'] = size
                current_month_features.loc[symbol, 'Quality'] = quality
                current_month_features.loc[symbol, 'Volatility'] = volatility
                current_month_features.loc[symbol, 'turnover_rate_20'] = turnover_rate_20
                current_month_features.loc[symbol, 'turnover_rate_5'] = turnover_rate_5

                # Calculate log returns for the stock
                first_price = history['close'].iloc[0]
                last_price = history['close'].iloc[-1]
                log_return = np.log(last_price / first_price)
                current_month_returns.loc[symbol, 'Returns'] = log_return

            except Exception as e:
                self.Log(f"Error processing {symbol}: {str(e)}")
                continue

        # If no previous month's data is available, store the current month's data and return
        if self.last_month_features.empty:
            self.last_month_features = current_month_features
            return []

        try:
            # Align the previous month's factors with the current month's returns
            X_train = self.last_month_features
            y_train = current_month_returns
            common_symbols = X_train.index.intersection(y_train.index)
            X_train = X_train.loc[common_symbols]
            y_train = y_train.loc[common_symbols]

            # Standardize features and encode returns into quantile-based classes
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train.fillna(X_train.median()))
            y_classes = pd.qcut(y_train['Returns'], q=self.num_groups, labels=False)

            # Train the XGBoost classifier
            self.model = XGBClassifier(n_estimators=50, learning_rate=0.1, max_depth=5, random_state=42)
            self.model.fit(X_train_scaled, y_classes)

            # Predict groups for the current month's features
            predictions = self.PredictGroups(current_month_features)
            highest_group = predictions[predictions['predicted_group'] == self.num_groups - 1].index
            lowest_group = predictions[predictions['predicted_group'] == 0].index

            # Select stocks from the highest and lowest groups
            selected_symbols = []
            for symbol_str in list(highest_group) + list(lowest_group):
                for stock in fine_list:
                    if str(stock.Symbol) == symbol_str:
                        selected_symbols.append(stock.Symbol)
                        break

            self.last_month_features = current_month_features  # Update for next month
            self.symbols = selected_symbols
            return self.symbols

        except Exception as e:
            self.Log(f"Error in model training: {str(e)}")
            return []

    def PredictGroups(self, features):
        # Predict return groups for given features using the trained XGBoost model
        features = features.fillna(features.mean())
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)
        class_probs = self.model.predict_proba(features_scaled)
        predicted_classes = np.argmax(class_probs, axis=1)
        return pd.DataFrame({
            'predicted_group': predicted_classes,
            'confidence': np.max(class_probs, axis=1)
        }, index=features.index)

    def OnData(self, data):
        # Skip execution during the warmup period
        if self.IsWarmingUp:
            return

        # Trigger stop-loss if portfolio losses exceed the threshold
        if self.Portfolio.TotalUnrealizedProfit / self.initial_portfolio_value <= self.stop_loss_threshold:
            self.Log("Stop loss triggered. Liquidating all positions.")
            self.Liquidate()
            return

        # If no symbols are selected, skip trading
        if not self.symbols:
            return

        # Separate selected symbols into long and short positions
        valid_symbols = [symbol for symbol in self.symbols if data.ContainsKey(symbol)]
        mid_point = len(valid_symbols) // 2
        long_symbols = valid_symbols[:mid_point]
        short_symbols = valid_symbols[mid_point:]

        # Set equal weights for long and short positions
        long_weight = 0.5 / len(long_symbols) if long_symbols else 0
        short_weight = -0.5 / len(short_symbols) if short_symbols else 0

        # Place trades
        for symbol in long_symbols:
            self.SetHoldings(symbol, long_weight)
        for symbol in short_symbols:
            self.SetHoldings(symbol, short_weight)

        # Clear selected symbols for the next day
        self.symbols = []