| Overall Statistics |
|
Total Orders 949 Average Win 0.39% Average Loss -0.30% Compounding Annual Return 16.892% Drawdown 10.900% Expectancy 0.116 Start Equity 100000 End Equity 149733.49 Net Profit 49.733% Sharpe Ratio 0.615 Sortino Ratio 0.815 Probabilistic Sharpe Ratio 44.635% Loss Rate 51% Win Rate 49% Profit-Loss Ratio 1.28 Alpha 0.052 Beta 0.289 Annual Standard Deviation 0.124 Annual Variance 0.015 Information Ratio -0.043 Tracking Error 0.154 Treynor Ratio 0.263 Total Fees $1303.48 Estimated Strategy Capacity $9000.00 Lowest Capacity Asset AWX RBSIMWGA33VP Portfolio Turnover 1.01% |
# region imports
from AlgorithmImports import *
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
class XGBoostClassifierLongShortAlgorithm(QCAlgorithm):
def Initialize(self):
# Set algorithm start and end dates
self.SetStartDate(2022, 5, 1)
self.SetEndDate(2024, 12, 1)
self.SetCash(100000) # Starting cash balance
# Set universe resolution and add custom universe functions
self.UniverseSettings.Resolution = Resolution.Daily
self.AddUniverse(self.CoarseSelectionFunction, self.FineSelectionFunction)
# Configuration parameters
self.num_stocks = 500 # Select the top 500 stocks by market cap
self.num_groups = 5 # Number of return-based groups for classification
self.current_month = -1 # Track the current month for updates
self.model = None # Machine learning model placeholder
self.last_month_features = pd.DataFrame() # Storage for previous month's factors
self.symbols = [] # List of selected symbols for trading
self.initial_portfolio_value = self.Portfolio.TotalPortfolioValue
self.stop_loss_threshold = -0.07 # Stop loss at -7% unrealized losses
self.SetWarmup(30) # Warmup period of 30 days to gather historical data
def CoarseSelectionFunction(self, coarse):
# Only update the universe once per month
if self.Time.month == self.current_month:
return Universe.Unchanged
self.current_month = self.Time.month
# Filter stocks with fundamental data and sort by market cap (descending)
sorted_by_market_cap = sorted([x for x in coarse if x.HasFundamentalData],
key=lambda x: x.Market, reverse=True)
return [x.Symbol for x in sorted_by_market_cap[:self.num_stocks]]
def FineSelectionFunction(self, fine):
fine_list = list(fine)
if not fine_list:
return []
# Initialize DataFrames for factor data and returns
current_month_features = pd.DataFrame()
current_month_returns = pd.DataFrame()
for stock in fine_list:
try:
symbol = str(stock.Symbol)
# Get 20 days of historical data for factor calculations
history = self.History(stock.Symbol, 20, Resolution.Daily)
if history.empty or 'volume' not in history.columns:
continue # Skip if history is empty or 'volume' is missing
# Calculate volatility (annualized standard deviation of daily returns)
daily_returns = history['close'].pct_change().dropna()
volatility = daily_returns.std() * np.sqrt(252)
if np.isnan(volatility) or np.isinf(volatility):
continue
# Calculate momentum features (5-day and 20-day simple moving averages)
sma_20 = history['close'].mean()
sma_5 = history['close'].iloc[-5:].mean()
# Calculate turnover rates (volume relative to shares outstanding)
shares_outstanding = stock.CompanyProfile.SharesOutstanding
if shares_outstanding is None or shares_outstanding <= 0:
continue
turnover_rates = history['volume'] / shares_outstanding
turnover_rate_20 = turnover_rates.mean()
turnover_rate_5 = turnover_rates.iloc[-5:].mean()
# Calculate value (inverse of P/E ratio)
pe_ratio = stock.ValuationRatios.PERatio
value = 1 / pe_ratio if 0 < pe_ratio < 100 else None
if value is None:
continue
# Calculate size (logarithm of market cap)
market_cap = stock.MarketCap
size = np.log(market_cap) if market_cap > 0 else None
if size is None:
continue
# Extract quality (return on equity)
quality = stock.OperationRatios.ROE.Value
if np.isnan(quality) or np.isinf(quality):
continue
# Add calculated factors to the features DataFrame
current_month_features.loc[symbol, 'Sma_20'] = sma_20
current_month_features.loc[symbol, 'Sma_5'] = sma_5
current_month_features.loc[symbol, 'Value'] = value
current_month_features.loc[symbol, 'Size'] = size
current_month_features.loc[symbol, 'Quality'] = quality
current_month_features.loc[symbol, 'Volatility'] = volatility
current_month_features.loc[symbol, 'turnover_rate_20'] = turnover_rate_20
current_month_features.loc[symbol, 'turnover_rate_5'] = turnover_rate_5
# Calculate log returns for the stock
first_price = history['close'].iloc[0]
last_price = history['close'].iloc[-1]
log_return = np.log(last_price / first_price)
current_month_returns.loc[symbol, 'Returns'] = log_return
except Exception as e:
self.Log(f"Error processing {symbol}: {str(e)}")
continue
# If no previous month's data is available, store the current month's data and return
if self.last_month_features.empty:
self.last_month_features = current_month_features
return []
try:
# Align the previous month's factors with the current month's returns
X_train = self.last_month_features
y_train = current_month_returns
common_symbols = X_train.index.intersection(y_train.index)
X_train = X_train.loc[common_symbols]
y_train = y_train.loc[common_symbols]
# Standardize features and encode returns into quantile-based classes
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.fillna(X_train.median()))
y_classes = pd.qcut(y_train['Returns'], q=self.num_groups, labels=False)
# Train the XGBoost classifier
self.model = XGBClassifier(n_estimators=50, learning_rate=0.1, max_depth=5, random_state=42)
self.model.fit(X_train_scaled, y_classes)
# Predict groups for the current month's features
predictions = self.PredictGroups(current_month_features)
highest_group = predictions[predictions['predicted_group'] == self.num_groups - 1].index
lowest_group = predictions[predictions['predicted_group'] == 0].index
# Select stocks from the highest and lowest groups
selected_symbols = []
for symbol_str in list(highest_group) + list(lowest_group):
for stock in fine_list:
if str(stock.Symbol) == symbol_str:
selected_symbols.append(stock.Symbol)
break
self.last_month_features = current_month_features # Update for next month
self.symbols = selected_symbols
return self.symbols
except Exception as e:
self.Log(f"Error in model training: {str(e)}")
return []
def PredictGroups(self, features):
# Predict return groups for given features using the trained XGBoost model
features = features.fillna(features.mean())
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
class_probs = self.model.predict_proba(features_scaled)
predicted_classes = np.argmax(class_probs, axis=1)
return pd.DataFrame({
'predicted_group': predicted_classes,
'confidence': np.max(class_probs, axis=1)
}, index=features.index)
def OnData(self, data):
# Skip execution during the warmup period
if self.IsWarmingUp:
return
# Trigger stop-loss if portfolio losses exceed the threshold
if self.Portfolio.TotalUnrealizedProfit / self.initial_portfolio_value <= self.stop_loss_threshold:
self.Log("Stop loss triggered. Liquidating all positions.")
self.Liquidate()
return
# If no symbols are selected, skip trading
if not self.symbols:
return
# Separate selected symbols into long and short positions
valid_symbols = [symbol for symbol in self.symbols if data.ContainsKey(symbol)]
mid_point = len(valid_symbols) // 2
long_symbols = valid_symbols[:mid_point]
short_symbols = valid_symbols[mid_point:]
# Set equal weights for long and short positions
long_weight = 0.5 / len(long_symbols) if long_symbols else 0
short_weight = -0.5 / len(short_symbols) if short_symbols else 0
# Place trades
for symbol in long_symbols:
self.SetHoldings(symbol, long_weight)
for symbol in short_symbols:
self.SetHoldings(symbol, short_weight)
# Clear selected symbols for the next day
self.symbols = []