Overall Statistics
Total Orders
36
Average Win
0%
Average Loss
0.00%
Compounding Annual Return
0.000%
Drawdown
17.300%
Expectancy
-1
Start Equity
1000000.00
End Equity
999999.51
Net Profit
0.000%
Sharpe Ratio
0.118
Sortino Ratio
0.069
Probabilistic Sharpe Ratio
20.826%
Loss Rate
100%
Win Rate
0%
Profit-Loss Ratio
0
Alpha
0.044
Beta
0.163
Annual Standard Deviation
0.457
Annual Variance
0.209
Information Ratio
-0.017
Tracking Error
0.481
Treynor Ratio
0.331
Total Fees
$0.00
Estimated Strategy Capacity
$19000000.00
Lowest Capacity Asset
BTCUSD 2XR
Portfolio Turnover
2.25%
Drawdown Recovery
55
# region imports
from AlgorithmImports import *
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from datetime import timedelta
# endregion

class EnsembleMLTrader(QCAlgorithm):

    def initialize(self):
        """Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm."""
        self.set_start_date(2020, 1, 1)
        self.set_end_date(2022, 1, 1)
        self.set_cash(100000)
        # 1. Add the asset to trade
        self.btc = self.add_crypto("BTCUSD", Resolution.DAILY).symbol
        
        # --- ML Strategy Parameters ---
        self.training_period = 365
        self.prediction_horizon = 10
        self.return_lookbacks = [3, 5, 10, 21] 
        self.retrain_frequency = 30
        self.last_retrain_time = None
        
        # --- Algorithm State ---
        self.model_xgb = None
        self.model_rf = None
        self.model_lr = None
        
        # --- Risk Management Parameters ---
        self.confidence_threshold = 0.60 
        self.stop_loss_multiplier = 2.0
        self.take_profit_multiplier = 3.0
        self.risk_per_trade = 0.05
        self.max_allocation = 0.8
        
        # --- Indicators & Risk Management State ---
        self.atr_indicator = self.atr(self.btc, 14)
        self.macd_indicator = self.macd(self.btc, 12, 26, 9, MovingAverageType.EXPONENTIAL, Resolution.DAILY)
        self.stop_loss_ticket = None
        self.take_profit_ticket = None

        # --- Feature List ---
        self.features = [f'return_{n}d' for n in self.return_lookbacks]
        self.features += [
            'log_return_accel_3d', 'log_return_accel_10d', 'volume_zscore_21d',
            'volatility_14d_ann', 'zscore_3d'
        ]

        # Set warm-up period to accommodate the longest lookback
        warmup_period = max(self.training_period, 42, 35) 
        self.set_warm_up(warmup_period)
        
        # Schedule our main logic to run daily
        self.schedule.on(self.date_rules.every_day(self.btc), 
                         self.time_rules.at(0, 1), 
                         self.trade_logic)

    def trade_logic(self):
        """
        Main logic function: retrains models periodically and places trades based on ensemble confidence and trend.
        """
        if self.last_retrain_time is None or (self.time - self.last_retrain_time).days >= self.retrain_frequency:
            self.train_models()

        models_ready = self.model_xgb and self.model_rf and self.model_lr
        if not models_ready or not self.atr_indicator.is_ready or not self.macd_indicator.is_ready:
            return

        latest_features_df = self.get_latest_features()
        if latest_features_df is None or latest_features_df.isnull().values.any():
            self.log("Features for prediction contain NaN. Skipping.")
            return

        prob_xgb = self.model_xgb.predict_proba(latest_features_df)[0][1]
        prob_rf = self.model_rf.predict_proba(latest_features_df)[0][1]
        prob_lr = self.model_lr.predict_proba(latest_features_df)[0][1]
        
        avg_confidence = (prob_xgb + prob_rf + prob_lr) / 3.0
        prediction = 1 if avg_confidence > 0.5 else 0
        
        is_invested = self.portfolio[self.btc].invested
        is_bullish_trend = self.macd_indicator.current.value > self.macd_indicator.signal.current.value

        if avg_confidence >= self.confidence_threshold and is_bullish_trend and not is_invested:
            self.log(f"Avg Confidence {avg_confidence:.2%} & MACD confirm bullish trend. Entering Long Position.")
            
            atr_value = self.atr_indicator.current.value
            if atr_value == 0: return
            stop_distance = atr_value * self.stop_loss_multiplier
            
            portfolio_risk = self.portfolio.total_portfolio_value * self.risk_per_trade
            size_based_on_vol = portfolio_risk / stop_distance
            
            final_size_shares = size_based_on_vol * avg_confidence
            
            current_price = self.securities[self.btc].price
            position_value = final_size_shares * current_price
            allocation = min(self.max_allocation, position_value / self.portfolio.total_portfolio_value)
            
            self.set_holdings(self.btc, allocation, tag="Entry")
            
        elif prediction == 0 and is_invested:
            self.log("Ensemble Prediction: DOWN. Exiting Position.")
            self.liquidate(self.btc)

    def train_models(self):
        """Fetches historical data, engineers features, and trains all models in the ensemble."""
        self.log(f"Training ensemble models at {self.time}...")
        history = self.history(self.btc, self.training_period, Resolution.DAILY)
        if history.empty:
            self.log("Not enough history to train models yet.")
            return
        
        df = self._create_features(history)
        
        close_prices = history['close'].unstack(level=0)[self.btc]
        df['target'] = np.where(close_prices.shift(-self.prediction_horizon) > close_prices, 1, 0)
        
        df.dropna(inplace=True)
        if df.empty:
            self.log("DataFrame is empty after cleaning. Skipping training.")
            return

        X = df[self.features]
        y = df['target']
        
        # Train XGBoost
        self.model_xgb = xgb.XGBClassifier(
            n_estimators=75, max_depth=3, learning_rate=0.1,
            random_state=42, use_label_encoder=False, eval_metric='logloss'
        )
        self.model_xgb.fit(X, y)
        
        # Train Random Forest
        self.model_rf = RandomForestClassifier(n_estimators=75, max_depth=5, random_state=42)
        self.model_rf.fit(X, y)

        # Train Logistic Regression
        self.model_lr = LogisticRegression(random_state=42, solver='liblinear')
        self.model_lr.fit(X, y)
        
        self.last_retrain_time = self.time
        self.log("Ensemble model training complete.")

    def get_latest_features(self):
        """Gets recent historical data to calculate features for making a new prediction."""
        hist_for_features = self.history(self.btc, 42, Resolution.DAILY)
        if hist_for_features.empty:
            return None
        
        features_df = self._create_features(hist_for_features)
        
        return features_df.iloc[-1:][self.features]

    def _create_features(self, history_df: pd.DataFrame) -> pd.DataFrame:
        """Helper method to create all specified features from a raw QC history DataFrame."""
        df_unstacked = history_df.unstack(level=0)
        
        close = df_unstacked[('close', self.btc)]
        volume = df_unstacked[('volume', self.btc)]
        
        features = pd.DataFrame(index=close.index)

        # --- Return and Momentum Features ---
        for n in self.return_lookbacks:
            features[f'return_{n}d'] = (close / close.shift(n)) - 1
            
        log_return = np.log(close / close.shift(1))
        features['log_return_accel_3d'] = log_return.diff(3)
        features['log_return_accel_10d'] = log_return.diff(10)
        
        # --- Volume Features ---
        vol_mean_21 = volume.rolling(21, min_periods=21).mean()
        vol_std_21 = volume.rolling(21, min_periods=21).std()
        features['volume_zscore_21d'] = (volume - vol_mean_21) / vol_std_21.replace(0, np.nan)

        # --- NEW: Volatility and Mean Reversion Features ---
        
        # 1. Rolling 14-day Annualized Volatility
        rolling_std_14d = log_return.rolling(window=14).std()
        features['volatility_14d_ann'] = rolling_std_14d * np.sqrt(365)
        
        # 2. Mean Reversion z-Score (3-day)
        daily_returns = close.pct_change()
        mean_3d = daily_returns.rolling(window=3).mean()
        std_3d = daily_returns.rolling(window=3).std()
        features['zscore_3d'] = (daily_returns - mean_3d) / std_3d.replace(0, np.nan)

        return features

    def on_order_event(self, order_event):
        """Handles order fill events to place/cancel SL/TP orders."""
        if order_event.status != OrderStatus.FILLED:
            return

        order = self.transactions.get_order_by_id(order_event.order_id)
        
        if order.tag is not None and "Entry" in order.tag and self.portfolio[self.btc].is_long:
            entry_price = order_event.fill_price
            self.log(f"Entry order filled at ${entry_price:.2f}. Placing Stop Loss and Take Profit.")

            self.cancel_sl_tp()

            atr_value = self.atr_indicator.current.value
            stop_distance = atr_value * self.stop_loss_multiplier
            
            stop_loss_price = entry_price - stop_distance
            take_profit_price = entry_price + (stop_distance * self.take_profit_multiplier)

            quantity = self.portfolio[self.btc].absolute_quantity
            self.stop_loss_ticket = self.stop_market_order(self.btc, -quantity, stop_loss_price)
            self.take_profit_ticket = self.limit_order(self.btc, -quantity, take_profit_price)

        elif not self.portfolio[self.btc].invested:
            self.cancel_sl_tp()
            
    def cancel_sl_tp(self):
        """Safely cancels any open Stop Loss or Take Profit orders."""
        open_statuses = [OrderStatus.NEW, OrderStatus.SUBMITTED]
        
        if self.stop_loss_ticket is not None and self.stop_loss_ticket.status in open_statuses:
            self.stop_loss_ticket.cancel()
            self.stop_loss_ticket = None
            
        if self.take_profit_ticket is not None and self.take_profit_ticket.status in open_statuses:
            self.take_profit_ticket.cancel()
            self.take_profit_ticket = None

    def on_end_of_algorithm(self):
        """Liquidate on algorithm end and print feature importances for the main model."""
        if self.model_xgb:
            feature_importances = pd.Series(self.model_xgb.feature_importances_, index=self.features)
            feature_importances.sort_values(ascending=False, inplace=True)
            self.log("--- XGBoost Feature Importances ---")
            self.log(str(feature_importances))
            self.log("-----------------------------------")
        
        self.liquidate()