Backtest

Overall Statistics
Total Orders 521 Average Win 1.68% Average Loss -1.84% Compounding Annual Return 9.914% Drawdown 53.300% Expectancy 0.113 Start Equity 100000 End Equity 160367.92 Net Profit 60.368% Sharpe Ratio 0.263 Sortino Ratio 0.291 Probabilistic Sharpe Ratio 5.234% Loss Rate 42% Win Rate 58% Profit-Loss Ratio 0.92 Alpha -0.011 Beta 1.182 Annual Standard Deviation 0.251 Annual Variance 0.063 Information Ratio 0.003 Tracking Error 0.19 Treynor Ratio 0.056 Total Fees $687.90 Estimated Strategy Capacity $120000000.00 Lowest Capacity Asset GDX TIU4L3M37HPH Portfolio Turnover 5.52% Drawdown Recovery 1485

#region imports
from AlgorithmImports import *

import statsmodels.api as sm
from sklearn.decomposition import PCA
#endregion


class PcaStatArbitrageAlgorithm(QCAlgorithm):

    def initialize(self):
        self.set_start_date(self.end_date - timedelta(5*365))
        self.set_cash(100_000)
        # Define some parameters.
        self._lookback = 60              # Length(days) of historical data
        self._components = 3             # Number of principal components in PCA
        self._universe_size = 20         # Number of the equities pool
        self._zscore_threshold = -1      # Number of std to trigger a trade
        self._weight_by_symbol = {}      # Target weight of each asset
        # Add a universe of US Equities.
        self._date_rule = self.date_rules.month_start('SPY')
        self.universe_settings.resolution = Resolution.HOUR
        self.universe_settings.schedule.on(self._date_rule)
        self.add_universe(self._select_assets)
        # Add warm up so that the algorithm trades on deployment.
        self.set_warm_up(timedelta(45))

    def on_warmup_finished(self):
        # Add a Scheduled Event to rebalance the portfolio monthly.
        time_rule = self.time_rules.after_market_open('SPY', 30)
        self.schedule.on(self._date_rule, time_rule, self._rebalance)
        # Rebalance today too.
        if self.live_mode:
            self._rebalance()
        else:
            self.schedule.on(self.date_rules.today, time_rule, self._rebalance)

    def _select_assets(self, fundamentals: List[Fundamental]):
        '''Drop securities which have too low prices.
        Select those with highest by dollar volume.
        Finally do PCA and get the selected trading symbols.
        '''
        # Select the most liquid Equities trading above $5.
        filtered = sorted([f for f in fundamentals if f.price > 5], key=lambda f: f.dollar_volume)
        symbols = [f.symbol for f in filtered[-self._universe_size:]]
        # Get historical data of the selected assets.
        history = self.history(symbols, self._lookback, Resolution.DAILY).close.unstack(level=0)
        # Determine the weight of each asset.
        self._weights = self._get_weights(history)
        return list(self._weights.index)

    def _get_weights(self, history):
        '''
        Get the finalized selected symbols and their weights according to their level of deviation
        of the residuals from the linear regression after PCA for each symbol.
        '''
        # Sample data for PCA (smooth it using np.log function).
        sample = np.log(history.dropna(axis=1))
        sample -= sample.mean() # Center it column-wise.
        # Fit the PCA model for sample data.
        model = PCA().fit(sample)
        # Get the first n_components factors.
        factors = np.dot(sample, model.components_.T)[:,:self._components]
        # Add 1's to fit the linear regression (intercept).
        factors = sm.add_constant(factors)
        # Train an Ordinary Least Squares linear model for each stock.
        ols_models = {ticker: sm.OLS(sample[ticker], factors).fit() for ticker in sample.columns}
        # Get the residuals from the linear regression after PCA for each stock.
        resids = pd.DataFrame({ticker: model.resid for ticker, model in ols_models.items()})
        # Get the Z scores by standarize the given pandas dataframe X.
        zscores = ((resids - resids.mean()) / resids.std()).iloc[-1] # residuals of the most recent day
        # Get the stocks far from mean (for mean reversion).
        selected = zscores[zscores < self._zscore_threshold]
        # Return the weights for each selected stock.
        return selected * (1 / selected.abs().sum())

    def _rebalance(self):
        '''
        Rebalance the portfolio based on calculated weights
        '''
        # Open positions based on calculated weights.
        # If the residual is way deviated from 0, we enter 
        # the position in the opposite way (mean reversion).
        targets = [PortfolioTarget(symbol, -weight) for symbol, weight in self._weights.items()]
        self.set_holdings(targets, True)

    def on_securities_changed(self, changes):
        '''
        Liquidate when the symbols are not in the universe
        '''
        for security in changes.removed_securities:
            if security.invested:
                self.liquidate(security, 'Removed from Universe')