Overall Statistics
Total Orders
521
Average Win
1.68%
Average Loss
-1.84%
Compounding Annual Return
9.914%
Drawdown
53.300%
Expectancy
0.113
Start Equity
100000
End Equity
160367.92
Net Profit
60.368%
Sharpe Ratio
0.263
Sortino Ratio
0.291
Probabilistic Sharpe Ratio
5.234%
Loss Rate
42%
Win Rate
58%
Profit-Loss Ratio
0.92
Alpha
-0.011
Beta
1.182
Annual Standard Deviation
0.251
Annual Variance
0.063
Information Ratio
0.003
Tracking Error
0.19
Treynor Ratio
0.056
Total Fees
$687.90
Estimated Strategy Capacity
$120000000.00
Lowest Capacity Asset
GDX TIU4L3M37HPH
Portfolio Turnover
5.52%
Drawdown Recovery
1485
#region imports
from AlgorithmImports import *

import statsmodels.api as sm
from sklearn.decomposition import PCA
#endregion


class PcaStatArbitrageAlgorithm(QCAlgorithm):

    def initialize(self):
        self.set_start_date(self.end_date - timedelta(5*365))
        self.set_cash(100_000)
        # Define some parameters.
        self._lookback = 60              # Length(days) of historical data
        self._components = 3             # Number of principal components in PCA
        self._universe_size = 20         # Number of the equities pool
        self._zscore_threshold = -1      # Number of std to trigger a trade
        self._weight_by_symbol = {}      # Target weight of each asset
        # Add a universe of US Equities.
        self._date_rule = self.date_rules.month_start('SPY')
        self.universe_settings.resolution = Resolution.HOUR
        self.universe_settings.schedule.on(self._date_rule)
        self.add_universe(self._select_assets)
        # Add warm up so that the algorithm trades on deployment.
        self.set_warm_up(timedelta(45))

    def on_warmup_finished(self):
        # Add a Scheduled Event to rebalance the portfolio monthly.
        time_rule = self.time_rules.after_market_open('SPY', 30)
        self.schedule.on(self._date_rule, time_rule, self._rebalance)
        # Rebalance today too.
        if self.live_mode:
            self._rebalance()
        else:
            self.schedule.on(self.date_rules.today, time_rule, self._rebalance)

    def _select_assets(self, fundamentals: List[Fundamental]):
        '''Drop securities which have too low prices.
        Select those with highest by dollar volume.
        Finally do PCA and get the selected trading symbols.
        '''
        # Select the most liquid Equities trading above $5.
        filtered = sorted([f for f in fundamentals if f.price > 5], key=lambda f: f.dollar_volume)
        symbols = [f.symbol for f in filtered[-self._universe_size:]]
        # Get historical data of the selected assets.
        history = self.history(symbols, self._lookback, Resolution.DAILY).close.unstack(level=0)
        # Determine the weight of each asset.
        self._weights = self._get_weights(history)
        return list(self._weights.index)

    def _get_weights(self, history):
        '''
        Get the finalized selected symbols and their weights according to their level of deviation
        of the residuals from the linear regression after PCA for each symbol.
        '''
        # Sample data for PCA (smooth it using np.log function).
        sample = np.log(history.dropna(axis=1))
        sample -= sample.mean() # Center it column-wise.
        # Fit the PCA model for sample data.
        model = PCA().fit(sample)
        # Get the first n_components factors.
        factors = np.dot(sample, model.components_.T)[:,:self._components]
        # Add 1's to fit the linear regression (intercept).
        factors = sm.add_constant(factors)
        # Train an Ordinary Least Squares linear model for each stock.
        ols_models = {ticker: sm.OLS(sample[ticker], factors).fit() for ticker in sample.columns}
        # Get the residuals from the linear regression after PCA for each stock.
        resids = pd.DataFrame({ticker: model.resid for ticker, model in ols_models.items()})
        # Get the Z scores by standarize the given pandas dataframe X.
        zscores = ((resids - resids.mean()) / resids.std()).iloc[-1] # residuals of the most recent day
        # Get the stocks far from mean (for mean reversion).
        selected = zscores[zscores < self._zscore_threshold]
        # Return the weights for each selected stock.
        return selected * (1 / selected.abs().sum())

    def _rebalance(self):
        '''
        Rebalance the portfolio based on calculated weights
        '''
        # Open positions based on calculated weights.
        # If the residual is way deviated from 0, we enter 
        # the position in the opposite way (mean reversion).
        targets = [PortfolioTarget(symbol, -weight) for symbol, weight in self._weights.items()]
        self.set_holdings(targets, True)

    def on_securities_changed(self, changes):
        '''
        Liquidate when the symbols are not in the universe
        '''
        for security in changes.removed_securities:
            if security.invested:
                self.liquidate(security, 'Removed from Universe')