Overall Statistics
"""
Statistical Arbitrage strategy (modernized from Quantopian original).
Credit: "Aqua Rooster" https://www.quantopian.com/posts/a-very-simple-1-dot-25-sharpe-algorithm

Pipeline:
1. Universe: top-N liquid primary manufacturing shares above min price
2. Risk factors: PCA (1 component) on log returns
3. Factor exposures: OLS regression to get betas
4. Portfolio weights: convex optimisation maximising alpha (z-score residual signal)
   subject to: zero net exposure, gross exposure <= 100%, neutralised factor betas
"""
# region imports
import scipy as sp
import cvxpy as cvx
from sklearn.decomposition import PCA
import statsmodels.api as sm
from AlgorithmImports import *
# endregion


class StatisticalArbitrage_v1(QCAlgorithm):

    def initialize(self):
        self.set_start_date(self.end_date - timedelta(5*365))
        self.set_cash(1_000_000)
        self.settings.seed_initial_prices = True
        self.settings.min_absolute_portfolio_target_percentage = 0
        # Define some parameters.
        self._min_price = 10
        self._universe_size = 100
        self._lookback = 90
        self._spy = self.add_equity("SPY", Resolution.DAILY)
        # Rebalance the portfolio quarterly.
        self._date_rule = FuncDateRule(
            name="quarter_start_spy_trading_day",             
            get_dates_function=lambda start, end: [
                datetime(year, month, 1)
                for year in range(start.year, end.year + 1)
                for month in (1, 4, 7, 10)
                if start <= datetime(year, month, 1) <= end
            ],
        )
        self.universe_settings.resolution = Resolution.DAILY
        self.universe_settings.asynchronous = True
        self.universe_settings.schedule.on(self._date_rule)
        # Add universe of top liquid primary manufacturing shares above min price.
        self._universe = self.add_universe(
            lambda fundamentals: [
                f.symbol for f in sorted(
                    (f for f in fundamentals
                     if f.price > self._min_price and
                     f.security_reference.is_primary_share and
                     f.company_reference.industry_template_code == 'N'),
                    key=lambda f: f.dollar_volume
                )[-self._universe_size:]
            ]
        )
        self.set_warm_up(timedelta(120))

    def on_warmup_finished(self):
        rebalance_time_rule = self.time_rules.at(8, 0)
        # Rebalance on the first SPY trading day of each quarter.
        self.schedule.on(self._date_rule, rebalance_time_rule, self._rebalance)
        # Rebalance the portfolio today too.
        if self.live_mode:
            self._rebalance()
        else:
            self.schedule.on(self.date_rules.today, rebalance_time_rule, self._rebalance)

    def _rebalance(self):
        # Build targets from the current universe selection.
        weight_by_symbol = self._compute_weight_by_symbol(self._universe.selected)
        if not weight_by_symbol:
            return
        targets = [PortfolioTarget(sym, weight) for sym, weight in weight_by_symbol.items()]
        self.set_holdings(targets, True)

    def _compute_weight_by_symbol(self, symbols):
        # Extract close prices and reshape to asset-by-time matrix.
        history = self.history(list(symbols), self._lookback, Resolution.DAILY).close.unstack(0).dropna(axis=1)
        # Compute log returns from prices.
        log_returns = np.log(history / history.shift(1)).dropna()
        # Extract common risk factor using PCA.
        X = PCA(1).fit_transform(log_returns)
        # Add constant term for OLS regression.
        X = sm.add_constant(X)
        # Fit OLS model to get factor exposures (betas).
        ols = sm.OLS(log_returns, X).fit()
        # Extract regression parameters.
        ols_params = np.asarray(ols.params)
        factor_betas = ols_params.T[:, 1:]
        # Get residuals which represent unexplained returns (alpha signal).
        residuals = np.asarray(ols.resid)
        # Compute mean-reversion signal from recent vs. older residuals.
        signal = sp.stats.zscore(residuals[-2:, :].sum(axis=0)) - sp.stats.zscore(residuals[-20:, :].sum(axis=0))
        # Optimize portfolio weights to maximize alpha signal subject to constraints.
        weights = self._optimize_portfolio(signal, factor_betas)
        # Normalize weights to 100% gross exposure.
        total_weight = np.sum(np.abs(weights)) or 1.0
        return dict(zip(log_returns.columns, weights / total_weight))

    def _optimize_portfolio(self, signal, factor_betas):
        n_assets = factor_betas.shape[0]
        weights_var = cvx.Variable(n_assets)
        # Maximize the dot product of signal and weights (alpha).
        objective = cvx.Maximize(signal @ weights_var)
        # Define portfolio constraint: net exposure approximately zero (market neutral).
        constraints = [
            cvx.abs(cvx.sum(weights_var)) <= 0.001,
            cvx.sum(cvx.abs(weights_var)) <= 1,
            weights_var <= 3.5 / n_assets,
            weights_var >= -3.5 / n_assets,
        ]
        # Neutralize factor betas to eliminate systematic risk.
        for i in range(factor_betas.shape[1]):
            constraints.append(cvx.abs(factor_betas[:, i] @ weights_var) <= 0.001)
        # Solve the convex optimization problem.
        problem = cvx.Problem(objective, constraints)
        problem.solve(solver=cvx.CVXOPT)
        return np.asarray(weights_var.value).flatten()