"""
Statistical Arbitrage strategy (modernized from Quantopian original).
Credit: "Aqua Rooster" https://www.quantopian.com/posts/a-very-simple-1-dot-25-sharpe-algorithm
Pipeline:
1. Universe: top-N liquid primary manufacturing shares above min price
2. Risk factors: PCA (1 component) on log returns
3. Factor exposures: OLS regression to get betas
4. Portfolio weights: convex optimisation maximising alpha (z-score residual signal)
subject to: zero net exposure, gross exposure <= 100%, neutralised factor betas
"""
# region imports
import scipy as sp
import cvxpy as cvx
from sklearn.decomposition import PCA
import statsmodels.api as sm
from AlgorithmImports import *
# endregion
class StatisticalArbitrage_v1(QCAlgorithm):
def initialize(self):
self.set_start_date(self.end_date - timedelta(5*365))
self.set_cash(1_000_000)
self.settings.seed_initial_prices = True
self.settings.min_absolute_portfolio_target_percentage = 0
# Define some parameters.
self._min_price = 10
self._universe_size = 100
self._lookback = 90
self._spy = self.add_equity("SPY", Resolution.DAILY)
# Rebalance the portfolio quarterly.
self._date_rule = FuncDateRule(
name="quarter_start_spy_trading_day",
get_dates_function=lambda start, end: [
datetime(year, month, 1)
for year in range(start.year, end.year + 1)
for month in (1, 4, 7, 10)
if start <= datetime(year, month, 1) <= end
],
)
self.universe_settings.resolution = Resolution.DAILY
self.universe_settings.asynchronous = True
self.universe_settings.schedule.on(self._date_rule)
# Add universe of top liquid primary manufacturing shares above min price.
self._universe = self.add_universe(
lambda fundamentals: [
f.symbol for f in sorted(
(f for f in fundamentals
if f.price > self._min_price and
f.security_reference.is_primary_share and
f.company_reference.industry_template_code == 'N'),
key=lambda f: f.dollar_volume
)[-self._universe_size:]
]
)
self.set_warm_up(timedelta(120))
def on_warmup_finished(self):
rebalance_time_rule = self.time_rules.at(8, 0)
# Rebalance on the first SPY trading day of each quarter.
self.schedule.on(self._date_rule, rebalance_time_rule, self._rebalance)
# Rebalance the portfolio today too.
if self.live_mode:
self._rebalance()
else:
self.schedule.on(self.date_rules.today, rebalance_time_rule, self._rebalance)
def _rebalance(self):
# Build targets from the current universe selection.
weight_by_symbol = self._compute_weight_by_symbol(self._universe.selected)
if not weight_by_symbol:
return
targets = [PortfolioTarget(sym, weight) for sym, weight in weight_by_symbol.items()]
self.set_holdings(targets, True)
def _compute_weight_by_symbol(self, symbols):
# Extract close prices and reshape to asset-by-time matrix.
history = self.history(list(symbols), self._lookback, Resolution.DAILY).close.unstack(0).dropna(axis=1)
# Compute log returns from prices.
log_returns = np.log(history / history.shift(1)).dropna()
# Extract common risk factor using PCA.
X = PCA(1).fit_transform(log_returns)
# Add constant term for OLS regression.
X = sm.add_constant(X)
# Fit OLS model to get factor exposures (betas).
ols = sm.OLS(log_returns, X).fit()
# Extract regression parameters.
ols_params = np.asarray(ols.params)
factor_betas = ols_params.T[:, 1:]
# Get residuals which represent unexplained returns (alpha signal).
residuals = np.asarray(ols.resid)
# Compute mean-reversion signal from recent vs. older residuals.
signal = sp.stats.zscore(residuals[-2:, :].sum(axis=0)) - sp.stats.zscore(residuals[-20:, :].sum(axis=0))
# Optimize portfolio weights to maximize alpha signal subject to constraints.
weights = self._optimize_portfolio(signal, factor_betas)
# Normalize weights to 100% gross exposure.
total_weight = np.sum(np.abs(weights)) or 1.0
return dict(zip(log_returns.columns, weights / total_weight))
def _optimize_portfolio(self, signal, factor_betas):
n_assets = factor_betas.shape[0]
weights_var = cvx.Variable(n_assets)
# Maximize the dot product of signal and weights (alpha).
objective = cvx.Maximize(signal @ weights_var)
# Define portfolio constraint: net exposure approximately zero (market neutral).
constraints = [
cvx.abs(cvx.sum(weights_var)) <= 0.001,
cvx.sum(cvx.abs(weights_var)) <= 1,
weights_var <= 3.5 / n_assets,
weights_var >= -3.5 / n_assets,
]
# Neutralize factor betas to eliminate systematic risk.
for i in range(factor_betas.shape[1]):
constraints.append(cvx.abs(factor_betas[:, i] @ weights_var) <= 0.001)
# Solve the convex optimization problem.
problem = cvx.Problem(objective, constraints)
problem.solve(solver=cvx.CVXOPT)
return np.asarray(weights_var.value).flatten()