Overall Statistics
Total Trades
43
Average Win
0.45%
Average Loss
-0.98%
Compounding Annual Return
-7.428%
Drawdown
4.900%
Expectancy
-0.062
Net Profit
-3.775%
Sharpe Ratio
-0.911
Loss Rate
36%
Win Rate
64%
Profit-Loss Ratio
0.46
Alpha
-0.053
Beta
-0.031
Annual Standard Deviation
0.067
Annual Variance
0.004
Information Ratio
-2.551
Tracking Error
0.123
Treynor Ratio
1.946
Total Fees
$4931.65
"""
A simple Statistial Arbitrage strategy from Quantopian
(credit to: "Aqua Rooster" https://www.quantopian.com/posts/a-very-simple-1-dot-25-sharpe-algorithm)


1. Select shares universe (e.g. i. a static list or ii. a dynamic one, like most liquid shares in same sector)
   and get historical prices

2. Find risk factors (i.e. common drivers for the returns), using e.g. PCA, ICA, ...

3. Regress returns vs. risk factors to get individual factor exposure (betas)

4. find shares weights such that 
        i.  maximise your alpha (e.g. z-score of regression residuals * weights) 
        ii. subject to some constraints, e.g. 
                zero net exposure, 
                gross exposure <= 100%, 
                neutralised betas 

"""
import numpy as np
import pandas as pd
import scipy as sp
import cvxpy as cvx
from sklearn.covariance import OAS
from sklearn.decomposition import PCA
import statsmodels.api as smapi # deprecation warning on pandas.core.datetools
# from sklearn.linear_model import LinearRegression # first X then Y, vs. statsmodels where Y is first

# from decimal import Decimal
# from datetime import datetime, timedelta

from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Indicators")
AddReference("QuantConnect.Common")

from System import *
from QuantConnect import *
from QuantConnect.Data import *
from QuantConnect.Algorithm import *
from QuantConnect.Indicators import *
from System.Collections.Generic import List


class StatisticalArbitrage_v1(QCAlgorithm):
    def Initialize(self):
        self.SetStartDate(2012,12,1)    #Set Start Date
        self.SetEndDate(2013,6,1)     #Set End Date
        self.SetCash(10000000)            #Set Strategy Cash
        self.coarse_symbols=[]; self.fine_symbols = []
        self.rebalence_flag =True # to rebalance the fist time anyway 
        self.trade_flag = True
        


        self.SPY = self.AddEquity("SPY").Symbol  #NB: Self.SPY = SPY R735QTJ8XC9X vs. Self.SPY.Value = SPY
        
        # charts: these lines will make it appear together with Strategy
        stPlot = Chart('Strat Plot')
        stPlot.AddSeries(Series('Longs', SeriesType.Line, 2))
        stPlot.AddSeries(Series('Shorts', SeriesType.Line, 2))
        self.AddChart(stPlot)

        self.UniverseSettings.Resolution = Resolution.Daily #; self.UniverseSettings.Leverage = 2
        self.AddUniverse(self.CoarseSelectionFunction, self.FineSelectionFunction)
 
        # params
        self.min_price = 10.0
        self.number_coarse = 100    # should be 500, but keeps getting maxout errors
        self.back_period = 90
        
        # rebalancing every 3 months (via counter in Rebalancing body fncts)
        self.counter = 2 # no. of months 
        self.Schedule.On(self.DateRules.MonthStart("SPY"), 
                         self.TimeRules.AfterMarketOpen(self.SPY, 0), 
                         Action(self.Rebalancing))
        
        self.Schedule.On(self.DateRules.EveryDay(self.SPY), 
                         self.TimeRules.AfterMarketOpen(self.SPY, 10), 
                         Action(self.Trade))
  

    def Rebalancing(self):
        # every 3rd month (e.g. Jan, March, June, ...) 
        if self.counter < 2:
            self.counter += 1
            self.rebalence_flag = False
            return
        
        self.counter = 0
        self.rebalence_flag = True
   

    def CoarseSelectionFunction(self, coarse):
        """ Get universe selection, i.e. shares s.t.
              (i)  min price > self.min_price" and 
              (ii) top #(self.number_coarse) liquid """
              
        if self.rebalence_flag:
            
            AboveMinPrice = [x for x in coarse if float(x.Price) > self.min_price]
            
            sortedByDollarVolume = sorted(AboveMinPrice, key=lambda x: x.DollarVolume, reverse=True) 
            top = sortedByDollarVolume[:self.number_coarse]

            self.coarse_symbols = [i.Symbol for i in top]
            
            # degug
            self.Debug("num of COARSE shares: %d" %len(self.coarse_symbols) )

            return self.coarse_symbols
        else: 
            return [] if (not self.coarse_symbols) else self.coarse_symbols

 
    def FineSelectionFunction(self, fine):
    
        if self.rebalence_flag:
            
            # get only Primary & Manufacturing shares
            filtered_fine = [x for x in fine if x.SecurityReference.IsPrimaryShare 
                                            and x.CompanyReference.IndustryTemplateCode=='N' 
                             ]  # more at: https://www.quantconnect.com/data#fundamentals/usa/morningstar

            self.fine_symbols =  [i.Symbol for i in filtered_fine]
            
            # degug
            self.Debug("num of FINE shares: %d" %len(self.fine_symbols) )
            
            # reset
            self.rebalence_flag = False
            self.trade_flag = True
            
            return self.fine_symbols
        else:
            return [] if (not self.fine_symbols) else self.fine_symbols


    def Trade(self):

        # checks
        if not self.fine_symbols: return
        if not self.trade_flag: return
        
        # get log returns for the universe selection (NB: hist should be a pandas Panel)
        symbols = [x.Value for x in self.fine_symbols] # x.Symbol.Value
        for tkr in symbols: 
            self.AddEquity(tkr, Resolution.Daily)
        hist = self.History(symbols, self.back_period, Resolution.Daily)
        if hist is None: return
    
        prices = hist["close"].unstack(level=0).dropna(axis=1)

        logRtrn = np.log(prices / prices.shift(1)).dropna() # (num_rtrn, symbols)
              # = np.diff(np.log(prices.values), axis=0)
              
        # model: #1 risk factor
        factors = PCA(1).fit_transform(logRtrn)    # fit + transform # (num_rtrn, [1])
        
        X_c = smapi.add_constant(factors) #  factor(s) + const
        model = smapi.OLS(logRtrn, X_c).fit()
                                                    # NB: need np.asarray(), contrarily to Quantopian (otherwise rtrns a Python list which is not hashable) 
        betas = np.asarray(model.params.T)[:, 1:]   # model.params is (2, symbols) array; returns beta (but not constant) for all symbols [[beta0], [beta1], ...,]

        # model.resid is (days, symbols) array :=> 
        #   sum across days (.sum(axis=0)) for two periods [-x:,:],
        #   standardise resulting 1-d array across symbols (by zscore) and 
        #   subtract older score from more recent
        signal = sp.stats.zscore(np.asarray(model.resid)[-2:, :].sum(axis=0)) \
               - sp.stats.zscore(np.asarray(model.resid)[-20:, :].sum(axis=0))

        # optimised weights
        w = self.get_weights(signal, betas)
        
        # get some charts
        self.ShowChart()

        # w_i re-based to 100% gross
        denom = np.sum(np.abs(w))
        if denom == 0: denom = 1.
        w = w / denom

        # remove tkrs we don't have (NB: prices.columns == self.fine_symbols)
        for tkr in self.Portfolio.Values:
            if (tkr.Invested) and  (tkr not in self.fine_symbols): 
                self.Liquidate(tkr.Symbol)
        
        # submit orders
        for i, tkr in enumerate(prices.columns):
            self.SetHoldings(tkr, w[i])
     
        self.trade_flag = False

    def get_weights(self, signal, betas):
        (m, n) = betas.shape    # (symbols, 1) if PCA(1)
        x = cvx.Variable(m)
        objective = cvx.Maximize(signal.T * x)

        constraints = [cvx.abs(sum(x)) < 0.001,     # net ~= 0
                       sum(cvx.abs(x)) <= 1,        # gross <= 100%
                       x <= 3.5 / m, -x <= 3.5 / m] # weights upper/lower bounds
        
        # neutralise all risk factors exposures (here just #1): abs(beta * weights) = 0
        for i in range(0, n):
            constraints.append(cvx.abs(betas[:, i].T * x) < 0.001)

        probl = cvx.Problem(objective, constraints)
        probl.solve(solver=cvx.CVXOPT)
        
        # if no solution
        if probl.status <> 'optimal':
            print prob.status
            return np.asarray([0.0] * m) # return 0. array
        
        return np.asarray(x.value).flatten()       

        
    def OnData(self, data):
        pass

    def ShowChart(self):
        longs=shorts=0
        longs = np.random.randint(0, 10, size=1)
        shorts = np.random.randint(0, 10, size=1)
        for tkr in self.Portfolio.Values:
            if tkr.Quantity > 0: longs += 1
            if tkr.Quantity < 0: shorts += 1
        self.Plot("Strat Plot", 'Longs', 1.*longs); self.Log("Longs: %d" %longs)
        self.Plot("Strat Plot", 'Shorts', 1.*shorts); self.Log("Shorts: %d" %shorts)
        # show chart