Overall Statistics Total Trades 4824 Average Win 0.02% Average Loss -0.01% Compounding Annual Return -12.729% Drawdown 12.700% Expectancy -0.601 Net Profit -12.718% Sharpe Ratio -10.248 Loss Rate 87% Win Rate 13% Profit-Loss Ratio 2.08 Alpha -0.144 Beta 0.429 Annual Standard Deviation 0.013 Annual Variance 0 Information Ratio -11.748 Tracking Error 0.013 Treynor Ratio -0.316 Total Fees \$6352.60
```import numpy as np

np.warnings.filterwarnings("ignore")
import pandas as pd
import datetime as datetime
import statsmodels.formula.api as sm
import statsmodels.tsa.stattools as ts
np.warnings.resetwarnings()

# generates all possible pairs
# calculates correlation and cointegration for all pairs every day
# every day will try to enter trades if for pairs with good correlation and cointegration and deviation too big/too small
# every day will try to exit trades for pairs with deviation too small/too big

def __init__(self):
self.symbols = ['CORN', 'SOYB', 'PALL', 'BAC', 'BXS', 'T', 'F', 'GM', 'MSFT', 'AAPL', 'IBM']

self.maximalNumberOfPairs = 100000

self.numberOfBars = 40

# correlation selection criteria
self.minimalCorrelation = 0.9

# co-integration selection criteria
self.maximalBIC = -3.5

# number of standard deviations to open
self.open_size = 2

# number of standard deviations to close
self.close_size = 1
self.stop_loss = 6

def Initialize(self):
self.SetStartDate(2014, 1, 1)
self.SetEndDate(2015, 1, 1)
self.SetCash(50000)

for i in range(len(self.symbols)):
equity = self.AddEquity(self.symbols[i], Resolution.Minute).Symbol
self.symbols[i] = equity

self.generatedPairs = self.generatePairs(self.symbols, self.numberOfBars)

def OnData(self, data):
# update indicators in pairs
for pair in self.generatedPairs:
pair.update(data)

if len(self.generatedPairs) == 0:
return

selectedPairs = self.selectPairsByCorrelation(self.generatedPairs, self.minimalCorrelation, self.maximalNumberOfPairs)
if len(selectedPairs) == 0:
return

selectedPairs = self.selectPairsByCointegration(selectedPairs, self.maximalBIC, self.maximalNumberOfPairs)
if len(selectedPairs) == 0:
return

self.Log('selectPairsByCointegration pairs= %s' % str(len(selectedPairs)))

# select top maximalNumberOfPairs pairs
if len(selectedPairs) > self.maximalNumberOfPairs:
selectedPairs = selectedPairs[:self.maximalNumberOfPairs]

## enter
for pair in selectedPairs:
if pair.touch == 0:
if pair.error < pair.mean_error - self.open_size * pair.standardDeviation and pair.last_error > pair.mean_error - self.open_size * pair.standardDeviation:
pair.touch += -1
elif pair.error > pair.mean_error + self.open_size * pair.standardDeviation and pair.last_error < pair.mean_error + self.open_size * pair.standardDeviation:
pair.touch += 1
elif pair.touch == -1:
if pair.error > pair.mean_error - self.open_size * pair.standardDeviation and pair.last_error < pair.mean_error - self.open_size * pair.standardDeviation:
self.Log('long %s and short %s' % (str(pair.a),str(pair.b)))
pair.record_model = pair.model
pair.record_mean_error = pair.mean_error
pair.record_sd = pair.standardDeviation

self.SetHoldings(pair.a, 0.2 / (len(selectedPairs)))
self.SetHoldings(pair.b, -0.2 / (len(selectedPairs)))

pair.touch = 0
elif pair.touch == 1:
if pair.error < pair.mean_error + self.open_size * pair.standardDeviation and pair.last_error > pair.mean_error + self.open_size * pair.standardDeviation:
self.Log('long %s and short %s' % (str(pair.b),str(pair.a)))
pair.record_model = pair.model
pair.record_mean_error = pair.mean_error
pair.record_sd = pair.standardDeviation

self.SetHoldings(pair.b, 0.2 / (len(selectedPairs)))
self.SetHoldings(pair.a, -0.2 / (len(selectedPairs)))

pair.touch = 0

else:
# close
for pair in self.trading_pairs:
if ((pair.error < pair.record_mean_error + self.close_size * pair.record_sd and pair.last_error > pair.record_mean_error + self.close_size * pair.record_sd) or (pair.error > pair.record_mean_error - self.close_size * pair.record_sd and pair.last_error < pair.record_mean_error - self.close_size * pair.record_sd)):
self.Log('close %s' % str(pair.name))

self.Liquidate(pair.a)
self.Liquidate(pair.b)

elif pair.error < pair.record_mean_error - self.stop_loss * pair.record_sd or pair.error > pair.record_mean_error + self.stop_loss * pair.record_sd:
self.Log('close %s to stop loss' % str(pair.name))

self.Liquidate(pair.a)
self.Liquidate(pair.b)

return

def generatePairs(self, equities, numberOfBars):
# generates all possible pairs from equities

generatedPairs = []

for i in range(len(equities)):
for j in range(i + 1, len(equities)):
generatedPairs.append(Pair(self, equities[i], equities[j], numberOfBars))

self.Log('generated pairs= %s' % str(len(generatedPairs)))

return generatedPairs

def selectPairsByCorrelation(self, pairs, minimalCorrelation, maximalNumberOfPairs):
# returns pairs with correlation > minimalCorrelation, sorted by correlation

# select pairs with correlation > minimalCorrelation
selectedPairs = [pair for pair in pairs if pair.isReady() and pair.correlation > minimalCorrelation]

# sort pairs by correlation, descending
selectedPairs.sort(key = lambda pair: pair.correlation, reverse = True)

return selectedPairs

def selectPairsByCointegration(self, pairs, maximalBIC, maximalNumberOfPairs):
# returns pairs with co-integration ADF < maximalBIC, sorted by co-integration

# select pairs with co-integration ADF < maximalBIC
selectedPairs = [pair for pair in pairs if pair.isReady() and pair.adf < maximalBIC]

# sort pairs by co-integration ADF
selectedPairs.sort(key = lambda pair: pair.adf)

return selectedPairs

class Pair(object):

def __init__(self, algorithm, a, b, numberOfBars):
# pair: stock a, stock b
# stock will contain DataFrame with prices and dates

self.algorithm = algorithm
self.a = a
self.b = b

# keep num_bar of data points
self.numberOfBars = numberOfBars

# name of pair
self.name = str(a) + ':' + str(b)

self.error = 0
self.last_error = 0
self.df = None
self.touch = 0

def calculateCorrelation(self):
# calculate correlation
self.correlation = self.df.corr().ix[0][1]

def calculateCointegration(self):
self.model = sm.ols(formula = '%s ~ %s' % (str(self.a), str(self.b)), data = self.df).fit()
self.mean_error = np.mean(self.model.resid)
self.standardDeviation = np.std(self.model.resid)

return not self.df is None and len(self.df) == self.numberOfBars

def update(self, data):
if not data.ContainsKey(self.a):
self.df = None
return

if not data.ContainsKey(self.b):
self.df = None
return

data_a = data[self.a]
data_b = data[self.b]

try:
priceOfStockA = float(data_a.Close)
priceOfStockB = float(data_b.Close)
except:
self.df = None
return

a_price = []
a_date = []
b_price = []
b_date = []

a_price.append(priceOfStockA)
a_date.append(data_a.EndTime)
b_price.append(priceOfStockB)
b_date.append(data_b.EndTime)

new_df = pd.DataFrame({str(self.a):a_price, str(self.b):b_price}, index = [a_date]).dropna()

if self.df is None:
self.df = new_df
else:
# concatenate existing DataFrame with new data
self.df = pd.concat([self.df, new_df])

# keep numberOfBars of data points
self.df = self.df.tail(self.numberOfBars)