| Overall Statistics |
|
Total Trades 2306 Average Win 0.40% Average Loss -0.28% Compounding Annual Return 7.589% Drawdown 16.400% Expectancy 0.169 Net Profit 62.056% Sharpe Ratio 0.586 Probabilistic Sharpe Ratio 10.426% Loss Rate 52% Win Rate 48% Profit-Loss Ratio 1.45 Alpha 0.069 Beta -0.005 Annual Standard Deviation 0.116 Annual Variance 0.014 Information Ratio -0.307 Tracking Error 0.2 Treynor Ratio -13.831 Total Fees $2769.99 Estimated Strategy Capacity $430000000.00 Lowest Capacity Asset GOOG T1AZ164W5VTX |
from SignalProccesor import SignalProccesor
# initialize SignalProccesor class for our model fitting and prediction
signalProccesor = SignalProccesor()
class SymbolData:
''' Class object storing machine learning models of symbol and usage methods '''
def __init__(self, symbol, edge):
''' Initialize a new instance of symbolModel for input symbol
Args:
symbol: input symbol
edge: required percentage of model given edge, calculated by correct accuracy % - wrong %, if less than that, recalibrate'''
self.symbol = symbol
self.edge = edge
# initiate a new Principal Component Analysis model
self.PCA = None
# initiate a new Support Vector Machine Classifier
self.SVM = None
def CheckModel(self, rankingDf, priceSeries, benchmark):
''' check should the model be recalibrated
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price of specific symbol
benchmark: daily close price of benchmark
Return:
(boolean) if criteria of recalibrating models were met'''
# mark if neither model is fitted or edge % is less than desired
return self.PCA is None or self.SVM is None \
or signalProccesor.ModelScore(rankingDf, priceSeries, benchmark, self.PCA, self.SVM) < 0.5 + self.edge/2
def UpdateModel(self, rankingDf, priceSeries, benchmark):
''' updating PCA and SVM models
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price of specific symbol
benchmark: daily close price of benchmark
Update:
PCA and SVM models'''
# update the inaccurate models
self.PCA, self.SVM = signalProccesor.ModelFitting(rankingDf, priceSeries, benchmark)
def CheckBias(self, rankingDf, priceSeries, benchmark):
''' check which side of the SVM model is suitable to predict for
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price of specific symbol
benchmark: daily close price of benchmark
Return:
the side of the SVM model is suitable to predict for'''
return signalProccesor.ModelBias(rankingDf, priceSeries, benchmark, self.PCA, self.SVM, self.edge)
def Prediction(self, rankingData):
''' predict next direction
Args:
rankingDf: previous day data of Brain's ML Stock Ranking 2, 3, 5 days
Return:
next week directional forecast on close price'''
return signalProccesor.ModelPrediction(rankingData, self.PCA, self.SVM)from QuantConnect.DataSource import *
import pandas as pd
from PortfolioOptimization import PortfolioOptimization
from SymbolData import SymbolData
class BrainMLRankPortfolio(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2015, 1, 1)
self.SetCash(100000)
'''
Parameters:
self.numberOfDays: number of days of daily return data to be used in model check and risk parity
self.trainingDataLength: number of days of daily data to be used in calibrating model
self.edge: required percentage of model's given self.edge, calculated by correct accuracy % - wrong %, if less than that, recalibrate
self.min_: minimum weight for individual asset weighting (0 - max)
self.max_: maximum weight for individual asset weighting (min to inf)
self.total_: target total weight for all assets (max to inf)
'''
self.numberOfDays = 252
self.trainingDataLength = 5*252
self.edge = 0.05
self.min_ = 0.
self.max_ = 1.
self.total_ = 1.
# FAANG + benchmark from research
tickers = ["FB", "AAPL", "AMZN", "NFLX", "GOOGL", "SPY"]
# Subscribe to the tickers
for symbol in tickers:
self.AddEquity(symbol, Resolution.Daily).Symbol
# Add Portfolio Construction Model
# InsightWeightingPortfolioConstructionModel is used as the position sizing will be determined by calculation in algorithm
# weekly rebalance as we predict weekly active return
self.SetPortfolioConstruction(InsightWeightingPortfolioConstructionModel(lambda time: Expiry.EndOfWeek(time)))
# Add Execution Function
# We stick with our research assuming immediate execution
self.SetExecution(ImmediateExecutionModel())
# Null Risk Management
# We don't want to expose to systematic risk tempering the beta-neutral balance
self.AddRiskManagement(NullRiskManagementModel())
# schedule our weekly insight generation
self.Schedule.On(self.DateRules.WeekStart("SPY"), \
self.TimeRules.AfterMarketOpen("SPY"), \
self.InsightGenerator)
# dictionary holding symbol of Brain's data
self.brainStockRank = {}
# dictionary holding custom symbolData class
self.data = {}
# dictionary holding trading side allowed
self.side = {}
# dict contain corresponding historical trade bar data
self.history = {}
# initialize RiskParityCalculation class for risk parity weightings calculation
self.portfolioOptimization = PortfolioOptimization(self.min_, self.max_, self.total_)
def InsightGenerator(self):
''' Creates insights '''
insights = []
# dictionaries holding symbols and their historical data
longData = {}; shortData = {}
benchmark = self.History(self.Symbol("SPY"), self.numberOfDays, Resolution.Daily)
if benchmark.empty: return
benchmark = benchmark.close.unstack("symbol")
for symbol, symbolData in self.data.items():
# get Brain's ML Stock Ranking data, close price, benchmark's close price for testing models
rankingDf = self.History(self.brainStockRank[symbol], self.numberOfDays, Resolution.Daily)
history = pd.DataFrame(self.history[symbol], columns=[symbol]).iloc[:self.numberOfDays][::-1]
# discontinue if no data available
if rankingDf.empty or history.empty: continue
rankingDf = rankingDf['rank'].unstack("symbol")
priceSeries = history.applymap(lambda bar: bar.Close)
priceSeries.index = pd.to_datetime(history.applymap(lambda bar: bar.EndTime).values.flatten().tolist())
# check if models recalibrating criteria is met
check = symbolData.CheckModel(rankingDf, priceSeries, benchmark)
# update models if one or more of criteria was/were met
if check:
# get 5 year Brain's ML Stock Ranking data, close price, benchmark's close price for training models
rankingDfTrain = self.History(self.brainStockRank[symbol], self.trainingDataLength, Resolution.Daily)['rank'].unstack("symbol")
history = pd.DataFrame(self.history[symbol], columns=[symbol]).iloc[:self.trainingDataLength][::-1]
priceSeriesTrain = history.applymap(lambda bar: bar.Close)
priceSeriesTrain.index = pd.to_datetime(history.applymap(lambda bar: bar.EndTime).values.flatten().tolist())
benchmarkTrain = self.History(self.Symbol("SPY"), self.trainingDataLength, Resolution.Daily).close.unstack("symbol")
# update models
symbolData.UpdateModel(rankingDfTrain, priceSeriesTrain, benchmarkTrain)
# select the side (long/short/both) the model is suitable to trade for
self.side[symbol] = symbolData.CheckBias(rankingDf, priceSeries, benchmark)
# predict the coming week close price direction
predict = symbolData.Prediction(rankingDf.iloc[-1].values.reshape(1, -1))
# classification according to prediction
if predict == 1 and (self.side[symbol] == "long" or self.side[symbol] == "both"):
longData[symbol] = priceSeries
elif predict == 0 and (self.side[symbol] == "short" or self.side[symbol] == "both"):
shortData[symbol] = priceSeries
# preset beta values to avoid errors
longBeta = 1; shortBeta = 1
if longData:
# get a dataframe for long symbols's historical data
longDf = pd.concat([df for df in list(longData.values())], axis=1)
# positional sizing for long symbols
longWeights = self.portfolioOptimization.CalculatePositionSize(longDf, benchmark) if len(longDf) > 1 else [1.]
# calculate beta
longBeta = self.portfolioOptimization.CalculateBeta(longDf, longWeights, benchmark)
# if all equities in short list, we long benchmark SPY
else:
longDf = benchmark; longWeights = [1.]
if shortData:
# get a dataframe for short symbols's historical data
shortDf = pd.concat([df for df in list(shortData.values())], axis=1)
# positional sizing for short symbols
shortWeights = self.portfolioOptimization.CalculatePositionSize(-shortDf, -benchmark) if len(shortDf) > 1 else [1.]
# calculate beta
shortBeta = self.portfolioOptimization.CalculateBeta(shortDf, shortWeights, benchmark)
# if all equities in long list, we short benchmark SPY
else:
shortDf = benchmark; shortWeights = [1.]
for n in range(len(longWeights)):
# use beta to have inter-portfolio weighting as to wash out systematic risk
insights.append(Insight(longDf.columns[n], Expiry.EndOfWeek, InsightType.Price, InsightDirection.Up,
None, None, None, shortBeta*longWeights[n]))
for n in range(len(shortWeights)):
# use beta to have inter-portfolio weighting as to wash out systematic risk
insights.append(Insight(shortDf.columns[n], Expiry.EndOfWeek, InsightType.Price, InsightDirection.Down,
None, None, None, longBeta*shortWeights[n]))
# emit insights
self.EmitInsights(insights)
def OnSecuritiesChanged(self, changes):
for change in changes.AddedSecurities:
# skip benchmark
if change.Symbol == self.Symbol("SPY"): continue
# Subscribe to Brain's ML stock ranking datasets (2, 3, 5, 10 days) for the tickers
brainStockRank2Days = self.AddData(BrainStockRanking2Day, change.Symbol).Symbol
brainStockRank3Days = self.AddData(BrainStockRanking3Day, change.Symbol).Symbol
brainStockRank5Days = self.AddData(BrainStockRanking5Day, change.Symbol).Symbol
self.brainStockRank[change.Symbol] = [brainStockRank2Days, brainStockRank3Days, brainStockRank5Days]
# holding custom symbolData class for symbol
self.data[change.Symbol] = SymbolData(change.Symbol, self.edge)
# set up historical data rolling window
windowSize = max(self.numberOfDays, self.trainingDataLength) + 1
self.history[change.Symbol] = RollingWindow[TradeBar](windowSize)
# warm up rolling windows
data = self.History(change.Symbol, windowSize, Resolution.Daily)
for time, bar in data.loc[change.Symbol].iterrows():
tradeBar = TradeBar(time, change.Symbol, bar.open, bar.high, bar.low, bar.close, bar.volume)
self.history[change.Symbol].Add(tradeBar)
# set up consolidator for future auto-update
self.Consolidate(change.Symbol, Resolution.Daily, self.DailyBarHandler)
def DailyBarHandler(self, bar):
self.history[bar.Symbol].Add(bar)import numpy as np
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
np.random.seed(0)
class SignalProccesor:
''' Uses Brain's ML Stock Ranking data and SVM to predict weekly active return direction '''
def __init__(self):
''' Initialize a new instance of DirectionPrediction class '''
pass
def DataProcessing(self, rankingDf, priceSeries, benchmark):
''' Process raw Brain's ML Stock Ranking data, close price return series of specific symbol and benchmark
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price return of specific symbol
benchmark: daily close price return of benchmark
Returns:
week-start Brain's ML Stock Ranking data and weekly directional close price return'''
# business day data for resample and offset
bd = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
# we're interested in weekly active return change on week start
priceSeries = priceSeries.resample("W-MON", loffset=bd).first()
benchmark = benchmark.resample("W-MON", loffset=bd).first()
rankingDf = rankingDf.resample("W-MON", loffset=bd).first()
# for training model, we should use log return by Palomar's signal processing book
# shift it back 1 bar for prediction analysis
priceSeries = np.log(priceSeries/priceSeries.shift(1)).shift(-1)
benchmark = np.log(benchmark/benchmark.shift(1)).shift(-1)
# matching date for DateTime index
priceSeries.index = pd.to_datetime(priceSeries.index, utc=True).date
benchmark.index = pd.to_datetime(benchmark.index, utc=True).date
rankingDf.index = pd.to_datetime(rankingDf.index, utc=True).date
# we would only compare those dates with data in price, ML datasets and benchmark, dropping those did not
df = pd.concat([rankingDf, priceSeries, benchmark], axis=1).dropna()
# we want active return only
activeReturn = df.iloc[:, -2].subtract(df.iloc[:, -1], axis=0)
# change into directional return series (1 for up, 0 for no change/down)
activeReturn[activeReturn <= 0] = 0
activeReturn[activeReturn > 0] = 1
return df.iloc[:, :-2], activeReturn
def ModelFitting(self, rankingDf, priceSeries, benchmark):
''' Fitting the SVM model for specific symbol by given Brain's ML Stock Ranking data
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price return of specific symbol
benchmark: daily close price return of benchmark
Returns:
fitted PCA model & SVM classifier'''
# process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return
rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark)
# initialize a PCA model to reduce dimension to avoid overfitting, uses MLE for estimate number of components
pca = PCA(n_components = 'mle')
# no need to scale data as they're already scaled
reduced = pca.fit_transform(rankingDf.fillna(0))
reduced = pd.DataFrame(reduced)
# initialize a SVM classifier to predict directional weekly return, we'll be using rbf kernel as we want best dimensional soft margin
svm = SVC(kernel='rbf')
# no need to scale data as they're already scaled, we'll be using binary return
svm.fit(reduced, binaryReturn.fillna(0))
return pca, svm
def ModelPrediction(self, rankingData, pca, svm):
''' Making directional prediction on coming week by Brain's ML Stock Ranking data
Args:
rankingData: Brain's ML Stock Ranking 2, 3, 5 days of this instance
pca: fitted PCA tranform model for specific symbol
svm: fitted SVM classifier model for specific symbol
Returns:
directional prediction (1 for up, 0 for down)'''
# reduce data dimension from PCA model transformation
reduced = pca.transform(rankingData)
reduced = pd.DataFrame(reduced).fillna(0)
return int(svm.predict(reduced))
def ModelScore(self, rankingDf, priceSeries, benchmark, pca, svm):
''' Get score of the SVM model for specific symbol by current data
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price return of specific symbol
benchmark: daily close price return of benchmark
pca: fitted PCA tranform model for specific symbol
svm: fitted SVM classifier model for specific symbol
Returns:
accuracy score of SVM classfier'''
# process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return
rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark)
# reduce data dimension from PCA model transformation
reduced = pca.transform(rankingDf.fillna(0))
reduced = pd.DataFrame(reduced)
return svm.score(reduced, binaryReturn.fillna(0))
def ModelBias(self, rankingDf, priceSeries, benchmark, pca, svm, edge):
''' Get sensitivity and specificity of model to decide it should trade for long/short/both side signals
Args:
rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days
priceSeries: daily close price return of specific symbol
benchmark: daily close price return of benchmark
pca: fitted PCA tranform model for specific symbol
svm: fitted SVM classifier model for specific symbol
edge: required percentage of model's given edge, calculated by correct accuracy % - wrong %, if less than that, abandon side
Returns:
which side of the SVM model is suitable to predict for'''
# process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return
rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark)
# reduce data dimension from PCA model transformation
reduced = pca.transform(rankingDf.fillna(0))
reduced = pd.DataFrame(reduced)
# get predicted results
predicted = svm.predict(reduced)
# contrast observed and predicted results
contrast = confusion_matrix(binaryReturn.fillna(0), predicted)
# do not proceed if no differential prediction
if contrast.shape[0] <= 1: return None
# get sensitivity and specificity
sensitivity = contrast[1, 1]/sum(contrast[1, :])
specificity = contrast[0, 0]/sum(contrast[0, :])
# return the signal accepted side
if sensitivity > 0.5 + edge/2:
if specificity > 0.5 + edge/2:
return "both"
return "long"
elif specificity > 0.5 + edge/2:
return "short"
return Noneimport cvxopt as cvx
import numpy as np
class PortfolioOptimization:
''' Calculate insight weights for risk parity portfolio. '''
def __init__(self, min_=0., max_=1., total_=1.):
''' Initialize a new instance of RiskCalculation class
Args:
min_: minimum weight for individual asset weighting (0 - max)
max_: maximum weight for individual asset weighting (min to inf)
total_: target total weight for all assets (max to inf)'''
self.minWeight = max(0, min_)
self.maxWeight = max(max_, min_)
self.totalWeight = max(max_, total_)
def CalculatePositionSize(self, df, benchmark):
''' Calculate the positional size for imported assets
Args:
df: 2-d array or dataframe contains daily return m x n size matrix of m days and n assets
benchmark: benchmark close price daily return series
Return:
(n,) size array containing position sizes for n assets'''
# number of assets
n = int(df.shape[1])
# change to return series
df = df.pct_change()[1:]
benchmark = benchmark.pct_change()[1:]
# get active returns
activeReturn = df.sub(benchmark.iloc[:, 0], axis=0)
# inequality constraints: Gx <= h
# we want each individual assets bounded by min and max weights allowed
# negative identity matrix as to flip to Ix >= h
G = cvx.matrix(np.concatenate((-np.eye(n), np.eye(n)), axis=0))
# individual weight must be >= 0 for weights in convex optimization
h = cvx.matrix(np.concatenate((self.minWeight*np.ones((n, 1)), max(1./n, self.maxWeight)*np.ones((n, 1))), axis=0))
# eqaulity constraints: Ax == b
# we want total weights sum up to target total weights
A = cvx.matrix(1.0, (1, n))
# total weight must be positive
b = cvx.matrix(max(0.001, self.totalWeight))
# Quadratic programming
return np.asarray(cvx.solvers.qp(cvx.matrix(df.cov().values), -cvx.matrix(df.mean().values), G=G, h=h, A=A, b=b)['x'])
def CalculateBeta(self, df, weights, benchmark):
''' Calculate beta of portfolio
Args:
df: dataframe/2-d array historical close price data
weights: corresponding weights of each asset in portfolio
benchmark: benchmark historical close price data
Return:
beta value of portfolio'''
# get return series
df = df.pct_change()[1:]
benchmark = benchmark.pct_change()[1:]
# distribute the weight
df = np.dot(df, weights.reshape(-1, 1))
# get covariance with benchmark
cov = np.cov(np.concatenate((df, benchmark), axis=1).T)[0, 1]
# get variance of benchmark
var = benchmark.var()
return cov/var