Overall Statistics |
Total Trades 2306 Average Win 0.40% Average Loss -0.28% Compounding Annual Return 7.589% Drawdown 16.400% Expectancy 0.169 Net Profit 62.056% Sharpe Ratio 0.586 Probabilistic Sharpe Ratio 10.426% Loss Rate 52% Win Rate 48% Profit-Loss Ratio 1.45 Alpha 0.069 Beta -0.005 Annual Standard Deviation 0.116 Annual Variance 0.014 Information Ratio -0.307 Tracking Error 0.2 Treynor Ratio -13.831 Total Fees $2769.99 Estimated Strategy Capacity $430000000.00 Lowest Capacity Asset GOOG T1AZ164W5VTX |
from SignalProccesor import SignalProccesor # initialize SignalProccesor class for our model fitting and prediction signalProccesor = SignalProccesor() class SymbolData: ''' Class object storing machine learning models of symbol and usage methods ''' def __init__(self, symbol, edge): ''' Initialize a new instance of symbolModel for input symbol Args: symbol: input symbol edge: required percentage of model given edge, calculated by correct accuracy % - wrong %, if less than that, recalibrate''' self.symbol = symbol self.edge = edge # initiate a new Principal Component Analysis model self.PCA = None # initiate a new Support Vector Machine Classifier self.SVM = None def CheckModel(self, rankingDf, priceSeries, benchmark): ''' check should the model be recalibrated Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price of specific symbol benchmark: daily close price of benchmark Return: (boolean) if criteria of recalibrating models were met''' # mark if neither model is fitted or edge % is less than desired return self.PCA is None or self.SVM is None \ or signalProccesor.ModelScore(rankingDf, priceSeries, benchmark, self.PCA, self.SVM) < 0.5 + self.edge/2 def UpdateModel(self, rankingDf, priceSeries, benchmark): ''' updating PCA and SVM models Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price of specific symbol benchmark: daily close price of benchmark Update: PCA and SVM models''' # update the inaccurate models self.PCA, self.SVM = signalProccesor.ModelFitting(rankingDf, priceSeries, benchmark) def CheckBias(self, rankingDf, priceSeries, benchmark): ''' check which side of the SVM model is suitable to predict for Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price of specific symbol benchmark: daily close price of benchmark Return: the side of the SVM model is suitable to predict for''' return signalProccesor.ModelBias(rankingDf, priceSeries, benchmark, self.PCA, self.SVM, self.edge) def Prediction(self, rankingData): ''' predict next direction Args: rankingDf: previous day data of Brain's ML Stock Ranking 2, 3, 5 days Return: next week directional forecast on close price''' return signalProccesor.ModelPrediction(rankingData, self.PCA, self.SVM)
from QuantConnect.DataSource import * import pandas as pd from PortfolioOptimization import PortfolioOptimization from SymbolData import SymbolData class BrainMLRankPortfolio(QCAlgorithm): def Initialize(self): self.SetStartDate(2015, 1, 1) self.SetCash(100000) ''' Parameters: self.numberOfDays: number of days of daily return data to be used in model check and risk parity self.trainingDataLength: number of days of daily data to be used in calibrating model self.edge: required percentage of model's given self.edge, calculated by correct accuracy % - wrong %, if less than that, recalibrate self.min_: minimum weight for individual asset weighting (0 - max) self.max_: maximum weight for individual asset weighting (min to inf) self.total_: target total weight for all assets (max to inf) ''' self.numberOfDays = 252 self.trainingDataLength = 5*252 self.edge = 0.05 self.min_ = 0. self.max_ = 1. self.total_ = 1. # FAANG + benchmark from research tickers = ["FB", "AAPL", "AMZN", "NFLX", "GOOGL", "SPY"] # Subscribe to the tickers for symbol in tickers: self.AddEquity(symbol, Resolution.Daily).Symbol # Add Portfolio Construction Model # InsightWeightingPortfolioConstructionModel is used as the position sizing will be determined by calculation in algorithm # weekly rebalance as we predict weekly active return self.SetPortfolioConstruction(InsightWeightingPortfolioConstructionModel(lambda time: Expiry.EndOfWeek(time))) # Add Execution Function # We stick with our research assuming immediate execution self.SetExecution(ImmediateExecutionModel()) # Null Risk Management # We don't want to expose to systematic risk tempering the beta-neutral balance self.AddRiskManagement(NullRiskManagementModel()) # schedule our weekly insight generation self.Schedule.On(self.DateRules.WeekStart("SPY"), \ self.TimeRules.AfterMarketOpen("SPY"), \ self.InsightGenerator) # dictionary holding symbol of Brain's data self.brainStockRank = {} # dictionary holding custom symbolData class self.data = {} # dictionary holding trading side allowed self.side = {} # dict contain corresponding historical trade bar data self.history = {} # initialize RiskParityCalculation class for risk parity weightings calculation self.portfolioOptimization = PortfolioOptimization(self.min_, self.max_, self.total_) def InsightGenerator(self): ''' Creates insights ''' insights = [] # dictionaries holding symbols and their historical data longData = {}; shortData = {} benchmark = self.History(self.Symbol("SPY"), self.numberOfDays, Resolution.Daily) if benchmark.empty: return benchmark = benchmark.close.unstack("symbol") for symbol, symbolData in self.data.items(): # get Brain's ML Stock Ranking data, close price, benchmark's close price for testing models rankingDf = self.History(self.brainStockRank[symbol], self.numberOfDays, Resolution.Daily) history = pd.DataFrame(self.history[symbol], columns=[symbol]).iloc[:self.numberOfDays][::-1] # discontinue if no data available if rankingDf.empty or history.empty: continue rankingDf = rankingDf['rank'].unstack("symbol") priceSeries = history.applymap(lambda bar: bar.Close) priceSeries.index = pd.to_datetime(history.applymap(lambda bar: bar.EndTime).values.flatten().tolist()) # check if models recalibrating criteria is met check = symbolData.CheckModel(rankingDf, priceSeries, benchmark) # update models if one or more of criteria was/were met if check: # get 5 year Brain's ML Stock Ranking data, close price, benchmark's close price for training models rankingDfTrain = self.History(self.brainStockRank[symbol], self.trainingDataLength, Resolution.Daily)['rank'].unstack("symbol") history = pd.DataFrame(self.history[symbol], columns=[symbol]).iloc[:self.trainingDataLength][::-1] priceSeriesTrain = history.applymap(lambda bar: bar.Close) priceSeriesTrain.index = pd.to_datetime(history.applymap(lambda bar: bar.EndTime).values.flatten().tolist()) benchmarkTrain = self.History(self.Symbol("SPY"), self.trainingDataLength, Resolution.Daily).close.unstack("symbol") # update models symbolData.UpdateModel(rankingDfTrain, priceSeriesTrain, benchmarkTrain) # select the side (long/short/both) the model is suitable to trade for self.side[symbol] = symbolData.CheckBias(rankingDf, priceSeries, benchmark) # predict the coming week close price direction predict = symbolData.Prediction(rankingDf.iloc[-1].values.reshape(1, -1)) # classification according to prediction if predict == 1 and (self.side[symbol] == "long" or self.side[symbol] == "both"): longData[symbol] = priceSeries elif predict == 0 and (self.side[symbol] == "short" or self.side[symbol] == "both"): shortData[symbol] = priceSeries # preset beta values to avoid errors longBeta = 1; shortBeta = 1 if longData: # get a dataframe for long symbols's historical data longDf = pd.concat([df for df in list(longData.values())], axis=1) # positional sizing for long symbols longWeights = self.portfolioOptimization.CalculatePositionSize(longDf, benchmark) if len(longDf) > 1 else [1.] # calculate beta longBeta = self.portfolioOptimization.CalculateBeta(longDf, longWeights, benchmark) # if all equities in short list, we long benchmark SPY else: longDf = benchmark; longWeights = [1.] if shortData: # get a dataframe for short symbols's historical data shortDf = pd.concat([df for df in list(shortData.values())], axis=1) # positional sizing for short symbols shortWeights = self.portfolioOptimization.CalculatePositionSize(-shortDf, -benchmark) if len(shortDf) > 1 else [1.] # calculate beta shortBeta = self.portfolioOptimization.CalculateBeta(shortDf, shortWeights, benchmark) # if all equities in long list, we short benchmark SPY else: shortDf = benchmark; shortWeights = [1.] for n in range(len(longWeights)): # use beta to have inter-portfolio weighting as to wash out systematic risk insights.append(Insight(longDf.columns[n], Expiry.EndOfWeek, InsightType.Price, InsightDirection.Up, None, None, None, shortBeta*longWeights[n])) for n in range(len(shortWeights)): # use beta to have inter-portfolio weighting as to wash out systematic risk insights.append(Insight(shortDf.columns[n], Expiry.EndOfWeek, InsightType.Price, InsightDirection.Down, None, None, None, longBeta*shortWeights[n])) # emit insights self.EmitInsights(insights) def OnSecuritiesChanged(self, changes): for change in changes.AddedSecurities: # skip benchmark if change.Symbol == self.Symbol("SPY"): continue # Subscribe to Brain's ML stock ranking datasets (2, 3, 5, 10 days) for the tickers brainStockRank2Days = self.AddData(BrainStockRanking2Day, change.Symbol).Symbol brainStockRank3Days = self.AddData(BrainStockRanking3Day, change.Symbol).Symbol brainStockRank5Days = self.AddData(BrainStockRanking5Day, change.Symbol).Symbol self.brainStockRank[change.Symbol] = [brainStockRank2Days, brainStockRank3Days, brainStockRank5Days] # holding custom symbolData class for symbol self.data[change.Symbol] = SymbolData(change.Symbol, self.edge) # set up historical data rolling window windowSize = max(self.numberOfDays, self.trainingDataLength) + 1 self.history[change.Symbol] = RollingWindow[TradeBar](windowSize) # warm up rolling windows data = self.History(change.Symbol, windowSize, Resolution.Daily) for time, bar in data.loc[change.Symbol].iterrows(): tradeBar = TradeBar(time, change.Symbol, bar.open, bar.high, bar.low, bar.close, bar.volume) self.history[change.Symbol].Add(tradeBar) # set up consolidator for future auto-update self.Consolidate(change.Symbol, Resolution.Daily, self.DailyBarHandler) def DailyBarHandler(self, bar): self.history[bar.Symbol].Add(bar)
import numpy as np import pandas as pd from pandas.tseries.holiday import USFederalHolidayCalendar from sklearn.decomposition import PCA from sklearn.metrics import confusion_matrix from sklearn.svm import SVC np.random.seed(0) class SignalProccesor: ''' Uses Brain's ML Stock Ranking data and SVM to predict weekly active return direction ''' def __init__(self): ''' Initialize a new instance of DirectionPrediction class ''' pass def DataProcessing(self, rankingDf, priceSeries, benchmark): ''' Process raw Brain's ML Stock Ranking data, close price return series of specific symbol and benchmark Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price return of specific symbol benchmark: daily close price return of benchmark Returns: week-start Brain's ML Stock Ranking data and weekly directional close price return''' # business day data for resample and offset bd = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar()) # we're interested in weekly active return change on week start priceSeries = priceSeries.resample("W-MON", loffset=bd).first() benchmark = benchmark.resample("W-MON", loffset=bd).first() rankingDf = rankingDf.resample("W-MON", loffset=bd).first() # for training model, we should use log return by Palomar's signal processing book # shift it back 1 bar for prediction analysis priceSeries = np.log(priceSeries/priceSeries.shift(1)).shift(-1) benchmark = np.log(benchmark/benchmark.shift(1)).shift(-1) # matching date for DateTime index priceSeries.index = pd.to_datetime(priceSeries.index, utc=True).date benchmark.index = pd.to_datetime(benchmark.index, utc=True).date rankingDf.index = pd.to_datetime(rankingDf.index, utc=True).date # we would only compare those dates with data in price, ML datasets and benchmark, dropping those did not df = pd.concat([rankingDf, priceSeries, benchmark], axis=1).dropna() # we want active return only activeReturn = df.iloc[:, -2].subtract(df.iloc[:, -1], axis=0) # change into directional return series (1 for up, 0 for no change/down) activeReturn[activeReturn <= 0] = 0 activeReturn[activeReturn > 0] = 1 return df.iloc[:, :-2], activeReturn def ModelFitting(self, rankingDf, priceSeries, benchmark): ''' Fitting the SVM model for specific symbol by given Brain's ML Stock Ranking data Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price return of specific symbol benchmark: daily close price return of benchmark Returns: fitted PCA model & SVM classifier''' # process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark) # initialize a PCA model to reduce dimension to avoid overfitting, uses MLE for estimate number of components pca = PCA(n_components = 'mle') # no need to scale data as they're already scaled reduced = pca.fit_transform(rankingDf.fillna(0)) reduced = pd.DataFrame(reduced) # initialize a SVM classifier to predict directional weekly return, we'll be using rbf kernel as we want best dimensional soft margin svm = SVC(kernel='rbf') # no need to scale data as they're already scaled, we'll be using binary return svm.fit(reduced, binaryReturn.fillna(0)) return pca, svm def ModelPrediction(self, rankingData, pca, svm): ''' Making directional prediction on coming week by Brain's ML Stock Ranking data Args: rankingData: Brain's ML Stock Ranking 2, 3, 5 days of this instance pca: fitted PCA tranform model for specific symbol svm: fitted SVM classifier model for specific symbol Returns: directional prediction (1 for up, 0 for down)''' # reduce data dimension from PCA model transformation reduced = pca.transform(rankingData) reduced = pd.DataFrame(reduced).fillna(0) return int(svm.predict(reduced)) def ModelScore(self, rankingDf, priceSeries, benchmark, pca, svm): ''' Get score of the SVM model for specific symbol by current data Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price return of specific symbol benchmark: daily close price return of benchmark pca: fitted PCA tranform model for specific symbol svm: fitted SVM classifier model for specific symbol Returns: accuracy score of SVM classfier''' # process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark) # reduce data dimension from PCA model transformation reduced = pca.transform(rankingDf.fillna(0)) reduced = pd.DataFrame(reduced) return svm.score(reduced, binaryReturn.fillna(0)) def ModelBias(self, rankingDf, priceSeries, benchmark, pca, svm, edge): ''' Get sensitivity and specificity of model to decide it should trade for long/short/both side signals Args: rankingDf: daily historical data of Brain's ML Stock Ranking 2, 3, 5 days priceSeries: daily close price return of specific symbol benchmark: daily close price return of benchmark pca: fitted PCA tranform model for specific symbol svm: fitted SVM classifier model for specific symbol edge: required percentage of model's given edge, calculated by correct accuracy % - wrong %, if less than that, abandon side Returns: which side of the SVM model is suitable to predict for''' # process our data to get week-start Brain's ML Stock Ranking data and weekly directional close price return rankingDf, binaryReturn = self.DataProcessing(rankingDf, priceSeries, benchmark) # reduce data dimension from PCA model transformation reduced = pca.transform(rankingDf.fillna(0)) reduced = pd.DataFrame(reduced) # get predicted results predicted = svm.predict(reduced) # contrast observed and predicted results contrast = confusion_matrix(binaryReturn.fillna(0), predicted) # do not proceed if no differential prediction if contrast.shape[0] <= 1: return None # get sensitivity and specificity sensitivity = contrast[1, 1]/sum(contrast[1, :]) specificity = contrast[0, 0]/sum(contrast[0, :]) # return the signal accepted side if sensitivity > 0.5 + edge/2: if specificity > 0.5 + edge/2: return "both" return "long" elif specificity > 0.5 + edge/2: return "short" return None
import cvxopt as cvx import numpy as np class PortfolioOptimization: ''' Calculate insight weights for risk parity portfolio. ''' def __init__(self, min_=0., max_=1., total_=1.): ''' Initialize a new instance of RiskCalculation class Args: min_: minimum weight for individual asset weighting (0 - max) max_: maximum weight for individual asset weighting (min to inf) total_: target total weight for all assets (max to inf)''' self.minWeight = max(0, min_) self.maxWeight = max(max_, min_) self.totalWeight = max(max_, total_) def CalculatePositionSize(self, df, benchmark): ''' Calculate the positional size for imported assets Args: df: 2-d array or dataframe contains daily return m x n size matrix of m days and n assets benchmark: benchmark close price daily return series Return: (n,) size array containing position sizes for n assets''' # number of assets n = int(df.shape[1]) # change to return series df = df.pct_change()[1:] benchmark = benchmark.pct_change()[1:] # get active returns activeReturn = df.sub(benchmark.iloc[:, 0], axis=0) # inequality constraints: Gx <= h # we want each individual assets bounded by min and max weights allowed # negative identity matrix as to flip to Ix >= h G = cvx.matrix(np.concatenate((-np.eye(n), np.eye(n)), axis=0)) # individual weight must be >= 0 for weights in convex optimization h = cvx.matrix(np.concatenate((self.minWeight*np.ones((n, 1)), max(1./n, self.maxWeight)*np.ones((n, 1))), axis=0)) # eqaulity constraints: Ax == b # we want total weights sum up to target total weights A = cvx.matrix(1.0, (1, n)) # total weight must be positive b = cvx.matrix(max(0.001, self.totalWeight)) # Quadratic programming return np.asarray(cvx.solvers.qp(cvx.matrix(df.cov().values), -cvx.matrix(df.mean().values), G=G, h=h, A=A, b=b)['x']) def CalculateBeta(self, df, weights, benchmark): ''' Calculate beta of portfolio Args: df: dataframe/2-d array historical close price data weights: corresponding weights of each asset in portfolio benchmark: benchmark historical close price data Return: beta value of portfolio''' # get return series df = df.pct_change()[1:] benchmark = benchmark.pct_change()[1:] # distribute the weight df = np.dot(df, weights.reshape(-1, 1)) # get covariance with benchmark cov = np.cov(np.concatenate((df, benchmark), axis=1).T)[0, 1] # get variance of benchmark var = benchmark.var() return cov/var