from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report from datetime import datetime from datetime import timedelta import pandas as pd import numpy as np class TransdimensionalTachyonCompensator(QCAlgorithm): month = 0 def Initialize(self): self.SetStartDate(2000, 1, 1) # Set Start Date # self.SetEndDate(2020, 6, 1) self.SetCash(100000) # Set Strategy Cash self.trading_symbols = [] self.UniverseSettings.Resolution = Resolution.Minute self.AddUniverse(self.CoarseSelectionFunction) # self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x))) self.lookback_period = 20 self.spy = self.AddEquity("SPY").Symbol # self.Train(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 0), self.train) self.Schedule.On(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 15), self.train) # self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.AfterMarketOpen(self.spy, 30), self.invest) def avg_true_range(self, df): ind = range(0,len(df)) indexlist = list(ind) df.index = indexlist for index, row in df.iterrows(): if index != 0: tr1 = row["high"] - row["low"] tr2 = abs(row["high"] - df.iloc[index-1]["close"]) tr3 = abs(row["low"] - df.iloc[index-1]["close"]) true_range = max(tr1, tr2, tr3) df.set_value(index,"True Range", true_range) df["Avg TR"] = df["True Range"].rolling(min_periods=14, window=14, center=False).mean() return df def CoarseSelectionFunction(self, coarse): if self.Time.month == self.month: return Universe.Unchanged self.month = self.Time.month sortedByDollarVolume = sorted(coarse, key=lambda x: x.DollarVolume, reverse=True) self.trading_symbols = [ x.Symbol for x in sortedByDollarVolume if x.HasFundamentalData ][:20] return self.trading_symbols def train(self): if not self.trading_symbols: return for symbol in self.trading_symbols: self.AddEquity(symbol, Resolution.Daily) today = self.Time price_history = self.History(self.trading_symbols, self.lookback_period, Resolution.Daily) symbol1 = [self.trading_symbols[0] for i in range(self.lookback_period)] symbol2 = [self.trading_symbols[1] for i in range(self.lookback_period)] symbol3 = [self.trading_symbols[2] for i in range(self.lookback_period)] symbol4 = [self.trading_symbols[3] for i in range(self.lookback_period)] symbol5 = [self.trading_symbols[4] for i in range(self.lookback_period)] symbol6 = [self.trading_symbols[5] for i in range(self.lookback_period)] symbol7 = [self.trading_symbols[6] for i in range(self.lookback_period)] symbol8 = [self.trading_symbols[7] for i in range(self.lookback_period)] symbol9 = [self.trading_symbols[8] for i in range(self.lookback_period)] symbol10 = [self.trading_symbols[9] for i in range(self.lookback_period)] symbol11 = [self.trading_symbols[10] for i in range(self.lookback_period)] symbol12 = [self.trading_symbols[11] for i in range(self.lookback_period)] symbol13 = [self.trading_symbols[12] for i in range(self.lookback_period)] symbol14 = [self.trading_symbols[13] for i in range(self.lookback_period)] symbol15 = [self.trading_symbols[14] for i in range(self.lookback_period)] symbol16 = [self.trading_symbols[15] for i in range(self.lookback_period)] symbol17 = [self.trading_symbols[16] for i in range(self.lookback_period)] symbol18 = [self.trading_symbols[17] for i in range(self.lookback_period)] symbol19 = [self.trading_symbols[18] for i in range(self.lookback_period)] symbol20 = [self.trading_symbols[19] for i in range(self.lookback_period)] close_list = [] open_list = [] high_list = [] low_list = [] volume_list = [] self.chandelier_long = [] self.chandelier_short = [] date_list = [ str(price_history.index[i][1]).split(" ")[0] for i in range(price_history.shape[0]) ] for i in range(price_history.shape[0]): close_list.append(price_history.close.iloc[i]) open_list.append(price_history.open.iloc[i]) high_list.append(price_history.high.iloc[i]) low_list.append(price_history.low.iloc[i]) volume_list.append(price_history.volume.iloc[i]) d = {'symbol': symbol1 + symbol2 + symbol3 + symbol4 + symbol5 + symbol6 + symbol7 + symbol8 + symbol9 \ + symbol10 + symbol11 + symbol12 + symbol13 + symbol14 + symbol15 + symbol16 + symbol17 + symbol18 + \ symbol19 + symbol20, 'datetime': date_list, 'close': close_list, 'high': high_list, 'low': low_list, 'open': open_list, 'volume': volume_list } self.price_data = pd.DataFrame(data=d) # sort the values by symbol and then date # self.price_data.sort_values(by = ['datetime'], inplace = True) # calculate the change in price self.price_data['change_in_price'] = self.price_data['close'].diff() # identify rows where the symbol changes mask = self.price_data['symbol'] != self.price_data['symbol'].shift(1) # For those rows, let's make the value null self.price_data['change_in_price'] = np.where(mask == True, np.nan, self.price_data['change_in_price']) # print the rows that have a null value, should have 20 self.price_data[self.price_data.isna().any(axis = 1)] # Calculate the 14 day RSI n = 14 # First make a copy of the data frame twice up_df, down_df = self.price_data[['symbol','change_in_price']].copy(), self.price_data[['symbol','change_in_price']].copy() # For up days, if the change is less than 0 set to 0. # up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0 # For down days, if the change is greater than 0 set to 0. # down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0 up_df['change_in_price'] = up_df['change_in_price'].apply(lambda x: max(x, 0)) down_df['change_in_price'] = down_df['change_in_price'].apply(lambda x: min(0, x)) # We need change in price to be absolute. down_df['change_in_price'] = down_df['change_in_price'].abs() # self.price_data["RSI"] = self.RSI("SPY", 14, MovingAverageType.Simple) # Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values. ewma_up = up_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean()) ewma_down = down_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean()) # Calculate the Relative Strength relative_strength = ewma_up / ewma_down # Calculate the Relative Strength Index relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength)) # Add the info to the data frame. self.price_data['down_days'] = down_df['change_in_price'] self.price_data['up_days'] = up_df['change_in_price'] self.price_data['RSI'] = relative_strength_index # Calculate the Stochastic Oscillator # Make a copy of the high and low column. low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy() # Group by symbol, then apply the rolling function and grab the Min and Max. low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min()) high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max()) # Calculate the Stochastic Oscillator. k_percent = 100 * ((self.price_data['close'] - low_14) / (high_14 - low_14)) # Add the info to the data frame. self.price_data['low_14'] = low_14 self.price_data['high_14'] = high_14 self.price_data['k_percent'] = k_percent # Make a copy of the high and low column. low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy() # Group by symbol, then apply the rolling function and grab the Min and Max. low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min()) high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max()) # Calculate William %R indicator. r_percent = ((high_14 - self.price_data['close']) / (high_14 - low_14)) * - 100 # Add the info to the data frame. self.price_data['r_percent'] = r_percent # Calculate the MACD ema_26 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 26).mean()) ema_12 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 12).mean()) macd = ema_12 - ema_26 # Calculate the EMA ema_9_macd = macd.ewm(span = 9).mean() # Store the data in the data frame. self.price_data['MACD'] = macd self.price_data['MACD_EMA'] = ema_9_macd # Calculate the Price Rate of Change roc_n = 9 # Calculate the Rate of Change in the Price, and store it in the Data Frame. self.price_data['Price_Rate_Of_Change'] = self.price_data.groupby('symbol')['close'].transform(lambda x: x.pct_change(periods = roc_n)) # apply the function to each group # obv_groups = self.price_data.groupby('symbol').apply(self.obv) # self.Debug(f"show obv groups length") # self.Debug(len(obv_groups)) # obv_groups going in one by one # self.Debug(f"show obv_groups") # self.Debug(len(obv_groups)) # add to the data frame, but drop the old index, before adding it. # self.price_data['On Balance Volume'] = obv_groups.reset_index(level=0, drop=True) # self.price_data["On Balance Volume"] = obv_groups # self.Debug(f"show obv groups length") # self.Debug(len(obv_groups)) # self.Debug(f"show on balance volume") # self.Debug(self.price_data.head()) # Create a column we wish to predict ''' In this case, let's create an output column that will be 1 if the closing price at time 't' is greater than 't-1' and 0 otherwise. In other words, if the today's closing price is greater than yesterday's closing price it would be 1. ''' # Group by the `Symbol` column, then grab the `Close` column. close_groups = self.price_data.groupby('symbol')['close'] # Apply the lambda function which will return -1.0 for down, 1.0 for up and 0.0 for no change. close_groups = close_groups.transform(lambda x : np.sign(x.diff())) # add the data to the main dataframe. self.price_data['Prediction'] = close_groups # for simplicity in later sections I'm going to make a change to our prediction column. To keep this as a binary classifier I'll change flat days and consider them up days. self.price_data.loc[self.price_data['Prediction'] == 0.0] = 1.0 self.price_data = self.avg_true_range(self.price_data) # Any row that has a `NaN` value will be dropped. self.price_data = self.price_data.dropna() self.price_data.reset_index(drop = True) for i in range(len(self.price_data)): self.chandelier_long.append(self.price_data["high"][i:i+22].max() - (self.price_data["Avg TR"][i:i+22].mean() * 3)) self.chandelier_short.append(self.price_data["low"][i:i+22].max() + (self.price_data["Avg TR"][i:i+22].mean() * 3)) self.price_data["chandelier long"] = self.chandelier_long self.price_data["chandelier short"] = self.chandelier_short # Grab our X & Y Columns. # X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD','On Balance Volume']] X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD']] Y_Cols = self.price_data['Prediction'] # Split X and y into X_ X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state = 0) # Create a Random Forest Classifier rand_frst_clf = RandomForestClassifier(n_estimators = 100, oob_score = True, criterion = "gini", random_state = 0) # Fit the data to the model rand_frst_clf.fit(X_train, y_train) # Make predictions y_pred = rand_frst_clf.predict(X_test) # sort the values by symbol and then date # self.price_data.sort_values(by = ['symbol','datetime'], inplace = True) # self.Debug(f"Correct Prediction (%): ") # self.Debug(accuracy_score(y_test, rand_frst_clf.predict(X_test), normalize = True) * 100.0) # self.price_data['Prediction'] = self.price_data['Prediction'].shift(periods=1) self.price_data = self.price_data.dropna() self.test_df = self.price_data[ ["symbol", "close", "datetime", "Prediction", "chandelier long", "chandelier short"]] # def invest (self): today = self.Time date = str(today).split()[0] # self.Debug(self.test_df["datetime"]) select_indices = list(np.where(self.test_df["datetime"] == date)[0]) for i in select_indices: # self.Debug(self.test_df["symbol"].iloc[i]) if self.test_df["Prediction"].iloc[i] == 1: self.SetHoldings( self.test_df["symbol"].iloc[i] , 1/len(self.trading_symbols)) if self.test_df["close"].iloc[i] < self.test_df["chandelier long"].iloc[i]: self.Liquidate(self.test_df["symbol"].iloc[i]) elif self.test_df["Prediction"].iloc[i] == -1: self.SetHoldings(self.test_df["symbol"].iloc[i] , -1.0/len(self.trading_symbols)) if self.test_df["close"].iloc[i] < self.test_df["chandelier short"].iloc[i]: self.Liquidate(self.test_df["symbol"].iloc[i]) # self.Debug(self.test_df)

Hi guys, I'm currently facing the following error:

"Runtime Error: In Scheduled Event 'MonthStart: SPY: 15 min after MarketOpen', ValueError : arrays must all be same length
at train in main.py:line 111"

The thing is, the code works fine for the first couple of iterations and only breaks after a couple of months.

Any help will be greatly appreciated, thank you!

Author