from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime
from datetime import timedelta
import pandas as pd
import numpy as np

class TransdimensionalTachyonCompensator(QCAlgorithm):
month = 0
def Initialize(self):
self.SetStartDate(2000, 1, 1) # Set Start Date
# self.SetEndDate(2020, 6, 1)
self.SetCash(100000) # Set Strategy Cash
self.trading_symbols = []
self.UniverseSettings.Resolution = Resolution.Minute
self.AddUniverse(self.CoarseSelectionFunction)
# self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))
self.lookback_period = 20
self.spy = self.AddEquity("SPY").Symbol

# self.Train(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 0), self.train)
self.Schedule.On(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 15), self.train)
# self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.AfterMarketOpen(self.spy, 30), self.invest)

def avg_true_range(self, df):
ind = range(0,len(df))
indexlist = list(ind)
df.index = indexlist

for index, row in df.iterrows():
if index != 0:
tr1 = row["high"] - row["low"]
tr2 = abs(row["high"] - df.iloc[index-1]["close"])
tr3 = abs(row["low"] - df.iloc[index-1]["close"])

true_range = max(tr1, tr2, tr3)
df.set_value(index,"True Range", true_range)

df["Avg TR"] = df["True Range"].rolling(min_periods=14, window=14, center=False).mean()

return df

def CoarseSelectionFunction(self, coarse):
if self.Time.month == self.month:
return Universe.Unchanged
self.month = self.Time.month
sortedByDollarVolume = sorted(coarse, key=lambda x: x.DollarVolume, reverse=True)
self.trading_symbols = [ x.Symbol for x in sortedByDollarVolume if x.HasFundamentalData ][:20]
return self.trading_symbols

def train(self):
if not self.trading_symbols: return

for symbol in self.trading_symbols:
self.AddEquity(symbol, Resolution.Daily)

today = self.Time

price_history = self.History(self.trading_symbols, self.lookback_period, Resolution.Daily)

symbol1 = [self.trading_symbols[0] for i in range(self.lookback_period)]
symbol2 = [self.trading_symbols[1] for i in range(self.lookback_period)]
symbol3 = [self.trading_symbols[2] for i in range(self.lookback_period)]
symbol4 = [self.trading_symbols[3] for i in range(self.lookback_period)]
symbol5 = [self.trading_symbols[4] for i in range(self.lookback_period)]
symbol6 = [self.trading_symbols[5] for i in range(self.lookback_period)]
symbol7 = [self.trading_symbols[6] for i in range(self.lookback_period)]
symbol8 = [self.trading_symbols[7] for i in range(self.lookback_period)]
symbol9 = [self.trading_symbols[8] for i in range(self.lookback_period)]
symbol10 = [self.trading_symbols[9] for i in range(self.lookback_period)]
symbol11 = [self.trading_symbols[10] for i in range(self.lookback_period)]
symbol12 = [self.trading_symbols[11] for i in range(self.lookback_period)]
symbol13 = [self.trading_symbols[12] for i in range(self.lookback_period)]
symbol14 = [self.trading_symbols[13] for i in range(self.lookback_period)]
symbol15 = [self.trading_symbols[14] for i in range(self.lookback_period)]
symbol16 = [self.trading_symbols[15] for i in range(self.lookback_period)]
symbol17 = [self.trading_symbols[16] for i in range(self.lookback_period)]
symbol18 = [self.trading_symbols[17] for i in range(self.lookback_period)]
symbol19 = [self.trading_symbols[18] for i in range(self.lookback_period)]
symbol20 = [self.trading_symbols[19] for i in range(self.lookback_period)]

close_list = []
open_list = []
high_list = []
low_list = []
volume_list = []

self.chandelier_long = []
self.chandelier_short = []

date_list = [ str(price_history.index[i][1]).split(" ")[0] for i in range(price_history.shape[0]) ]

for i in range(price_history.shape[0]):
close_list.append(price_history.close.iloc[i])
open_list.append(price_history.open.iloc[i])
high_list.append(price_history.high.iloc[i])
low_list.append(price_history.low.iloc[i])
volume_list.append(price_history.volume.iloc[i])

d = {'symbol': symbol1 + symbol2 + symbol3 + symbol4 + symbol5 + symbol6 + symbol7 + symbol8 + symbol9 \
+ symbol10 + symbol11 + symbol12 + symbol13 + symbol14 + symbol15 + symbol16 + symbol17 + symbol18 + \
symbol19 + symbol20,
'datetime': date_list,
'close': close_list,
'high': high_list,
'low': low_list,
'open': open_list,
'volume': volume_list
}
self.price_data = pd.DataFrame(data=d)

# sort the values by symbol and then date
# self.price_data.sort_values(by = ['datetime'], inplace = True)

# calculate the change in price
self.price_data['change_in_price'] = self.price_data['close'].diff()

# identify rows where the symbol changes
mask = self.price_data['symbol'] != self.price_data['symbol'].shift(1)

# For those rows, let's make the value null
self.price_data['change_in_price'] = np.where(mask == True, np.nan, self.price_data['change_in_price'])

# print the rows that have a null value, should have 20
self.price_data[self.price_data.isna().any(axis = 1)]

# Calculate the 14 day RSI
n = 14

# First make a copy of the data frame twice
up_df, down_df = self.price_data[['symbol','change_in_price']].copy(), self.price_data[['symbol','change_in_price']].copy()

# For up days, if the change is less than 0 set to 0.
# up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0

# For down days, if the change is greater than 0 set to 0.
# down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0

up_df['change_in_price'] = up_df['change_in_price'].apply(lambda x: max(x, 0))

down_df['change_in_price'] = down_df['change_in_price'].apply(lambda x: min(0, x))

# We need change in price to be absolute.
down_df['change_in_price'] = down_df['change_in_price'].abs()

# self.price_data["RSI"] = self.RSI("SPY", 14, MovingAverageType.Simple)

# Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values.
ewma_up = up_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())

# Calculate the Relative Strength
relative_strength = ewma_up / ewma_down

# Calculate the Relative Strength Index
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))

# Add the info to the data frame.
self.price_data['down_days'] = down_df['change_in_price']
self.price_data['up_days'] = up_df['change_in_price']
self.price_data['RSI'] = relative_strength_index

# Calculate the Stochastic Oscillator
# Make a copy of the high and low column.
low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())

# Calculate the Stochastic Oscillator.
k_percent = 100 * ((self.price_data['close'] - low_14) / (high_14 - low_14))

# Add the info to the data frame.
self.price_data['low_14'] = low_14
self.price_data['high_14'] = high_14
self.price_data['k_percent'] = k_percent

# Make a copy of the high and low column.
low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())

# Calculate William %R indicator.
r_percent = ((high_14 - self.price_data['close']) / (high_14 - low_14)) * - 100

# Add the info to the data frame.
self.price_data['r_percent'] = r_percent

# Calculate the MACD
ema_26 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 26).mean())
ema_12 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 12).mean())
macd = ema_12 - ema_26

# Calculate the EMA
ema_9_macd = macd.ewm(span = 9).mean()

# Store the data in the data frame.
self.price_data['MACD'] = macd
self.price_data['MACD_EMA'] = ema_9_macd

# Calculate the Price Rate of Change
roc_n = 9

# Calculate the Rate of Change in the Price, and store it in the Data Frame.
self.price_data['Price_Rate_Of_Change'] = self.price_data.groupby('symbol')['close'].transform(lambda x: x.pct_change(periods = roc_n))

# apply the function to each group
# obv_groups = self.price_data.groupby('symbol').apply(self.obv)

# self.Debug(f"show obv groups length")
# self.Debug(len(obv_groups))

# obv_groups going in one by one
# self.Debug(f"show obv_groups")
# self.Debug(len(obv_groups))

# add to the data frame, but drop the old index, before adding it.
# self.price_data['On Balance Volume'] = obv_groups.reset_index(level=0, drop=True)

# self.price_data["On Balance Volume"] = obv_groups

# self.Debug(f"show obv groups length")

# self.Debug(len(obv_groups))

# self.Debug(f"show on balance volume")

# self.Debug(self.price_data.head())

# Create a column we wish to predict
'''
In this case, let's create an output column that will be 1 if the closing price at time 't' is greater than 't-1' and 0 otherwise.
In other words, if the today's closing price is greater than yesterday's closing price it would be 1.
'''

# Group by the `Symbol` column, then grab the `Close` column.
close_groups = self.price_data.groupby('symbol')['close']

# Apply the lambda function which will return -1.0 for down, 1.0 for up and 0.0 for no change.
close_groups = close_groups.transform(lambda x : np.sign(x.diff()))

# add the data to the main dataframe.
self.price_data['Prediction'] = close_groups

# for simplicity in later sections I'm going to make a change to our prediction column. To keep this as a binary classifier I'll change flat days and consider them up days.
self.price_data.loc[self.price_data['Prediction'] == 0.0] = 1.0

self.price_data = self.avg_true_range(self.price_data)

# Any row that has a `NaN` value will be dropped.
self.price_data = self.price_data.dropna()

self.price_data.reset_index(drop = True)

for i in range(len(self.price_data)):
self.chandelier_long.append(self.price_data["high"][i:i+22].max() - (self.price_data["Avg TR"][i:i+22].mean() * 3))
self.chandelier_short.append(self.price_data["low"][i:i+22].max() + (self.price_data["Avg TR"][i:i+22].mean() * 3))

self.price_data["chandelier long"] = self.chandelier_long
self.price_data["chandelier short"] = self.chandelier_short

# Grab our X & Y Columns.
# X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD','On Balance Volume']]
X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD']]

Y_Cols = self.price_data['Prediction']

# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state = 0)

# Create a Random Forest Classifier
rand_frst_clf = RandomForestClassifier(n_estimators = 100, oob_score = True, criterion = "gini", random_state = 0)

# Fit the data to the model
rand_frst_clf.fit(X_train, y_train)

# Make predictions
y_pred = rand_frst_clf.predict(X_test)

# sort the values by symbol and then date
# self.price_data.sort_values(by = ['symbol','datetime'], inplace = True)

# self.Debug(f"Correct Prediction (%): ")
# self.Debug(accuracy_score(y_test, rand_frst_clf.predict(X_test), normalize = True) * 100.0)

# self.price_data['Prediction'] = self.price_data['Prediction'].shift(periods=1)

self.price_data = self.price_data.dropna()

self.test_df = self.price_data[ ["symbol", "close", "datetime", "Prediction", "chandelier long", "chandelier short"]]

# def invest (self):
today = self.Time

date = str(today).split()[0]

# self.Debug(self.test_df["datetime"])

select_indices = list(np.where(self.test_df["datetime"] == date)[0])

for i in select_indices:
# self.Debug(self.test_df["symbol"].iloc[i])
if self.test_df["Prediction"].iloc[i] == 1:
self.SetHoldings( self.test_df["symbol"].iloc[i] , 1/len(self.trading_symbols))

if self.test_df["close"].iloc[i] < self.test_df["chandelier long"].iloc[i]:
self.Liquidate(self.test_df["symbol"].iloc[i])

elif self.test_df["Prediction"].iloc[i] == -1:
self.SetHoldings(self.test_df["symbol"].iloc[i] , -1.0/len(self.trading_symbols))

if self.test_df["close"].iloc[i] < self.test_df["chandelier short"].iloc[i]:
self.Liquidate(self.test_df["symbol"].iloc[i])

# self.Debug(self.test_df)







Hi guys, I'm currently facing the following error:

"Runtime Error: In Scheduled Event 'MonthStart: SPY: 15 min after MarketOpen', ValueError : arrays must all be same length
at train in main.py:line 111"

The thing is, the code works fine for the first couple of iterations and only breaks after a couple of months.

Any help will be greatly appreciated, thank you!