from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime
from datetime import timedelta
import pandas as pd
import numpy as np
class TransdimensionalTachyonCompensator(QCAlgorithm):
month = 0
def Initialize(self):
self.SetStartDate(2000, 1, 1) # Set Start Date
# self.SetEndDate(2020, 6, 1)
self.SetCash(100000) # Set Strategy Cash
self.trading_symbols = []
self.UniverseSettings.Resolution = Resolution.Minute
self.AddUniverse(self.CoarseSelectionFunction)
# self.SetSecurityInitializer(lambda x: x.SetMarketPrice(self.GetLastKnownPrice(x)))
self.lookback_period = 20
self.spy = self.AddEquity("SPY").Symbol
# self.Train(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 0), self.train)
self.Schedule.On(self.DateRules.MonthStart(), self.TimeRules.AfterMarketOpen(self.spy, 15), self.train)
# self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.AfterMarketOpen(self.spy, 30), self.invest)
def avg_true_range(self, df):
ind = range(0,len(df))
indexlist = list(ind)
df.index = indexlist
for index, row in df.iterrows():
if index != 0:
tr1 = row["high"] - row["low"]
tr2 = abs(row["high"] - df.iloc[index-1]["close"])
tr3 = abs(row["low"] - df.iloc[index-1]["close"])
true_range = max(tr1, tr2, tr3)
df.set_value(index,"True Range", true_range)
df["Avg TR"] = df["True Range"].rolling(min_periods=14, window=14, center=False).mean()
return df
def CoarseSelectionFunction(self, coarse):
if self.Time.month == self.month:
return Universe.Unchanged
self.month = self.Time.month
sortedByDollarVolume = sorted(coarse, key=lambda x: x.DollarVolume, reverse=True)
self.trading_symbols = [ x.Symbol for x in sortedByDollarVolume if x.HasFundamentalData ][:20]
return self.trading_symbols
def train(self):
if not self.trading_symbols: return
for symbol in self.trading_symbols:
self.AddEquity(symbol, Resolution.Daily)
today = self.Time
price_history = self.History(self.trading_symbols, self.lookback_period, Resolution.Daily)
symbol1 = [self.trading_symbols[0] for i in range(self.lookback_period)]
symbol2 = [self.trading_symbols[1] for i in range(self.lookback_period)]
symbol3 = [self.trading_symbols[2] for i in range(self.lookback_period)]
symbol4 = [self.trading_symbols[3] for i in range(self.lookback_period)]
symbol5 = [self.trading_symbols[4] for i in range(self.lookback_period)]
symbol6 = [self.trading_symbols[5] for i in range(self.lookback_period)]
symbol7 = [self.trading_symbols[6] for i in range(self.lookback_period)]
symbol8 = [self.trading_symbols[7] for i in range(self.lookback_period)]
symbol9 = [self.trading_symbols[8] for i in range(self.lookback_period)]
symbol10 = [self.trading_symbols[9] for i in range(self.lookback_period)]
symbol11 = [self.trading_symbols[10] for i in range(self.lookback_period)]
symbol12 = [self.trading_symbols[11] for i in range(self.lookback_period)]
symbol13 = [self.trading_symbols[12] for i in range(self.lookback_period)]
symbol14 = [self.trading_symbols[13] for i in range(self.lookback_period)]
symbol15 = [self.trading_symbols[14] for i in range(self.lookback_period)]
symbol16 = [self.trading_symbols[15] for i in range(self.lookback_period)]
symbol17 = [self.trading_symbols[16] for i in range(self.lookback_period)]
symbol18 = [self.trading_symbols[17] for i in range(self.lookback_period)]
symbol19 = [self.trading_symbols[18] for i in range(self.lookback_period)]
symbol20 = [self.trading_symbols[19] for i in range(self.lookback_period)]
close_list = []
open_list = []
high_list = []
low_list = []
volume_list = []
self.chandelier_long = []
self.chandelier_short = []
date_list = [ str(price_history.index[i][1]).split(" ")[0] for i in range(price_history.shape[0]) ]
for i in range(price_history.shape[0]):
close_list.append(price_history.close.iloc[i])
open_list.append(price_history.open.iloc[i])
high_list.append(price_history.high.iloc[i])
low_list.append(price_history.low.iloc[i])
volume_list.append(price_history.volume.iloc[i])
d = {'symbol': symbol1 + symbol2 + symbol3 + symbol4 + symbol5 + symbol6 + symbol7 + symbol8 + symbol9 \
+ symbol10 + symbol11 + symbol12 + symbol13 + symbol14 + symbol15 + symbol16 + symbol17 + symbol18 + \
symbol19 + symbol20,
'datetime': date_list,
'close': close_list,
'high': high_list,
'low': low_list,
'open': open_list,
'volume': volume_list
}
self.price_data = pd.DataFrame(data=d)
# sort the values by symbol and then date
# self.price_data.sort_values(by = ['datetime'], inplace = True)
# calculate the change in price
self.price_data['change_in_price'] = self.price_data['close'].diff()
# identify rows where the symbol changes
mask = self.price_data['symbol'] != self.price_data['symbol'].shift(1)
# For those rows, let's make the value null
self.price_data['change_in_price'] = np.where(mask == True, np.nan, self.price_data['change_in_price'])
# print the rows that have a null value, should have 20
self.price_data[self.price_data.isna().any(axis = 1)]
# Calculate the 14 day RSI
n = 14
# First make a copy of the data frame twice
up_df, down_df = self.price_data[['symbol','change_in_price']].copy(), self.price_data[['symbol','change_in_price']].copy()
# For up days, if the change is less than 0 set to 0.
# up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0
# For down days, if the change is greater than 0 set to 0.
# down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0
up_df['change_in_price'] = up_df['change_in_price'].apply(lambda x: max(x, 0))
down_df['change_in_price'] = down_df['change_in_price'].apply(lambda x: min(0, x))
# We need change in price to be absolute.
down_df['change_in_price'] = down_df['change_in_price'].abs()
# self.price_data["RSI"] = self.RSI("SPY", 14, MovingAverageType.Simple)
# Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values.
ewma_up = up_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
# Calculate the Relative Strength
relative_strength = ewma_up / ewma_down
# Calculate the Relative Strength Index
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))
# Add the info to the data frame.
self.price_data['down_days'] = down_df['change_in_price']
self.price_data['up_days'] = up_df['change_in_price']
self.price_data['RSI'] = relative_strength_index
# Calculate the Stochastic Oscillator
# Make a copy of the high and low column.
low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy()
# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())
# Calculate the Stochastic Oscillator.
k_percent = 100 * ((self.price_data['close'] - low_14) / (high_14 - low_14))
# Add the info to the data frame.
self.price_data['low_14'] = low_14
self.price_data['high_14'] = high_14
self.price_data['k_percent'] = k_percent
# Make a copy of the high and low column.
low_14, high_14 = self.price_data[['symbol','low']].copy(), self.price_data[['symbol','high']].copy()
# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())
# Calculate William %R indicator.
r_percent = ((high_14 - self.price_data['close']) / (high_14 - low_14)) * - 100
# Add the info to the data frame.
self.price_data['r_percent'] = r_percent
# Calculate the MACD
ema_26 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 26).mean())
ema_12 = self.price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 12).mean())
macd = ema_12 - ema_26
# Calculate the EMA
ema_9_macd = macd.ewm(span = 9).mean()
# Store the data in the data frame.
self.price_data['MACD'] = macd
self.price_data['MACD_EMA'] = ema_9_macd
# Calculate the Price Rate of Change
roc_n = 9
# Calculate the Rate of Change in the Price, and store it in the Data Frame.
self.price_data['Price_Rate_Of_Change'] = self.price_data.groupby('symbol')['close'].transform(lambda x: x.pct_change(periods = roc_n))
# apply the function to each group
# obv_groups = self.price_data.groupby('symbol').apply(self.obv)
# self.Debug(f"show obv groups length")
# self.Debug(len(obv_groups))
# obv_groups going in one by one
# self.Debug(f"show obv_groups")
# self.Debug(len(obv_groups))
# add to the data frame, but drop the old index, before adding it.
# self.price_data['On Balance Volume'] = obv_groups.reset_index(level=0, drop=True)
# self.price_data["On Balance Volume"] = obv_groups
# self.Debug(f"show obv groups length")
# self.Debug(len(obv_groups))
# self.Debug(f"show on balance volume")
# self.Debug(self.price_data.head())
# Create a column we wish to predict
'''
In this case, let's create an output column that will be 1 if the closing price at time 't' is greater than 't-1' and 0 otherwise.
In other words, if the today's closing price is greater than yesterday's closing price it would be 1.
'''
# Group by the `Symbol` column, then grab the `Close` column.
close_groups = self.price_data.groupby('symbol')['close']
# Apply the lambda function which will return -1.0 for down, 1.0 for up and 0.0 for no change.
close_groups = close_groups.transform(lambda x : np.sign(x.diff()))
# add the data to the main dataframe.
self.price_data['Prediction'] = close_groups
# for simplicity in later sections I'm going to make a change to our prediction column. To keep this as a binary classifier I'll change flat days and consider them up days.
self.price_data.loc[self.price_data['Prediction'] == 0.0] = 1.0
self.price_data = self.avg_true_range(self.price_data)
# Any row that has a `NaN` value will be dropped.
self.price_data = self.price_data.dropna()
self.price_data.reset_index(drop = True)
for i in range(len(self.price_data)):
self.chandelier_long.append(self.price_data["high"][i:i+22].max() - (self.price_data["Avg TR"][i:i+22].mean() * 3))
self.chandelier_short.append(self.price_data["low"][i:i+22].max() + (self.price_data["Avg TR"][i:i+22].mean() * 3))
self.price_data["chandelier long"] = self.chandelier_long
self.price_data["chandelier short"] = self.chandelier_short
# Grab our X & Y Columns.
# X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD','On Balance Volume']]
X_Cols = self.price_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD']]
Y_Cols = self.price_data['Prediction']
# Split X and y into X_
X_train, X_test, y_train, y_test = train_test_split(X_Cols, Y_Cols, random_state = 0)
# Create a Random Forest Classifier
rand_frst_clf = RandomForestClassifier(n_estimators = 100, oob_score = True, criterion = "gini", random_state = 0)
# Fit the data to the model
rand_frst_clf.fit(X_train, y_train)
# Make predictions
y_pred = rand_frst_clf.predict(X_test)
# sort the values by symbol and then date
# self.price_data.sort_values(by = ['symbol','datetime'], inplace = True)
# self.Debug(f"Correct Prediction (%): ")
# self.Debug(accuracy_score(y_test, rand_frst_clf.predict(X_test), normalize = True) * 100.0)
# self.price_data['Prediction'] = self.price_data['Prediction'].shift(periods=1)
self.price_data = self.price_data.dropna()
self.test_df = self.price_data[ ["symbol", "close", "datetime", "Prediction", "chandelier long", "chandelier short"]]
# def invest (self):
today = self.Time
date = str(today).split()[0]
# self.Debug(self.test_df["datetime"])
select_indices = list(np.where(self.test_df["datetime"] == date)[0])
for i in select_indices:
# self.Debug(self.test_df["symbol"].iloc[i])
if self.test_df["Prediction"].iloc[i] == 1:
self.SetHoldings( self.test_df["symbol"].iloc[i] , 1/len(self.trading_symbols))
if self.test_df["close"].iloc[i] < self.test_df["chandelier long"].iloc[i]:
self.Liquidate(self.test_df["symbol"].iloc[i])
elif self.test_df["Prediction"].iloc[i] == -1:
self.SetHoldings(self.test_df["symbol"].iloc[i] , -1.0/len(self.trading_symbols))
if self.test_df["close"].iloc[i] < self.test_df["chandelier short"].iloc[i]:
self.Liquidate(self.test_df["symbol"].iloc[i])
# self.Debug(self.test_df)
Hi guys, I'm currently facing the following error:
"Runtime Error: In Scheduled Event 'MonthStart: SPY: 15 min after MarketOpen', ValueError : arrays must all be same length
at train in main.py:line 111"
The thing is, the code works fine for the first couple of iterations and only breaks after a couple of months.
Any help will be greatly appreciated, thank you!