Hi all, I am currently trying to improve my LSTM model by hyperparameter optimisation (random search). The goal of this LSTM model is to predict if the closing price of the SPY ETF (S&P 500 ETF) will be higher or lower than the current closing price on the next day.
Unfortunately, during the training in the Research.ipynb environment, there is always a disconnect to the server.
Do I have a bug in my code or do I need to upgrade to a paid module to further optimise this LSTM model?
What is your opinion about my model?
I am very grateful for any feedback.
Thank you very much for your help and opinion!
Florian
import pandas as pd
import numpy as np
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import EarlyStopping, History
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score, f1_score, accuracy_score
from sklearn.model_selection import RandomizedSearchCV
from matplotlib import pyplot as plt
def create_model(learning_rate=0.0005, lstm1_units=200, lstm2_units=150, lstm3_units=100, dropout_rate=0.5):
model = Sequential()
model.add(LSTM(units=lstm1_units, return_sequences=True, input_shape=(features_set.shape[1], 2)))
model.add(Dropout(dropout_rate))
model.add(BatchNormalization())
model.add(LSTM(units=lstm2_units, return_sequences=True))
model.add(Dropout(dropout_rate))
model.add(BatchNormalization())
model.add(LSTM(units=lstm3_units))
model.add(Dropout(dropout_rate))
model.add(BatchNormalization())
model.add(Dense(units=1, activation='sigmoid'))
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
return model
qb = QuantBook()
asset = "SPY"
qb.AddEquity(asset, Resolution.Minute)
history = qb.History(qb.Securities.Keys, datetime(2019, 1, 1), datetime(2021, 12, 31), Resolution.Daily)
close_price = history['close'].unstack(level=0)
close_price['sma10'] = close_price[asset].rolling(window=10).mean()
# Drop the first 9 rows with NaN values in the SMA10 column
close_price = close_price.dropna()
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_price)
df = pd.DataFrame(scaled_data, index=close_price.index, columns=['Close', 'SMA10'])
output = df.shift(-1).iloc[:-1]
splitter = int(df.shape[0] * 0.8)
X_train = df.iloc[:splitter]
X_test = df.iloc[splitter:]
y_train = output.iloc[:splitter]
y_test = output.iloc[splitter:]
features_set = []
labels = []
for i in range(60, X_train.shape[0]):
features_set.append(X_train.iloc[i - 60:i].values)
labels.append(int(y_train.iloc[i, 0] > X_train.iloc[i, 0]))
features_set, labels = np.array(features_set), np.array(labels)
features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 2))
# Hyperparameter optimization
model = KerasClassifier(build_fn=create_model, epochs=30, batch_size=64, verbose=1)
param_grid = {
'learning_rate': [0.001, 0.0005, 0.0001],
'lstm1_units': [100, 150, 200],
'lstm2_units': [100, 150, 200],
'lstm3_units': [50, 75, 100],
'dropout_rate': [0.3, 0.5, 0.7],
}
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, verbose=2, random_state=42)
random_search_result = random_search.fit(features_set, labels)
print("Best: %f using %s" % (random_search_result.best_score_, random_search_result.best_params_))
best_params = random_search_result.best_params_
best_model = create_model(**best_params)
callback = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)
history_callback = History()
best_model.fit(features_set, labels, epochs=30, batch_size=64, callbacks=[callback, history_callback], validation_split=0.2)
test_features = []
for i in range(60, X_test.shape[0] - 1):
test_features.append(X_test.iloc[i - 60:i].values)
test_features = np.array(test_features)
test_features = np.reshape(test_features, (test_features.shape[0], test_features.shape[1], 2))
predictions = best_model.predict(test_features)
predictions = (predictions > 0.5).astype(int)
actual = np.array([int(y_test.iloc[i, 0] > X_test.iloc[i, 0]) for i in range(len(predictions))])
precision = precision_score(actual, predictions)
f1 = f1_score(actual, predictions)
accuracy = accuracy_score(actual, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")
# Plot training and validation accuracy
plt.plot(history_callback.history['accuracy'], label='Training Accuracy')
plt.plot(history_callback.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')
plt.show()
# Plot training and validation loss
plt.plot(history_callback.history['loss'], label='Training Loss')
plt.plot(history_callback.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.show()
Nico Xenox
Hey Florian Wuelfing,
yes you will have to upgrade your research notebook. I let it run and the following message came:
You can upgrade your research notebook here
Best,
Nico
Florian Wuelfing
The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.
To unlock posting to the community forums please complete at least 30% of Boot Camp.
You can continue your Boot Camp training progress from the terminal. We hope to see you in the community soon!