import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from math import sqrt
class UpgradedRedOrangeAnguilline(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2021, 11, 1) # Set Start Date
self.SetCash(100000) # Set Strategy Cash
self.AddEquity("SPY", Resolution.Hour)
self.ticker = "SPY"
self.p_values = range(1, 3)
self.q_values = self.p_values
self.d_values = range(1,2)
self.prices = {"open":[], "close":[]}
def OnData(self, data):
self.prices["open"].append(self.Securities["SPY"].Open)
self.prices["close"].append(self.Securities["SPY"].Close)
df = pd.DataFrame(self.prices)
series = np.log(df["open"].values)
self.Debug("Calculating the order and rmse")
result = self.evaluate_models(series, self.p_values, self.d_values, self.q_values)
model_predictions = []
temp_list = []
if result == None:
return
else:
order = self.evaluate_models(series, self.p_values, self.d_values, self.q_values)[0]
rmse = self.evaluate_models(series, self.p_values, self.d_values, self.q_values)[1]
for time_point in range(len(series)):
model = ARIMA(series, order) # Here is the bug
model_fit = model.fit()
output = model_fit.forecast()
#self.Debug(str(output))
temp_list.append(output)
yhat = output[0]
model_predictions.append(yhat)
#true_test_value = series[time_point]
#series.append(true_test_value)
max_pred = max(model_predictions[0], model_predictions[1], model_predictions[2], model_predictions[3], model_predictions[4])
if self.Portfolio[self.ticker].Invested:
if self.Securities[self.ticker].Open < max_pred - rmse:
self.Liquidate(self.ticker)
else:
if self.Securities[self.ticker].Open > max_pred + rmse:
self.SetHoldings(self.ticker, 0.5)
def evaluate_arima_model(self, X, arima_order):
# prepare training dataset
train_size = int(len(X) * 0.7)
train, test = X[0:train_size], X[train_size:]
hist = [x for x in train]
# make predictions
predictions = list()
# This for loop is where the problems start
for t in range(len(test)):
model = ARIMA(hist, order=arima_order)
model_fit = model.fit()
yhat = model_fit.forecast()[0]
predictions.append(yhat)
hist.append(test[t])
# calculate out of sample error
rmse = sqrt(mean_squared_error(test, predictions))
return rmse, model_fit.aic
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(self, dataset, p_values, d_values, q_values):
dataset = dataset.astype('float32')
best_score, best_aic, best_cfg = float("inf"),float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
rmse = self.evaluate_arima_model(dataset, order)[0]
aic = self.evaluate_arima_model(dataset, order)[1]
if rmse < best_score:
best_score, aic, best_cfg = rmse, aic, order
#return order, rmse
return [order, rmse]
except:
continue