Overall Statistics
#Several types of oil can be used (Brent, WTI, Dubai etc.) without big differences in results. The source paper for this anomaly uses Arab Light crude oil. Monthly oil
#returns are used in the regression equation as an independent variable and equity returns are used as a dependent variable. The model is re-estimated every month and
#observations of the last month are added. The investor determines whether the expected stock market return in a specific month (based on regression results and
#conditional on the oil price change in the previous month) is higher or lower than the risk-free rate. The investor is fully invested in the market portfolio if the expected
#return is higher (bull market); he invests in cash if the expected return is lower (bear market).

from datetime import datetime
import numpy as np
import pandas as pd
from collections import deque
import statsmodels.formula.api as sm

class Crude_Oil_Predicts_Equity_Returns(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2005, 1, 1)
        self.SetEndDate(2019, 8, 1)
        self.SetCash(100000)
        self.data = {}

        self.return_period = 21

        self.spx = self.AddData(QuandlFutures, 'CHRIS/CME_ES1', Resolution.Daily).Symbol
        self.oil = self.AddData(QuandlFutures, 'CHRIS/CME_CL1', Resolution.Daily).Symbol
        
        # Monthly returns history
        self.data[self.spx] = deque()
        self.data[self.oil] = deque()
        
        self.Schedule.On(self.DateRules.MonthStart(self.spx), self.TimeRules.AfterMarketOpen(self.spx), self.MonthStart)
        self.Schedule.On(self.DateRules.MonthEnd(self.spx), self.TimeRules.BeforeMarketClose(self.spx), self.MonthEnd)

    def MonthStart(self):
        oil_closes = self.History(self.oil, self.return_period, Resolution.Daily)['settle']
        oil_ret = self.Return(oil_closes)
        self.data[self.oil].append(oil_ret)
        
        if len(self.data[self.oil]) < 12: return

        oil_monthly_returns = pd.Series([x for x in self.data[self.oil]])
        spx_monthly_returns = pd.Series([x for x in self.data[self.spx]])
        data_frame = pd.concat([spx_monthly_returns, oil_monthly_returns], axis = 1).dropna()
        data_frame.columns = [ 'spx', 'oil' ]
        model = sm.ols(formula = 'spx~oil', data = data_frame).fit()
        
        # Simple Linear Regression
        # Y = C + (M * X)
        # Y = α + (β ∗ X)
        
        # Multiple Linear Regression
        # Y = C + (M1 * X1) + (M2 * X2) + …
        # Y = α + (β ∗ X1) + (β ∗ X2) + ...

        # Y = Dependent variable (output/outcome/prediction/estimation)
        # C/α = Constant (Y-Intercept)
        # M/β = Slope of the regression line (the effect that X has on Y)
        # X = Independent variable (input variable used in the prediction of Y)
        
        alpha = model.params[0]
        beta = model.params[1]
        X = oil_ret
        Y = alpha + (beta * X)  # Expected SPX return
        
        if Y > 0:
            if not self.Portfolio[self.spx].Invested:
                self.SetHoldings(self.spx, 1)
        else:
            self.Liquidate()
            
        #self.Log(str(self.Time) + " Predicted: " + str(Y))
        #self.Log(model.predict())
        #self.Log(model.predict(Y))
        #self.Log(model.summary())

    def MonthEnd(self):
        spx_closes = self.History(self.spx, self.return_period, Resolution.Daily)['settle']
        spx_ret = self.Return(spx_closes)
        self.data[self.spx].append(spx_ret)
        
    def Return(self, history):
        prices = np.array(history)
        return (prices[-1] - prices[0]) / prices[0]

class QuandlFutures(PythonQuandl):
    def __init__(self):
        self.ValueColumnName = "Settle"