Backtest

Overall Statistics
Total Orders 100 Average Win 6.94% Average Loss -3.82% Compounding Annual Return 8.668% Drawdown 44.500% Expectancy 0.712 Start Equity 100000 End Equity 499879.01 Net Profit 399.879% Sharpe Ratio 0.324 Sortino Ratio 0.341 Probabilistic Sharpe Ratio 0.181% Loss Rate 39% Win Rate 61% Profit-Loss Ratio 1.82 Alpha 0.039 Beta 0.167 Annual Standard Deviation 0.153 Annual Variance 0.023 Information Ratio -0.055 Tracking Error 0.201 Treynor Ratio 0.297 Total Fees $466.07 Estimated Strategy Capacity $820000000.00 Lowest Capacity Asset SPY R735QTJ8XC9X Portfolio Turnover 1.27%

# region imports
from AlgorithmImports import *
from dateutil.relativedelta import relativedelta
from pandas.tseries.offsets import BDay
# endregion

class LastDateHandler():
    _last_update_date:Dict[Symbol, datetime.date] = {}
    
    @staticmethod
    def get_last_update_date() -> Dict[Symbol, datetime.date]:
       return LastDateHandler._last_update_date

# Source: https://trends.google.com/trends/explore?date=all&geo=US&q=S%26P%20500&hl=sk
class GoogleSearchVolume(PythonData):
    def GetSource(self, config: SubscriptionDataConfig, date: datetime, isLiveMode: bool) -> SubscriptionDataSource:
        return SubscriptionDataSource(f'data.quantpedia.com/backtesting_data/google_search/{config.Symbol.Value}.csv', SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)

    def Reader(self, config: SubscriptionDataConfig, line: str, date: datetime, isLiveMode: bool) -> BaseData:
        data = GoogleSearchVolume()
        data.Symbol = config.Symbol
        
        if not line[0].isdigit(): return None
        split: str = line.split(';')
        
        data.Time = datetime.strptime(split[0], "%Y-%m") + relativedelta(months=1) + BDay(1)
        data.Value = float(split[1])

        if config.Symbol not in LastDateHandler._last_update_date:
            LastDateHandler._last_update_date[config.Symbol] = datetime(1,1,1).date()
        if data.Time.date() > LastDateHandler._last_update_date[config.Symbol]:
            LastDateHandler._last_update_date[config.Symbol] = data.Time.date()

        return data

class UNRATE(PythonData):
    def GetSource(self, config: SubscriptionDataConfig, date: datetime, isLiveMode: bool) -> SubscriptionDataSource:
        return SubscriptionDataSource(f'data.quantpedia.com/backtesting_data/economic/{config.Symbol.Value}.csv', SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)

    def Reader(self, config: SubscriptionDataConfig, line: str, date: datetime, isLiveMode: bool) -> BaseData:
        data = UNRATE()
        data.Symbol = config.Symbol

        if not line[0].isdigit(): return None
        split = line.split(';')
        
        if split[1] == '.':
            return None
        # Parse the CSV file's columns into the custom data class
        data.Time = datetime.strptime(split[0], "%Y-%m-%d") + relativedelta(months=1) + timedelta(days=10)
        data.Value = float(split[1])

        if config.Symbol not in LastDateHandler._last_update_date:
            LastDateHandler._last_update_date[config.Symbol] = datetime(1,1,1).date()
        if data.Time.date() > LastDateHandler._last_update_date[config.Symbol]:
            LastDateHandler._last_update_date[config.Symbol] = data.Time.date()
        
        return data

# Custom fee model.
class CustomFeeModel(FeeModel):
    def GetOrderFee(self, parameters: OrderFeeParameters) -> OrderFee:
        fee: float = parameters.Security.Price * parameters.Order.AbsoluteQuantity * 0.00005
        return OrderFee(CashAmount(fee, "USD"))

# https://quantpedia.com/strategies/google-trends-unemployment-market-timing-strategy/
# 
# This strategy`s investment universe is centered on one instrument: the S&P 500 index (ETF SPY or CFD).
# (Monthly and seasonally adjusted U.S. civilian unemployment rate data were obtained from the Federal Reserve Bank of St. Louis. Google Search query volumes 
# are from Google Trends.)
# General Summary: The strategy is based on macroeconomic signals (like unemployment rates, which we selected as our reported version, which gave the most 
# significant increase in out-of-sample forecasting accuracy) derived from Google Trends data. The methodology involves predicting the change in the unemployment 
# rate for the upcoming month. So, this investment strategy is based on unemployment predictions using Google Trends data: Consider a variant of the Google 
# Trends strategy utilizing the search volume of the term “laid off.” The strategy involves acting on the predicted month-over-month change in U.S. unemployment 
# rates (∆UNEMt).
# Model and Variable Selection: the UNEMt (UNEMt is predicted response variable) forecasting models are based on a linear regression formulation (Eq. 2) with 
# an additional independent variable Xt-1, the contemporaneous search volumes. So it includes a lagged autoregressive component and one lag of the exogenous 
# variables (Xt-1 i ), which in this study will be the Google Search volumes for a particular search term or category i = laid off.
# Strategy Execution: Investment decisions are made 15 trading days before the government data release. They are based on prediction models that include one 
# autoregressive lag (UNEMt-1) and one lag of monthly Google search volumes (Xt-1 laid off) as explanatory variables for UNEMt. The buy and sell rules are 
# as follows:
# If the expected change in the unemployment rate is negative, indicating a decrease, the strategy buys the S&P 500 on close.
# If the expected change is positive, a second-order criterion is applied:
# buy if the rate of increase is slowing compared to the previous period, and
# short sell if the growth rate is accelerating
# Weighting & Rebalancing: The strategy involves rebalancing positions monthly, aligned with the timing of unemployment data releases. Only one asset is traded, 
# so the whole position size is taken from the allocated portfolio.

# region imports
from AlgorithmImports import *
import data_tools
from dateutil.relativedelta import relativedelta
import statsmodels.api as sm
from typing import List, Dict
# endregion

class GoogleTrendsUnemploymentMarketTimingStrategy(QCAlgorithm):

    def initialize(self) -> None:
        self.set_start_date(2006, 1, 1)
        self.set_cash(100_000)

        period: int = 36
        month_period: int = 21

        # Source: https://trends.google.com/trends/explore?date=all&geo=US&q=laid%20off&hl=en-GB
        self._GSV: Symbol = self.add_data(data_tools.GoogleSearchVolume, 'LAID_OFF', Resolution.Daily).symbol
        # Source: https://fred.stlouisfed.org/series/UNRATE
        self._UNRATE: Symbol = self.AddData(data_tools.UNRATE, 'UNRATE', Resolution.Daily).Symbol

        self._data: Dict[Symbol, RollingWindow] = {symbol: RollingWindow[float](period) for symbol in [self._GSV, self._UNRATE]}

        self._traded_asset: Symbol = self.add_equity('SPY', Resolution.DAILY).symbol

        self.set_warm_up(timedelta(days=period * month_period), Resolution.DAILY)
        self.settings.minimum_order_margin_portfolio_percentage = 0
        self.settings.daily_precise_end_time = False

    def on_data(self, slice: Slice) -> None:
        # Check if data is still coming.
        if any(self.securities[x].get_last_data() and self.time.date() > data_tools.LastDateHandler.get_last_update_date()[x] for x in [self._GSV, self._UNRATE]):
            self.log('Data stopped comming for custom data.')
            self.liquidate()
            return

        # Save data for regression.
        for symbol, regression_data in self._data.items():
            if slice.contains_key(symbol) and slice[symbol]:
                regression_data.add(slice[symbol].price)

        if self.is_warming_up:
            return

        if not all(regression_data.is_ready for regression_data in list(self._data.values())):
            return

        # Rebalance when google search volume data arrives.
        if slice.contains_key(self._GSV) and slice[self._GSV]:
            y: np.ndarray = np.array(list(self._data[self._UNRATE])[::-1])[1:]
            x: np.ndarray = np.array([pd.Series(list(self._data[self._UNRATE])[::-1]).shift(1).dropna().values, np.array(list(self._data[self._GSV])[::-1])[:-1]])

            model = self.multiple_linear_regression(x, y)
            predict_y: float = model.predict(sm.add_constant(np.append(y[-1], x[1][-1]).reshape(1,2), has_constant='add'))[0]

            trade_direction: bool = 1 if predict_y < y[-1] else False

            if not trade_direction:
                lastperiod_diff: np.ndarray = np.diff(np.append(y[-2:], predict_y))
                trade_direction: int = 1 if lastperiod_diff[-1] - lastperiod_diff[0] < 0 else -1

            self.set_holdings(self._traded_asset, trade_direction, True)

    def multiple_linear_regression(self, x: np.ndarray, y: np.ndarray):
        x: np.ndarray = np.array(x).T
        x = sm.add_constant(x)
        result = sm.OLS(endog=y, exog=x).fit()
        return result