Overall Statistics
import numpy as np
from scipy.optimize import minimize

sp100_stocks = ['AAPL','MSFT','AMZN','FB','BRKB','GOOGL','GOOG','JPM','JNJ','V','PG','XOM','UNH','BAC','MA','T','DIS','INTC','HD','VZ','MRK','PFE','CVX','KO','CMCSA','CSCO','PEP','WFC','C','BA','ADBE','WMT','CRM','MCD','MDT','BMY','ABT','NVDA','NFLX','AMGN','PM','PYPL','TMO','COST','ABBV','ACN','HON','NKE','UNP','UTX','NEE','IBM','TXN','AVGO','LLY','ORCL','LIN','SBUX','AMT','LMT','GE','MMM','DHR','QCOM','CVS','MO','LOW','FIS','AXP','BKNG','UPS','GILD','CHTR','CAT','MDLZ','GS','USB','CI','ANTM','BDX','TJX','ADP','TFC','CME','SPGI','COP','INTU','ISRG','CB','SO','D','FISV','PNC','DUK','SYK','ZTS','MS','RTN','AGN','BLK']

def MonthDiff(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month

def Return(values):
    return (values[-1] - values[0]) / values[0]
def Volatility(values):
    values = np.array(values)
    returns = (values[1:] - values[:-1]) / values[:-1]
    return np.std(returns)  

# Custom fee model
class CustomFeeModel(FeeModel):
    def GetOrderFee(self, parameters):
        fee = parameters.Security.Price * parameters.Order.AbsoluteQuantity * 0.00005
        return OrderFee(CashAmount(fee, "USD"))

# Quandl free data
class QuandlFutures(PythonQuandl):
    def __init__(self):
        self.ValueColumnName = "settle"

# Quandl short interest data.
class QuandlFINRA_ShortVolume(PythonQuandl):
    def __init__(self):
        self.ValueColumnName = 'SHORTVOLUME'    # also 'TOTALVOLUME' is accesible

# Quantpedia data
# NOTE: IMPORTANT: Data order must be ascending (datewise)
class QuantpediaFutures(PythonData):
    def GetSource(self, config, date, isLiveMode):
        return SubscriptionDataSource("data.quantpedia.com/backtesting_data/futures/{0}.csv".format(config.Symbol.Value), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)

    def Reader(self, config, line, date, isLiveMode):
        data = QuantpediaFutures()
        data.Symbol = config.Symbol
        if not line[0].isdigit(): return None
        split = line.split(';')
        data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
        data['settle'] = float(split[1])
        data.Value = float(split[1])

        return data
# https://quantpedia.com/strategies/pairs-trading-with-country-etfs/
# The investment universe consists of 22 international ETFs. A normalized cumulative total return index is created for each ETF (dividends 
# included), and the starting price during the formation period is set to $1 (price normalization). The selection of pairs is made after
# a 120 day formation period. Pair’s distance for all ETF pairs is calculated as the sum of squared deviations between two normalized 
# price series. The top 5 pairs with the smallest distance are used in the subsequent 20 day trading period. The strategy is monitored
# daily, and trade is opened when the divergence between the pairs exceeds 0.5x the historical standard deviation. Investors go long 
# on the undervalued ETF and short on the overvalued ETF. The trade is exited if a pair converges or after 20 days (if the pair does 
# not converge within the next 20 business days). Pairs are weighted equally, and the portfolio is rebalanced on a daily basis.

import numpy as np
from collections import deque
import itertools as it
from fk_tools import CustomFeeModel

class PairsTradingwithCountryETFs(QCAlgorithm):
    def Initialize(self):
        self.SetStartDate(2000, 1, 1)
        self.symbols = [
                        "EWA",  # iShares MSCI Australia Index ETF
                        "EWO",  # iShares MSCI Austria Investable Mkt Index ETF
                        "EWK",  # iShares MSCI Belgium Investable Market Index ETF
                        "EWZ",  # iShares MSCI Brazil Index ETF
                        "EWC",  # iShares MSCI Canada Index ETF
                        "FXI",  # iShares China Large-Cap ETF
                        "EWQ",  # iShares MSCI France Index ETF
                        "EWG",  # iShares MSCI Germany ETF 
                        "EWH",  # iShares MSCI Hong Kong Index ETF
                        "EWI",  # iShares MSCI Italy Index ETF
                        "EWJ",  # iShares MSCI Japan Index ETF
                        "EWM",  # iShares MSCI Malaysia Index ETF
                        "EWW",  # iShares MSCI Mexico Inv. Mt. Idx
                        "EWN",  # iShares MSCI Netherlands Index ETF
                        "EWS",  # iShares MSCI Singapore Index ETF
                        "EZA",  # iShares MSCI South Africe Index ETF
                        "EWY",  # iShares MSCI South Korea ETF
                        "EWP",  # iShares MSCI Spain Index ETF
                        "EWD",  # iShares MSCI Sweden Index ETF
                        "EWL",  # iShares MSCI Switzerland Index ETF
                        "EWT",  # iShares MSCI Taiwan Index ETF
                        "THD",  # iShares MSCI Thailand Index ETF
                        "EWU",  # iShares MSCI United Kingdom Index ETF
                        "SPY",  # SPDR S&P 500 ETF

        self.period = 120
        self.history_price = {}

        for symbol in self.symbols:
            data = self.AddEquity(symbol, Resolution.Daily)
            symbol_obj = data.Symbol
            if symbol not in self.history_price:
                history = self.History(symbol_obj, self.period, Resolution.Daily)
                if len(history) == self.period and 'close' in history:
                    closes = [x for x in history['close']]
                    self.history_price[symbol] = deque(closes, maxlen = self.period)

        self.sorted_pairs = None
        self.symbol_pairs = list(it.combinations(self.symbols, 2))  
        self.days = 20
    def OnData(self, data):
        # Update the price series everyday
        for symbol in self.symbols:
            if symbol in self.history_price:
                if self.Securities.ContainsKey(symbol):
                    price = self.Securities[symbol].Price
                    if price != 0:
                        # Append latest price as a next one in case there's 0 as price.
                        if len(self.history_price[symbol]) > 0:
                            last_price = self.history_price[symbol][-1]
        # Start of trading period.
        if self.days == 20:
            distances = {}
            for pair in self.symbol_pairs:
                if pair[0] in self.history_price and pair[1] in self.history_price and len(self.history_price[pair[0]]) == self.period and len(self.history_price[pair[1]]) == self.period:
                    distances[pair] = self.Distance(self.history_price[pair[0]], self.history_price[pair[1]])
            if len(distances) != 0:
                self.sorted_pairs = sorted(distances.items(), key = lambda x: x[1])[:20]
                self.sorted_pairs = [x[0] for x in self.sorted_pairs]
            self.days = 1
        self.days += 1
        if self.sorted_pairs is None: return
        for pair in self.sorted_pairs:
            # Calculate the spread of two price series.
            spread = np.array(self.history_price[pair[0]]) - np.array(self.history_price[pair[1]])
            mean = np.mean(spread[:-1])
            std = np.std(spread[:-1])
            weight = 1 / len(self.sorted_pairs)
            # Long-short position is opened when pair prices have diverged by 0.5 standard deviations.
            # Rebalance / open new positions.
            if spread[-1] > mean + 0.5 * std:
                self.SetHoldings(pair[0], -weight)
                self.SetHoldings(pair[1], weight)
            elif spread[-1] < mean - 0.5 * std:
                self.SetHoldings(pair[0], weight)
                self.SetHoldings(pair[1], -weight)
            # The position is closed when prices revert back.
                if self.Portfolio[pair[0]].Invested and self.Portfolio[pair[1]].Invested:

    def Distance(self, price_a, price_b):
        # Calculate the sum of squared deviations between two normalized price series.
        norm_a = np.array(price_a) / price_a[0]
        norm_b = np.array(price_b) / price_b[0]
        return sum((norm_a - norm_b)**2)