Overall Statistics
from clr import AddReference

from System import *
from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Indicators import *

import pandas as pd
import numpy as np
from math import ceil, floor
import scipy.stats as stats
import sklearn.mixture as mix
from datetime import datetime, timedelta
import decimal as d
import time

ALPHA = 0.95 # for sampling confidence intervals

def make_gmm(n_components=N_COMPONENTS, max_iter=150, random_state=RANDOM_STATE):
    """fn: create gmm object"""
    model_kwds = dict(n_components=n_components, 

    gmm = mix.GaussianMixture(**model_kwds)
    return gmm
def make_returns(df):
    return np.log(df/df.shift(1)).dropna()
class TradingWithGMM(QCAlgorithm):
    '''In this algorithm we submit/update/cancel each order type'''
    def Initialize(self):
        '''Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized.'''

        self.SetStartDate(2008,9,01)  #Set Start Date
        self.SetEndDate(2015,12,31)    #Set End Date
        self.SetCash(100000)           #Set Strategy Cash
        # Find more symbols here: http://quantconnect.com/data
        ## make universe 
        self._universe = self.make_universe()
        self.openMarketOnOpenOrders = []
        #self.__openMarketOnCloseOrders = []
        #self.__openLimitOrders = []
        #self.__openStopMarketOrders = []
        #self.__openStopLimitOrders = []

        self._longs = False
        self._shorts = False 
        self._holding_period = 21
        self.BET_SIZE = 0.05

        # track RAM
        self.splotName = 'Strategy Info'
        sPlot = Chart(self.splotName)
        sPlot.AddSeries(Series('RAM',  SeriesType.Line, 0))
        sPlot.AddSeries(Series('Time',  SeriesType.Line, 1))
        self.time_to_run_main_algo = 0
        ## run algorithm
        # make buy list        
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday, DayOfWeek.Friday), self.TimeRules.AfterMarketOpen("SPY", 10), Action(self.run_main_algo))        
        # send orders
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday, DayOfWeek.Friday), self.TimeRules.AfterMarketOpen("SPY", 30), Action(self.send_orders))
        # check trade dates and liquidate if date condition
        self.Schedule.On(self.DateRules.Every(DayOfWeek.Monday, DayOfWeek.Friday), self.TimeRules.AfterMarketOpen("SPY", 35), Action(self.check_liquidate))   
        # plot RAM
        self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.AfterMarketOpen("SPY", 40), Action(self.CHART_RAM))
        # reduce use of History by retaining what we can. Uses more ram, but might address bottleneck?
        self.hist = {}
        for sym in self.symbols:
            self.hist[sym] = self.History([sym], timedelta(252*6.5), Resolution.Hour)["open"].unstack(level=0).astype(np.float32)

    def make_universe(self):
        """fn: place in initialize to add custom list of equities to 
                `self.securities` object
        self.symbols = ["SPY", "QQQ", "DIA"] # "TLT", "GLD", "EFA", "EEM", "SLV"]
        for sym in self.symbols: self.AddEquity(sym, Resolution.Minute)
        return True
    def check_liquidate(self):
        """fn: to check if todays date matches exit date and liquidate
        self.Log('\n'+'-'*77+'\n[{}] checking liquidation status...'.format(self.UtcTime))
        orders = self.Transactions.GetOrders(None)
        if orders: pass
        else: return
        # current time is gt_eq order time + holding period
        crit1 = lambda order: self.UtcTime >= (order.Time + timedelta(self._holding_period))
        # order time is within today - holding period window
        crit2 = lambda order: order.Time >= (self.UtcTime - timedelta(self._holding_period + 7)) # 7 day overlap
        for order in orders:
            if crit1(order) & crit2(order):
                if self.Portfolio[order.Symbol].Invested:
                    fmt_args = (self.UtcTime, order.Symbol, order.Time, self.UtcTime - order.Time)
                    self.Log('[{}] liquidating... {}, order date: {}, time delta: {}'.format(*fmt_args))
    def compute(self, sym):
        #if sym.Symbol.Value == "SPY": # single symbol for now
        # symbol must be in list
        # get the recent history, can prob reduce the excess more in a few ways
        new_hist = self.History([sym.Symbol], timedelta(5*6.5), Resolution.Hour)["open"].unstack(level=0).astype(np.float32)
        # combine with history
        self.hist[sym.Symbol.Value] = self.hist[sym.Symbol.Value].append(new_hist)
        # append(push), drop duplicate index, slice to ensure we dont acrew indefinitely, ensure sorted by datetime just in case
        self.hist[sym.Symbol.Value] = self.hist[sym.Symbol.Value][~self.hist[sym.Symbol.Value] \
                                            .index.duplicated(keep='first')] \
                                            .ix[int(-252*6.5):] \
        train_px = self.hist[sym.Symbol.Value]
        #train_px = self.History([sym.Symbol], timedelta(252*6.5), Resolution.Hour)["open"].unstack(level=0).astype(np.float32)
        train_df = make_returns(train_px)
        tmp_x = train_df[sym.Symbol.Value].reshape(-1, 1)
        #self.Debug('head: {}, tail: {}'.format(train_px.head(), train_df.tail()))
        ### fit GMM ###
        gmm = make_gmm().fit(tmp_x)
        hidden_states = gmm.predict(tmp_x)
        ### get last state estimate ###
        last_state = hidden_states[-1]
        last_mean = gmm.means_[last_state]
        last_var = np.diag(gmm.covariances_[last_state])

        ### sample from distribution using last state parameters ###
        # must choose number of samples
        n_samples = 1000
        rvs = stats.norm.rvs(loc=last_mean, scale=np.sqrt(last_var), size=n_samples)
        low_ci, high_ci = stats.norm.interval(alpha=ALPHA, loc=np.mean(rvs), scale=np.std(rvs))
        #self.Debug('low_ci {:0.4f} high_ci: {:0.4f}'.format(low_ci, high_ci))
        ## get current return ##
        tmp_ret = np.log(float(self.Securities[sym.Symbol].Price) / train_px[sym.Symbol.Value].iloc[-1])
        r_gt = (tmp_ret > high_ci)
        r_lt = (tmp_ret < low_ci)
        if r_gt: result_tag = 'too_high'
        elif r_lt: result_tag = 'too_low'
        else: result_tag = 'hit'
        #self.Debug('result tag: {}'.format(result_tag))
        # (symbol, low ci, high ci, current return, result_tag)
        sym_row = (sym.Symbol.Value, low_ci, high_ci, tmp_ret, result_tag)
        #self.Debug('sym row:\n{}'.format(sym_row))
        return sym_row
    def run_main_algo(self):
        start_time = time.time()
        """fn: run main algorithm"""
        self.Log('\n'+'-'*77+'\n[{}] Begin main algorithm computation...'.format(self.UtcTime))
        self._algo_data = False
        self._longs = False
        self._shorts = False
        #for sym in self.Securities.Values: # iterate through universe
        #    if not self.Portfolio[sym.Symbol].Invested: # only compute if not already invested
        #        compute()
        tmp_data_list = [self.compute(asset) for asset in self.Securities.Values if not self.Portfolio[asset.Symbol].Invested]
        if tmp_data_list:
            cols = ['symbol', 'low_ci', 'high_ci', 'current_return', 'result_tag']
            df = (pd.DataFrame(tmp_data_list, columns=cols))
            #self._algo_data = df
            self.Log('[{}] algo data:\n\t{}'.format(self.UtcTime, df)) #self._algo_data))
            self._longs = np.asarray(df.query('result_tag=="too_high"')['symbol'].unique())
            # self._shorts = np.asarray(df.query('result_tag=="too_low"')['symbol'].unique())
            self.Log('\n'+'-'*77+'\n[{0}] longs: {1}\n[{0}] shorts: {2}'.format(self.UtcTime, self._longs, self._shorts))
            self.Log('[{}] already fully invested, no computations run, exiting...'.format(self.UtcTime))
        self.time_to_run_main_algo = time.time() - start_time
        #self.Log("run_main_algo %d took %0.1f s "%(self.current_subset, (time.time() - start_time)) + str(self.Time))
        self.Plot(self.splotName,'Time', self.time_to_run_main_algo)
    def send_orders(self):
        """fn: send orders"""
        self.Log('\n'+'-'*77+'\n[{}] checking buy sell arrays to send orders...'.format(self.UtcTime))
        if isinstance(self._shorts, np.ndarray):
            if self._shorts.size:
                for sym in self._shorts:
                    if not self.Portfolio[sym].Invested:   
                        self.Log('[{}] sending short order for {}...'.format(self.UtcTime, sym))                        
                        newTicket = self.MarketOnOpenOrder(sym, self.CalculateOrderQuantity(sym, self.BET_SIZE))
                self.Log('[{}] no shorts listed, no orders sent...'.format(self.UtcTime))
        if isinstance(self._longs, np.ndarray):
            if self._longs.size:
                for sym in self._longs:
                    if not self.Portfolio[sym].Invested:
                        self.Log('[{}] sending long order for {}...'.format(self.UtcTime, sym))                        
                        newTicket = self.MarketOnOpenOrder(sym, self.CalculateOrderQuantity(sym, self.BET_SIZE))
                self.Log('[{}] no longs listed, no orders sent...'.format(self.UtcTime))
    def OnData(self, data):
        '''OnData event is the primary entry point for your algorithm.
        Each new data point will be pumped in here.'''


    def CHART_RAM(self):
        # Once a day or something reasonable to prevent spam
        self.Plot(self.splotName,'RAM', OS.ApplicationMemoryUsed/1024.)