Overall Statistics
import numpy as np
import math
import statsmodels.api as smapi
import statsmodels as sm
from sklearn import linear_model
import pandas as pd
from sklearn import decomposition
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.preprocessing import StandardScaler

### <summary>
### Basic template algorithm simply initializes the date range and cash. This is a skeleton
### framework you can use for designing an algorithm.
### </summary>
class BasicTemplateCryptoAlgorithm(QCAlgorithm):
    '''Basic template algorithm simply initializes the date range and cash'''

    def Initialize(self):
        '''Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized.'''

        self.SetStartDate(2017, 1, 1)  #Set Start Date
        self.SetEndDate(2018, 1, 1)    #Set End Date
        self.SetCash(10000)          #Set Strategy Cash
        # Find more symbols here: http://quantconnect.com/data
        self.AddCrypto("BTCUSD", Resolution.Daily)
        self.AddCrypto("LTCUSD", Resolution.Daily)
        self.AddCrypto("ETHUSD", Resolution.Daily)
        self.AddCrypto("LTCBTC", Resolution.Daily)        
        self.AddCrypto("ETHBTC", Resolution.Daily)
        self.AddForex("EURUSD", Resolution.Daily)
        self.AddForex("JPYUSD", Resolution.Daily)
        self.AddForex("GBPUSD", Resolution.Daily)
        self.AddForex("AUDUSD", Resolution.Daily)
        self.AddForex("CADUSD", Resolution.Daily)
        self.AddForex("CHFUSD", Resolution.Daily)
        self.AddForex("CNYUSD", Resolution.Daily)
        self.AddForex("SEKUSD", Resolution.Daily)
        self.AddForex("NZDUSD", Resolution.Daily)
        #sets parameters
        self.lookback = 30
        self.n_components = 5
        self.longnum = 3
        self.shortnum = 3
        self.highvarthres = 0.80
        self.lowvarthres = 0.70
        # History method returns a dict with a pandas.DataFrame - multi-index pandas DataFrame.
        self.pxhistory = self.History(["BTCUSD","LTCUSD", "ETHUSD", "LTCBTC", "ETHBTC", "EURUSD", "JPYUSD", "GBPUSD", "AUDUSD", "CADUSD","CHFUSD", "CNYUSD", "SEKUSD", "NZDUSD"], self.lookback)

            # prints out the tail of the dataframe

        #sets brokerage model
        #self.SetBrokerageModel(BrokerageName.InteractiveBrokers, AccountType.Cash)

    def OnData(self, data):
        '''OnData event is the primary entry point for your algorithm. Each new data point will be pumped in here.

            data: Slice object keyed by symbol containing the stock data
        # Good practice to check whether the dataframe is empty
        if not self.pxhistory.empty:
            #filter for open prices
            price_history = self.pxhistory["open"]
            #switch column symbol to column headers
            price_history = price_history.unstack('symbol')
            #clearing all the NaNs in returns
            returns = price_history.pct_change()
            #for idx in returns.count():
            #    returns = returns[pd.notnull(returns[list(returns)[idx]])]
            #returns = returns.bfill().ffill()
            returns = returns.dropna()
            returns_np = StandardScaler().fit_transform(returns)
            returns = pd.DataFrame(data = returns_np, columns = returns.columns, index=returns.index)
            pca = PCA(n_components=self.n_components, whiten=True)
            var = pca.explained_variance_ratio_
            highcount = 1
            while sum(var) > self.highvarthres:
                new_components = self.n_components - highcount
                pca = PCA(n_components=new_components, whiten=True)
                var = pca.explained_variance_ratio_
                highcount += 1
            lowcount = 1
            while sum(var) < self.lowvarthres: 
                new_components = self.n_components + lowcount
                pca = PCA(n_components=new_components, whiten=True)
                var = pca.explained_variance_ratio_
                lowcount += 1
            pca_returns = pca.transform(returns)
            factors = pd.DataFrame(pca_returns)
            X = factors.iloc[0:-1,:]
            lastday = factors.iloc[-1,:] 
            lastday = lastday.to_frame().T
            pred_ret = pd.Series(index=returns.columns)
            #self.Log("variance is: %s" %sum(var))
            for stock in returns.columns:
                Y = returns.iloc[0:-1,:]
                Y = Y[stock]
                #print ('shape of Y is', Y.shape)
                LR = linear_model.Lasso(alpha=0.1)
                #score = LR.score(X,Y) 
                #print ("score is:", score)
                pred = LR.predict(lastday)
                pred_ret.loc[stock] = pred
        if self.Time.minute == 0:
            for stock in returns.columns:
                if stock not in pred_ret.nlargest(self.longnum).index and stock not in pred_ret.nsmallest(self.shortnum).index:
                    self.SetHoldings(stock, 0)
                elif stock in pred_ret.nlargest(self.longnum).index and pred_ret[stock] > 0:
                    self.SetHoldings(stock, 0.33)
                elif stock in pred_ret.nsmallest(self.shortnum).index  and pred_ret[stock] < 0:
                    self.SetHoldings(stock, -0.33)
        #if self.Time.minute == 0:
        #    if not self.Portfolio.Invested:
        #        self.SetHoldings("BTCUSD", 1)
        #    else:
        #        self.Liquidate()

        #    btcHoldings = self.Portfolio.CashBook["BTC"].Amount
        #    usdCash = self.Portfolio.CashBook["USD"].Amount
        #    self.Log("{} - BTC holdings: {} - USD cash: {}".format(self.Time, btcHoldings, usdCash))