Backtest

Overall Statistics
Total Trades 377 Average Win 1.05% Average Loss -0.75% Compounding Annual Return 20596.233% Drawdown 34.100% Expectancy 0.320 Net Profit 57.287% Sharpe Ratio 5.178 Loss Rate 45% Win Rate 55% Profit-Loss Ratio 1.40 Alpha 8.167 Beta -323.793 Annual Standard Deviation 0.741 Annual Variance 0.55 Information Ratio 5.16 Tracking Error 0.741 Treynor Ratio -0.012 Total Fees $0.00
# Derek M Tishler - 2017
# https://tishlercapital.com/

# Based on the MINST TensorFlow Softmax Classification Example
# https://www.tensorflow.org/get_started/mnist/beginners
# https://www.tensorflow.org/get_started/mnist/pros

# Extended from single class to multi class probabilities using:
# "Multi-label image classification with Inception net" - Radek Bartyzal
# https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30

# In case you missed it, there is a slightly simpler single asset tensorflow example located here(use second post in thread):
# https://www.quantconnect.com/forum/discussion/2880/machine-learning---tensorflow-basic-example/p1/comment-8880

import random
import numpy as np
import pandas as pd
import tensorflow as tf
import decimal as d

from datetime import timedelta, datetime
from sklearn.model_selection import train_test_split

seed  = 1
random.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)

class BasicTemplateAlgorithm(QCAlgorithm):

    def Initialize(self):

        # setup backtest
        self.SetStartDate(2017,12,10)  #Set Start Date
        self.SetEndDate(2018,1,9)  #Set End Date
        self.SetCash(1000)         #Set Strategy Cash
        
        # We think(step through minute data) faster than we act(daily forecast signal/rebalance) for better execution estimation.
        self.resolution      = Resolution.Minute 
        self.SetBrokerageModel(BrokerageName.GDAX, AccountType.Cash)
        
        # Current top market cap, simple universe, lots of bias but easy to work with data wise.
        self.portfolioIDs = ["ETHUSD", "LTCUSD", "BTCUSD"]

        self.ETH = "ETHUSD"
        self.LTC = "LTCUSD"
        self.BTC = "BTCUSD"
        
        self.portfolio = [
            self.AddCrypto(self.ETH, Resolution.Minute),
            self.AddCrypto(self.LTC, Resolution.Minute),
            self.AddCrypto(self.BTC, Resolution.Minute)
         ]
         

        
        self.priceETH = self.Securities["ETHUSD"].Price
        self.priceLTC = self.Securities["LTCUSD"].Price
        self.priceBTC = self.Securities["BTCUSD"].Price
         
        # init the tensorflow model object and pass our portfolio string so we know the number of classes in output layer.
        self.model = Model(symbols=self.portfolioIDs)
        
        # Custom charting for model performance
        sPlot = Chart('Strategy Equity')
        sPlot.AddSeries(Series('Model_Accuracy', SeriesType.Line, 2))
        sPlot.AddSeries(Series('Train_Model_Accuracy', SeriesType.Line, 2))
        sPlot.AddSeries(Series('Test_Model_Accuracy',  SeriesType.Line, 2))
        sPlot.AddSeries(Series('Loss',  SeriesType.Line, 3))
        sPlot.AddSeries(Series('Train_Model_Cross_Entropy_x100',  SeriesType.Line, 3))
        sPlot.AddSeries(Series('Test_Model_Cross_Entropy_x100',  SeriesType.Line, 3))
        self.AddChart(sPlot)
        
        # Our big history call, only done once to save time
        self.model.hist_data = self.History(["ETHUSD", "LTCUSD", "BTCUSD"], self.model.warmup_count, Resolution.Minute).astype(np.float32)
        
        # Flag to know when to start gathering history in OnData or Rebalance
        self.do_once         = True

        # prevent order spam by tracking current weight target and comparing against new targets
        self.target          = np.zeros((len(self.portfolioIDs),))
        
        # We are forecasting and trading on open-to-open price changes on a daily time scale. So work every morning.
        self.Schedule.On(self.DateRules.EveryDay(),
            self.TimeRules.Every(timedelta(minutes=120)),
            Action(self.Rebalance))
            
    def SetHoldings(self, symbol, ratio):
        security = self.Securities[symbol]
        if not security.IsTradable:
            self.Debug("{} is not tradable.".format(symbol))
            return    # passive fail
        
        if ratio == 0.0:
            ratio = 0.00
            
        ratio = format(ratio, '.2f')
        price, quantity = security.Price, security.Holdings.Quantity
        
        # Keep 3% Cash    (for the limit order, rounding errors, and safety)
        keep = .03
        usablePortfolioValue = self.Portfolio.TotalPortfolioValue #* d.Decimal(1 - keep).quantize(d.Decimal('1.00'))
        self.Log(symbol) 
        # +0.1% Limit Order
        # (to make sure it executes quickly and without much loss)
        # (if you set the limit large it will act like a market order)
        limit = 1.001
        
        desiredQuantity = usablePortfolioValue * d.Decimal(ratio).quantize(d.Decimal('1.00')) / price
        orderQuantity = desiredQuantity - quantity
        
        orderQuantity = format(orderQuantity, '.4f')
        
        # limit needs to be inverse when selling
        limitPrice = d.Decimal(price).quantize(d.Decimal('1.00')) * d.Decimal(limit if orderQuantity >= 0 else 1/limit).quantize(d.Decimal('1.00'))
        
        self.Log("Limit Order: {} coins @ ${} per coin".format(orderQuantity, limitPrice))
       
        if orderQuantity > 0.000000:
            
            self.LimitOrder(symbol, orderQuantity, limitPrice)
    
    def Rebalance(self):

        # Update the current price dictionary, asset(STRING NOT OBJECT ugh had some issues there) used for key
        for asset in self.portfolioIDs:
            asset = str(asset)
            self.model.current_price[asset] = float(self.Securities[asset].Price)
            self.Plot("Asset Price", asset, float(self.Securities[asset].Price))
            self.Plot("Asset Price", asset, float(self.Securities[asset].Price))
            self.Plot("Asset Price", asset, float(self.Securities[asset].Price)) 
            
        # Accrew history over time vs making huge, slow history calls each step.
        if not self.do_once:
            new_hist             = self.History(self.portfolioIDs, 1, Resolution.Minute).astype(np.float32)
            self.model.hist_data = self.model.hist_data.append(new_hist).iloc[1:] #append and pop stack   
            self.Log(str([str(asset) for asset in self.portfolioIDs]))
        else:
            self.do_once  = False

        # Prepare our data now that it has been updated
        self.model.preproessing(self)

        # Perform a number of training steps with the new data
        self.model.train(self)

        # Using the latest input feature set, lets get the predicted assets expected to make the desired profit by the next open
        self.weights = self.model.predict(self)

        # Some charting of model metrics        
        self.Checkpoint()
        
        # A little ugly, but lets keep our weight info in the log
        #self.Log(str([str(asset) for asset in self.portfolioIDs])+'\n'+str(self.weights))
        self.Log(str(self.weights))

        # In case of repeated forecast, lets skip rebalance and reduce fees/orders(not much help in this case cause large universe)
        if np.any(self.weights != self.target):
         
            # track our current target to allow for above filter
            self.target = self.weights

            # Loop through each asset and assign the relative weight for simple rebalancing efforts.
            for asset, weight in zip(self.portfolioIDs, self.weights):
                # Please note the weights are already adjusted to yield a leverage of 1 daily(see predict function)
               
                self.SetHoldings(asset, weight)

    def Checkpoint(self):
        # Some custom charts so better see model performance over time (and see if our training is even progressing)    
        self.Plot("Strategy Equity",'Train_Model_Accuracy', 100.*self.model.train_accuracy)
        self.Plot("Strategy Equity",'Test_Model_Accuracy', 100.*self.model.test_accuracy)
        self.Plot("Strategy Equity",'Train_Model_Cross_Entropy_x100', 100.*self.model.train_ce)
        self.Plot("Strategy Equity",'Test_Model_Cross_Entropy_x100', 100.*self.model.test_ce)


class Model():

    def __init__(self, symbols):
        # list of strings, portfolio symbols
        self.symbols = symbols

        # Number of inputs for training (will loose 1)
        self.eval_lookback        = 252*4 + 1
        
        # the past n open-to-open price changes per asset, to be assembled into full feature set per input sample
        self.n_features_per_asset = 1000
        
        # The input window will have each assets price data
        self.n_features   = self.n_features_per_asset * len(self.symbols)
        
        # Each assets now has a probability to determine if it is relevant to the input 'image'
        self.n_classes    = len(self.symbols)#2
        
        # How much historical data do we need?
        self.warmup_count = self.eval_lookback + self.n_features

        # define our tensorflow model/network
        self.network_setup()
        
        # a dict used to contain every asset's current open price at rebalance time.
        self.current_price = {}


    def network_setup(self):
        
        # Tensorflow Turorial does a great job(with illustrations) so comments left out here mostly: https://www.tensorflow.org/get_started/mnist/beginners
        self.sess               = tf.InteractiveSession()

        # Our feed dicts pipe data into these tensors on runs/evals. Input layer and correct-labels.
        self.x                  = tf.placeholder(tf.float32, shape=[None, self.n_features])
        self.y_                 = tf.placeholder(tf.float32, shape=[None, self.n_classes])

        # The brain of our networkk, the weights and biases. Nice and simple for a linear softmax network.
        #self.W                  = tf.Variable(tf.zeros([self.n_features, self.n_classes]))
        #self.b                  = tf.Variable(tf.zeros([self.n_classes]))
        
        def weight_variable(shape):
            initial = tf.truncated_normal(shape, stddev=0.1)
            return tf.Variable(initial)
        
        def bias_variable(shape):
            initial = tf.constant(0.1, shape=shape)
            return tf.Variable(initial)
            
        self.W = weight_variable([self.n_features, self.n_classes])
        self.b = bias_variable([self.n_classes])
        
        # The actual model is a painfully simple linear regressor
        self.y                  = tf.matmul(self.x,self.W) + self.b

        # Output lauer: using sigmoid instead to prevent norm of all probabilities and retrieve a per class probability instead, source:
        # https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30
        self.y_pred = tf.nn.sigmoid(self.y)
        self.cross_entropy      = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y_, logits=self.y)) #tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y))

        # For fun we use AdamOptimizer instead of basic vanilla GradientDescentOptimizer.
        self.train_step         = tf.train.AdamOptimizer(1e-3).minimize(self.cross_entropy)

        # metric ops, adjusted for multi class/label as per multi-label tutorial.
        self.correct_prediction = tf.equal(tf.round(self.y_pred), self.y_)
        self.accuracy           = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))

        # This is done later vs Tensorflow Tutorial because of AdamOptimizer usage, which needs its own vars to be init'ed
        self.sess.run(tf.global_variables_initializer())


    def preproessing(self, algo_context):
        # Inout features:
        # We are using a sliding window of past change in open prices per asset to act as our input "image". 
        #By no means a good idea to discover alpha...
        
        all_data = {}
        for asset in self.symbols:
            asset = str(asset)
            all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])

        features   = []
        labels     = []
        for i in range(self.n_features_per_asset+1, len(all_data.itervalues().next())-1):
            
            temp_feat = []
            for asset in self.symbols:
                asset = str(asset)
                temp_feat.append( np.diff(all_data[asset][i-self.n_features_per_asset-1:i])/all_data[asset][i-self.n_features_per_asset-1:i-1] )
            features.append( np.array(temp_feat).flatten() )
            
            # Get the open-to-open change for the next day per asset. Use percent change to better compare assets.
            temp_lab = []
            for asset in self.symbols:
                asset = str(asset)
                temp_lab.append( 100.*(all_data[asset][i+1]-all_data[asset][i])/all_data[asset][i] )
            temp_lab = np.array(temp_lab)
            
            # For multi class labels: 
            # we want to set 1 for any "image"(sliding price data) with the relevant "labels"(an asset that was far enough in the green to be favorable)
            ml = np.zeros_like(temp_lab, dtype=np.float32)
            ml[np.where(temp_lab > 0.001)[0]] = 1.0 # multi label classification ground truth vector based on money making assets
            labels.append( ml )
        features = np.array(features)
        labels   = np.array(labels)
        
        # Here we use test train spit so we can better evaluate the model.
        # But due to the nature of our sliding window and the use of overlapping input features, 
        #    this may still be useless and lead to rapid overfitting. 
        #    Try to discover other ways to creat and manage your dataset.
        #self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(features, labels, test_size=0.2, random_state=seed)
        
        # Test train split, unfortunate to loose recent data, but need data not seen ever by train set.
        split_len    = int(len(labels)*0.05)
        self.X_train = features[:-split_len]
        self.X_test  = features[-split_len:]
        self.y_train = labels[:-split_len]
        self.y_test  = labels[-split_len:]
        

    def train(self, algo_context):
        # Perform  training step(s) and check train accuracy. This is really lame, use a test/train split and measure OOS data for good info about test/validation accuracy.
        for _ in range(100):
            # batch = np.random.permutation(np.arange(len(self.X_train)))[:100]
            self.train_step.run(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
            
        # Collect some metrics for charting
        self.train_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
        self.test_accuracy  = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test})
        self.train_ce       = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
        self.test_ce        = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test})
        #print("\nTrain Accuracy: %0.5f %0.5f"%(self.train_accuracy,self.test_accuracy)) # commented out to reduce log
        

    def predict(self, algo_context):
        # Perform inference
        #pred_feat  =  np.append(self.hist_data.open.values.flatten().astype(np.float32), self.current_price)[-self.n_features-1:]
        all_data = {}
        temp_feat = []
        for asset in self.symbols:
            asset = str(asset)
            all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])[-self.n_features-1:]
            temp_feat.append( np.diff(all_data[asset][-self.n_features_per_asset-1:])/all_data[asset][-self.n_features_per_asset-1:-1] )
        pred_feat =  np.array(temp_feat).flatten() 
        
        #pred_feat  = 100.*np.diff(all_data)/all_data[:-1]
        pred_proba = self.y_pred.eval(session=self.sess, feed_dict={self.x: [pred_feat]})
        
        #print("Forecast Probabilities: %s"%str(pred_proba[0])) # commented out to reduce log
        self.current_forecast = pred_proba[0]
        
        # Cash or Long, additionaly ensure no nans to prevent crash(NOT IDEAL CAN FLAG BUYS)
        classified = np.clip(np.nan_to_num(np.round(pred_proba[0])), 0.,1.)
        
        # So now each asset with a 1.0 needs to be purchased, lets keep the leverage to 1.0 and adjust our weights.
        if np.sum(classified) != 0.:
            classified /= np.sum(classified)
        
        return classified