| Overall Statistics |
|
Total Trades 377 Average Win 1.05% Average Loss -0.75% Compounding Annual Return 20596.233% Drawdown 34.100% Expectancy 0.320 Net Profit 57.287% Sharpe Ratio 5.178 Loss Rate 45% Win Rate 55% Profit-Loss Ratio 1.40 Alpha 8.167 Beta -323.793 Annual Standard Deviation 0.741 Annual Variance 0.55 Information Ratio 5.16 Tracking Error 0.741 Treynor Ratio -0.012 Total Fees $0.00 |
# Derek M Tishler - 2017
# https://tishlercapital.com/
# Based on the MINST TensorFlow Softmax Classification Example
# https://www.tensorflow.org/get_started/mnist/beginners
# https://www.tensorflow.org/get_started/mnist/pros
# Extended from single class to multi class probabilities using:
# "Multi-label image classification with Inception net" - Radek Bartyzal
# https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30
# In case you missed it, there is a slightly simpler single asset tensorflow example located here(use second post in thread):
# https://www.quantconnect.com/forum/discussion/2880/machine-learning---tensorflow-basic-example/p1/comment-8880
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import decimal as d
from datetime import timedelta, datetime
from sklearn.model_selection import train_test_split
seed = 1
random.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)
class BasicTemplateAlgorithm(QCAlgorithm):
def Initialize(self):
# setup backtest
self.SetStartDate(2017,12,10) #Set Start Date
self.SetEndDate(2018,1,9) #Set End Date
self.SetCash(1000) #Set Strategy Cash
# We think(step through minute data) faster than we act(daily forecast signal/rebalance) for better execution estimation.
self.resolution = Resolution.Minute
self.SetBrokerageModel(BrokerageName.GDAX, AccountType.Cash)
# Current top market cap, simple universe, lots of bias but easy to work with data wise.
self.portfolioIDs = ["ETHUSD", "LTCUSD", "BTCUSD"]
self.ETH = "ETHUSD"
self.LTC = "LTCUSD"
self.BTC = "BTCUSD"
self.portfolio = [
self.AddCrypto(self.ETH, Resolution.Minute),
self.AddCrypto(self.LTC, Resolution.Minute),
self.AddCrypto(self.BTC, Resolution.Minute)
]
self.priceETH = self.Securities["ETHUSD"].Price
self.priceLTC = self.Securities["LTCUSD"].Price
self.priceBTC = self.Securities["BTCUSD"].Price
# init the tensorflow model object and pass our portfolio string so we know the number of classes in output layer.
self.model = Model(symbols=self.portfolioIDs)
# Custom charting for model performance
sPlot = Chart('Strategy Equity')
sPlot.AddSeries(Series('Model_Accuracy', SeriesType.Line, 2))
sPlot.AddSeries(Series('Train_Model_Accuracy', SeriesType.Line, 2))
sPlot.AddSeries(Series('Test_Model_Accuracy', SeriesType.Line, 2))
sPlot.AddSeries(Series('Loss', SeriesType.Line, 3))
sPlot.AddSeries(Series('Train_Model_Cross_Entropy_x100', SeriesType.Line, 3))
sPlot.AddSeries(Series('Test_Model_Cross_Entropy_x100', SeriesType.Line, 3))
self.AddChart(sPlot)
# Our big history call, only done once to save time
self.model.hist_data = self.History(["ETHUSD", "LTCUSD", "BTCUSD"], self.model.warmup_count, Resolution.Minute).astype(np.float32)
# Flag to know when to start gathering history in OnData or Rebalance
self.do_once = True
# prevent order spam by tracking current weight target and comparing against new targets
self.target = np.zeros((len(self.portfolioIDs),))
# We are forecasting and trading on open-to-open price changes on a daily time scale. So work every morning.
self.Schedule.On(self.DateRules.EveryDay(),
self.TimeRules.Every(timedelta(minutes=120)),
Action(self.Rebalance))
def SetHoldings(self, symbol, ratio):
security = self.Securities[symbol]
if not security.IsTradable:
self.Debug("{} is not tradable.".format(symbol))
return # passive fail
if ratio == 0.0:
ratio = 0.00
ratio = format(ratio, '.2f')
price, quantity = security.Price, security.Holdings.Quantity
# Keep 3% Cash (for the limit order, rounding errors, and safety)
keep = .03
usablePortfolioValue = self.Portfolio.TotalPortfolioValue #* d.Decimal(1 - keep).quantize(d.Decimal('1.00'))
self.Log(symbol)
# +0.1% Limit Order
# (to make sure it executes quickly and without much loss)
# (if you set the limit large it will act like a market order)
limit = 1.001
desiredQuantity = usablePortfolioValue * d.Decimal(ratio).quantize(d.Decimal('1.00')) / price
orderQuantity = desiredQuantity - quantity
orderQuantity = format(orderQuantity, '.4f')
# limit needs to be inverse when selling
limitPrice = d.Decimal(price).quantize(d.Decimal('1.00')) * d.Decimal(limit if orderQuantity >= 0 else 1/limit).quantize(d.Decimal('1.00'))
self.Log("Limit Order: {} coins @ ${} per coin".format(orderQuantity, limitPrice))
if orderQuantity > 0.000000:
self.LimitOrder(symbol, orderQuantity, limitPrice)
def Rebalance(self):
# Update the current price dictionary, asset(STRING NOT OBJECT ugh had some issues there) used for key
for asset in self.portfolioIDs:
asset = str(asset)
self.model.current_price[asset] = float(self.Securities[asset].Price)
self.Plot("Asset Price", asset, float(self.Securities[asset].Price))
self.Plot("Asset Price", asset, float(self.Securities[asset].Price))
self.Plot("Asset Price", asset, float(self.Securities[asset].Price))
# Accrew history over time vs making huge, slow history calls each step.
if not self.do_once:
new_hist = self.History(self.portfolioIDs, 1, Resolution.Minute).astype(np.float32)
self.model.hist_data = self.model.hist_data.append(new_hist).iloc[1:] #append and pop stack
self.Log(str([str(asset) for asset in self.portfolioIDs]))
else:
self.do_once = False
# Prepare our data now that it has been updated
self.model.preproessing(self)
# Perform a number of training steps with the new data
self.model.train(self)
# Using the latest input feature set, lets get the predicted assets expected to make the desired profit by the next open
self.weights = self.model.predict(self)
# Some charting of model metrics
self.Checkpoint()
# A little ugly, but lets keep our weight info in the log
#self.Log(str([str(asset) for asset in self.portfolioIDs])+'\n'+str(self.weights))
self.Log(str(self.weights))
# In case of repeated forecast, lets skip rebalance and reduce fees/orders(not much help in this case cause large universe)
if np.any(self.weights != self.target):
# track our current target to allow for above filter
self.target = self.weights
# Loop through each asset and assign the relative weight for simple rebalancing efforts.
for asset, weight in zip(self.portfolioIDs, self.weights):
# Please note the weights are already adjusted to yield a leverage of 1 daily(see predict function)
self.SetHoldings(asset, weight)
def Checkpoint(self):
# Some custom charts so better see model performance over time (and see if our training is even progressing)
self.Plot("Strategy Equity",'Train_Model_Accuracy', 100.*self.model.train_accuracy)
self.Plot("Strategy Equity",'Test_Model_Accuracy', 100.*self.model.test_accuracy)
self.Plot("Strategy Equity",'Train_Model_Cross_Entropy_x100', 100.*self.model.train_ce)
self.Plot("Strategy Equity",'Test_Model_Cross_Entropy_x100', 100.*self.model.test_ce)
class Model():
def __init__(self, symbols):
# list of strings, portfolio symbols
self.symbols = symbols
# Number of inputs for training (will loose 1)
self.eval_lookback = 252*4 + 1
# the past n open-to-open price changes per asset, to be assembled into full feature set per input sample
self.n_features_per_asset = 1000
# The input window will have each assets price data
self.n_features = self.n_features_per_asset * len(self.symbols)
# Each assets now has a probability to determine if it is relevant to the input 'image'
self.n_classes = len(self.symbols)#2
# How much historical data do we need?
self.warmup_count = self.eval_lookback + self.n_features
# define our tensorflow model/network
self.network_setup()
# a dict used to contain every asset's current open price at rebalance time.
self.current_price = {}
def network_setup(self):
# Tensorflow Turorial does a great job(with illustrations) so comments left out here mostly: https://www.tensorflow.org/get_started/mnist/beginners
self.sess = tf.InteractiveSession()
# Our feed dicts pipe data into these tensors on runs/evals. Input layer and correct-labels.
self.x = tf.placeholder(tf.float32, shape=[None, self.n_features])
self.y_ = tf.placeholder(tf.float32, shape=[None, self.n_classes])
# The brain of our networkk, the weights and biases. Nice and simple for a linear softmax network.
#self.W = tf.Variable(tf.zeros([self.n_features, self.n_classes]))
#self.b = tf.Variable(tf.zeros([self.n_classes]))
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
self.W = weight_variable([self.n_features, self.n_classes])
self.b = bias_variable([self.n_classes])
# The actual model is a painfully simple linear regressor
self.y = tf.matmul(self.x,self.W) + self.b
# Output lauer: using sigmoid instead to prevent norm of all probabilities and retrieve a per class probability instead, source:
# https://towardsdatascience.com/multi-label-image-classification-with-inception-net-cbb2ee538e30
self.y_pred = tf.nn.sigmoid(self.y)
self.cross_entropy = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y_, logits=self.y)) #tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, logits=self.y))
# For fun we use AdamOptimizer instead of basic vanilla GradientDescentOptimizer.
self.train_step = tf.train.AdamOptimizer(1e-3).minimize(self.cross_entropy)
# metric ops, adjusted for multi class/label as per multi-label tutorial.
self.correct_prediction = tf.equal(tf.round(self.y_pred), self.y_)
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
# This is done later vs Tensorflow Tutorial because of AdamOptimizer usage, which needs its own vars to be init'ed
self.sess.run(tf.global_variables_initializer())
def preproessing(self, algo_context):
# Inout features:
# We are using a sliding window of past change in open prices per asset to act as our input "image".
#By no means a good idea to discover alpha...
all_data = {}
for asset in self.symbols:
asset = str(asset)
all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])
features = []
labels = []
for i in range(self.n_features_per_asset+1, len(all_data.itervalues().next())-1):
temp_feat = []
for asset in self.symbols:
asset = str(asset)
temp_feat.append( np.diff(all_data[asset][i-self.n_features_per_asset-1:i])/all_data[asset][i-self.n_features_per_asset-1:i-1] )
features.append( np.array(temp_feat).flatten() )
# Get the open-to-open change for the next day per asset. Use percent change to better compare assets.
temp_lab = []
for asset in self.symbols:
asset = str(asset)
temp_lab.append( 100.*(all_data[asset][i+1]-all_data[asset][i])/all_data[asset][i] )
temp_lab = np.array(temp_lab)
# For multi class labels:
# we want to set 1 for any "image"(sliding price data) with the relevant "labels"(an asset that was far enough in the green to be favorable)
ml = np.zeros_like(temp_lab, dtype=np.float32)
ml[np.where(temp_lab > 0.001)[0]] = 1.0 # multi label classification ground truth vector based on money making assets
labels.append( ml )
features = np.array(features)
labels = np.array(labels)
# Here we use test train spit so we can better evaluate the model.
# But due to the nature of our sliding window and the use of overlapping input features,
# this may still be useless and lead to rapid overfitting.
# Try to discover other ways to creat and manage your dataset.
#self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(features, labels, test_size=0.2, random_state=seed)
# Test train split, unfortunate to loose recent data, but need data not seen ever by train set.
split_len = int(len(labels)*0.05)
self.X_train = features[:-split_len]
self.X_test = features[-split_len:]
self.y_train = labels[:-split_len]
self.y_test = labels[-split_len:]
def train(self, algo_context):
# Perform training step(s) and check train accuracy. This is really lame, use a test/train split and measure OOS data for good info about test/validation accuracy.
for _ in range(100):
# batch = np.random.permutation(np.arange(len(self.X_train)))[:100]
self.train_step.run(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
# Collect some metrics for charting
self.train_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
self.test_accuracy = self.accuracy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test})
self.train_ce = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_train, self.y_: self.y_train})
self.test_ce = self.cross_entropy.eval(session=self.sess, feed_dict={self.x: self.X_test, self.y_: self.y_test})
#print("\nTrain Accuracy: %0.5f %0.5f"%(self.train_accuracy,self.test_accuracy)) # commented out to reduce log
def predict(self, algo_context):
# Perform inference
#pred_feat = np.append(self.hist_data.open.values.flatten().astype(np.float32), self.current_price)[-self.n_features-1:]
all_data = {}
temp_feat = []
for asset in self.symbols:
asset = str(asset)
all_data[asset] = np.append(self.hist_data.loc[asset].open.values.flatten().astype(np.float32), self.current_price[asset])[-self.n_features-1:]
temp_feat.append( np.diff(all_data[asset][-self.n_features_per_asset-1:])/all_data[asset][-self.n_features_per_asset-1:-1] )
pred_feat = np.array(temp_feat).flatten()
#pred_feat = 100.*np.diff(all_data)/all_data[:-1]
pred_proba = self.y_pred.eval(session=self.sess, feed_dict={self.x: [pred_feat]})
#print("Forecast Probabilities: %s"%str(pred_proba[0])) # commented out to reduce log
self.current_forecast = pred_proba[0]
# Cash or Long, additionaly ensure no nans to prevent crash(NOT IDEAL CAN FLAG BUYS)
classified = np.clip(np.nan_to_num(np.round(pred_proba[0])), 0.,1.)
# So now each asset with a 1.0 needs to be purchased, lets keep the leverage to 1.0 and adjust our weights.
if np.sum(classified) != 0.:
classified /= np.sum(classified)
return classified