| Overall Statistics |
|
Total Trades 1484 Average Win 0.23% Average Loss -0.18% Compounding Annual Return -2.605% Drawdown 10.000% Expectancy -0.038 Net Profit -5.464% Sharpe Ratio -0.292 Loss Rate 57% Win Rate 43% Profit-Loss Ratio 1.23 Alpha -0.026 Beta 0.021 Annual Standard Deviation 0.079 Annual Variance 0.006 Information Ratio -0.74 Tracking Error 0.194 Treynor Ratio -1.101 Total Fees $6111.40 |
# QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
# Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import clr
clr.AddReference("System")
clr.AddReference("QuantConnect.Algorithm")
clr.AddReference("QuantConnect.Indicators")
clr.AddReference("QuantConnect.Common")
from System import *
from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Indicators import *
import decimal as d
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.linear_model import LogisticRegression
from CNN_classifier import CNNClassifier
def calcSignal(data,algo):
r=data.p.diff().fillna(0)
s_brk=dual_thr(data,15,0.25)
s_rsi=rsi(r,15,0.2)
s_mom=px_mom(r,15,300)
s_chmom=chan_mom(data,60,5)
s_wr=williamsr(data,15,0.3)
# conv. net
# clf=algo.fitted_models[-1]
vdp=data.v*r
n1=2**np.arange(0,8)
n2=2**np.arange(1,8)
X=madiff(vdp,n1,n2)
#X=X.reshape((1,X.shape[0],X.shape[1]))
#X=np.concatenate((X[:,0].T,X[:,1].T)).reshape((1,X.shape[1]*algo.window))
# yhat=clf.predict(X)
return s_chmom
def dual_thr(data,n,gamma):
rg1=data.h.iloc[-n:].max()-data.p.iloc[-n:].min()
rg2=data.p.iloc[-n:].max()-data.l.iloc[-n:].min()
rg=max(rg1,rg2)
p=data.p.values[-1]
o=data.o.values[-1]
return max(p-(o+gamma*rg),0) + min(p-(o-gamma*rg),0)
def williamsr(data,n,thr=0.2):
h=data.h.iloc[-n:].max()
ratio=(h-data.p.iloc[-1])/(h-data.l.iloc[-n:].min())
return ratio-0.5 #-np.int8(ratio>1.0-thr)+np.int8(ratio<thr)
def rsi(ts,n,thr):
s=ts.rolling(window=n,min_periods=n).sum()
a=np.abs(ts).rolling(window=n,min_periods=n).sum()
rsi=(0.5*(s+a)/a-0.5).fillna(0).values
return max(np.abs(rsi[-1])-thr,0)*np.sign(rsi[-1])
def px_mom(ts,n1,n2):
s1=ts.rolling(window=n1,min_periods=n1).mean().fillna(0)
v1=np.abs(s1).rolling(window=n2,min_periods=1).mean()
s2=ts.rolling(window=n2,min_periods=n2).mean().fillna(0)
mom=((s1-s2)/v1).fillna(0).values
return mom[-1]
def chan_mom(data,n,n_s):
h=data.h.values[-n:]
l=data.l.values[-n:]
v=data.v.values[-n:]
rg=h.max()-l.min()
mh=(h[-n_s:].mean()-h[:n_s].mean())/rg
ml=(l[-n_s:].mean()-l[:n_s].mean())/rg
vz=(v[-n_s:].mean()-v.mean())/v.std()
return np.sign(mh+ml)*np.int8(vz>1)
def build_acc(r,n_f):
cand=[]
N=len(n_f)
for i1 in range(N-2):
m1=r.rolling(window=n_f[i1],min_periods=n_f[-1]).ema()
s=m1.rolling(window=n_f[-1],min_periods=n_f[-1]).std()
cand.append(m1/s1)
for i2 in range(i1+1,N):
m2=r.rolling(window=n_f[i2],min_periods=n_f[-1]).ema()
cand.append((m1-m2)/s)
cand.append(r.shift(-1).fillna(0).values)
ds=np.column_stack(cand)
T=len(ds)
ds_train=ds[T//4:]
ds_test =ds[:T//4]
n_c=ds.shape[1]-1
metric_train=np.zeros(n_c)
metric_test=np.zeros(n_c)
for n in range(n_c):
metrics_train[n]=cosine_similarity(ds_train[:,n].T,ds_train[:,-1])
metrics_test[n]=metrics_train[n]/np.abs(np.diff(ds_train[:,n])).sum()
del ds
return metrics_train,metrics_test
def prepareDataForCNN(df,params):
train_test_split=params.get('train_test_split',0.75)
ds=df.values
N=len(ds)
window=params['window']
pred_window=params['pred_window']
threshold=params['threshold']
data=np.ones(ds.shape)
data[1:,0]=ds[1:,0]/ds[0,0]
data[1:,1]=data[1:,1]/data[0,1]
X = np.atleast_3d(np.array([data[start:start + window] for start in range(0, N - window)]))
r = np.ones(N)
r[:-pred_window]=data[pred_window:,0]/data[:-pred_window,0]
y=np.ones((N,1))
y[r>1.0+threshold]=2
y[r<1.0-threshold]=0
onehot_encoder = OneHotEncoder(sparse=False)
Y = onehot_encoder.fit_transform(y[window:])
k=int((N-window)*train_test_split)
X_train=X[:k]
Y_train=Y[:k]
X_test =X[k:]
Y_test =Y[k:]
return (X_train,Y_train,X_test,Y_test)
def prepareDataForRFC(df,params):
train_test_split=params.get('train_test_split',0.75)
pred_window=params['pred_window']
r=df['p'].diff().fillna(0)
R=r.rolling(window=pred_window,min_periods=pred_window).sum().fillna(0)
R=R.shift(-pred_window)
ds=df.values
N=len(ds)
window=params['window']
threshold=params['threshold']
data=np.ones(ds.shape)
data[1:,0]=ds[1:,0]/ds[0,0]-1.0
data[1:,1]=data[1:,1]/data[0,1]-1.0
X=pd.DataFrame([np.concatenate((data[start:start + window,0].T,data[start:start + window,1].T)) for start in range(0, N - window)]).values
y=np.zeros((N,1))
y[R>threshold]=1
y[R<-threshold]=-1
#onehot_encoder = OneHotEncoder(sparse=False)
Y = y[window:] #onehot_encoder.fit_transform(y[window:])
k=int((N-window)*train_test_split)
X_train=X[:k]
Y_train=Y[:k].reshape((k,))
X_test =X[k:]
Y_test =Y[k:].reshape((N-window-k,))
return (X_train,Y_train,X_test,Y_test)
def prepareVdpDataForRFC(df,params):
train_test_split=params.get('train_test_split',0.75)
pred_window=params['pred_window']
threshold=params['threshold']
N=df.shape[0]
r=(df['p'].diff()/df['p'].shift()).fillna(0)
R=r.rolling(window=pred_window,min_periods=pred_window).sum().fillna(0)
R=R.shift(-pred_window)
vdp=df.v*r
n1=2**np.arange(0,8)
n2=2**np.arange(1,8)
window=n2[-1]
X=madiff(vdp,n1,n2)
y=np.zeros((N,1))
y[R>threshold]=1
y[R<-threshold]=-1
Y = y[window-1:]
k=int((N-window+1)*train_test_split)
X_train=X[:k]
Y_train=Y[:k].reshape((k,))
X_test =X[k:]
Y_test =Y[k:].reshape((N-window+1-k,))
return (X_train,Y_train,X_test,Y_test)
def madiff(ts,n1,n2):
window=n2[-1]
mm=[]
cols=[]
for n_s in n1:
m1=ts.rolling(window=n_s,min_periods=n_s).mean()
s=m1.rolling(window=n2[-1],min_periods=n2[-1]).std()
for n_l in n2[np.int(np.log2(n_s))+1:]:
m2=ts.rolling(window=n_l,min_periods=n_l).mean()
mm.append((m1-m2)/s)
cols.append('ts'+str(n_s)+'x'+str(n_l))
return pd.DataFrame(np.column_stack(mm)).fillna(0).values[window-1:]
class MovingAverageCrossAlgorithm(QCAlgorithm):
def Initialize(self):
'''Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized.'''
self.SetStartDate(2011, 01, 02) #Set Start Date
self.SetEndDate(2016, 12, 23) #Set End Date
self.SetCash(100000) #Set Strategy Cash
self.symbols = ["SPY"]#,"FXI","EWJ","EWG","EWQ","EQI","EWZ"]
for s in self.symbols:
self.AddEquity(s,Resolution.Minute)
#self.previous = None
self.n_fit=2000
self.SetWarmup(self.n_fit)
self.n_f=[1,2,4,8,16,32,64] #,128,256,512]
self.window=128
self.pred_window=30
self.threshold=0.0001
self.conv_layers=[{'kernel_size':4,'num_filters':4},{'kernel_size':4,'num_filters':4}]
self.dropout=0.5
self.pooling='avg'
self.target_dim=3
self.calc_len=self.window
#self.Schedule.On(self.DateRules.MonthStart("SPY"),self.TimeRules.BeforeMarketClose("SPY",5),Action(self.modelFit))
# start at 11:50 and go in 30 min increments till 15:20
for i in range(16):
self.Schedule.On(self.DateRules.EveryDay("SPY"),self.TimeRules.AfterMarketOpen("SPY",140+i*15),Action(self.runAndTrade))
#self.Schedule.On(self.DateRules.EveryDay("SPY"),self.TimeRules.BeforeMarketClose("SPY",10),Action(self.runAndTrade))
self.fitted_models=[]
self.signal_values=[]
def runAndTrade(self):
# wait for our slow ema to fully initialize
if self.IsWarmingUp:
return
# only once per day
#if self.previous is not None and self.previous.date() == self.Time.date():
# return
for s in self.symbols:
history = self.History(s,self.calc_len,Resolution.Minute)
data=[]
index=[]
for slice in history:
data.append([np.float(slice.Open),np.float(slice.High),np.float(slice.Low),np.float(slice.Close),np.float(slice.Volume)])
index.append(slice.Time.date())
df=pd.DataFrame(data,columns=['o','h','l','p','v'],index=pd.Series(index))
if len(df)==self.calc_len:
if len(self.fitted_models)==0:
signal=calcSignal(df,self)
self.signal_values.append(signal)
self.SetHoldings(s,signal/self.Securities.Count)
def modelFit(self):
# wait for our slow ema to fully initialize
if self.IsWarmingUp:
return
else:
self.Debug("Running modelFit on "+str(self.Time.date()))
# only once per day
#if self.previous is not None and self.previous.date() == self.Time.date():
# return
for s in self.symbols:
history = self.History(s,self.n_fit,Resolution.Minute)
data=[]
index=[]
for slice in history:
index.append(slice.Time.date())
data.append([np.float(slice.Close),np.float(slice.Volume)])
#data.append(np.float(slice.Close))
#data.append([np.float(slice.Open),np.float(slice.High),np.float(slice.Low),np.float(slice.Close),np.float(slice.Volume)])
df=pd.DataFrame(data,columns=['p','v'],index=pd.Series(index))
#self.Debug("Loaded "+str(len(df))+" rows")
if len(data)==self.n_fit:
'''
input_shape=(self.window,df.shape[1])
clf=CNNClassifier(input_shape,self.conv_layers,self.dropout,self.pooling,self.target_dim)
self.Debug(str(clf.model.summary()))
(X_train,Y_train,X_test,Y_test) = prepareDataForCNN(df,{'window':self.window,'pred_window':self.pred_window,'threshold':self.threshold})
clf.fit(X_train,Y_train,validation_data=(X_test,Y_test),sample_weight=None)
self.Debug(clf.model.evaluate(X_test,Y_test))
yhat=np.argmax(clf.predict_proba(X_test),axis=1)-1
y_test=np.argmax(Y_test,axis=1)-1
'''
(X_train,Y_train,X_test,Y_test) = prepareVdpDataForRFC(df,{'window':self.window,'pred_window':self.pred_window,'threshold':self.threshold})
#self.Debug("Y_train: "+str(pd.Series(Y_train).value_counts()))
#self.Debug("Y_test: "+str(pd.Series(Y_test).value_counts()))
#self.Debug("X_train shape: "+str(X_train.shape))
#self.Debug("X_test shape: "+str(X_test.shape))
#clf=RFC(n_estimators=100)
clf=LogisticRegression(C=0.1)
clf.fit(X_train,Y_train)
yhat=clf.predict(X_test)
cm=confusion_matrix(Y_test,yhat)
self.Debug("Short acc: "+str(1.0*cm[0,0]/cm[0,:].sum()))
self.Debug("Long acc: " +str(1.0*cm[2,2]/cm[2,:].sum()))
self.fitted_models.append(clf)from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import OneHotEncoder
from keras.losses import cosine_proximity
from keras.models import Sequential
from keras.layers import LSTM,Dense,Activation,Flatten
from keras.layers.convolutional import Conv1D, AveragePooling1D, MaxPooling1D
from keras.layers.advanced_activations import LeakyReLU, ELU
from keras.layers.core import Dropout
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras import initializers
from keras import regularizers
from keras.callbacks import ReduceLROnPlateau
from keras import backend as K
class CNNClassifier(BaseEstimator, ClassifierMixin):
def __init__(self,input_shape,conv_layers,dropout,pooling,target_dim):
self.input_shape=input_shape
self.conv_layers=conv_layers
self.dropout=dropout
if pooling=='max':
self.pooling=MaxPooling1D
else:
self.pooling=AveragePooling1D
self.n_classes_=target_dim
# build the model
self._build_model()
def fit(self,X,Y,validation_data=None,sample_weight=None):
"""
y must be one-hot encoded
"""
self.history = self.model.fit(X,Y,
epochs = 50,
batch_size = 256,
verbose=1,
validation_data=validation_data,
sample_weight=sample_weight,
shuffle=True,
callbacks=self.callbacks)
def predict_proba(self,X):
return self.model.predict(X)
def _build_model(self):
layer_params=self.conv_layers[0]
model = Sequential()
model.add(Conv1D(input_shape=self.input_shape,
filters=layer_params['num_filters'],
kernel_size=layer_params['kernel_size'],
activity_regularizer=regularizers.l2(0.001),
activation=layer_params.get('activation','selu')))
model.add(self.pooling())
model.add(BatchNormalization())
model.add(Dropout(self.dropout))
for n in range(1,len(self.conv_layers)):
layer_params=self.conv_layers[n]
model.add(Conv1D(filters=layer_params['num_filters'],
kernel_size=layer_params['kernel_size'],
activity_regularizer=regularizers.l2(0.001),
activation=layer_params.get('activation','selu')))
model.add(self.pooling())
model.add(BatchNormalization())
model.add(Dropout(self.dropout))
model.add(Flatten()) # need to flatten the 3d convolution arrays in order to feed them into Dense layer
model.add(Dense(3,activation='softmax'))
opt=optimizers.Nadam(lr=0.001)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=5, min_lr=0.000001, verbose=1)
model.compile(optimizer=opt,loss='categorical_crossentropy', metrics=['accuracy'])
self.model=model
self.callbacks=[reduce_lr]# Your New Python File
'''
input_shape=(self.window,df.shape[1])
clf=CNNClassifier(input_shape,self.conv_layers,self.dropout,self.pooling,self.target_dim)
self.Debug(str(clf.model.summary()))
(X_train,Y_train,X_test,Y_test) = prepareDataForCNN(df,{'window':self.window,'pred_window':self.pred_window,'threshold':self.threshold})
clf.fit(X_train,Y_train,validation_data=(X_test,Y_test),sample_weight=None)
self.Debug(clf.model.evaluate(X_test,Y_test))
yhat=np.argmax(clf.predict_proba(X_test),axis=1)-1
y_test=np.argmax(Y_test,axis=1)-1
'''