Hi everyone, I'm quite new to QuantConnect. It would be great if you can give me some advice on how to solve a data collection problem. What I am doing is to collect BTC future data and save it to a dataframe, so I can build machine learning models that generate trading signals. The algo works well when I only added BTC future, but when I added TLT and SPY, the dataframe became empty. Do y'all know what is the problem? I would appreciate if you can give me some recommendations.
Attached is the code for your reference. I comment out these two lines, when you run the codes, you can see it works well if we only add BTC future.
#self.AddEquity("SPY", Resolution.Minute)
#self.AddEquity("TLT", Resolution.Minute)
Thank you!
import clr
clr.AddReference("System")
clr.AddReference("QuantConnect.Algorithm")
clr.AddReference("QuantConnect.Common")
from System import *
from QuantConnect import *
from QuantConnect.Algorithm import *
import datetime
from datetime import timedelta
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import Ridge, Lasso
import pandas as pd
from math import floor
class ScikitLearnLinearRegressionAlgorithm(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2018, 1, 4) # Set Start Date
self.SetEndDate(2018, 3, 4) # Set End Date
self.SetCash(1000000) # Set Strategy Cash
#self.Settings.FreePortfolioValuePercentage = 0.30
self.timestamp = 60*24 #1day
self.lookback = 30*24*60 # 30days, 1 month
self.testing = 25*self.timestamp #testing period table
self.long_quantile = 0.5
self.short_quantile = 0.5
self.close_quantile = 0
self.alpha = 0.1
self.BTC = self.AddFuture(Futures.Currencies.BTC, Resolution.Minute)
#self.AddEquity("SPY", Resolution.Minute)
#self.AddEquity("TLT", Resolution.Minute)
#self.SetBenchmark("BTCUSD")
self.BTC.SetFilter(lambda x: x.FrontMonth())
self.Schedule.On(self.DateRules.MonthEnd(),self.TimeRules.At(23, 59) ,self.Regression)
self.er_rebuild_model = 0
self.run = 0
self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.At(23, 59), self.handle_error)
def OnData(self, slice):
for chain in slice.FutureChains:
contracts_list = [contract for contract in chain.Value]
ideal_contract = sorted(contracts_list, key=lambda k : k.OpenInterest, reverse=True)[0]
self.contract_symbol = ideal_contract.Symbol
def data_construction(self, look_back_period):
slices = self.History(look_back_period, Resolution.Minute)
datetime = []
bidclose = []
bidsize = []
askclose = []
asksize = []
openprice = []
close = []
for s in slices:
datetime.append(s.Time)
bidclose.append(s.QuoteBars[self.contract_symbol].Bid.Close)
bidsize.append(int(s.QuoteBars[self.contract_symbol].LastBidSize))
askclose.append(s.QuoteBars[self.contract_symbol].Ask.Close)
asksize.append(int(s.QuoteBars[self.contract_symbol].LastAskSize))
openprice.append(s.QuoteBars[self.contract_symbol].Open)
close.append(s.QuoteBars[self.contract_symbol].Close)
df = pd.DataFrame({"bidclose":bidclose, "bidsize":bidsize, "askclose":askclose, "asksize":asksize, "open":openprice, "close":close}, index=datetime)
if (self.timestamp != 1):
# Resample the data
temp_str = str(self.timestamp) + "T"
df = df.resample(temp_str).last()
temp_sum = df.resample(temp_str).sum()/self.timestamp
#temp_sum = temp_sum.astype(int)
temp_first = df.resample(temp_str).first()
df[["bidsize", "asksize"]] = temp_sum[["bidsize", "asksize"]]
df["open"]=temp_first["open"]
df['bidpricechange_lag']=df['bidclose']-df['bidclose'].shift(1)
df['askpricechange_lag']=df['askclose']-df['askclose'].shift(1)
df['bidsizediff_lag']=df['bidsize']-df['bidsize'].shift(1)
df['asksizediff_lag']=df['asksize']-df['asksize'].shift(1)
df=df.dropna(axis=0)
deltaVolumeBid=[]
for i in df.index:
if df.loc[i,'bidpricechange_lag'] > 0:
deltaVolumeBid.append(df.loc[i,'bidsize'])
elif df.loc[i,'bidpricechange_lag'] < 0:
deltaVolumeBid.append(0)
else:
deltaVolumeBid.append(df.loc[i,'bidsizediff_lag'])
df['deltaVolumeBid']=deltaVolumeBid
deltaVolumeAsk=[]
for j in df.index:
if df.loc[j,'askpricechange_lag'] > 0:
deltaVolumeAsk.append(0)
elif df.loc[j,'askpricechange_lag'] < 0:
deltaVolumeAsk.append(df.loc[j,'asksize'])
else:
deltaVolumeAsk.append(df.loc[j,'asksizediff_lag'])
df['deltaVolumeAsk']=deltaVolumeAsk
df['Return']=(df['close'].shift(-1)/df['open'].shift(-1))-1 #open # default trading open?
df['VOI']=df['deltaVolumeBid']-df['deltaVolumeAsk']
df['OIR']=(df['bidsize']-df['asksize'])/(df['bidsize']+df['asksize'])
#df=df.fillna(0) # As I checked the data and see that bidsize/asksize are 0 in some timestamps
df['SP']=df['askclose']-df['bidclose']
#sp_0index = df[df["SP"]==0].index
#df.loc[sp_0index, "SP"] = 1 # to ensure that adjusted VOI won't be nan
df['VOI_SP']=(df['VOI'])/df['SP']
df['OIR_SP']=(df['OIR'])/df['SP']
df['VOI_SP_lag1']=df['VOI_SP'].shift(1)
df['VOI_SP_lag2']=df['VOI_SP'].shift(2)
df['VOI_SP_lag3']=df['VOI_SP'].shift(3)
df['VOI_SP_lag4']=df['VOI_SP'].shift(4)
df['VOI_SP_lag5']=df['VOI_SP'].shift(5)
df['OIR_SP_lag1']=df['OIR_SP'].shift(1)
df['OIR_SP_lag2']=df['OIR_SP'].shift(2)
df['OIR_SP_lag3']=df['OIR_SP'].shift(3)
df['OIR_SP_lag4']=df['OIR_SP'].shift(4)
df['OIR_SP_lag5']=df['OIR_SP'].shift(5)
df=df.dropna(axis=0)
return df
def Regression(self):
try:
df = self.data_construction(self.lookback)
X = df[["VOI_SP", "VOI_SP_lag1", "VOI_SP_lag2", "VOI_SP_lag3", "VOI_SP_lag4", "VOI_SP_lag5", "OIR_SP",
"OIR_SP_lag1", "OIR_SP_lag2", "OIR_SP_lag3", "OIR_SP_lag4", "OIR_SP_lag5"]]
self.scaler = preprocessing.StandardScaler().fit(X)
X_scaled = self.scaler.transform(X)
Model = Ridge(alpha = self.alpha).fit(X_scaled, df["Return"])
df['yhat']= Model.predict(X_scaled)
self.long= df['yhat'].quantile(self.long_quantile)
self.closelong = df['yhat'].quantile(self.long_quantile-self.close_quantile)
self.short = df['yhat'].quantile(self.short_quantile)
self.closeshort = df['yhat'].quantile(self.short_quantile+self.close_quantile)
self.MLmodel = Model
self.run=1
self.Debug("long signal " + str(self.long))
self.Debug("short signal " + str(self.short))
self.Debug(self.MLmodel)
self.Debug(self.Time)
self.er_rebuild_model = 0
except:
self.er_rebuild_model = 1
self.Debug("Model need to be rebuilt in the upcoming day " + str(self.Time))
def handle_error(self):
if (self.er_rebuild_model == 1):
self.Regression()
Derek Melchin
Hi Tin Yat Chau,
To resolve the issue, we pass `self.contract_symbol` to the History method. Therefore, we should replace lines 69-78 above with
slices = self.History(self.contract_symbol, look_back_period, Resolution.Minute) if slices.empty: return expiry = list(slices.index.levels[0])[0] history = slices.loc[expiry].loc[self.contract_symbol] df = history[['bidclose', 'bidsize', 'askclose', 'asksize', 'open', 'close']]
See the attached backtest for reference.
Best,
Derek Melchin
The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.
Tin Yat Chau
Thanks Derek!
I have tried passing `self.contract_symbol` to the History method, but the dataframe will always be empty after passing the symbol. If you look at the logs, it never successfully printed out the model information because of the empty dataframe.
There are several versions of .history method. I guess the one I am currently using is the only one that can get the quote bar object. The current version works well if I only add BTC future. There are problems only when i add other securities. Let me know if you have any idea. Thank you!
Derek Melchin
Hi Tin Yet Chau,
We recommend reviewing the algorithm above. It passes `self.contract_symbol` to the History method and the resulting DataFrame is not empty. Note the algorithm above also subscribes to "SPY" and "TLT". The following lines just need to be removed from the `Regression` method to train and predict with the model:
self.Quit(df.head().to_string()) return
Best,
Derek Melchin
The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.
Tin Yat Chau
The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.
To unlock posting to the community forums please complete at least 30% of Boot Camp.
You can continue your Boot Camp training progress from the terminal. We hope to see you in the community soon!