I am trying to gather some fundamental data on earnings dates from a large dataset, found it on kaggle. There are >5k symbols and >90k earnings events over 5 years.  I plan to sort the earnings dates based on their fundamental info prior to earnings release, so I am trying to get the data just on the earnings days.  Since QuantBook.GetFundamental() only gets daily I thought this would be a bit overkill.

The below code pulls several Morningstar selectors for each earnings date and saves to list.  However I get random "Your kernel has died and will be restarted".  Hence the reason I started saving to ObjectStore every 500 dates.  Sometimes it runs for 300 dates or so.  Sometimes it runs for 3000 days.  I am about halfway through now, saved up to about 45k dates with the fundamental data.  I have had to restart the notebook maybe 100 times at this point.

This is not a large dataset, only about 4MB so far, all said and done about 10MB.  What is causing the kernel to crash?  Is there anything I am doing incorrectly?

 



QuantConnect Logo

from clr import AddReference
AddReference("System")
AddReference('System.Memory')
#AddReference("QuantConnect.Common")
from System import *
#from QuantConnect import *
#from QuantConnect.Data.Market import TradeBar, QuoteBar
from datetime import datetime, timedelta
import pandas as pd
import cloudpickle, os
import pickle
import numpy

qb = QuantBook()

days = 3 #must be odd days only
date_start = datetime(2015,1,1)
date_end = datetime(2020,12,31)

quantQuote = pickle.loads(bytes(qb.ObjectStore.ReadBytes('quantQuote')))
qqSymbols = quantQuote.symbol.unique()
print(len(qqSymbols))
quantQuote.head()
del quantQuote

19019

earnings = pickle.loads(bytes(qb.ObjectStore.ReadBytes('earnings'))).reset_index()
allSymbols = earnings.symbol.unique()

print('number of symbols in earnings data, ' + str(len(allSymbols)))
earnings = earnings.query('symbol in @qqSymbols')
mask = (earnings['edate'] > date_start) & (earnings['edate'] < date_end)
earnings = earnings.loc[mask]
del mask
del qqSymbols
print('number of symbols after filtering on QuantQuote smybols, ' + str(len(allSymbols)))
print('number of earnings events, ' + str(len(earnings)))
earnings.head()

number of symbols in earnings data, 5282
number of symbols after filtering on QuantQuote smybols, 5282
number of earnings events, 90572

index symbol edate qtr eps_est eps release_time
23 23 A 2015-02-17 Jan-15 0.41 0.41 post
24 24 A 2015-05-18 Apr-15 0.39 0.38 post
25 25 A 2015-08-17 Jul-15 0.41 0.44 post
26 26 A 2015-11-16 Oct-15 0.47 0.50 post
27 27 A 2016-02-16 Jan-16 0.43 0.46 post

def ShowData():
qb = QuantBook()
keys = [str(j).split(',')[0][1:] for _, j in enumerate(qb.ObjectStore.GetEnumerator())]
sizes = [os.path.getsize(qb.ObjectStore.GetFilePath(key)) for key in keys]
for i in keys:
print(f"{i} - {os.path.getsize(qb.ObjectStore.GetFilePath(i))/1e6:.1f} MB")
print(f"Total size: {sum(sizes)/1e6:.1f} MB")
ShowData()

earnings_fund_data - 8.5 MB
quantQuote - 0.8 MB
EarningsData_1501_3000_sym - 1.1 MB
EarningsData_0_1500_sym - 1.7 MB
EarningsData_4501_end_sym - 0.4 MB
earnings - 6.6 MB
EarningsData_3001_4500_sym - 1.6 MB
Total size: 20.6 MB

edates = earnings.copy(deep=True)
edates = edates[['symbol','edate']].reset_index()
edates = edates.drop(['index'],axis=1)
del earnings

def GetFundFromSymbol(s,d,codes):
l = [s,d]
try:
for i in range(len(codes)):
f = qb.GetFundamental(s, codes[i],d,d)
v = f[f.columns[0]][0]
l.append(v)
return l
except:
l.extend(numpy.repeat(float('NaN'),len(codes)))
return l

%%time
import time
codes = ["ValuationRatios.PERatio",
"ValuationRatios.BookValueYield",
"ValuationRatios.RatioPE5YearAverage",
"ValuationRatios.NormalizedPERatio",
"ValuationRatios.ForwardROE",
"ValuationRatios.ForwardROA",
"ValuationRatios.TotalAssetPerShare",
"EarningReports.BasicAverageShares.ThreeMonths",
"EarningReports.BasicEPS.TwelveMonths"]
qb = QuantBook()

try:
data = pickle.loads(bytes(qb.ObjectStore.ReadBytes('earnings_fund_data')))
last = data[len(data)-1]
lastSym = last[0]
lastDate = last[1]
start = False
except:
data = []
start = True

for i,row in edates.iterrows():
s = row['symbol']
d = row['edate']
if start==True:
l = GetFundFromSymbol(s,d,codes)
data.append(l)
if (i % 100) == 0:
print(str(i) + ', ' + "{:.4f} %".format(100*i/len(edates)) + ', ' + datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))
if (i % 500) == 0:
qb.ObjectStore.SaveBytes('earnings_fund_data', bytearray(cloudpickle.dumps(data)))
qb = QuantBook()
print('Saved data, total items=' + str(len(data)))
time.sleep(1)
else:
if (s==lastSym) & (d==lastDate):
start = True
print('starting on iter=' + str(i))

print('Completed\n')
qb.ObjectStore.SaveBytes('earnings_fund_data', bytearray(cloudpickle.dumps(data)))
print('Saved data, total items=' + str(len(data)))

cols = ['symol','edate']
cols.extend(codes)
df = pd.DataFrame(data, columns = cols)
df.head()
qb.ObjectStore.SaveBytes('earnings_fund_df', bytearray(cloudpickle.dumps(df)))

starting on iter=42500
42600, 47.0344 %, 12/03/2020, 22:09:17
42700, 47.1448 %, 12/03/2020, 22:09:28
42800, 47.2552 %, 12/03/2020, 22:09:38