I am trying to gather some fundamental data on earnings dates from a large dataset, found it on kaggle. There are >5k symbols and >90k earnings events over 5 years.  I plan to sort the earnings dates based on their fundamental info prior to earnings release, so I am trying to get the data just on the earnings days.  Since QuantBook.GetFundamental() only gets daily I thought this would be a bit overkill.

The below code pulls several Morningstar selectors for each earnings date and saves to list.  However I get random "Your kernel has died and will be restarted".  Hence the reason I started saving to ObjectStore every 500 dates.  Sometimes it runs for 300 dates or so.  Sometimes it runs for 3000 days.  I am about halfway through now, saved up to about 45k dates with the fundamental data.  I have had to restart the notebook maybe 100 times at this point.

This is not a large dataset, only about 4MB so far, all said and done about 10MB.  What is causing the kernel to crash?  Is there anything I am doing incorrectly?

 

QuantConnect Logo from clr import AddReference AddReference("System") AddReference('System.Memory') #AddReference("QuantConnect.Common") from System import * #from QuantConnect import * #from QuantConnect.Data.Market import TradeBar, QuoteBar from datetime import datetime, timedelta import pandas as pd import cloudpickle, os import pickle import numpy qb = QuantBook() days = 3 #must be odd days only date_start = datetime(2015,1,1) date_end = datetime(2020,12,31) quantQuote = pickle.loads(bytes(qb.ObjectStore.ReadBytes('quantQuote'))) qqSymbols = quantQuote.symbol.unique() print(len(qqSymbols)) quantQuote.head() del quantQuote 19019 earnings = pickle.loads(bytes(qb.ObjectStore.ReadBytes('earnings'))).reset_index() allSymbols = earnings.symbol.unique() print('number of symbols in earnings data, ' + str(len(allSymbols))) earnings = earnings.query('symbol in @qqSymbols') mask = (earnings['edate'] > date_start) & (earnings['edate'] < date_end) earnings = earnings.loc[mask] del mask del qqSymbols print('number of symbols after filtering on QuantQuote smybols, ' + str(len(allSymbols))) print('number of earnings events, ' + str(len(earnings))) earnings.head() number of symbols in earnings data, 5282 number of symbols after filtering on QuantQuote smybols, 5282 number of earnings events, 90572 index symbol edate qtr eps_est eps release_time 23 23 A 2015-02-17 Jan-15 0.41 0.41 post 24 24 A 2015-05-18 Apr-15 0.39 0.38 post 25 25 A 2015-08-17 Jul-15 0.41 0.44 post 26 26 A 2015-11-16 Oct-15 0.47 0.50 post 27 27 A 2016-02-16 Jan-16 0.43 0.46 post def ShowData(): qb = QuantBook() keys = [str(j).split(',')[0][1:] for _, j in enumerate(qb.ObjectStore.GetEnumerator())] sizes = [os.path.getsize(qb.ObjectStore.GetFilePath(key)) for key in keys] for i in keys: print(f"{i} - {os.path.getsize(qb.ObjectStore.GetFilePath(i))/1e6:.1f} MB") print(f"Total size: {sum(sizes)/1e6:.1f} MB") ShowData() earnings_fund_data - 8.5 MB quantQuote - 0.8 MB EarningsData_1501_3000_sym - 1.1 MB EarningsData_0_1500_sym - 1.7 MB EarningsData_4501_end_sym - 0.4 MB earnings - 6.6 MB EarningsData_3001_4500_sym - 1.6 MB Total size: 20.6 MB edates = earnings.copy(deep=True) edates = edates[['symbol','edate']].reset_index() edates = edates.drop(['index'],axis=1) del earnings def GetFundFromSymbol(s,d,codes): l = [s,d] try: for i in range(len(codes)): f = qb.GetFundamental(s, codes[i],d,d) v = f[f.columns[0]][0] l.append(v) return l except: l.extend(numpy.repeat(float('NaN'),len(codes))) return l %%time import time codes = ["ValuationRatios.PERatio", "ValuationRatios.BookValueYield", "ValuationRatios.RatioPE5YearAverage", "ValuationRatios.NormalizedPERatio", "ValuationRatios.ForwardROE", "ValuationRatios.ForwardROA", "ValuationRatios.TotalAssetPerShare", "EarningReports.BasicAverageShares.ThreeMonths", "EarningReports.BasicEPS.TwelveMonths"] qb = QuantBook() try: data = pickle.loads(bytes(qb.ObjectStore.ReadBytes('earnings_fund_data'))) last = data[len(data)-1] lastSym = last[0] lastDate = last[1] start = False except: data = [] start = True for i,row in edates.iterrows(): s = row['symbol'] d = row['edate'] if start==True: l = GetFundFromSymbol(s,d,codes) data.append(l) if (i % 100) == 0: print(str(i) + ', ' + "{:.4f} %".format(100*i/len(edates)) + ', ' + datetime.now().strftime("%m/%d/%Y, %H:%M:%S")) if (i % 500) == 0: qb.ObjectStore.SaveBytes('earnings_fund_data', bytearray(cloudpickle.dumps(data))) qb = QuantBook() print('Saved data, total items=' + str(len(data))) time.sleep(1) else: if (s==lastSym) & (d==lastDate): start = True print('starting on iter=' + str(i)) print('Completed\n') qb.ObjectStore.SaveBytes('earnings_fund_data', bytearray(cloudpickle.dumps(data))) print('Saved data, total items=' + str(len(data))) cols = ['symol','edate'] cols.extend(codes) df = pd.DataFrame(data, columns = cols) df.head() qb.ObjectStore.SaveBytes('earnings_fund_df', bytearray(cloudpickle.dumps(df))) starting on iter=42500 42600, 47.0344 %, 12/03/2020, 22:09:17 42700, 47.1448 %, 12/03/2020, 22:09:28 42800, 47.2552 %, 12/03/2020, 22:09:38

 

Author