| Overall Statistics |
|
Total Trades 2820 Average Win 0.74% Average Loss -0.74% Compounding Annual Return 25.261% Drawdown 33.500% Expectancy 0.253 Net Profit 852.706% Sharpe Ratio 1.059 Loss Rate 37% Win Rate 63% Profit-Loss Ratio 0.99 Alpha 0.074 Beta 1.192 Annual Standard Deviation 0.228 Annual Variance 0.052 Information Ratio 0.67 Tracking Error 0.151 Treynor Ratio 0.203 Total Fees $10767.64 |
import math
import numpy as np
import pandas as pd
import statsmodels.api as sm
from datetime import date, datetime, timedelta
from scipy import stats
class ExpectedIdiosyncraticSkewness(QCAlgorithm):
'''Step 1. Calculating Fama-French daily regression residuals
Step 2. Using daily residuals to calculate historical monthly moments
Step 3. Run regression of historical monthly moments to estimate regression coefficients
Step 4. Using historical monthly moments and estimated coefficients to calculate expected skewness
Step 5. Sorting symbols by skewness and long the ones with lowest skewness
Note: Fama-French factors data are only available up to 06/28/2019.
So, backtest is implemented up to end of June, 2019. And, live trading is not feasible for current version.
Reference:
[1] "Expected Idiosyncratic Skewness" by Boyer, Mitton and Vorkink, Rev Financ Stud, June 2009
URL: https://academic.oup.com/rfs/article-abstract/23/1/169/1578688?redirectedFrom=PDF
[2] https://dr.library.brocku.ca/bitstream/handle/10464/6426/Brock_Cao_Xu_2015.pdf
[3] Fama-French official data: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
'''
def Initialize(self):
self.SetStartDate(2009, 7, 1) # Set Start Date
self.SetEndDate(2019, 7, 1) # Set End Date
self.SetCash(100000) # Set Strategy Cash
self.AddEquity("SPY", Resolution.Daily) # Used to check trading days
self.__number_of_coarse_symbol = 200 # Set the number of coarse symbol to be further filtered by expected skewness
self.symbol_weight = pd.DataFrame() # DataFrame to save desired weights with symbols as index
self.month = 0 # Track current calendar month
self.next_trading_day = 0 # Track next trading day
self._get_fama_french_factors() # Download Fama French factors data as a dataframe
self.UniverseSettings.Resolution = Resolution.Daily # Subscribe daily data for selected symbols in universe
self.AddUniverse(self.CoarseSelectionAndSkewnessSorting) # Coarse Selection and Skewness Sorting
def CoarseSelectionAndSkewnessSorting(self, coarse):
'''Coarse selection to get an initial fixed universe for the skewness sorting trade logic.
Then, select the symbols to trade monthly based on skewness sorting.
'''
# if not last trading day at month end, return the unchanged universe
self.month = (self.Time - timedelta(days = 1)).month
self.next_trading_day = self.Securities["SPY"].Exchange.Hours.GetNextMarketOpen(self.Time, False)
if self.month == self.next_trading_day.month:
return Universe.Unchanged
self.Debug(f"Month end rebalance at: {self.Time}")
### Run the coarse selection to narrow down the universe
# Sort descendingly by daily dollar volume
sorted_by_volume = sorted(coarse, key = lambda x: x.DollarVolume, reverse = True)
fine = [ x.Symbol for x in sorted_by_volume[:self.__number_of_coarse_symbol] ]
### Select symbols to trade based on expected skewness at each month end
# Estimate expected idiosyncratic skewness
fine_and_skew = self.CalculateExpectedSkewness(fine)
# Select the lowest quintile and calculate desired weights
self.symbol_weight = pd.DataFrame(index = fine_and_skew.loc[:math.ceil(self.__number_of_coarse_symbol * 0.05)]['symbol'])
self.symbol_weight.loc[:,'weight'] = np.ones([len(self.symbol_weight), 1]) / len(self.symbol_weight)
self.Debug(f"Selected symbols to trade >>\n {list(self.symbol_weight.index)}\n")
return [self.Symbol(x) for x in self.symbol_weight.index] + [self.Symbol("SPY")]
def OnSecuritiesChanged(self, changes):
'''Liquidate symbols that are removed from the dynamic universe
'''
for security in changes.RemovedSecurities:
if security.Invested:
self.Liquidate(security.Symbol)
def OnData(self, data):
'''Rebalance at month end. Determine weights. Place orders.
'''
# if not last trading day at month end, return
if self.month == self.next_trading_day.month: return
# Placing orders
for symbol, row in self.symbol_weight.iterrows():
self.SetHoldings(symbol, row['weight'])
def CalculateExpectedSkewness(self, fine):
'''Calculate expected skewness using historical moments and estimated regression coefficients
'''
### Get predictors
# Get historical returns for two months
monthEnd_this = self.Time
monthEnd_lag_1 = (self.Time - timedelta(days = 1)).replace(day = 1)
monthEnd_lag_2 = (monthEnd_lag_1 - timedelta(days = 1)).replace(day = 1) # First day of last trading month
history = self.History(fine, monthEnd_lag_2 - timedelta(days = 1), monthEnd_this, Resolution.Daily) # Get one more day for price data
history = history["close"].unstack(level = 0)
daily_returns = (np.log(history) - np.log(history.shift(1)))[1:] # Drop the first day
# Merge Fama-French factors to daily returns based on dates available in return series
daily_returns['time'] = daily_returns.index
daily_returns = daily_returns.merge(self.fama_french_factors_per_day, left_on = 'time', right_on = 'time')
daily_returns_this = daily_returns[daily_returns['time'] > monthEnd_lag_1]
daily_returns_last = daily_returns[daily_returns['time'] <= monthEnd_lag_1]
daily_returns_dict = {monthEnd_this: daily_returns_this, monthEnd_lag_1: daily_returns_last}
# For each stock and each month, run fama-french time-series regression and calculate historical moments
column_list = list(daily_returns.columns)
predictor_list = []
for month, returns in daily_returns_dict.items():
for symbol in fine:
if str(symbol) not in column_list:
# self.Debug(f"Symbol not in return series >> {symbol}")
predictor_list.append([str(symbol), month, np.nan, np.nan])
continue
Y = (returns[str(symbol)] - returns['RF']).values
X = returns[['Mkt_RF', 'SMB', 'HML']].values
X = sm.add_constant(X)
results = sm.OLS(Y, X).fit()
hist_skew, hist_vol = stats.skew(results.resid), stats.tstd(results.resid) # Use daily residual to calculate monthly moments
predictor_list.append([str(symbol), month, hist_skew, hist_vol])
predictor = pd.DataFrame(predictor_list, columns = ['symbol', 'time', 'skew', 'vol'])
### Estimate coefficients by regressing current skewness on historical moments
Y = predictor[predictor['time'] == monthEnd_this]['skew'].values
X = predictor[predictor['time'] == monthEnd_lag_1][['skew', 'vol']].values
X = sm.add_constant(X)
results = sm.OLS(Y, X, missing = 'drop').fit()
coef = results.params
### Calculate expected skewness
predictor_t = predictor[predictor['time'] == monthEnd_this][['skew', 'vol']].values
ones = np.ones([len(predictor_t), 1])
predictor_t = np.append(ones, predictor_t, 1)
exp_skew = np.inner(predictor_t, coef)
skew_df = predictor[predictor['time'] == monthEnd_this][['symbol']].reset_index(drop = True)
skew_df.loc[:,'skew'] = exp_skew
skew_df = skew_df.sort_values(by = ['skew']).reset_index(drop = True)
return skew_df
def _get_fama_french_factors(self):
'''Download fama-french factors data from Github cloud and read it as a DataFrame.
Data is originally from Prof French's official homepage. I unzip the data folder and upload to Github cloud.
'''
tmp_list = []
data_str = self.Download("https://raw.githubusercontent.com/xinweimetrics/Tutorials/master/04%20Strategy%20Library/231%20Expected%20Idiosyncratic%20Skewness/F-F_Research_Data_Factors_daily.CSV")
data_lines = data_str.splitlines()
data_lines = data_lines[5:-2] # drop the first 5 and last 2 lines which are not data that we need
for line in data_lines:
data = line.split(',')
# add one day to match QC behavior: daily data is timestamped at UTC 00:00 the next day from the actual day that produced the data
tmp_list.append([datetime.strptime(data[0], "%Y%m%d") + timedelta(days = 1)] + [float(i) for i in data[1:]])
self.fama_french_factors_per_day = pd.DataFrame(tmp_list, columns = ['time', 'Mkt_RF', 'SMB', 'HML', 'RF'])
self.Debug(f"Downloading Fama-French data succeed! :: DataFrame shape >> {self.fama_french_factors_per_day.shape}")import math
import numpy as np
import pandas as pd
import statsmodels.api as sm
from datetime import date, datetime, timedelta
from scipy import stats
class ExpectedIdiosyncraticSkewness(QCAlgorithm):
'''Step 1. Calculating Fama-French daily regression residuals
Step 2. Using daily residuals to calculate historical monthly moments
Step 3. Run regression of historical monthly moments to estimate regression coefficients
Step 4. Using historical monthly moments and estimated coefficients to calculate expected skewness
Step 5. Sorting symbols by skewness and long the ones with lowest skewness
Note: Fama-French factors data are only available up to 06/28/2019.
So, backtest is implemented up to end of June, 2019. And, live trading is not feasible for current version.
Reference:
[1] https://academic.oup.com/rfs/article-abstract/23/1/169/1578688?redirectedFrom=PDF
[2] https://dr.library.brocku.ca/bitstream/handle/10464/6426/Brock_Cao_Xu_2015.pdf
[3] Fama-French official data: https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
'''
def Initialize(self):
self.SetStartDate(2009, 7, 1) # Set Start Date
self.SetEndDate(2019, 7, 1) # Set End Date
self.SetCash(100000) # Set Strategy Cash
self.AddEquity("SPY", Resolution.Daily) # Used to check trading days
self.__number_of_coarse_symbol = 200 # Set the number of coarse symbol to be further filtered by expected skewness
self.predictor_list = [] # List to save monthly predictors for each symbol
self.initial_selection = True # Control initial selection runs only once
self._get_fama_french_factors() # Download Fama French factors data as a dataframe
self.UniverseSettings.Resolution = Resolution.Daily
self.AddUniverse(self.CoarseSelectionAndSkewnessSorting)
def CoarseSelectionAndSkewnessSorting(self, coarse):
'''Coarse selection to get an initial fixed universe for the skewness sorting trade logic.
Then, select the symbols to trade monthly based on skewness sorting.
'''
### Run the coarse universe selection only once at the beginning of strategy
if self.initial_selection:
# Select symbols with fundamental data
coarse_with_fundamental = [x for x in coarse if x.HasFundamentalData]
# Sort descendingly by daily dollar volume
sorted_by_volume = sorted(coarse_with_fundamental, key = lambda x: x.DollarVolume, reverse = True)
self.fine = [ x.Symbol for x in sorted_by_volume[:self.__number_of_coarse_symbol] ]
self.initial_selection = False
### Select symbols to trade based on expected skewness at each month end
# if not last trading day at month end, return the unchanged universe
self.month = (self.Time - timedelta(days = 1)).month
next_trading_day = self.Securities["SPY"].Exchange.Hours.GetNextMarketOpen(self.Time, False)
if self.month == next_trading_day.month:
return Universe.Unchanged
self.Debug(f"Month end rebalance at: {self.Time}")
# Estimate expected idiosyncratic skewness
skewness = self.ExpectedSkewness()
# Sort symbols by skewness
skewness_sorted = skewness.sort_values(by = ['skew']).reset_index(drop = True)
# Select the lowest quintile
self.low_skew = skewness_sorted.loc[:math.ceil(self.__number_of_coarse_symbol * 0.05), 'symbol']
self.Debug(f"Selected symbols to trade >>\n {self.low_skew}\n")
return [self.Symbol(x) for x in self.low_skew]
def OnData(self, data):
'''Rebalance at month end. Determine weights. Place orders.
'''
# if not last trading day at month end, return
next_trading_day = self.Securities["SPY"].Exchange.Hours.GetNextMarketOpen(self.Time, False)
self.month = (self.Time - timedelta(days = 1)).month
if self.month == next_trading_day.month: return
# Determine weights
weights = self.PortfolioWeights()
# Place orders
self.Liquidate()
for symbol in self.low_skew:
weight_i = weights[symbol]
self.SetHoldings(symbol, weight_i)
def ExpectedSkewness(self):
'''Calculate expected skewness using historical moments and estimated regression coefficients
'''
### Get predictors
self._get_predictors()
### Estimate coefficients by regressing current skewness on historical moments
if len(self.predictor['time'].unique()) == 1:
coef = [0, 1, 0]
else:
this_month = self.predictor['time'].iloc[-1]
last_month = self.predictor['time'].unique()[-2]
Y = self.predictor[self.predictor['time'] == this_month]['skew'].values
X = self.predictor[self.predictor['time'] == last_month][['skew', 'vol']].values
X = sm.add_constant(X)
results = sm.OLS(Y, X, missing = 'drop').fit()
coef = results.params
### Calculate expected skewness
this_month = self.predictor['time'].iloc[-1]
data_t = self.predictor[self.predictor['time'] == this_month][['skew', 'vol']].values
ones = np.ones([len(data_t), 1])
data_t = np.append(ones, data_t, 1)
exp_skew = np.inner(data_t, coef)
skew_df = self.predictor[self.predictor['time'] == this_month][['symbol']].reset_index(drop = True)
skew_df.loc[:,'skew'] = exp_skew
return skew_df
def PortfolioWeights(self):
'''Construct equal-weighted portfolio'''
weights = {}
for symbol in self.low_skew:
weights[symbol] = 1 / len(self.low_skew)
return weights
def _get_predictors(self):
'''Run Fama-French time-series regression to get residuals.
Then, use residuals to calculate historical moments.
'''
### Get historical returns for current month
end_day = self.Time
start_day = start_day = (self.Time - timedelta(days = 1)).replace(day = 1)
history = self.History(self.fine, start_day - timedelta(days = 1), end_day, Resolution.Daily) # Get one more day for price data
history = history.close.unstack(level = 0)
daily_returns = (np.log(history) - np.log(history.shift(1)))[1:].dropna() # Drop the first day
daily_returns['time'] = daily_returns.index
### Merge FF factors to returns dataframe based on dates available in return series
daily_returns = daily_returns.merge(self.fama_french_factors_per_day, left_on = 'time', right_on = 'time')
### Run fama-french time-series regression and calculate historical moments
column_list = list(daily_returns.columns)
for symbol in self.fine:
if str(symbol) not in column_list:
self.Debug(f"Symbol not in return series >> {symbol}")
self.predictor_list.append([str(symbol), self.Time, np.nan, np.nan])
continue
Y = (daily_returns[str(symbol)] - daily_returns['RF']).values
X = daily_returns[['Mkt_RF', 'SMB', 'HML']].values
X = sm.add_constant(X)
results = sm.OLS(Y, X).fit()
hist_skew, hist_vol = stats.skew(results.resid), stats.tstd(results.resid) # Use daily residual to calculate monthly moments
self.predictor_list.append([str(symbol), self.Time, hist_skew, hist_vol])
self.predictor = pd.DataFrame(self.predictor_list, columns = ['symbol', 'time', 'skew', 'vol'])
def _get_fama_french_factors(self):
'''Download fama-french factors data from Github cloud and read it as a DataFrame.
Data is originally from Prof French's official homepage. I unzip the data folder and upload to Github cloud.
'''
tmp_list = []
data_str = self.Download("https://raw.githubusercontent.com/xinweimetrics/Tutorials/master/04%20Strategy%20Library/231%20Expected%20Idiosyncratic%20Skewness/F-F_Research_Data_Factors_daily.CSV")
data_lines = data_str.splitlines()
data_lines = data_lines[5:-2] # drop the first 5 and last 2 lines which are not data that we need
for line in data_lines:
data = line.split(',')
# add one day to match QC behavior: daily data is timestamped at UTC 00:00 the next day from the actual day that produced the data
tmp_list.append([datetime.strptime(data[0], "%Y%m%d") + timedelta(days = 1)] + [float(i) for i in data[1:]])
self.fama_french_factors_per_day = pd.DataFrame(tmp_list, columns = ['time', 'Mkt_RF', 'SMB', 'HML', 'RF'])
self.Debug(f"Downloading Fama-French data succeed! :: DataFrame shape >> {self.fama_french_factors_per_day.shape}")# Your New Python File
# Your New Python File