Overall Statistics
# https://quantpedia.com/strategies/skewness-effect-in-commodities/
#
# The investment universe consists of 27 futures contracts on commodities. Each month, investor calculates skewness (3rd moment of returns) 
# from daily returns from data going 12 months into the past for all futures. Commodities are then sorted into quintiles and investor goes 
# long quintile containing the commodities with the 20% lowest total skewness and short quintile containing the commodities with the 20% highest
# total skewness (over a ranking period of 12 months). The resultant portfolio is equally weighted and rebalanced each month.

import numpy as np
from scipy.stats import skew

class Skewness_Effect(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(1991, 1, 1)
        self.SetEndDate(2019, 1, 1)
        self.SetCash(100000)
        
        self.symbols = ["CME_S1",   # Soybean Futures, Continuous Contract
                        "CME_W1",   # Wheat Futures, Continuous Contract
                        "CME_SM1",  # Soybean Meal Futures, Continuous Contract
                        "CME_BO1",  # Soybean Oil Futures, Continuous Contract
                        "CME_C1",   # Corn Futures, Continuous Contract
                        "CME_O1",   # Oats Futures, Continuous Contract
                        "CME_LC1",  # Live Cattle Futures, Continuous Contract
                        "CME_FC1",  # Feeder Cattle Futures, Continuous Contract
                        "CME_LN1",  # Lean Hog Futures, Continuous Contract
                        "CME_GC1",  # Gold Futures, Continuous Contract
                        "CME_SI1",  # Silver Futures, Continuous Contract
                        "CME_PL1",  # Platinum Futures, Continuous Contract
                        "CME_CL1",  # Crude Oil Futures, Continuous Contract
                        "CME_HG1",  # Copper Futures, Continuous Contract
                        "CME_NG1",  # Natural Gas (Henry Hub) Physical Futures, Continuous Contract
                        "CME_PA1",  # Palladium Futures, Continuous Contract 
                        
                        "ICE_CC1",  # Cocoa Futures, Continuous Contract 
                        "ICE_CT1",  # Cotton No. 2 Futures, Continuous Contract
                        "ICE_KC1",  # Coffee C Futures, Continuous Contract
                        "ICE_O1",   # Heating Oil Futures, Continuous Contract
                        "ICE_OJ1",  # Orange Juice Futures, Continuous Contract
                        "ICE_SB1"   # Sugar No. 11 Futures, Continuous Contract
                        ]
        
        self.lookup_period = 12*21
        self.SetWarmup(2*self.lookup_period)
        
        # True -> Quantpedia data
        # False -> Quandl free data
        self.use_quantpedia_data = True
        
        if not self.use_quantpedia_data:
            self.symbols = ['CHRIS/' + x for x in self.symbols]

        for symbol in self.symbols:
            if self.use_quantpedia_data:
                self.AddData(QuantpediaFutures, symbol, Resolution.Daily)
            else:
                self.AddData(QuandlFutures, symbol, Resolution.Daily)
        
        self.Schedule.On(self.DateRules.MonthStart(self.symbols[0]), self.TimeRules.AfterMarketOpen(self.symbols[0]), self.Rebalance)
    
    def Rebalance(self):
        if self.IsWarmingUp: return
    
        # Skewness calculation
        skewness_data = {}
        for symbol in self.symbols:
            # NOTE: There's no need to exclude last day from history anymore, since we download data the right way -> with no Look-Ahead Bias.
            hist = self.History([symbol], 2*self.lookup_period, Resolution.Daily)
            if 'settle' in hist:
                hist = hist['settle'][-self.lookup_period:]
                prices = np.array(hist)
                returns = (prices[1:]-prices[:-1])/prices[:-1]
                if len(returns) == self.lookup_period-1:
                    # NOTE: Manual skewness calculation example
                    # avg = np.average(returns)
                    # std = np.std(returns)
                    # skewness = (sum(np.power((x - avg), 3) for x in returns)) / ((self.return_history[symbol].maxlen-1) * np.power(std, 3))
                    skewness_data[symbol] = skew(returns)
                
        # Skewness sorting
        sorted_by_skewness = sorted(skewness_data.items(), key = lambda x: x[1], reverse = True)
        quintile = int(len(sorted_by_skewness)/5)
        top_symbols = [x[0] for x in sorted_by_skewness[:quintile]]
        low_symbols = [x[0] for x in sorted_by_skewness[-quintile:]]
        
        # Trade execution
        self.Liquidate()
        
        for symbol in low_symbols:
            self.SetHoldings(symbol, 1/(2*quintile))
        for symbol in top_symbols:
            self.SetHoldings(symbol, -1/(2*quintile))

# Quantpedia data
class QuantpediaFutures(PythonData):
    def GetSource(self, config, date, isLiveMode):
        return SubscriptionDataSource("https://quantpedia.com/backtesting_data/futures/{0}.csv".format(config.Symbol), SubscriptionTransportMedium.RemoteFile, FileFormat.Csv)

    def Reader(self, config, line, date, isLiveMode):
        data = QuantpediaFutures()
        data.Symbol = config.Symbol
        
        try:
            if not line[0].isdigit(): return None
            split = line.split(';')
            
            data.Time = datetime.strptime(split[0], "%d.%m.%Y") + timedelta(days=1)
            data['settle'] = float(split[1])
            data.Value = float(split[1])
        except:
            return None
            
        return data

# Quandl free data
class QuandlFutures(PythonQuandl):
    def __init__(self):
        self.ValueColumnName = "settle"