Backtest

Overall Statistics
Total Orders 4370 Average Win 0.10% Average Loss -0.10% Compounding Annual Return -0.006% Drawdown 18.600% Expectancy 0.001 Start Equity 100000 End Equity 99942.38 Net Profit -0.058% Sharpe Ratio -0.755 Sortino Ratio -0.634 Probabilistic Sharpe Ratio 0.007% Loss Rate 49% Win Rate 51% Profit-Loss Ratio 0.98 Alpha -0.023 Beta -0.001 Annual Standard Deviation 0.03 Annual Variance 0.001 Information Ratio -0.61 Tracking Error 0.153 Treynor Ratio 38.968 Total Fees $156.65 Estimated Strategy Capacity $0 Lowest Capacity Asset 600132.ChineseStocks 2S Portfolio Turnover 0.80%

#region imports
from AlgorithmImports import *
import bz2
import pickle
import base64
import numpy as np
from typing import List, Dict, OrderedDict
#endregion

def initialize_QP_custom_data(algo: QCAlgorithm, leverage: int, top_count: int, period: int) -> Dict:
    QP_data: Dict[Symbol, SymbolData] = {}
    ticker_file_str: str = algo.download('data.quantpedia.com/backtesting_data/equity/chinese_stocks/large_cap_500.csv')
    ticker_lines: List[str] = ticker_file_str.split('\r\n')[:top_count]
    tickers = [ ticker_line.split(',')[0] for ticker_line in ticker_lines[1:] ]

    for t in tickers:
        # price data subscription
        data: Security = algo.add_data(ChineseStocks, t, Resolution.DAILY)
        data.set_fee_model(CustomFeeModel())
        data.set_leverage(leverage)
        stock_symbol: Symbol = data.symbol

        QP_data[stock_symbol] = SymbolData(period)

    return QP_data

class SymbolData():
    def __init__(
        self,
        period: int,
    ) -> None:
        # self._prices: List[float] = []
        self._prices: RollingWindow = RollingWindow[float](period)

    def _update_price(self, price: float) -> None:
        self._prices.add(price)
    
    def _is_ready(self) -> bool:
        return self._prices.is_ready

    def _get_percentile(self, percentile: float) -> np.float64:
        returns: np.ndarray = pd.Series(list(self._prices)[::-1]).pct_change().values[1:]
        return np.percentile(returns, percentile)

# Chinese stock price/volume data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
class ChineseStocks(PythonData):
    _last_update_date:Dict[Symbol, datetime.date] = {}

    @staticmethod
    def get_last_update_date() -> Dict[Symbol, datetime.date]:
       return ChineseStocks._last_update_date

    def GetSource(self, config: SubscriptionDataConfig, date: datetime, isLiveMode: bool) -> SubscriptionDataSource:
        return SubscriptionDataSource("data.quantpedia.com/backtesting_data/equity/chinese_stocks/large_cap_300_close_open_size.dat", SubscriptionTransportMedium.REMOTE_FILE, FileFormat.UNFOLDING_COLLECTION)
   
    def Reader(self, config:SubscriptionDataConfig, line: str, date: datetime, isLiveMode: bool) -> BaseDataCollection:
        # Columns: ['closePrice', 'turnoverVol', 'negMarketValue', 'marketValue']

        # closePrice = daily close price
        # turnoverVol = daily share volume
        # marketValue = market cap
        # negMarketValue = shares outstd * closePrice 
        #
        # more can be calculated based of it:
        #
        # shares outstd = negMarketValue / closePrice
        # turnoverValue = closePrice * turnoverVol
        # turnoverRatio = turnoverValue / negMarketValue

        objects:list[ChineseStocks] = []

        base64_bytes = line.encode('ascii')
        data_to_decompress = base64.b64decode(base64_bytes)
        decompressed_data = bz2.decompress(data_to_decompress)
        data:list[dict] = pickle.loads(decompressed_data)

        for index, sample in enumerate(data):
            custom_data: ChineseStocks = ChineseStocks()
            custom_data.symbol = config.symbol
            
            curr_date: datetime = datetime.strptime(sample['date'], '%Y-%m-%d')# + timedelta(days=1)
            custom_data.Time = curr_date
            custom_data.EndTime = curr_date + timedelta(days=1)

            custom_data.Time = curr_date
            if config.symbol.value in sample['stocks']:
                custom_data['price_data'] = sample['stocks'][config.symbol.value]
                custom_data.value = float(sample['stocks'][config.symbol.value]['openPrice'])
                custom_data.close = float(sample['stocks'][config.symbol.value]['closePrice'])
                custom_data.open = float(sample['stocks'][config.symbol.value]['openPrice'])

                # store last date of the symbol
                if config.symbol not in ChineseStocks._last_update_date:
                    ChineseStocks._last_update_date[config.symbol] = datetime(1,1,1).date()
                if custom_data.Time.date() > ChineseStocks._last_update_date[config.symbol]:
                    ChineseStocks._last_update_date[config.symbol] = custom_data.Time.date()
            else:
                custom_data['price_data'] = {}
                custom_data.Value = 0

            objects.append(custom_data)

        return BaseDataCollection(objects[-1].EndTime, config.symbol, objects)

# Custom fee model
class CustomFeeModel(FeeModel):
    def GetOrderFee(self, parameters):
        fee = parameters.security.price * parameters.order.absolute_quantity * 0.00005
        return OrderFee(CashAmount(fee, "USD"))

# https://quantpedia.com/strategies/righ-tail-vs-left-tail-stock-picking-strategy-in-china
# 
# The investment universe for this strategy consists of stocks listed on the Chinese stock market (China A-share common stocks traded on the Shanghai Stock 
# Exchange (SHSE) and the Shenzhen Stock Exchange (SZSE)).
# (All data used in this paper are sourced from the China Stock Market & Accounting Research Database (CSMAR). The risk-free interest rate for calculating 
# excess returns is based on the one-year China Treasury bond rate, applicable to both daily and monthly frequencies. Monthly excess returns on the market 
# (MaT), size (SMB), value (HML), momentum (MOM), profitability (RMW), and investment (CMA) factors, as defined by Fama and French (2018), are also obtained 
# from CSMAR.)
# Fundamental Recapitulation: Individual instruments are selected based on their Value at Risk (VaR) measures, explicitly focusing on the right-tail bonus 
# (VaR95) and left-tail risk (VaR5). Stocks are chosen for long or short positions depending on their quintile rankings in these measures, as described in 
# the research paper. The universe is mainly focused on stocks that attract significant retail investor attention, as these are more likely to exhibit the 
# right-tail reversal phenomenon. This approach aims to exploit the mispricing caused by retail investors' lottery-like preferences.
# Computational Process: The trading rules involve calculating the VaR95 and VaR5 for each stock in the investment universe. The methodology consists of 
# sorting stocks monthly into quintiles based on their VaR95 values to identify the right-tail bonus. Within each VaR95 quintile, stocks are further sorted 
# into quintiles based on their VaR5 values to assess left-tail risk.
# Investment Strategy: Perform the investment strategy based on dependent double sorts of VaR95 (RT) and VaR5 (LT): quintile portfolios are formed every 
# month based on right-tail bonus VaR95 first, and then additional quintile portfolios are formed based on left-tail risk VaR5 within each VaR95 quintile.
# The investment strategy longs stocks with the lowest right-tail bonus and highest left-tail risk, and
# shorts stocks with the highest right-tail bonus and lowest left-tail risk, i.e., RT1LT5-RT5LT1.
# Rebalancing & Weighting: The strategy involves rebalancing the portfolio monthly to adjust for changes in VaR95 and VaR5 values and maintain the desired 
# exposure. To maintain a market-neutral stance, the quintile sorting process determines the number of positions, with equal capital allocated to each long 
# and short position.
# 
# QC Implementation changes:
#   - QP large cap Chinese custom data are used as trading universe (data available from 2015 to 2022).

# region imports
from AlgorithmImports import *
import data_tools
from typing import List, Dict
# endregion

class RighTailVsLeftTailStockPickingStrategyInChina(QCAlgorithm):

    def initialize(self) -> None:
        self.set_start_date(2015, 1, 1)
        self.set_cash(100_000)

        self._excluded_tickers: List[str] = ['601398', '601939', '600941', '300750', '601288']

        self._quantile: int = 5
        self._percentile: float = .95
        top_count: int = 300
        leverage: int = 5
        period: int = 12 * 21

        self._data: Dict[Symbol, data_tools.SymbolData] = data_tools.initialize_QP_custom_data(
            self, 
            leverage, 
            top_count, 
            period, 
        )

        self.settings.minimum_order_margin_portfolio_percentage = 0.
        self.settings.daily_precise_end_time = False
        self._current_month: int = -1

    def on_data(self, slice: Slice) -> None:
        # Check if custom data is still comming in.
        price_last_update_date:Dict[Symbol, datetime.date] = data_tools.ChineseStocks.get_last_update_date()
        if any(self.securities[x].get_last_data() and self.time.date() >= price_last_update_date[x] for x in price_last_update_date):
            if self.portfolio.invested:
                self.log('QP chinese custom data stopped coming.')
                self.liquidate()
            return

        # Store daily price data.
        for symbol, symbol_data in self._data.items():
            if symbol.value in self._excluded_tickers:
                continue
            if slice.contains_key(symbol) and slice[symbol]:
                price: float = slice[symbol].value
                if price != 0:
                    self._data[symbol]._update_price(price)

        # Monthly rebalance.
        if self.time.month == self._current_month:
            return
        self._current_month = self.time.month

        # Calculate Value at Risk (VaR) measures.
        performance: Dict[Symbol, tuple[Any, Any]] = {
            symbol: (symbol_data._get_percentile(self._percentile), symbol_data._get_percentile(1 - self._percentile))
            for symbol, symbol_data in self._data.items()
            if symbol_data._is_ready()
        }

        if len(performance) < self._quantile:
            self.log('Not enough data to further sorting.')
            return

        # Sort and divide.
        RT_sorted: List[tuple[Symbol, tuple[Any, Any]]] = sorted(performance.items(), key=lambda x:x[1][0])
        RT_quantile: int = len(RT_sorted) // self._quantile
        RT_low: List[Symbol] = [x[0] for x in RT_sorted][:RT_quantile]
        RT_high: List[Symbol] = [x[0] for x in RT_sorted][-RT_quantile:]

        LT_sorted: List[tuple[Symbol, tuple[Any, Any]]] = sorted(performance.items(), key=lambda x:x[1][1])
        LT_quantile: int = len(LT_sorted) // self._quantile
        LT_low: List[Symbol] = [x[0] for x in LT_sorted][:LT_quantile]
        LT_high: List[Symbol] = [x[0] for x in LT_sorted][-LT_quantile:]

        # Trade execution.
        portions = {}
        for i, portfolio in enumerate([RT_low + LT_high, RT_high + LT_low]):
                for symbol in portfolio:
                    if slice.contains_key(symbol) and slice[symbol]:
                        if symbol not in portions:
                            portions[symbol] = 0
                        portions[symbol] += (((-1)**i) * (self.portfolio.total_portfolio_value / len(portfolio)))

        invested: List[Symbol] = [x.key for x in self.portfolio if x.value.invested]
        for symbol in invested:
            if symbol not in portions:
                self.liquidate(symbol)

        for symbol, portion in portions.items():
            if slice.contains_key(symbol) and slice[symbol]:
                if slice[symbol].price != 0:
                    quantity: int = portion // slice[symbol].price
                    self.market_order(symbol, quantity - self.portfolio[symbol].quantity)