Overall Statistics
Total Orders
5396
Average Win
0.37%
Average Loss
-0.35%
Compounding Annual Return
-3.690%
Drawdown
58.800%
Expectancy
-0.035
Start Equity
100000
End Equity
67757.61
Net Profit
-32.242%
Sharpe Ratio
-0.266
Sortino Ratio
-0.259
Probabilistic Sharpe Ratio
0.001%
Loss Rate
53%
Win Rate
47%
Profit-Loss Ratio
1.05
Alpha
-0.036
Beta
-0.031
Annual Standard Deviation
0.145
Annual Variance
0.021
Information Ratio
-0.516
Tracking Error
0.212
Treynor Ratio
1.259
Total Fees
$971.11
Estimated Strategy Capacity
$0
Lowest Capacity Asset
600460.ChineseStocks 2S
Portfolio Turnover
5.88%
#region imports
from AlgorithmImports import *
import bz2
import pickle
import base64
import numpy as np
from typing import List, Dict, OrderedDict
#endregion

def initialize_QP_custom_data(algo: QCAlgorithm, leverage: int, top_count: int, period: int) -> Dict:
    QP_data: Dict[Symbol, SymbolData] = {}
    ticker_file_str: str = algo.download('data.quantpedia.com/backtesting_data/equity/chinese_stocks/large_cap_500.csv')
    ticker_lines: List[str] = ticker_file_str.split('\r\n')[:top_count]
    tickers = [ ticker_line.split(',')[0] for ticker_line in ticker_lines[1:] ]

    for t in tickers:
        # price data subscription
        data: Security = algo.add_data(ChineseStocks, t, Resolution.DAILY)
        data.set_fee_model(CustomFeeModel())
        data.set_leverage(leverage)
        stock_symbol: Symbol = data.symbol

        QP_data[stock_symbol] = SymbolData(period)

    return QP_data

class SymbolData():
    def __init__(
        self,
        period: int
    ) -> None:
        self._prices: List[float] = []
        self._daily_trading_volume: List[float] = []
        self._daily_volume: RollingWindow = RollingWindow[float](period)
        self._daily_shares: List[float] = []
        self._daily_AVOL: List[float] = []
        self._daily_TOVR: List[float] = []
        self._current_shares_outstanding: float = 0
        self._current_marketcap: float = 0

    def _update_daily_data(self, price: float, volume: float, daily_shares: float, shares_outstanding: float, marketcap: float) -> None:
        self._prices.append(price)
        self._daily_trading_volume.append(volume)
        self._daily_volume.add(volume)
        self._daily_shares.append(daily_shares)
        self._current_shares_outstanding = shares_outstanding
        self._current_marketcap = marketcap
        if self._is_ready():
            self._daily_AVOL.append(float(volume / np.mean(list(self._daily_volume))))
            self._daily_TOVR.append(daily_shares / shares_outstanding)
    
    def _get_data(self) -> tuple[float, float]:
        AVOL: float = float(sum(self._daily_trading_volume) / np.mean(list(self._daily_volume)))
        TOVR: float = sum(self._daily_shares) / self._current_shares_outstanding
        return AVOL, TOVR

    def _get_std(self) -> tuple[float, float]:
        return float(np.std(self._daily_AVOL)), float(np.std(self._daily_TOVR))

    def _distance_data_is_ready(self) -> bool:
        return len(self._daily_AVOL) != 0 and len(self._daily_TOVR) != 0

    def _is_ready(self) -> bool:
        return self._daily_volume.is_ready and self._current_shares_outstanding != 0 and self._current_marketcap != 0 and len(self._prices) != 0

    def _reset(self) -> None:
        self._prices.clear()
        self._daily_trading_volume.clear()
        self._daily_shares.clear()
        self._daily_AVOL.clear()
        self._daily_TOVR.clear()

    def _get_performance(self) -> float:
        return self._prices[-1] / self._prices[0] - 1

# Chinese stock price/volume data.
# NOTE: IMPORTANT: Data order must be ascending (datewise)
class ChineseStocks(PythonData):
    _last_update_date:Dict[Symbol, datetime.date] = {}

    @staticmethod
    def get_last_update_date() -> Dict[Symbol, datetime.date]:
       return ChineseStocks._last_update_date

    def GetSource(self, config:SubscriptionDataConfig, date:datetime, isLiveMode:bool) -> SubscriptionDataSource:
        return SubscriptionDataSource("data.quantpedia.com/backtesting_data/equity/chinese_stocks/large_cap_500_price_volume_shares_size.dat", SubscriptionTransportMedium.REMOTE_FILE, FileFormat.UNFOLDING_COLLECTION)
   
    def Reader(self, config:SubscriptionDataConfig, line:str, date:datetime, isLiveMode:bool) -> BaseData:
        # Columns: ['closePrice', 'turnoverVol', 'negMarketValue', 'marketValue']

        # closePrice = daily close price
        # turnoverVol = daily share volume
        # marketValue = market cap
        # negMarketValue = shares outstd * closePrice 
        #
        # more can be calculated based of it:
        #
        # shares outstd = negMarketValue / closePrice
        # turnoverValue = closePrice * turnoverVol
        # turnoverRatio = turnoverValue / negMarketValue

        objects:list[ChineseStocks] = []

        base64_bytes = line.encode('ascii')
        data_to_decompress = base64.b64decode(base64_bytes)
        decompressed_data = bz2.decompress(data_to_decompress)
        data:list[dict] = pickle.loads(decompressed_data)

        for index, sample in enumerate(data):
            custom_data:ChineseStocks = ChineseStocks()
            custom_data.symbol = config.symbol
            
            curr_date: datetime = datetime.strptime(sample['date'], '%Y-%m-%d')# + timedelta(days=1)
            custom_data.Time = curr_date
            custom_data.EndTime = curr_date + timedelta(days=1)

            custom_data.Time = curr_date
            if config.symbol.value in sample['stocks']:
                custom_data['price_data'] = sample['stocks'][config.symbol.value]
                custom_data.Value = float(sample['stocks'][config.symbol.value]['closePrice'])
                custom_data.Close = float(sample['stocks'][config.symbol.value]['closePrice'])
                custom_data.TurnoverVol = float(sample['stocks'][config.symbol.value]['turnoverVol'])
                custom_data.MarketCap = float(sample['stocks'][config.symbol.value]['marketValue'])

                # store last date of the symbol
                if config.symbol not in ChineseStocks._last_update_date:
                    ChineseStocks._last_update_date[config.symbol] = datetime(1,1,1).date()
                if custom_data.Time.date() > ChineseStocks._last_update_date[config.symbol]:
                    ChineseStocks._last_update_date[config.symbol] = custom_data.Time.date()
            else:
                custom_data['price_data'] = {}
                custom_data.Value = 0

            objects.append(custom_data)

        return BaseDataCollection(objects[-1].EndTime, config.symbol, objects)

# Custom fee model
class CustomFeeModel(FeeModel):
    def GetOrderFee(self, parameters):
        fee = parameters.Security.Price * parameters.Order.AbsoluteQuantity * 0.00005
        return OrderFee(CashAmount(fee, "USD"))
# https://quantpedia.com/strategies/similar-attention-stocks-effect-in-chinese-stocks
# 
# The sample stocks contain all A-shares listed on the main boards of the Shanghai Stock Exchange (SSE), Shenzhen Stock Exchange (SZSE), and the Growth 
# Enterprise Market (GEM). ST shares are excluded.
# (All A-share stock trading data, financial statement data, and CSRC historic industry classification documents are sourced from CSMAR. The Baidu search 
# volume index (SVI), available only after 2011, is directly obtained from the Chinese Research Data Services Platform (CNRDS).)
# Fundamental Recapitulation: Individual instruments are selected based on their similar-attention returns (SA_Rtn), which are calculated using abnormal 
# trading volume, turnover rate, and monthly variability. The top 10% of stocks with the nearest Euclidean distances on these dimensions are considered 
# similar-attention peers. As the research indicates, the methodology assumes that SA_Rtn can negatively predict future returns. No additional technical 
# indicators are used; the focus is solely on the SA_Rtn metric derived from the similar-attention peer group.
# Computational Process: At the end of each month, each stock’s similar-attention peers are defined as other 10% stocks with the nearest Euclidean distances 
# on four dimensions, including abnormal trading volume, turnover rate, and their monthly variability, to the focal stock. SA_Rtn is the arithmetic average 
# of the monthly returns of similar-attention stocks; calculation stems from equations (1) and (2) from 2.1 Similar-attention Stocks on page 11.
# Then, portfolio sorting using the cross-stock reversal effect was performed on SA_Rtn: All sample stocks are sorted into deciles according to their SA_Rtn.
# Strategy Execution (details from 3.1 Univariate Sorting and Investor Clientele): The strategy involves constructing a long-short portfolio by buying stocks 
# in the lowest decile (those with the lowest SA_Rtn) and selling stocks in the highest decile (those with the highest SA_Rtn).
# Rebalancing & Weighting: All portfolios are held for one month and rebalanced monthly (at the end of each month). Stocks within each decile group can be 
# weighted equally (selected by us in this report) or by market capitalization (value-weighted).
# 
# QC Implementation changes:
#   - QP large cap Chinese custom data are used as trading universe (data available from 2015 to 2022).
#   - Traded portfolio is weighted equally.

# region imports
from AlgorithmImports import *
import data_tools
# endregion

class SimilarAttentionStocksEffectInChineseStocks(QCAlgorithm):

    def initialize(self) -> None:
        self.set_start_date(2015, 1, 1)
        self.set_cash(100_000)

        self._excluded_slice: slice = slice(5, -15)

        self._value_weighted_flag: bool = False
        self._quantile: int = 10
        top_count: int = 300
        leverage: int = 5
        period: int = 252

        self._data: Dict[Symbol, data_tools.SymbolData] = data_tools.initialize_QP_custom_data(self, leverage, top_count, period)

        self.settings.minimum_order_margin_portfolio_percentage = 0.
        self.settings.daily_precise_end_time = False
        self._current_month: int = -1

    def on_data(self, slice: Slice) -> None:
        # Check if custom data is still comming in.
        price_last_update_date: Dict[Symbol, datetime.date] = data_tools.ChineseStocks.get_last_update_date()
        if any([self.securities[x].get_last_data() and self.time.date() >= price_last_update_date[x] for x in price_last_update_date]):
            if self.portfolio.invested:
                self.log('QP China custom data stopped coming.')
                self.liquidate()
            return

        # Store daily price data.
        for symbol, symbol_data in self._data.items():
            if symbol.value not in list(self._data.keys())[self._excluded_slice]:
                continue
            if slice.contains_key(symbol) and slice[symbol]:
                price_data: Dict[str, float] = slice[symbol].price_data
                price: float = slice[symbol].value
                marketcap: float = price_data.get('marketValue', 0)
                daily_shares: float = price_data.get('turnoverVol', 0)
                if any(x == 0 for x in [marketcap, daily_shares, price]):
                    continue

                shares_outstanding: float = marketcap / price
                volume: float = daily_shares * price

                symbol_data._update_daily_data(price, volume, daily_shares, shares_outstanding, marketcap)

        # Monthly rebalance.
        if self.time.month == self._current_month:
            return
        self._current_month = self.time.month

        similar_stock_performance: Dict[Symbol, float] = {}

        # Calculate euclidean distances between stocks.
        for base_symbol, base_symbol_data in self._data.items():
            if not base_symbol_data._distance_data_is_ready():
                continue

            base_AVOL, base_TOVR = base_symbol_data._get_data()
            base_AVOL_std, base_TOVR_std = base_symbol_data._get_std()

            current_distances: List[tuple[Any, Any]] = []
            for compared_symbol, compared_symbol_data in self._data.items():
                if compared_symbol == symbol:
                    continue

                if not compared_symbol_data._is_ready() or not compared_symbol_data._distance_data_is_ready():
                    continue

                compared_AVOL, compared_TOVR = compared_symbol_data._get_data()
                compared_AVOL_std, compared_TOVR_std = compared_symbol_data._get_std()

                distance: float = np.sqrt(
                    (
                        ((base_AVOL - compared_AVOL) ** 2) +
                        ((base_TOVR - compared_TOVR) ** 2) +
                        ((base_AVOL_std - compared_AVOL_std) ** 2) +
                        ((base_TOVR_std - compared_TOVR_std) ** 2) 
                    )
                )

                current_distances.append((compared_symbol,distance))

            # Smallest distance.
            if len(current_distances) >= self._quantile:
                sorted_distances: List[tuple[Any, Any]] = sorted(current_distances, key=lambda x:x[1])
                quantile: int = len(sorted_distances) // self._quantile
                similar_stocks: List[Symbol] = [x[0] for x in sorted_distances][:quantile]

                similar_stock_performance[base_symbol] = np.mean([self._data[symbol]._get_performance() for symbol in similar_stocks + [base_symbol]])

        if len(similar_stock_performance) < self._quantile:
            self.log('Not enough stocks to further sorting.')
            self.liquidate()
            return

        # Sort and divide.
        sorted_performance: List[tuple[Any, Any]] = sorted(similar_stock_performance.items(), key=lambda x:x[1])
        quantile = len(sorted_performance) // self._quantile
        long: List[Symbol] = [x[0] for x in sorted_performance][:quantile]
        short: List[Symbol] = [x[0] for x in sorted_performance][-quantile:]

        # Trade execution.
        weight: Dict[Symbol, float] = {}
        targets: List[PortfolioTarget] = []
        if self._value_weighted_flag:
            for i, portfolio in enumerate([long, short]):
                mc_sum: float = sum([self._data[symbol]._current_marketcap for symbol in portfolio])
                for symbol in portfolio:
                    weight[symbol] = ((-1)**i) * self._data[symbol]._current_marketcap / mc_sum

            targets = [PortfolioTarget(symbol, w) for symbol, w in weight.items() if slice.contains_key(symbol) and slice[symbol]]
        else:
            for i, portfolio in enumerate([long, short]):
                for symbol in portfolio:
                    if slice.contains_key(symbol) and slice[symbol]:
                        targets.append(PortfolioTarget(symbol, ((-1) ** i) / len(portfolio)))
        
        self.set_holdings(targets, True)
        # Reset monthly data.
        for symbol_data in list(self._data.values()):
            symbol_data._reset()