Backtest

Overall Statistics
# region imports
from AlgorithmImports import *
# endregion


class LazyPricesStrategy(QCAlgorithm):
    _vector_by_symbol: Dict[Symbol, np.ndarray] = {}
    _similarity_by_symbol: Dict[Symbol, float] = {}

    def initialize(self) -> None:
        self.set_start_date(2015, 1, 1)
        self.set_end_date(2026, 6, 1)
        self.set_cash(1_000_000)
        self.settings.seed_initial_prices = True
        self.universe_settings.resolution = Resolution.DAILY
        self._min_price = 5
        self._pool_size = 1000
        self._quantile = 0.2
        # Mid- and small-cap style boxes: the Lazy Prices anomaly's habitat, away from mega-caps.
        self._style_boxes = [
            StyleBox.MID_VALUE, StyleBox.MID_CORE, StyleBox.MID_GROWTH,
            StyleBox.SMALL_VALUE, StyleBox.SMALL_CORE, StyleBox.SMALL_GROWTH
        ]
        self._metric_n = 0
        self._metric_sum = np.zeros(10)
        self._metric_sumsq = np.zeros(10)
        # The fundamental universe picks the tradeable book; the Brain universe only feeds scores.
        self._universe = self.add_universe(self._fundamental_filter)
        self.add_universe(BrainCompanyFilingLanguageMetricsUniverseAll, self._update_signals)

    def on_warmup_finished(self) -> None:
        # Daily data fills at the prior close, so anchor the quarterly rebalance to 8 AM.
        time_rule = self.time_rules.at(8, 0)
        self.schedule.on(self.date_rules.quarter_start("SPY"), time_rule, self._rebalance)
        if self.live_mode:
            self._rebalance()
        else:
            self.schedule.on(self.date_rules.today, time_rule, self._rebalance)

    def _fundamental_filter(self, fundamental: List[Fundamental]) -> List[Symbol]:
        # Keep scored, tradeable mid/small-cap names; the most liquid form the cross-section.
        eligible = [
            f for f in fundamental
            if f.has_fundamental_data and f.symbol in self._similarity_by_symbol and
            f.price > self._min_price and f.asset_classification.style_box in self._style_boxes
        ]
        return [f.symbol for f in sorted(eligible, key=lambda f: f.dollar_volume)[-self._pool_size:]]

    def _update_signals(self, filings: List[BrainCompanyFilingLanguageMetricsUniverseAll]) -> List[Symbol]:
        for filing in filings:
            report = filing.report_sentiment
            if report is None:
                continue
            fields = [
                report.sentiment, report.uncertainty, report.litigious, report.constraining,
                report.interesting, report.readability, report.lexical_richness,
                report.lexical_density, report.specific_density, report.mean_sentence_length
            ]
            if any(f is None for f in fields):
                continue
            vector = np.array([float(f) for f in fields])
            previous = self._vector_by_symbol.get(filing.symbol)
            # An identical vector is the same filing carried forward, so skip it before scoring.
            if previous is not None and np.array_equal(previous, vector):
                continue
            self._vector_by_symbol[filing.symbol] = vector
            self._metric_n += 1
            self._metric_sum += vector
            self._metric_sumsq += vector * vector
            # The first filing for a symbol has no prior to compare, so store it and score the next.
            if previous is None:
                continue
            # Standardize each metric by its own scale so big-magnitude fields can't dominate.
            mean = self._metric_sum / self._metric_n
            std = np.sqrt(np.maximum(self._metric_sumsq / self._metric_n - mean * mean, 1e-12))
            self._similarity_by_symbol[filing.symbol] = 1.0 / (1.0 + float(np.linalg.norm((vector - previous) / std)))
        return []

    def _rebalance(self) -> None:
        selected = [s for s in self._universe.selected if s in self._similarity_by_symbol and self.securities[s].price]
        n = int(len(selected) * self._quantile)
        if n < 1:
            return
        # Long the top similarity quintile (lazy), short the bottom quintile (biggest rewriters).
        ranked = sorted(selected, key=self._similarity_by_symbol.get)
        weight = 0.5 / n
        targets = [PortfolioTarget(s, -weight) for s in ranked[:n]] + [PortfolioTarget(s, weight) for s in ranked[-n:]]
        self.set_holdings(targets, True)