| Overall Statistics |
|
Total Orders 115 Average Win 0.45% Average Loss -0.41% Compounding Annual Return 4.614% Drawdown 16.100% Expectancy 0.259 Start Equity 1000000 End Equity 1094129.23 Net Profit 9.413% Sharpe Ratio 0.008 Sortino Ratio 0.01 Probabilistic Sharpe Ratio 14.899% Loss Rate 40% Win Rate 60% Profit-Loss Ratio 1.11 Alpha -0.001 Beta -0.203 Annual Standard Deviation 0.091 Annual Variance 0.008 Information Ratio 0.054 Tracking Error 0.213 Treynor Ratio -0.004 Total Fees $293.48 Estimated Strategy Capacity $0 Lowest Capacity Asset NB R735QTJ8XC9X Portfolio Turnover 0.47% Drawdown Recovery 101 |
from AlgorithmImports import *
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
class LazyPricesStrategy(QCAlgorithm):
def initialize(self):
self.set_start_date(2022, 1, 1)
self.set_end_date(2024, 1, 1)
self.set_cash(1_000_000)
self.set_warm_up(timedelta(days=400))
self.spy = self.add_equity("SPY", Resolution.DAILY).symbol
self.set_benchmark(self.spy)
# ── Parameters ────────────────────────────────────────────────────────
self.n_long_short = 5
self.max_signal_age = 400
# self.max_text_len = 100_000
self.min_sim_threshold = 0.15
self._max_subscriptions = 50
# ── State ─────────────────────────────────────────────────────────────
self.prev_filing_text = {}
self.sim_scores = {}
self.sec_sym_to_ticker = {}
self._subscribed_tickers = set()
# ── Universe ──────────────────────────────────────────────────────────
self.universe_settings.resolution = Resolution.DAILY
self.add_universe(self._select_universe)
# ── Monthly rebalance ─────────────────────────────────────────────────
self.schedule.on(
self.date_rules.month_start(self.spy),
self.time_rules.after_market_open(self.spy, 30),
self._rebalance
)
# ─────────────────────────────────────────────────────────────────────────
# Universe: top 50 most liquid US large-cap equities
# ─────────────────────────────────────────────────────────────────────────
def _select_universe(self, fundamental):
eligible = [
f for f in fundamental
if f.has_fundamental_data
and f.dollar_volume > 1e8
and f.market_cap > 5e9
]
return [
f.symbol
for f in sorted(eligible, key=lambda f: f.dollar_volume, reverse=True)[:50]
]
# ─────────────────────────────────────────────────────────────────────────
# Subscribe / unsubscribe SEC 10-K data as stocks enter / leave universe
# ─────────────────────────────────────────────────────────────────────────
def on_securities_changed(self, changes):
for security in changes.added_securities:
if security.type != SecurityType.EQUITY:
continue
equity_sym = security.symbol
ticker = equity_sym.value
if ticker in self._subscribed_tickers:
continue
if len(self._subscribed_tickers) >= self._max_subscriptions:
continue
sec_sym = self.add_data(SECReport10K, equity_sym, Resolution.DAILY).symbol
self.sec_sym_to_ticker[sec_sym] = ticker
self._subscribed_tickers.add(ticker)
for security in changes.removed_securities:
if security.type != SecurityType.EQUITY:
continue
ticker = security.symbol.value
dataset_sym = next(
(s for s, t in self.sec_sym_to_ticker.items() if t == ticker), None
)
if dataset_sym:
self.remove_security(dataset_sym)
del self.sec_sym_to_ticker[dataset_sym]
# ─────────────────────────────────────────────────────────────────────────
# Process 10-K filings
# ─────────────────────────────────────────────────────────────────────────
def on_data(self, data):
for report in data.get(SECReport10K).values():
ticker = report.symbol.underlying.value
text = self._extract_text(report)
if not text:
continue
if ticker in self.prev_filing_text:
score = self._cosine_sim(self.prev_filing_text[ticker], text)
if score is not None:
self.sim_scores[ticker] = (score, self.time)
label = "WARMUP" if self.is_warming_up else "LIVE"
self.debug(f"{self.time.date()} | {ticker} | {label} | sim={score:.4f}")
self.prev_filing_text[ticker] = text
# ─────────────────────────────────────────────────────────────────────────
# Extract Text
# ─────────────────────────────────────────────────────────────────────────
def _extract_text(self, report):
try:
docs = report.report.documents
parts = [doc.text for doc in docs if doc.text and len(doc.text) > 50]
text = " ".join(parts)
return text if len(text) > 200 else None
except Exception as e:
self.debug(f"Extract error: {e}")
return None
# ─────────────────────────────────────────────────────────────────────────
# TF-IDF cosine similarity
# ─────────────────────────────────────────────────────────────────────────
def _cosine_sim(self, text_a, text_b):
try:
vec = TfidfVectorizer(
max_features = 5_000,
stop_words = "english",
sublinear_tf = True,
)
mat = vec.fit_transform([text_a, text_b])
return float(cosine_similarity(mat[0:1], mat[1:2])[0][0])
except Exception as e:
self.debug(f"Similarity error: {e}")
return None
# ─────────────────────────────────────────────────────────────────────────
# Monthly rebalance: long high-similarity, short low-similarity
# ─────────────────────────────────────────────────────────────────────────
def _rebalance(self):
if self.is_warming_up:
return
signals = {}
seen_tickers = set()
for symbol, security in self.active_securities.items():
if security.type != SecurityType.EQUITY:
continue
ticker = symbol.value
if ticker in seen_tickers:
continue
entry = self.sim_scores.get(ticker)
if entry is None:
continue
score, filing_date = entry
if score < self.min_sim_threshold:
self.debug(f"{self.time.date()} | {ticker} | score {score:.4f} below threshold, skipping")
continue
if (self.time - filing_date).days <= self.max_signal_age:
signals[symbol] = score
seen_tickers.add(ticker)
min_needed = 2 * self.n_long_short
if len(signals) < min_needed:
self.debug(f"{self.time.date()} | {len(signals)} signals < {min_needed}. Skipping.")
return
ranked = sorted(signals, key=signals.get)
short_syms = ranked[:self.n_long_short]
long_syms = ranked[-self.n_long_short:]
active_set = set(long_syms + short_syms)
for sym, holding in self.portfolio.items():
if holding.invested and sym not in active_set:
self.liquidate(sym)
w = 0.5 / self.n_long_short
targets = (
[PortfolioTarget(s, w) for s in long_syms] +
[PortfolioTarget(s, -w) for s in short_syms]
)
self.set_holdings(targets)
self.debug(
f"{self.time.date()} | Signals={len(signals)} | "
f"Long={[s.value for s in long_syms]} | "
f"Short={[s.value for s in short_syms]}"
)
self.plot("Strategy", "Signal Count", len(signals))
def on_end_of_algorithm(self):
self.debug(f"Final Value : ${self.portfolio.total_portfolio_value:,.2f}")
self.debug(f"Scored tickers : {len(self.sim_scores)}")
self.debug(f"Stored filings : {len(self.prev_filing_text)}")
for ticker, (score, date) in sorted(
self.sim_scores.items(), key=lambda x: x[1][0]
):
self.debug(f" {ticker}: sim={score:.4f} (filed {date.date()})")