| Overall Statistics |
|
Total Orders 14 Average Win 9.04% Average Loss -9.56% Compounding Annual Return 14.613% Drawdown 35.800% Expectancy -0.027 Start Equity 100000 End Equity 114570.54 Net Profit 14.571% Sharpe Ratio 0.495 Sortino Ratio 0.547 Probabilistic Sharpe Ratio 25.745% Loss Rate 50% Win Rate 50% Profit-Loss Ratio 0.95 Alpha 0.08 Beta -1.626 Annual Standard Deviation 0.574 Annual Variance 0.329 Information Ratio 0.578 Tracking Error 0.708 Treynor Ratio -0.175 Total Fees $59.62 Estimated Strategy Capacity $260000000.00 Lowest Capacity Asset SQQQ UK280CGTCB51 Portfolio Turnover 2.99% |
# region imports
from AlgorithmImports import *
import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer, set_seed
from pathlib import Path
from datasets import Dataset
import pytz
import torch
# endregion
class FinbertBaseModelAlgorithm(QCAlgorithm):
def initialize(self):
self.set_start_date(2022, 1, 1)
self.set_end_date(2023, 1, 1)
self.set_cash(100_000)
spy = Symbol.create("SPY", SecurityType.EQUITY, Market.USA)
self.universe_settings.resolution = Resolution.DAILY
self.universe_settings.schedule.on(self.date_rules.month_start(spy))
self._universe = self.add_universe(
lambda fundamental: [
self.history(
[f.symbol for f in sorted(fundamental, key=lambda f: f.dollar_volume)[-10:]],
timedelta(365), Resolution.DAILY
)['close'].unstack(0).pct_change().iloc[1:].std().idxmax()
]
)
set_seed(1, True)
self._last_rebalance_time = datetime.min
self.schedule.on(
self.date_rules.month_start(spy, 1),
self.time_rules.midnight,
self._trade
)
self.set_warm_up(timedelta(30))
self._model_name = "ProsusAI/finbert"
self._tokenizer = BertTokenizer.from_pretrained(self._model_name)
def on_warmup_finished(self):
self._trade()
def on_securities_changed(self, changes):
for security in changes.removed_securities:
self.remove_security(security.dataset_symbol)
for security in changes.added_securities:
security.dataset_symbol = self.add_data(
TiingoNews, security.symbol
).symbol
def _trade(self):
if (self.is_warming_up or
self.time - self._last_rebalance_time < timedelta(14)):
return
# Get the target security.
security = self.securities[list(self._universe.selected)[0]]
# Get samples to fine-tune the model
samples = pd.DataFrame(columns=['text', 'label'])
news_history = self.history(security.dataset_symbol, 30, Resolution.DAILY)
if news_history.empty:
return
news_history = news_history.loc[security.dataset_symbol]['description']
asset_history = self.history(
security.symbol, timedelta(30), Resolution.SECOND
).loc[security.symbol]['close']
for i in range(len(news_history.index)-1):
# Get factor (article description).
factor = news_history.iloc[i]
if not factor:
continue
# Get the label (the market reaction to the news, for now).
release_time = self._convert_to_eastern(news_history.index[i])
next_release_time = self._convert_to_eastern(news_history.index[i+1])
reaction_period = asset_history[
(asset_history.index > release_time) &
(asset_history.index < next_release_time + timedelta(seconds=1))
]
if reaction_period.empty:
continue
label = (
(reaction_period.iloc[-1] - reaction_period.iloc[0])
/ reaction_period.iloc[0]
)
# Save the training sample.
samples.loc[len(samples), :] = [factor, label]
samples = samples.iloc[-100:]
if samples.shape[0] < 10:
self.liquidate()
return
# Classify the market reaction into positive/negative/neutral.
# 75% of the most negative labels => class 0 (negative)
# 75% of the most postiive labels => class 2 (positive)
# Remaining labels => class 1 (netural)
sorted_samples = samples.sort_values(by='label', ascending=False).reset_index(drop=True)
percent_signed = 0.75
positive_cutoff = (
int(percent_signed
* len(sorted_samples[sorted_samples.label > 0]))
)
negative_cutoff = (
len(sorted_samples)
- int(percent_signed * len(sorted_samples[sorted_samples.label < 0]))
)
sorted_samples.loc[list(range(negative_cutoff, len(sorted_samples))), 'label'] = 0
sorted_samples.loc[list(range(positive_cutoff, negative_cutoff)), 'label'] = 1
sorted_samples.loc[list(range(0, positive_cutoff)), 'label'] = 2
# Load the pre-trained model.
model = TFBertForSequenceClassification.from_pretrained(
self._model_name, num_labels=3, from_pt=True
)
# Compile the model.
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
)
# Create the training dataset.
dataset = Dataset.from_pandas(sorted_samples)
dataset = dataset.map(
lambda sample: self._tokenizer(
sample['text'], padding='max_length', truncation=True
)
)
dataset = model.prepare_tf_dataset(
dataset, shuffle=True, tokenizer=self._tokenizer
)
# Train the model.
model.fit(dataset, epochs=2)
# Prepare the input sentences.
inputs = self._tokenizer(
list(samples['text'].values), padding=True, truncation=True,
return_tensors='tf'
)
# Get the model outputs.
outputs = model(**inputs)
# Apply softmax to the outputs to get probabilities.
scores = tf.nn.softmax(outputs.logits, axis=-1).numpy()
scores = self._aggregate_sentiment_scores(scores)
self.plot("Sentiment Probability", "Negative", scores[0])
self.plot("Sentiment Probability", "Neutral", scores[1])
self.plot("Sentiment Probability", "Positive", scores[2])
# Rebalance.
weight = 1 if scores[2] > scores[0] else -0.25
self.set_holdings(security.symbol, weight, True)
self._last_rebalance_time = self.time
def _convert_to_eastern(self, dt):
return dt.astimezone(pytz.timezone('US/Eastern')).replace(tzinfo=None)
def _aggregate_sentiment_scores(self, sentiment_scores):
n = sentiment_scores.shape[0]
# Generate exponentially increasing weights
weights = np.exp(np.linspace(0, 1, n))
# Normalize weights to sum to 1
weights /= weights.sum()
# Apply weights to sentiment scores
weighted_scores = sentiment_scores * weights[:, np.newaxis]
# Aggregate weighted scores by summing them
aggregated_scores = weighted_scores.sum(axis=0)
return aggregated_scores