ExtractAlpha
Estimize
Introduction
The Estimize dataset by ExtractAlpha estimates the financials of companies, including EPS, and revenues. The data covers over 2,800 US-listed Equities’ EPS/Revenue. The data starts in January 2011 and is updated on a daily frequency. The data is sparse, and it doesn't have new updates every day. This dataset is crowdsourced from a community of 100,000+ contributors via the data provider’s web platform.
This dataset depends on the US Equity Security Master dataset because the US Equity Security Master dataset contains information on splits, dividends, and symbol changes.
For more information about the Estimize dataset, including CLI commands and pricing, see the dataset listing.
About the Provider
ExtractAlpha was founded by Vinesh Jha in 2013 with the goal of providing alternative data for investors. ExtractAlpha's rigorously researched data sets and quantitative stock selection models leverage unique sources and analytical techniques, allowing users to gain an investment edge.
Getting Started
The following snippet demonstrates how to request data from the Estimize dataset:
self.aapl = self.add_equity("AAPL", Resolution.DAILY).symbol
self.estimize_consensus_symbol = self.add_data(EstimizeConsensus, self.symbol).symbol
self.estimize_estimate_symbol = self.add_data(EstimizeEstimate, self.symbol).symbol
self.estimize_release_symbol = self.add_data(EstimizeRelease, self.symbol).symbol
_symbol = AddEquity("AAPL", Resolution.Daily).Symbol;
_estimizeConsensusSymbol = AddData<EstimizeConsensus>(_symbol).Symbol;
_estimizeEstimateSymbol = AddData<EstimizeEstimate>(_symbol).Symbol;
_estimizeReleaseSymbol = AddData<EstimizeRelease>(_symbol).Symbol;
Requesting Data
To add Estimize data to your algorithm, call the AddDataadd_data method. Save a reference to the dataset Symbol so you can access the data later in your algorithm.
class ExtractAlphaEstimizeDataAlgorithm(QCAlgorithm):
def initialize(self) -> None:
self.set_start_date(2019, 1, 1)
self.set_end_date(2020, 6, 1)
self.set_cash(100000)
self.aapl = self.add_equity("AAPL", Resolution.DAILY).symbol
self.estimize_consensus_symbol = self.add_data(EstimizeConsensus, self.aapl).symbol
self.estimize_estimate_symbol = self.add_data(EstimizeEstimate, self.aapl).symbol
self.estimize_release_symbol = self.add_data(EstimizeRelease, self.aapl).symbol public class ExtractAlphaEstimizeDataAlgorithm : QCAlgorithm
{
private Symbol _symbol, _estimizeConsensusSymbol, _estimizeEstimateSymbol, _estimizeReleaseSymbol;
public override void Initialize()
{
SetStartDate(2019, 1, 1);
SetEndDate(2020, 6, 1);
SetCash(100000);
_symbol = AddEquity("AAPL", Resolution.Daily).Symbol;
_estimizeConsensusSymbol = AddData<EstimizeConsensus>(_symbol).Symbol;
_estimizeEstimateSymbol = AddData<EstimizeEstimate>(_symbol).Symbol;
_estimizeReleaseSymbol = AddData<EstimizeRelease>(_symbol).Symbol;
}
}
Accessing Data
To get the current Estimize data, index the current Slice with the dataset Symbol. Slice objects deliver unique events to your algorithm as they happen, but the Slice may not contain data for your dataset at every time step. To avoid issues, check if the Slice contains the data you want before you index it.
def on_data(self, slice: Slice) -> None:
if slice.contains_key(self.estimize_consensus_symbol):
data_point = slice[self.estimize_consensus_symbol]
self.log(f"{self.estimize_consensus_symbol} mean at {slice.time}: {data_point.mean}")
if slice.contains_key(self.estimize_estimate_symbol):
data_point = slice[self.estimize_estimate_symbol]
self.log(f"{self.estimize_estimate_symbol} EPS at {slice.time}: {data_point.eps}")
if slice.contains_key(self.estimize_release_symbol):
data_point = slice[self.estimize_release_symbol]
self.log(f"{self.estimize_release_symbol} EPS at {slice.time}: {data_point.eps}") public override void OnData(Slice slice)
{
if (slice.ContainsKey(_estimizeConsensusSymbol))
{
var dataPoint = slice[_estimizeConsensusSymbol];
Log($"{_estimizeConsensusSymbol} mean at {slice.Time}: {dataPoint.Mean}");
}
if (slice.ContainsKey(_estimizeEstimateSymbol))
{
var dataPoint = slice[_estimizeEstimateSymbol];
Log($"{_estimizeEstimateSymbol} EPS at {slice.Time}: {dataPoint.Eps}");
}
if (slice.ContainsKey(_estimizeReleaseSymbol))
{
var dataPoint = slice[_estimizeReleaseSymbol];
Log($"{_estimizeReleaseSymbol} EPS at {slice.Time}: {dataPoint.Eps}");
}
}
To iterate through all of the dataset objects in the current Slice, call the Getget method.
def on_data(self, slice: Slice) -> None:
for dataset_symbol, data_point in slice.get(EstimizeConsensus).items():
self.log(f"{dataset_symbol} mean at {slice.time}: {data_point.mentions}")
for dataset_symbol, data_point in slice.get(EstimizeEstimate).items():
self.log(f"{dataset_symbol} EPS at {slice.time}: {data_point.eps}")
for dataset_symbol, data_point in slice.get(EstimizeRelease).items():
self.log(f"{dataset_symbol} EPS at {slice.time}: {data_point.eps}")
public override void OnData(Slice slice)
{
foreach (var kvp in slice.Get<EstimizeConsensus>())
{
var datasetSymbol = kvp.Key;
var dataPoint = kvp.Value;
Log($"{datasetSymbol} mean at {slice.Time}: {dataPoint.Mentions}");
}
foreach (var kvp in slice.Get<EstimizeEstimate>())
{
var datasetSymbol = kvp.Key;
var dataPoint = kvp.Value;
Log($"{datasetSymbol} EPS at {slice.Time}: {dataPoint.Eps}");
}
foreach (var kvp in slice.Get<EstimizeRelease>())
{
var datasetSymbol = kvp.Key;
var dataPoint = kvp.Value;
Log($"{datasetSymbol} EPS at {slice.Time}: {dataPoint.Eps}");
}
}
Historical Data
To get historical Estimize data, call the Historyhistory method with the dataset Symbol. If there is no data in the period you request, the history result is empty.
# DataFrames
consensus_history_df = self.history(self.estimize_consensus_symbol, 100, Resolution.DAILY)
estimate_history_df = self.history(self.estimize_estimate_symbol, 100, Resolution.DAILY)
release_history_df = self.history(self.estimize_release_symbol, 100, Resolution.DAILY)
history_df = self.history([
self.estimize_consensus_symbol,
self.estimize_estimate_symbol,
self.estimize_release_symbol], 100, Resolution.DAILY)
# Dataset objects
consensus_history_bars = self.history[EstimizeConsensus](self.estimize_consensus_symbol, 100, Resolution.DAILY)
estimate_history_bars = self.history[EstimizeEstimate](self.estimize_estimate_symbol, 100, Resolution.DAILY)
release_history_bars = self.history[EstimizeRelease](self.estimize_release_symbol, 100, Resolution.DAILY) // Dataset objects
var concensusHistory = History<EstimizeConsensus>(_estimizeConsensusSymbol, 100, Resolution.Daily);
var estimateHistory = History<EstimizeEstimate>(_estimizeEstimateSymbol, 100, Resolution.Daily);
var releaseHistory = History<EstimizeRelease>(_estimizeReleaseSymbol, 100, Resolution.Daily);
// Slice objects
var history = History(new[]{_estimizeConsensusSymbol,
_estimizeEstimateSymbol,
_estimizeReleaseSymbol}, 10, Resolution.Daily);
For more information about historical data, see History Requests.
Remove Subscriptions
To remove a subscription, call the RemoveSecurityremove_security method.
self.remove_security(self.estimize_consensus_symbol) self.remove_security(self.estimize_estimate_symbol) self.remove_security(self.estimize_release_symbol)
RemoveSecurity(_estimizeConsensusSymbol); RemoveSecurity(_estimizeEstimateSymbol); RemoveSecurity(_estimizeReleaseSymbol);
If you subscribe to Estimize data for assets in a dynamic universe, remove the dataset subscription when the asset leaves your universe. To view a common design pattern, see Track Security Changes.
Example Applications
The Estimize dataset enables you to estimate the financial data of a company more accurately for alpha. Examples include the following use cases:
- Fundamental estimates for ML regression/classification models
- Arbitrage/Sentiment trading on market “surprise” from ordinary expectations based on the better expectation by the dataset
- Using industry-specific KPIs to predict the returns of individual sectors
Classic Algorithm Example
The following example algorithm creates a dynamic universe of the 250 most liquid US Equities. Each morning, the algorithm forms an equal-weighted with the companies that have a new EPS estimate.
from AlgorithmImports import *
class ExtractAlphaEstimizeAlgorithm(QCAlgorithm):
def initialize(self) -> None:
self.set_start_date(2024, 9, 1)
self.set_end_date(2024, 12, 31)
self.set_cash(100000)
# Seed the price of each asset with its last known price to avoid trading errors.
self.set_security_initializer(
BrokerageModelSecurityInitializer(
self.brokerage_model,
FuncSecuritySeeder(self.get_last_known_prices)
)
)
# Add a dictionary to hold the latest Consensus data since this dataset is sparse.
self._consensus_by_symbol = {}
# Add a universe of liquid US Equities.
self.add_universe(self._select_assets)
# Add a Scheduled Event to rebalance the portfolio each day.
spy = Symbol.create('SPY', SecurityType.EQUITY, Market.USA)
self.schedule.on(
self.date_rules.every_day(spy),
self.time_rules.after_market_open(spy, 1),
self._rebalance
)
def _select_assets(self, fundamentals: List[Fundamental]) -> List[Symbol]:
# Select the non-penny stocks with the highest dollar volume,
# since they have more stable price (lower risk) and more
# informed insights from high market activities
sorted_by_dollar_volume = sorted(
[x for x in fundamentals if x.has_fundamental_data and x.price > 4],
key=lambda x: x.dollar_volume
)
return [x.symbol for x in sorted_by_dollar_volume[-250:]]
def on_data(self, slice: Slice) -> None:
# Record the latest Consensus data.
consensus = slice.get(EstimizeConsensus)
if not consensus:
return
for dataset_symbol, value in consensus.items():
self._consensus_by_symbol[dataset_symbol.underlying] = value
def _rebalance(self):
if not self._consensus_by_symbol:
return
# If there has been new Consensus data released since the previous
# market open, form an equal-weighted portfolio of all the assets
# that have new Consensus data.
symbols = [
symbol for symbol in self._consensus_by_symbol.keys()
if self.securities[symbol].price
]
targets = [PortfolioTarget(symbol, 1/len(symbols)) for symbol in symbols]
self.set_holdings(targets, True)
self._consensus_by_symbol.clear()
def on_securities_changed(self, changes: SecurityChanges) -> None:
for security in changes.added_securities:
# Requesting data for trading signal generation
security.consensus = self.add_data(EstimizeConsensus, security.symbol).symbol
# Historical data
history = self.history(security.consensus, 100, Resolution.DAILY)
for security in changes.removed_securities:
# Remove the Consensus data for this asset when it leaves the universe.
self.remove_security(security.consensus)
public class ExtractAlphaEstimizeAlgorithm : QCAlgorithm
{
// Add a dictionary to hold the latest Consensus data since this dataset is sparse.
private Dictionary<Symbol, EstimizeConsensus> _consensusBySymbol = new();
public override void Initialize()
{
SetStartDate(2024, 9, 1);
SetEndDate(2024, 12, 31);
SetCash(100000);
// Seed securities with their last known prices to avoid stale-price trade errors
SetSecurityInitializer(new BrokerageModelSecurityInitializer(
BrokerageModel,
new FuncSecuritySeeder(GetLastKnownPrices)
));
// Add a universe of liquid US Equities.
AddUniverse(SelectAssets);
// Add a Scheduled Event to rebalance the portfolio each day.
var spy = QuantConnect.Symbol.Create("SPY", SecurityType.Equity, Market.USA);
Schedule.On(
DateRules.EveryDay(spy),
TimeRules.AfterMarketOpen(spy, 1),
Rebalance
);
}
private IEnumerable<Symbol> SelectAssets(IEnumerable<Fundamental> fundamentals)
{
// Select the non-penny stocks with the highest dollar volume,
// since they have more stable price (lower risk) and more
// informed insights from high market activities
return fundamentals
.Where(x => x.HasFundamentalData && x.Price > 4)
.OrderBy(x => x.DollarVolume)
.TakeLast(250)
.Select(x => x.Symbol);
}
public override void OnData(Slice slice)
{
// Record the latest Consensus data.
var consensus = slice.Get<EstimizeConsensus>();
if (consensus.IsNullOrEmpty()) return;
foreach (var kvp in consensus)
{
_consensusBySymbol[kvp.Key.Underlying] = kvp.Value;
}
}
private void Rebalance()
{
if (_consensusBySymbol.Count == 0) return;
// If there has been new Consensus data released since the previous
// market open, form an equal-weighted portfolio of all the assets
// that have new Consensus data.
var symbols = _consensusBySymbol.Keys.Where(symbol => Securities[symbol].Price > 0);
var targets = symbols.Select(symbol => new PortfolioTarget(symbol, 1m/symbols.Count()));
SetHoldings(targets.ToList(), true);
_consensusBySymbol.Clear();
}
public override void OnSecuritiesChanged(SecurityChanges changes)
{
foreach (dynamic security in changes.AddedSecurities)
{
// Requesting data for trading signal generation
security.Consensus = AddData<EstimizeConsensus>(security.Symbol).Symbol;
// Historical data
var history = History<EstimizeConsensus>(security.Consensus, 100, Resolution.Daily);
}
foreach (dynamic security in changes.RemovedSecurities)
{
// Remove the Consensus data for this asset when it leaves the universe.
RemoveSecurity(security.Consensus);
}
}
}
Framework Algorithm Example
The following example algorithm creates a dynamic universe of the 250 most liquid US Equities. Each morning, the algorithm emits insights for the companies that have a new EPS estimate.
from AlgorithmImports import *
class ExtractAlphaEstimizeFrameworkAlgorithm(QCAlgorithm):
def initialize(self) -> None:
self.set_start_date(2024, 9, 1)
self.set_end_date(2024, 12, 31)
self.set_cash(100000)
self.add_universe_selection(LiquidEquitiesUniverseSelectionModel())
# Custom alpha model that emit signal according to Estimize data
self.add_alpha(ExtractAlphaEstimizeAlphaModel())
# Invest equally to evenly dissipate capital concentration risk
self.set_portfolio_construction(EqualWeightingPortfolioConstructionModel())
self.set_execution(ImmediateExecutionModel())
class LiquidEquitiesUniverseSelectionModel(FundamentalUniverseSelectionModel):
def select(self, algorithm: QCAlgorithm, fundamentals: List[Fundamental]) -> List[Symbol]:
# Select the non-penny stocks with the highest dollar volume,
# since they have more stable price (lower risk) and more
# informed insights from high market activities
sorted_by_dollar_volume = sorted(
[x for x in fundamentals if x.has_fundamental_data and x.price > 4],
key=lambda x: x.dollar_volume
)
return [x.symbol for x in sorted_by_dollar_volume[-250:]]
class ExtractAlphaEstimizeAlphaModel(AlphaModel):
# Add a dictionary to hold the latest Consensus data since this dataset is sparse.
_consensus_by_symbol = {}
_day = -1
def update(self, algorithm: QCAlgorithm, slice: Slice) -> List[Insight]:
# Record the latest Consensus data.
consensus = slice.get(EstimizeConsensus)
if consensus:
for dataset_symbol, value in consensus.items():
self._consensus_by_symbol[dataset_symbol.underlying] = value
# Wait for the market open to rebalance
if self._day == algorithm.time.day or not slice.bars:
return []
self._day = algorithm.time.day
# If there has been new Consensus data released since the previous
# market open, emit insights for all the assets that have new Consensus data.
if not self._consensus_by_symbol:
return []
symbols = [
symbol for symbol in self._consensus_by_symbol.keys()
if algorithm.securities[symbol].price
]
self._consensus_by_symbol.clear()
return [Insight.price(symbol, timedelta(30), InsightDirection.UP) for symbol in symbols]
def on_securities_changed(self, algorithm: QCAlgorithm, changes: SecurityChanges) -> None:
for security in changes.added_securities:
# Requesting data for trading signal generation
security.consensus = algorithm.add_data(EstimizeConsensus, security.symbol).symbol
# Historical data
history = algorithm.history(security.consensus, 100, Resolution.DAILY)
for security in changes.removed_securities:
# Remove the Consensus data for this asset when it leaves the universe.
algorithm.remove_security(security.consensus)
public class ExtractAlphaEstimizeFrameworkAlgorithm : QCAlgorithm
{
public override void Initialize()
{
SetStartDate(2024, 9, 1);
SetEndDate(2024, 12, 31);
SetCash(100000);
AddUniverseSelection(new LiquidEquitiesUniverseSelectionModel());
// Custom alpha model that emit signal according to Estimize data
AddAlpha(new ExtractAlphaEstimizeAlphaModel());
// Invest equally to evenly dissipate capital concentration risk
SetPortfolioConstruction(new EqualWeightingPortfolioConstructionModel());
SetExecution(new ImmediateExecutionModel());
}
}
public class LiquidEquitiesUniverseSelectionModel : FundamentalUniverseSelectionModel
{
public override IEnumerable<Symbol> Select(QCAlgorithm algorithm, IEnumerable<Fundamental> fundamentals)
{
// Select the non-penny stocks with the highest dollar volume,
// since they have more stable price (lower risk) and more
// informed insights from high market activities
return fundamentals
.Where(x => x.HasFundamentalData && x.Price > 4)
.OrderBy(x => x.DollarVolume)
.TakeLast(250)
.Select(x => x.Symbol);
}
}
public class ExtractAlphaEstimizeAlphaModel: AlphaModel
{
// Add a dictionary to hold the latest Consensus data since this dataset is sparse.
private Dictionary<Symbol, EstimizeConsensus> _consensusBySymbol = new();
private int _day = -1;
public override IEnumerable<Insight> Update(QCAlgorithm algorithm, Slice slice)
{
var insights = new List<Insight>();
// Record the latest Consensus data.
var consensus = slice.Get<EstimizeConsensus>();
if (!consensus.IsNullOrEmpty())
{
foreach (var kvp in consensus)
{
_consensusBySymbol[kvp.Key.Underlying] = kvp.Value;
}
}
// Wait for the market open to rebalance
if (_day == algorithm.Time.Day || slice.Bars.Count == 0)
{
return insights;
}
_day = algorithm.Time.Day;
// If there has been new Consensus data released since the previous
// market open, emit insights for all the assets that have new Consensus data.
if (_consensusBySymbol.Count == 0)
{
return insights;
}
var symbols = _consensusBySymbol.Keys.Where(symbol => algorithm.Securities[symbol].Price != 0);
insights.AddRange(symbols.Select(symbol => Insight.Price(symbol, TimeSpan.FromDays(30), InsightDirection.Up)).ToList());
_consensusBySymbol.Clear();
return insights;
}
public override void OnSecuritiesChanged(QCAlgorithm algorithm, SecurityChanges changes)
{
foreach (dynamic security in changes.AddedSecurities)
{
// Requesting data for trading signal generation
security.Consensus = algorithm.AddData<EstimizeConsensus>(security.Symbol).Symbol;
// Historical data
var history = algorithm.History<EstimizeConsensus>(security.Consensus, 100, Resolution.Daily);
}
foreach (dynamic security in changes.RemovedSecurities)
{
// Remove the Consensus data for this asset when it leaves the universe.
algorithm.RemoveSecurity(security.Consensus);
}
}
}
Data Point Attributes
The Estimize dataset provides EstimizeConsensus, EstimizeEstimate, and EstimizeRelease objects.
EstimizeConsensus Attributes
EstimizeConsensus objects have the following attributes:
EstimizeEstimate Attributes
EstimizeEstimate objects have the following attributes:
EstimizeRelease Attributes
EstimizeRelease objects have the following attributes: