Back

Pairs Copula - Forex - Duplicate entries error

Hey,

I'm trying to alter one of the algo's from the strategy library from equities to forex. I've only changed the symbols lines, and I'm getting the error: 

Runtime Error: ValueError : Index contains duplicate entries, cannot reshape
at PairSelection in main.py:line 140
at _get_historical_returns in main.py:line 185
:: history = history.close.unstack(level=0)
ValueError : Index contains duplicate entries, cannot reshape

Any ideas on the error? Here is the code.

import numpy as np
from scipy import stats
from statsmodels.distributions.empirical_distribution import ECDF
from scipy.stats import kendalltau, pearsonr, spearmanr
from scipy.optimize import minimize
from scipy.integrate import quad
import sys
from collections import deque


class CopulaPairsTradingAlgorithm(QCAlgorithm):

def Initialize(self):
'''Initialize algorithm and add universe'''

self.SetStartDate(2010, 1, 1)
self.SetEndDate(2019, 9, 1)
self.SetCash(100000)

self.numdays = 1000 # length of formation period which determine the copula we use
self.lookbackdays = 250 # length of history data in trading period
self.cap_CL = 0.95 # cap confidence level
self.floor_CL = 0.05 # floor confidence level
self.weight_v = 0.5 # desired holding weight of asset v in the portfolio, adjusted to avoid insufficient buying power
self.coef = 0 # to be calculated: requested ratio of quantity_u / quantity_v
self.window = {} # stores historical price used to calculate trading day's stock return

self.day = 0 # keep track of current day for daily rebalance
self.month = 0 # keep track of current month for monthly recalculation of optimal trading pair
self.pair = [] # stores the selected trading pair

# Select optimal trading pair into the universe
self.UniverseSettings.Resolution = Resolution.Daily
self.AddUniverse('PairUniverse', self.PairSelection)


def OnData(self, slice):
'''Main event handler. Implement trading logic.'''

self.SetSignal(slice) # only executed at first day of each month

# Daily rebalance
if self.Time.day == self.day:
return

long, short = self.pair[0], self.pair[1]

# Update current price to trading pair's historical price series
for kvp in self.Securities:
symbol = kvp.Key
if symbol in self.pair:
price = kvp.Value.Price
self.window[symbol].append(price)

if len(self.window[long]) < 2 or len(self.window[short]) < 2:
return

# Compute the mispricing indices for u and v by using estimated copula
MI_u_v, MI_v_u = self._misprice_index()

# Placing orders: if long is relatively underpriced, buy the pair
if MI_u_v < self.floor_CL and MI_v_u > self.cap_CL:

self.SetHoldings(short, -self.weight_v, False, f'Coef: {self.coef}')
self.SetHoldings(long, self.weight_v * self.coef * self.Portfolio[long].Price / self.Portfolio[short].Price)

# Placing orders: if short is relatively underpriced, sell the pair
elif MI_u_v > self.cap_CL and MI_v_u < self.floor_CL:

self.SetHoldings(short, self.weight_v, False, f'Coef: {self.coef}')
self.SetHoldings(long, -self.weight_v * self.coef * self.Portfolio[long].Price / self.Portfolio[short].Price)

self.day = self.Time.day


def SetSignal(self, slice):
'''Computes the mispricing indices to generate the trading signals.
It's called on first day of each month'''

if self.Time.month == self.month:
return

## Compute the best copula

# Pull historical log returns used to determine copula
logreturns = self._get_historical_returns(self.pair, self.numdays)
x, y = logreturns[str(self.pair[0])], logreturns[str(self.pair[1])]

# Convert the two returns series to two uniform values u and v using the empirical distribution functions
ecdf_x, ecdf_y = ECDF(x), ECDF(y)
u, v = [ecdf_x(a) for a in x], [ecdf_y(a) for a in y]

# Compute the Akaike Information Criterion (AIC) for different copulas and choose copula with minimum AIC
tau = kendalltau(x, y)[0] # estimate Kendall'rank correlation
AIC ={} # generate a dict with key being the copula family, value = [theta, AIC]

for i in ['clayton', 'frank', 'gumbel']:
param = self._parameter(i, tau)
lpdf = [self._lpdf_copula(i, param, x, y) for (x, y) in zip(u, v)]
# Replace nan with zero and inf with finite numbers in lpdf list
lpdf = np.nan_to_num(lpdf)
loglikelihood = sum(lpdf)
AIC[i] = [param, -2 * loglikelihood + 2]

# Choose the copula with the minimum AIC
self.copula = min(AIC.items(), key = lambda x: x[1][1])[0]

## Compute the signals

# Generate the log return series of the selected trading pair
logreturns = logreturns.tail(self.lookbackdays)
x, y = logreturns[str(self.pair[0])], logreturns[str(self.pair[1])]

# Estimate Kendall'rank correlation
tau = kendalltau(x, y)[0]

# Estimate the copula parameter: theta
self.theta = self._parameter(self.copula, tau)

# Simulate the empirical distribution function for returns of selected trading pair
self.ecdf_x, self.ecdf_y = ECDF(x), ECDF(y)

# Run linear regression over the two history return series and return the desired trading size ratio
self.coef = stats.linregress(x,y).slope

self.month = self.Time.month


def PairSelection(self, date):
'''Selects the pair of stocks with the maximum Kendall tau value.
It's called on first day of each month'''

if date.month == self.month:
return Universe.Unchanged

symbols = [ Symbol.Create(x, SecurityType.Forex, Market.Oanda)
for x in ["EURUSD","GBPUSD","USDCAD", "AUDUSD", "NZDUSD"] ]

logreturns = self._get_historical_returns(symbols, self.lookbackdays)

tau = 0
for i in range(0, len(symbols), 2):

x = logreturns[str(symbols[i])]
y = logreturns[str(symbols[i+1])]

# Estimate Kendall rank correlation for each pair
tau_ = kendalltau(x, y)[0]

if tau > tau_:
continue

tau = tau_
self.pair = symbols[i:i+2]

return [x.Value for x in self.pair]


def OnSecuritiesChanged(self, changes):
'''Warms up the historical price for the newly selected pair.
It's called when current security universe changes'''

for security in changes.RemovedSecurities:
symbol = security.Symbol
self.window.pop(symbol)
if security.Invested:
self.Liquidate(symbol, "Removed from Universe")

for security in changes.AddedSecurities:
self.window[security.Symbol] = deque(maxlen = 2)

# Get historical prices
history = self.History(list(self.window.keys()), 2, Resolution.Daily)
history = history.close.unstack(level=0)
for symbol in self.window:
self.window[symbol].append(history[str(symbol)][0])


def _get_historical_returns(self, symbols, period):
'''Get historical returns for a given set of symbols and a given period
'''

history = self.History(symbols, period, Resolution.Daily)
history = history.close.unstack(level=0)
return (np.log(history) - np.log(history.shift(1))).dropna()


def _parameter(self, family, tau):
''' Estimate the parameters for three kinds of Archimedean copulas
according to association between Archimedean copulas and the Kendall rank correlation measure
'''

if family == 'clayton':
return 2 * tau / (1 - tau)

elif family == 'frank':

'''
debye = quad(integrand, sys.float_info.epsilon, theta)[0]/theta is first order Debye function
frank_fun is the squared difference
Minimize the frank_fun would give the parameter theta for the frank copula
'''

integrand = lambda t: t / (np.exp(t) - 1) # generate the integrand
frank_fun = lambda theta: ((tau - 1) / 4.0 - (quad(integrand, sys.float_info.epsilon, theta)[0] / theta - 1) / theta) ** 2

return minimize(frank_fun, 4, method='BFGS', tol=1e-5).x

elif family == 'gumbel':
return 1 / (1 - tau)


def _lpdf_copula(self, family, theta, u, v):
'''Estimate the log probability density function of three kinds of Archimedean copulas
'''

if family == 'clayton':
pdf = (theta + 1) * ((u ** (-theta) + v ** (-theta) - 1) ** (-2 - 1 / theta)) * (u ** (-theta - 1) * v ** (-theta - 1))

elif family == 'frank':
num = -theta * (np.exp(-theta) - 1) * (np.exp(-theta * (u + v)))
denom = ((np.exp(-theta * u) - 1) * (np.exp(-theta * v) - 1) + (np.exp(-theta) - 1)) ** 2
pdf = num / denom

elif family == 'gumbel':
A = (-np.log(u)) ** theta + (-np.log(v)) ** theta
c = np.exp(-A ** (1 / theta))
pdf = c * (u * v) ** (-1) * (A ** (-2 + 2 / theta)) * ((np.log(u) * np.log(v)) ** (theta - 1)) * (1 + (theta - 1) * A ** (-1 / theta))

return np.log(pdf)


def _misprice_index(self):
'''Calculate mispricing index for every day in the trading period by using estimated copula
Mispricing indices are the conditional probability P(U < u | V = v) and P(V < v | U = u)'''

return_x = np.log(self.window[self.pair[0]][-1] / self.window[self.pair[0]][-2])
return_y = np.log(self.window[self.pair[1]][-1] / self.window[self.pair[1]][-2])

# Convert the two returns to uniform values u and v using the empirical distribution functions
u = self.ecdf_x(return_x)
v = self.ecdf_y(return_y)

if self.copula == 'clayton':
MI_u_v = v ** (-self.theta - 1) * (u ** (-self.theta) + v ** (-self.theta) - 1) ** (-1 / self.theta - 1) # P(U<u|V=v)
MI_v_u = u ** (-self.theta - 1) * (u ** (-self.theta) + v ** (-self.theta) - 1) ** (-1 / self.theta - 1) # P(V<v|U=u)

elif self.copula == 'frank':
A = (np.exp(-self.theta * u) - 1) * (np.exp(-self.theta * v) - 1) + (np.exp(-self.theta * v) - 1)
B = (np.exp(-self.theta * u) - 1) * (np.exp(-self.theta * v) - 1) + (np.exp(-self.theta * u) - 1)
C = (np.exp(-self.theta * u) - 1) * (np.exp(-self.theta * v) - 1) + (np.exp(-self.theta) - 1)
MI_u_v = B / C
MI_v_u = A / C

elif self.copula == 'gumbel':
A = (-np.log(u)) ** self.theta + (-np.log(v)) ** self.theta
C_uv = np.exp(-A ** (1 / self.theta)) # C_uv is gumbel copula function C(u,v)
MI_u_v = C_uv * (A ** ((1 - self.theta) / self.theta)) * (-np.log(v)) ** (self.theta - 1) * (1.0 / v)
MI_v_u = C_uv * (A ** ((1 - self.theta) / self.theta)) * (-np.log(u)) ** (self.theta - 1) * (1.0 / u)

return MI_u_v, MI_v_u

 

Update Backtest







0

The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.


Hi Michael,

This appears to be a LEAN issue. In the attached backtest logs and research notebook, we see the History call returns a duplicate entry for 2011-11-04.

EURUSD 8G 2011-11-03 20:00:00 1.382260
2011-11-04 20:00:00 1.378930
2011-11-04 20:00:00 1.378930
2011-11-06 20:00:00 1.378800

We've created a GitHub Issue to have this resolved. Track its progress here.

In the meantime, a workaround would be to remove the duplicate index values. This can be accomplished with

history = history[~history.index.duplicated(keep='first')]

Best,
Derek Melchin

1

The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.


Hi Michael,

To fix this, we need to change our AddUniverse method to

self.AddUniverse(self.PairSelection)

This will allow us to return a list of Symbol objects instead of string Symbols.

We then need to change the PairSelection method to

def PairSelection(self, _):
if self.Time.month == self.month:
return Universe.Unchanged
...
return self.pair

This will construct the list of Symbol objects.

Finally, the code in the thread post above should be changed to

for security in changes.AddedSecurities:
symbol=security.Symbol
self.window[symbol] = deque(maxlen = 2)

#Get historical prices
history = self.History(symbol, 2, Resolution.Daily)

if not history.empty:
for _, close in history.loc[symbol].close.iteritems():
self.window[symbol].appendleft(close)


Note how the code now only calls the History method with a single Symbol now.

See the attached backtest and logs for reference. The DataFrame returned from the History method is no longer empty.

Best,
Derek Melchin

1

The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.


Update Backtest





0

The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.


Loading...

This discussion is closed