Overall Statistics
# (c) 2021 Ostirion.net
# This code is licensed under MIT license (see LICENSE for details)


import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from scipy.stats import entropy


def compute_weights(d: float,
                    size: int) -> pd.DataFrame:
    '''
    Compute the weights of individual data points
    for fractional differentiation:
    Args:
        d (float): Fractional differentiation value.
        size (int): Length of the data series.
    Returns:
        pd.DataFrame: Dataframe containing the weights for each point.
    '''

    w = [1.0]
    for k in range(1, size):
        w.append(-w[-1]/k*(d-k+1))
    w = np.array(w[::-1]).reshape(-1, 1)

    return pd.DataFrame(w)


def standard_frac_diff(df: pd.DataFrame,
                       d: float,
                       thres: float=.01) -> pd.DataFrame:
    '''
    Compute the d fractional difference of the series.
    Args:
        df (pd.DataFrame): Dataframe with series to be differentiated in a single
                           column.
        d (float): Order of differentiation.
        thres (float): threshold value to drop non-significant weights.
    Returns:
        pd.DataFrame: Dataframe containing differentiated series.
    '''
    
    w = compute_weights(d, len(df))
    w_ = np.cumsum(abs(w))
    w_ /= w_.iloc[-1]
    skip = int((w_ > thres).sum().values)
    results = {}
    index = df.index

    for name in df.columns:
        series_f = df[name].fillna(method='ffill').dropna()
        r = range(skip, series_f.shape[0])
        df_ = pd.Series(index=r)
        for idx in r:
            if not np.isfinite(df[name].iloc[idx]):
                continue
            results[idx] = np.dot(w.iloc[-(idx):, :].T, series_f.iloc[:idx])[0]

    result = pd.DataFrame(pd.Series(results), columns=['Frac_diff'])
    result.set_index(df[skip:].index, inplace=True)

    return result


def compute_weights_fixed_window(d: float,
                                 threshold: float=1e-5) -> pd.DataFrame:
    '''
    Compute the weights of individual data points
    for fractional differentiation with fixed window:
    Args:
        d (float): Fractional differentiation value.
        threshold (float): Minimum weight to calculate.
    Returns:
        pd.DataFrame: Dataframe containing the weights for each point.
    '''

    w = [1.0]
    k = 1
    while True:
        v = -w[-1]/k*(d-k+1)
        if abs(v) < threshold:
            break
        w.append(v)
        k += 1

    w = np.array(w[::-1]).reshape(-1, 1)
    return pd.DataFrame(w)


def fixed_window_fracc_diff(df: pd.DataFrame,
                            d: float,
                            threshold: float=1e-5) -> pd.DataFrame:
    '''
    Compute the d fractional difference of the series with
    a fixed width window. It defaults to standard fractional
    differentiation when the length of the weights becomes 0.
    
    Args:
        df (pd.DataFrame): Dataframe with series to be differentiated in a single
                           column.
        d (float): Order of differentiation.
        threshold (float): threshold value to drop non-significant weights.
    Returns:
        pd.DataFrame: Dataframe containing differentiated series.
    '''

    w = compute_weights_fixed_window(d, threshold)
    l = len(w)
    results = {}
    names = df.columns
    for name in names:
        series_f = df[name].fillna(method='ffill').dropna()

        if l > series_f.shape[0]:
            return standard_frac_diff(df, d, threshold)
        r = range(l, series_f.shape[0])
        df_ = pd.Series(index=r)

        for idx in r:
            if not np.isfinite(df[name].iloc[idx]):
                continue
            results[idx] = np.dot(w.iloc[-(idx):, :].T,
                                  series_f.iloc[idx-l:idx])[0]

    result = pd.DataFrame(pd.Series(results), columns=['Frac_diff'])
    result.set_index(df[l:].index, inplace=True)

    return result


def find_stat_series(df: pd.DataFrame,
                     threshold: float=0.0001,
                     diffs: np.linspace=np.linspace(0.05, 0.95, 19),
                     p_value: float=0.05) -> pd.DataFrame:
    '''
    Find the series that passes the adf test at the given
    p_value.
    The time series must be a single column dataframe.
    Args:
        df (pd.DataFrame): Dataframe with series to be differentiated.
        threshold (float): threshold value to drop non-significant weights.
        diffs (np.linspace): Space for candidate d values.
        p_value (float): ADF test p-value limit for rejection of null
                         hypothesis.
    Returns:
        pd.DataFrame: Dataframe containing differentiated series. This series
                      is stationary and maintains maximum memory information.
    '''

    for diff in diffs:
        if diff == 0:
            continue
        s = fixed_window_fracc_diff(df, diff, threshold)
        adf_stat = adfuller(s, maxlag=1, regression='c', autolag=None)[1]
        if adf_stat < p_value:
            s.columns = ['d='+str(diff)]
            return s

def compute_vol(df: pd.DataFrame,
                span: int=100) -> pd.DataFrame:
    '''
    Compute period volatility of returns as exponentially weighted
    moving standard deviation:
    Args:
        df (pd.DataFrame): Dataframe with price series in a single column.
        span (int): Span for exponential weighting.
    Returns:
        pd.DataFrame: Dataframe containing volatility estimates.
    '''
    df.fillna(method='ffill', inplace=True)
    r = df.pct_change()
    return r.ewm(span=span).std()


def triple_barrier_labels(
    df: pd.DataFrame,
    t: int,
    upper: float=None,
    lower: float=None,
    devs: float=2.5,
    join: bool=False,
    span: int=100) -> pd.DataFrame:
    '''
    Compute the triple barrier label for a price time series:
    Args:
        df (pd.DataFrame): Dataframe with price series in a single column.
        t (int): Future periods to obtain the lable for.
        upper (float): Returns for upper limit.
        lower (float): Returns for lower limit.
        devs (float): Standard deviations to set the upper and lower return
                      limits to when no limits passed.
        join (bool): Return a join of the input dataframe and the labels.
        span (int): Span for exponential weighting.
    Returns:
        pd.DataFrame: Dataframe containing labels and optinanlly (join=True)
                      input values.
    '''
    # Incorrect time delta:
    if t < 1:
        raise ValueError("Look ahead time invalid, t<1.")
    # Lower limit must be negative:
    if lower is not None:
        if lower > 0: 
            raise ValueError("Lower limit must be a negative value.")

    df.fillna(method='ffill', inplace=True)

    lims = np.array([upper, lower])

    labels = pd.DataFrame(index=df.index, columns=['Label'])

    returns = df.pct_change()

    r = range(0, len(df)-1-t)
    for idx in r:
        s = returns.iloc[idx:idx+t]
        minimum = s.cumsum().values.min()
        maximum = s.cumsum().values.max()

        if not all(np.isfinite(s.cumsum().values)):
            labels['Label'].iloc[idx] = np.nan
            continue

        if any(lims == None):
            vol = compute_vol(df[:idx+t], span)

        if upper is None:
            u = vol.iloc[idx].values*devs
        else:
            u = upper

        if lower is None:
            l = -vol.iloc[idx].values*devs
        else:
            l = lower

        valid = np.isfinite(u) and np.isfinite(l)
        if not valid:
            labels['Label'].iloc[idx] = np.nan
            continue

        if any(s.cumsum().values >= u):
            labels['Label'].iloc[idx] = 1
        elif any(s.cumsum().values <= l):
            labels['Label'].iloc[idx] = -1
        else:
            labels['Label'].iloc[idx] = 0

    if join:
        df = df.join(labels)
        return df

    return labels


def get_entropic_labels(df: pd.DataFrame,
               side: str = 'max',
               future_space: np.linspace = np.linspace(2,90,40, dtype=int),
               tbl_settings: dict = {}) -> pd.DataFrame:
    '''
    Compute the series of triple barrier labels for a price series that 
    results in the maximum or minimum entropy for label distribution.
    
    Args:
        df (pd.Dataframe): Dataframe with price series in a single column.
        side (str): 'max' or 'min' to select maximum or minimim entropies.
                    'min' entropy may not result in usable data.
        future_space (np.linspace): Space of future windows to analyze.
        tbl_settings (dict): Dictionary with settings for triple_barrier_labels function.
    
    Returns:
        pd.DataFrame: Dataframe with the selected entropy distribution of labels.
    '''
    
    if side not in ['max', 'min']:
        raise ValueError("Side must be 'max' or 'min'.")
    
    # Labels:
    l = {}
    for f in future_space:
        # Check this for references:
        l[f] = triple_barrier_labels(df, f, **tbl_settings)
        
    # Counts:
    c = {}
    for f in l.keys():
        s = l[f].squeeze()
        c[f] = s.value_counts(normalize=True)
    
    # Entropies:
    e = {}
    for f, c in c.items():
        e[f] = entropy(c)
    
    # Maximum and minimum entropies:
    max_e = [k for k,v in e.items() if v == max(e.values())][0]
    min_e =  [k for k,v in e.items() if v == min(e.values())][0]
    
    if side == 'max':
        e_labels = l[max_e]
        t = max_e

    if side == 'min':
        e_labels = l[min_e]
        t = min_e

    e_labels.columns = ['t_delta='+str(t)]
    return e_labels
'''
MIT License

Copyright (c) 2021 Ostirion.net

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''

class EmptyAlgoToShareNotebooks(QCAlgorithm):

    def Initialize(self):
        self.SetStartDate(2020,12,1)  # Set Start Date
        self.SetCash(1)  # Set Strategy Cash

    def OnData(self, data):
        pass
# (c) 2021 Ostirion.net
# This code is licensed under MIT license (see LICENSE for details)

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import base64
import matplotlib.image as image
import matplotlib.gridspec as gridspec


# Small Ostirion Logo as PNG string:
SMALL_LOGO = "iVBORw0KGgoAAAANSUhEUgAAADAAAAAqCAYAAAD1T9h6AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAAEnQAABJ0Ad5mH3gAAAK6SURBVGhD7ZnPavpAEMdXDx4EqYiIqO3JFxAVwb8HvVjPfQGfwENBKHjTS8GXaC0iePOmqEfFfy8gFIroTURBqKJNTX5j0WY2Was1v4If+Mpk4szsaHbJJipCCLfRn+UkDVitVnJ7e0uur68FGQwGYjQa4eymiEpFFosFmU6nZDQakX6/TwaDASkWi/CN4+AbOFjxeJyr1+vcKcjn81wsFkPrMAh1UvXy8gJlf4dMJoPWlRDqFCmVSkGJ8xAIBNBxIEKdeyqXy5D2vNzd3aHj+SbU+aVsNgvplMHpdKLj2kpyFbJYLGQ4HMIRO7PZTIibz+fCCsRzdXVF7Ha7YB/KNgcNUVdb9Xo9+B3keXh44HQ6HZrnuzweD1coFCBSnvv7ezQPCHUKYqHb7aKxLNJqtZBFmuVyicaDUKcwgVjAYg8Rf42zgMXyUm8+UBwOB1h0np+fwfo5m38QLGlo46E2YDabwaLDWlyO1WoFFh2NRgPWPtQG1us1WHRYCrOgVlOH8cW/q0iMfKQEcsvbOTiqgf+BSwNKc2lAaS4NKM2lAaW5NKA0lwaUhtoAy53mx8cHWMpBbYBlP6DX68E6DpZ9BW3PQG1gPB6DRSccDoN1HLTd1i6TyQQsMaKNMi/+YSsLJpMJjWdVOp2GTNJgsSDUKYiVRCKBxsspl8tBBmlarRYaz0vyyVylUiGRSASO5Hl7eyONRoO0223hEnx/fxcWA36y63Q64Umfy+UiwWDwoPnjdrtJp9OBIzGirnalNLVaDR3XjlDnl0KhEKQ6P7PZDB3TN6HOPfn9fkh5PqrVKjoWRKgTValUgvS/x2bucF6vF61PEeqUVDKZ5F5fX6HkaXh8fORubm7QelKSXIVYiEajxOfzCeKf/9tsNjiDw7+p5N9S8itVs9kkT09PcOZnHN2A0vzxu1FCPgGAb5goqktPowAAAABJRU5ErkJggg=="
imgdata = base64.b64decode(SMALL_LOGO)
filename = 'small_logo.png'

# Remove comments to use notebook plots:
'''with o#pen(filename, 'wb') as f:
    f.write(imgdata)'''


def plot_df(df, color='blue', size=(16, 7), legend='Close Price', y_label='Price in USD', title=None, kind='line'):
    
    im = image.imread(filename)
    
    
    plt.style.use('dark_background')
    plt.rcParams["figure.figsize"] = size
    
    ax = df.plot(kind=kind, color=color)
    ax.figure.figimage(im, 0, 0, alpha=1.0, zorder=1)
    
    plt.title(title)
    plt.ylabel(y_label)
    x = 0.01
    y = 0.01
    plt.text(x, y, 'www.ostirion.net', fontsize=15, transform=ax.transAxes)
    plt.legend(ncol=int(len(df.columns) / 2))
    date_form = mdates.DateFormatter("%m-%Y")
    plt.xticks(rotation=45);
    plt.show()
    
def plot_corr_hm(df, title='Title', size=(16, 7), annot = True):
    corr = df.corr()
    plt.style.use('dark_background')
    plt.rcParams["figure.figsize"] = size
    mask = np.triu(np.ones_like(corr, dtype=bool))
    cmap = sns.color_palette("RdBu")
    ax = sns.heatmap(corr, mask=mask, vmax=.3, center=0, cmap=cmap, annot=annot,
                     square=True, linewidths=0, cbar_kws={"shrink": .5}, fmt='g')
    ax.set_title(title)
    plt.setp(ax.get_yticklabels(), rotation=0);
    plt.setp(ax.get_xticklabels(), rotation=90);
    plt.show()

def plot_cm(df, title='Title', size=(16,7)):
    plt.style.use('dark_background')
    plt.rcParams["figure.figsize"] = size
    cmap = sns.color_palette("Blues")
    ax = sns.heatmap(df, cmap=cmap, annot=True, linewidths=0, cbar_kws={"shrink": .5}, fmt='g')
    ax.set_title(title)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.setp(ax.get_xticklabels(), rotation=0);

def plot_hm(df, title='Title', size=(16, 7), annot = True, x_rot=90):

    plt.style.use('dark_background')
    plt.rcParams["figure.figsize"] = size

    cmap = sns.color_palette("RdBu")
    ax = sns.heatmap(df, vmax=.3, center=0, cmap=cmap, annot=annot,
                     square=True, linewidths=0, cbar_kws={"shrink": .5}, fmt='g')
    ax.set_title(title)
    plt.setp(ax.get_yticklabels(), rotation=0);
    plt.setp(ax.get_xticklabels(), rotation=x_rot);
    plt.show()