Overall Statistics
# region imports
from AlgorithmImports import *
# endregion

# Your New Python File
general_setting = {
    
    "If_manual_choose_replication": True, # If we manually choose hedge, then we will choose replication method based on replicate_method, Or compare different methods based on some metrix
    "replicate_method": "lasso", # "lasso" , "elasticNet", "groupSelection"
    "opt_method": 'Momentum', # "markowitz", "StockRanker", "Momentum"
    "mom_window": 7, # Calculate the window of previous n days.
    "mom_diff_weight" : 0.005,
    "rebalance_interval": "week", # biweek; week; ....


    "Manual_Hedge": True, # If we manually choose hedge, then we will notice if_hedge button, Or we can use other methods to choose if hedge or not
    "If_Hedge": False, 
    "hedge_size": 0.8,

    # set minimum weight selection
    "if_preselect": True, 
    "weight_threshold": 0.002, 

    "pct_num_group": 0.05, 
    "if_manual_set_number_lasso": True,
    "lasso_alpha": 0.3,
    "num_lasso": 10, # or it can set by calculate the min tracking error
    "if_manual_set_number_elasticNet": True,
    "elasticNet_alpha": 0.1,
    "num_elasticNet": 10, # or it can set by calculate the min tracking error
    "elasticNet_l1_ratio": 0.5,


    # Not yet set up TP/SL mechanism
    "Take_Profit_pct": 0.1,
    "Stop_Loss_pct": 0.05,
    
    # Regime Recognization
    "regime_method": "GMM", # SMA150, SMA200, VIX, GMM 
    "regime_interval": "week",
    "regime_lookback_window": 90, 

    # To be constructed
    "Index": "SPY",
    "stockranker_feature":['roc'],




}
# region imports
from AlgorithmImports import *
# endregion
import random



import pandas as pd
from datetime import datetime
import cvxopt as opt
from cvxopt import blas, solvers
from config import general_setting

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from scipy import stats

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.stattools import durbin_watson

from sklearn.linear_model import ElasticNet, Lasso, Ridge, LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

import joblib
import os
import lightgbm as lgb

import math
from scipy.stats import linregress
from hmmlearn.hmm import GaussianHMM
import scipy
import datetime
import json
import seaborn as sns
import joblib
import sklearn
from datetime import datetime
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
from matplotlib import cm,pyplot as plt
import random

class EnhancedIndexing(QCAlgorithm):

    def initialize(self):
        self.set_start_date(2024, 6, 1)
        # self.set_end_date(2016, 1, 10)
        self.set_cash(100000)

        self.spy = self.add_equity('SPY', resolution = Resolution.MINUTE)
        self.spy_sma150 = self.sma("SPY", 150)
        self.spy_sma200 = self.sma("SPY", 200)
        option = self.AddOption('SPY',Resolution.MINUTE)
        self.option_symbol = option.Symbol
        option.SetFilter(lambda universe: universe.include_weeklys())
        self.contract = None
        self.set_benchmark("SPY")
        self.contract_size = None
        self.strike = None
        self.has_contract = False
        self.curr_spy = None
        # self.set_warm_up(100)

        # Access to General Setting
        self.if_manual_rep = general_setting["If_manual_choose_replication"]
        self.replicate_method = general_setting['replicate_method']
        self.opt_method = general_setting["opt_method"]
        self.rebalance_interval = general_setting["rebalance_interval"]

        self.manual_Hedge = general_setting["Manual_Hedge"]
        if self.manual_Hedge:
            self.if_hedge = general_setting["If_Hedge"] 
        self.hedge_size = general_setting['hedge_size']


        self.pct_num_group = general_setting["pct_num_group"]
        self.lasso_alpha = general_setting['lasso_alpha']
        self.if_manual_set_number_lasso = general_setting["if_manual_set_number_lasso"]
        self.num_lasso = general_setting["num_lasso"]
        self.elasticNet_alpha = general_setting['elasticNet_alpha']
        self.elasticNet_l1_ratio = general_setting['elasticNet_l1_ratio']
        self.if_manual_set_number_elasticNet = general_setting["if_manual_set_number_elasticNet"]
        self.num_elasticNet = general_setting["num_elasticNet"]

        self.regime_interval = general_setting["regime_interval"]
        self.regime_method = general_setting["regime_method"]
        self.regime_lookback_window = general_setting["regime_lookback_window"]
        

        self.if_preselect = general_setting['if_preselect']
        self.weight_threshold = general_setting['weight_threshold']

        self.mom_window = general_setting['mom_window']

        # Add universe
        _universe = self.add_universe(self.universe.etf("SPY"))
        history = self.History(_universe, 360, Resolution.Daily)
        history = history.droplevel('symbol', axis=0)
        self.universe_dict = {}
        self.ticker_list = []
        for date, fundamentals in history.items():
            ls = []
            for fundamental in fundamentals:
                symbol = fundamental.symbol
                weight = fundamental.weight
                row = {'ticker': symbol.value, 'symbol': symbol, 'weight': weight}
                if self.if_preselect:
                    if weight > self.weight_threshold:
                # if weight > 0.004:
                        self.ticker_list.append(symbol.value)
                        self.universe_dict[symbol.value] = self.add_equity(symbol.value, resolution = Resolution.DAILY)
                        ls.append(row)
                    else:
                        continue

                else:
                    self.ticker_list.append(symbol.value)
                    self.universe_dict[symbol.value] = self.add_equity(symbol.value, resolution = Resolution.DAILY)
                    ls.append(row)
            break
        
        self.init_df = pd.DataFrame(ls)
        self.init_df['weight_adj'] = self.init_df['weight']/self.init_df['weight'].sum()
        self.weight_universe = self.init_df.set_index('ticker')['weight_adj'].to_dict()

        self.rebalance_signal = False

        self.selected_stocks  = []
        self.portfolio_weights = {}

        self.latent_states_sequence = None
        self.best_status_sequence = None

        # Add Options

        # option = self.AddOption('SPX',Resolution.Daily)
        # self.option_symbol = option.Symbol

        # option.SetFilter(timedelta(0), timedelta(150))

        # Initial a optimized weight at the begining


        self.Schedule.On(
                self.DateRules.today,
                self.TimeRules.now,
                self.rebalance_weights,
            )
        

        # Every Month rebalance
        if self.rebalance_interval == 'month':
            self.Schedule.On(
                self.DateRules.month_start(self.spy.symbol, 0), # Month Start or Month end or add days offset (daysOffset = 0), or dynamically adjust based on weekdays, earnings, FOMC ...
                self.TimeRules.after_market_open(self.spy.symbol, 10), # Before market or after market
                self.rebalance_weights,
            )

        # Every week rebalance
        if self.rebalance_interval == 'week':
            self.Schedule.On(
                self.DateRules.week_start(self.spy.symbol, 0), 
                # self.date_rules.every(DayOfWeek.THURSDAY), 
                self.TimeRules.after_market_open(self.spy.symbol, 10),# Before market or after market
                self.rebalance_weights,
            )

        if not self.manual_Hedge:

            self.Schedule.On(
                self.DateRules.today,
                self.TimeRules.now,
                self.regime_recognization,
            )

            # Every Month recognize regimes
            if self.rebalance_interval == 'month':
                self.Schedule.On(
                    self.DateRules.month_start(self.spy.symbol, 0), # Month Start or Month end or add days offset (daysOffset = 0), or dynamically adjust based on weekdays, earnings, FOMC ...
                    self.TimeRules.after_market_open(self.spy.symbol, 5), # Before market or after market
                    self.regime_recognization,
                )

            # Every week recognize regime
            if self.rebalance_interval == 'week':
                self.Schedule.On(
                    self.DateRules.week_start(self.spy.symbol, 0), 
                    # self.date_rules.every(DayOfWeek.THURSDAY), 
                    self.TimeRules.after_market_open(self.spy.symbol, 5),# Before market or after market
                    self.regime_recognization,
            )

        # Manually set event date function

        # date_rule = FuncDateRule(
        #     name="10th_day_of_the_month", 
        #     get_dates_function=lambda start, end: [
        #         datetime(year, month, 10) 
        #         for year in range(start.year, end.year) for month in range(1,12)
        #     ]
        # ) 

        # Every 2 week rebalance
        # if self.rebalance_interval == 'biweek':
        #     self.Schedule.On(
        #         self.DateRules.week_start(self.spy.symbol,daysOffset= 0), # Month Start or Month end or add days offset, or dynamically adjust based on weekdays, earnings, FOMC ...
        #         self.TimeRules.after_market_open(self.spy.symbol, 10),
        #         self.rebalance_weights,
        #     )

        # Cancel schedule
        # self.schedule.remove(scheduled_event)

    def regime_recognization(self):
        random.seed(10)
        ''' 
        3 status:
            Bullish, Bearish, flat

        method:
            Machine learning: classification
            VIX value
            Other indicators

        '''

        if self.regime_method == 'SMA200':
            if self.spy_sma200.current.value > self.spy.Close :
                self.if_hedge = True
                return
            else:
                self.if_hedge = False
                return

        if self.regime_method == 'SMA150':
            if self.spy_sma150.current.value > self.spy.Close :
                self.if_hedge = True
                return 
            else:
                self.if_hedge = False
                return 

        if self.regime_method == 'GMM':
            start_time = datetime(self.Time.year-4,self.Time.month, self.Time.day) 
            end_time = self.Time.date
            data_use = self.history(self.spy.symbol, timedelta(self.regime_lookback_window), Resolution.DAILY).droplevel('symbol', axis=0)
            data_use = data_use.reset_index()
            close=data_use['close']
            high=data_use['high'][5:]
            low=data_use['low'][5:]
            volume=data_use['volume'][5:]
            money=data_use['volume'][5:]

            logreturn=(np.log(np.array(close[1:]))-np.log(np.array(close[:-1])))[4:]
            logreturn5=(np.log(np.array(close[5:]))-np.log(np.array(close[:-5])))
            diffreturn = (np.log(np.array(high))-np.log(np.array(low)))
            closeidx = close[5:]

            # data_1 = data_use.copy()
            # data_1['log_close'] = np.log(data_1['close'])
            # data_1['log_ret'] = data_1['log_close'].diff()
            # data_1['log_high'] = np.log(data_1['high'])
            # data_1['log_low'] = np.log(data_1['low'])
            # data_1['log_ret5'] = data_1['log_close'].diff(5)
            # data_1['diff_ret'] = data_1['log_high'] - data_1['log_low']
            # data_1['closeidx'] = data_1['close']
            X = np.column_stack([logreturn,diffreturn,logreturn5])
            datelist= pd.to_datetime(data_use.time[5:])
            hmm=GaussianHMM(n_components = 4, covariance_type ='diag', n_iter = 6000, random_state = 4).fit(X)
            try:
                latent_states_sequence = hmm.predict(X)
            except:
                self.if_hedge = True
                return
            # len(latent_states_sequence)


            # # PLOTTING #
            data_use_2=data_use.drop_duplicates(subset=['time'],keep='last')
            datelist= pd.to_datetime(data_use_2.time[5:])

            # # sns.set_style("white")
            # plt.figure(figsize = (15,8))

            for i in range(hmm.n_components):
                state = (latent_states_sequence ==i)
            #     plt.plot(datelist[state],closeidx[state],'.',label='latent state %d'%i,lw =1)
            #     plt.legend()
            #     plt.grid(1)



            data=pd.DataFrame({'datelist':datelist,'logreturn':logreturn,'state':latent_states_sequence}).set_index('datelist')
            # plt.figure(figsize=(15,8))

            max_idx = 0
            max = -10000
            for i in range(hmm.n_components):
                state = (latent_states_sequence ==i)
                idx = np.append(0,state[:-1])
                data['state %d_return'%i] = data.logreturn.multiply(idx,axis = 0)
                if np.exp(data['state %d_return'%i].cumsum())[-1] > max:
                    max_idx = i
                    max = np.exp(data['state %d_return'%i].cumsum())[-1]
            #     plt.plot(np.exp(data['state %d_return'%i].cumsum()),label='latent state %d'%i)
            #     plt.legend()
            #     plt.grid(1)


            # buy = (latent_states_sequence ==0)
            # buy = np.append(0,buy[:-1])
            # sell = (latent_states_sequence == 1) 
            # sell = np.append(0,sell[:-1])
            # data['backtest_return'] = data.logreturn.multiply(buy,axis=0)-data.logreturn.multiply(sell,axis=0)

            # plt.figure(figsize = (15,8))
            # plt.plot_date(datelist,np.exp(data['backtest_return'].cumsum()),'-',label='backtest result')
            # plt.legend()
            # plt.grid(1)

            self.latent_states_sequence = latent_states_sequence[-1]
            self.best_status_sequence = max_idx

            if latent_states_sequence[-1] == max_idx:
                self.if_hedge = False
                

                return

            else:
                self.if_hedge = True
                return


                        
        if self.regime_method== 'VIX':
            ...
            '''
            VIX vs EMA(100)
            '''

        if self.regime_method == 'vote':
            ...





        return res

    def get_historical_data(self):
        
        def slope(data):
            return linregress(np.arange(data.shape[0]),data).slope

        ticker_list = self.ticker_list
        start_time = datetime(self.Time.year-2,self.Time.month, self.Time.day) # obtain 2 years historical data
        end_time = self.Time.date

        daily_dfs = []
        fund_ = {}
        daily_dfs2 = []

        for i in ticker_list:
            try:
                # daily_data = self.history([self.securities[i].symbol], timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']].rename(columns = {'close':i})
                # daily_dfs.append(daily_data)
                
                daily_data = self.history(self.securities[i].symbol, timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']]
                # test_data = self.history(universe_dict[i].symbol, start_time1, end_time1).droplevel('symbol', axis=0)[['close']]
                daily_data1 = daily_data.rename(columns= {'close':i})
                daily_data2 = daily_data.copy()
                daily_data2['ticker'] = i
                
                daily_data2['slope_20'] = daily_data2['close'].rolling(20,min_periods=20).apply(slope,raw=True)
                daily_data2['ret'] = daily_data2['close'].shift(5)/daily_data2['close'] - 1
                
                # test_data['ticker'] = i
                # test_data['slope_20'] = test_data['close'].rolling(20,min_periods=20).apply(slope,raw=True)
                # test_data['ret'] = test_data['close'].shift(5)/test_data['close'] - 1

                # daily_data['label1'] = pd.qcut(daily_data['label'],5, labels= [1,2,3,4,5])
                daily_dfs.append(daily_data1)
                daily_dfs2.append(daily_data2)
                # test_dfs.append(test_data)
            except:
                continue
        
        
        daily_ = pd.concat(daily_dfs, axis = 1, sort = True, join = 'outer')
        daily_1 = daily_.dropna(axis=1).reset_index()
        spy_data = self.history([self.spy.symbol], timedelta(500), Resolution.DAILY).droplevel('symbol', axis=0)[['close']].rename(columns = {'close':'spy'})

        daily_2 = pd.merge(daily_1, spy_data.reset_index(), on = ['time'], how = 'left')


        daily2_ = pd.concat(daily_dfs2, axis = 0, sort = True, join = 'outer')
        daily2_ = daily2_.dropna()
        if len(self.selected_stocks) > 0:
            daily2_ = daily2_[daily2_['ticker'].isin(self.selected_stocks)]
      

        latest_date = daily2_.index[-1]
        test_df = daily2_[daily2_.index > pd.to_datetime((self.Time.date()-timedelta(30))) ]
        daily2_ = daily2_[daily2_.index <= pd.to_datetime((self.Time.date()-timedelta(30))) ]
        daily2_ = daily2_.set_index([daily2_.index, 'ticker']).sort_index()
        daily2_['label'] = daily2_.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))

        test_df = test_df.set_index([test_df.index, 'ticker']).sort_index()
        test_df['label'] = test_df.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))
        
        test_df = test_df.loc[(latest_date,slice(None)),:]

        return daily_1, daily_2, daily2_, test_df


    def get_fundamental_data(self):

        fund_ = []
        a_time = self.Time - timedelta(7)
        b_time = self.Time

        for i in self.ticker_list:
            single_fundamental_history = self.history[Fundamental](i, a_time, b_time)
            for fundamental in single_fundamental_history:
                if fundamental.has_fundamental_data:
                    
                    symbol = fundamental.symbol
                    industry1 = fundamental.asset_classification.MorningstarSectorCode
                    market_cap = fundamental.market_cap
                    # self.debug(f"symbol is {symbol}, sector is {industry1}")
                    row = {'ticker': i , 'industry1': industry1, 'market_cap': market_cap}
                    fund_.append(row)
                else:
                    continue
            
        fund_df = pd.DataFrame(fund_)
        fund_df=fund_df.drop_duplicates(subset = 'ticker',keep = 'last')
        fund_df = fund_df[fund_df['ticker'].isin([101, 102, 103, 104, 205, 206, 207, 308, 310])]

        MORNINGSTAR_SECTOR_CODES = {  
            # 0 : 'Other', # xiabiande, meiyou 0 zhegexuanxiang
            101: 'Basic Materials',  
            102: 'Consumer Cyclical',  
            103: 'Financial Services',  
            104: 'Real Estate',  
            205: 'Consumer Defensive',  
            206: 'Healthcare',  
            207: 'Utilities',  
            308: 'Communication Services',  
            309: 'Energy',  
            310: 'Industrials',  
            311: 'Technology' , }

        fund_df['sector'] = fund_df['industry1'].apply(lambda x:MORNINGSTAR_SECTOR_CODES[x])

        return fund_df


    def replicate_portfolio(self, method = 'lasso'):
        random.seed(10)

        daily_1, daily_2, _, _  = self.get_historical_data()
        fund_df = self.get_fundamental_data()


        # train_df = daily_1[:3019]
        # test_df = daily_1[3020:]
        train_df = daily_1.copy()
        train_df = train_df.drop(columns = {'time'})
        # test_df = test_df.drop(columns = {'time'})
        train_features = train_df.copy()
        train_index = daily_2[['spy']]
        # self.debug(f"train feature length is {train_features} and train_index length is {train_features}")
        # test_features = test_df.copy()
        # test_index = spy_data.reset_index()[['spy']][3020:]
        assets_names = train_df.columns

        if method == 'lasso':

            lasso_reg = Lasso(self.lasso_alpha)
            lasso_reg.fit(train_features, train_index)

            # Get the model parameters
            intercept_lasso = lasso_reg.intercept_
            coefficients_lasso = lasso_reg.coef_

            # print("Lasso Intercept: \n", intercept_lasso)
            # print("Lasso Coefficients: \n", coefficients_lasso)

            # Evaluate the model
            y_pred_lasso = lasso_reg.predict(train_features)

            # Calculate the mean squared error of the predictions
            mse_lasso = mean_squared_error(train_index, y_pred_lasso)

            df_Lasso = pd.DataFrame(coefficients_lasso)
            df_Lasso.columns = ['weight']
            df_Lasso['symbol'] = assets_names
            df_Lasso['abs_weight'] = abs(df_Lasso['weight'])
            df_Lasso_select = df_Lasso.sort_values(by=['abs_weight'], ascending= False)
            df_Lasso_select = df_Lasso_select.head(30)
            train_lasso_feature = train_features[df_Lasso_select.symbol]
            train_lasso_feature_T = train_lasso_feature.T.reset_index()
            train_lasso_feature_T = train_lasso_feature_T.rename(columns = {'index':'symbol'})
            d = train_lasso_feature_T.drop(columns=['symbol'])

            # Calculate the replication index
            weights = np.array([list(df_Lasso_select['weight'])])
            data = np.array(d)
            result = np.dot(weights, data)
            result = result[0]
            # result = weights@data

            # Calculate the TrackingError
            df_lasso_TrackingError = pd.DataFrame(train_index)
            df_lasso_TrackingError['RepIdx'] = result
            df_lasso_TrackingError['SPY_ret'] = df_lasso_TrackingError['spy'].pct_change()
            df_lasso_TrackingError['RepIdx_ret'] = df_lasso_TrackingError['RepIdx'].pct_change()
            df_lasso_TrackingError = df_lasso_TrackingError.dropna()
            df_lasso_TrackingError['TD'] = df_lasso_TrackingError['SPY_ret'] - df_lasso_TrackingError['RepIdx_ret']
            df_lasso_TrackingError['TD_squared'] = df_lasso_TrackingError['TD']**2
            TE = np.sqrt(sum(df_lasso_TrackingError['TD_squared'])/(len(df_lasso_TrackingError)-1))

            num_trial = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
            min_te_num = 10
            min_te = 10000
            TE_ = []
            for i in num_trial:
                df_Lasso = pd.DataFrame(coefficients_lasso)
                df_Lasso.columns = ['weight']
                df_Lasso['symbol'] = assets_names
                df_Lasso['abs_weight'] = abs(df_Lasso['weight'])
                df_Lasso_select = df_Lasso.sort_values(by=['abs_weight'], ascending= False)
                df_Lasso_select = df_Lasso_select.head(i)
                train_lasso_feature = train_features[df_Lasso_select.symbol]
                train_lasso_feature_T = train_lasso_feature.T.reset_index()
                train_lasso_feature_T = train_lasso_feature_T.rename(columns = {'index':'symbol'})
                d = train_lasso_feature_T.drop(columns=['symbol'])

                weights = np.array([list(df_Lasso_select['weight'])])
                data = np.array(d)
                result = np.dot(weights, data)
                result = result[0]

                df_lasso_TrackingError = pd.DataFrame(train_index)
                df_lasso_TrackingError['RepIdx'] = result
                df_lasso_TrackingError['SPY_ret'] = df_lasso_TrackingError['spy'].pct_change()
                df_lasso_TrackingError['RepIdx_ret'] = df_lasso_TrackingError['RepIdx'].pct_change()
                df_lasso_TrackingError = df_lasso_TrackingError.dropna()
                df_lasso_TrackingError['TD'] = df_lasso_TrackingError['SPY_ret'] - df_lasso_TrackingError['RepIdx_ret']
                df_lasso_TrackingError['TD_squared'] = df_lasso_TrackingError['TD']**2
                TE = np.sqrt(sum(df_lasso_TrackingError['TD_squared'])/(len(df_lasso_TrackingError)-1))
                if TE < min_te:
                    min_te = TE
                    min_te_num = i
                TE_.append(TE)

            if self.if_manual_set_number_lasso:
                return list(df_Lasso_select.head(self.num_lasso)['symbol'])

            else:
                return list(df_Lasso_select.head(min_te_num)['symbol'])



        if method == 'elasticNet':
            elastic_reg = ElasticNet(
                alpha=self.elasticNet_alpha, l1_ratio=self.elasticNet_l1_ratio
            )
            elastic_reg.fit(train_features, train_index)

            # Get the model parameters
            intercept_elastic = elastic_reg.intercept_
            coefficients_elastic = elastic_reg.coef_

            # print("Elastic Net Intercept: \n", intercept_elastic)
            # print("Elastic Net Coefficients: \n", coefficients_elastic)

            # Evaluate the model
            y_pred_elastic = elastic_reg.predict(train_features)

            # Calculate the mean squared error of the predictions
            mse_elastic = mean_squared_error(train_index, y_pred_elastic)


            df_Elastic = pd.DataFrame(coefficients_elastic)
            df_Elastic.columns = ['weight']
            df_Elastic['symbol'] = assets_names
            df_Elastic['abs_weight'] = abs(df_Elastic['weight'])
            df_Elastic_select = df_Elastic.sort_values(by=['abs_weight'], ascending= False)
            df_Elastic_select = df_Elastic_select.head(50)
            train_Elastic_feature = train_features[df_Elastic_select.symbol]
            train_Elastic_feature_T = train_Elastic_feature.T.reset_index()
            train_Elastic_feature_T = train_Elastic_feature_T.rename(columns = {'index':'symbol'})
            d = train_Elastic_feature_T.drop(columns=['symbol'])

            # Calculate the replication index
            weights = np.array([list(df_Elastic_select['weight'])])
            data = np.array(d)
            result = np.dot(weights, data)
            result = result[0]
            # result = weights@data

            # Calculate the TrackingError
            df_Elastic_TrackingError = pd.DataFrame(train_index)
            df_Elastic_TrackingError['RepIdx'] = result
            df_Elastic_TrackingError['SPY_ret'] = df_Elastic_TrackingError['spy'].pct_change()
            df_Elastic_TrackingError['RepIdx_ret'] = df_Elastic_TrackingError['RepIdx'].pct_change()
            df_Elastic_TrackingError = df_Elastic_TrackingError.dropna()
            df_Elastic_TrackingError['TD'] = df_Elastic_TrackingError['SPY_ret'] - df_Elastic_TrackingError['RepIdx_ret']
            df_Elastic_TrackingError['TD_squared'] = df_Elastic_TrackingError['TD']**2
            TE = np.sqrt(sum(df_Elastic_TrackingError['TD_squared'])/(len(df_Elastic_TrackingError)-1))

            num_trial = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
            min_te_num = 10
            min_te = 10000
            TE_ = []
            for i in num_trial:
                df_Elastic = pd.DataFrame(coefficients_elastic)
                df_Elastic.columns = ['weight']
                df_Elastic['symbol'] = assets_names
                df_Elastic['abs_weight'] = abs(df_Elastic['weight'])
                df_Elastic_select = df_Elastic.sort_values(by=['abs_weight'], ascending= False)
                df_Elastic_select = df_Elastic_select.head(i)
                train_Elastic_feature = train_features[df_Elastic_select.symbol]
                train_Elastic_feature_T = train_Elastic_feature.T.reset_index()
                train_Elastic_feature_T = train_Elastic_feature_T.rename(columns = {'index':'symbol'})
                d = train_Elastic_feature_T.drop(columns=['symbol'])

                weights = np.array([list(df_Elastic_select['weight'])])
                data = np.array(d)
                result = np.dot(weights, data)
                result = result[0]

                df_Elastic_TrackingError = pd.DataFrame(train_index)
                df_Elastic_TrackingError['RepIdx'] = result
                df_Elastic_TrackingError['SPY_ret'] = df_Elastic_TrackingError['spy'].pct_change()
                df_Elastic_TrackingError['RepIdx_ret'] = df_Elastic_TrackingError['RepIdx'].pct_change()
                df_Elastic_TrackingError = df_Elastic_TrackingError.dropna()
                df_Elastic_TrackingError['TD'] = df_Elastic_TrackingError['SPY_ret'] - df_Elastic_TrackingError['RepIdx_ret']
                df_Elastic_TrackingError['TD_squared'] = df_Elastic_TrackingError['TD']**2
                TE = np.sqrt(sum(df_Elastic_TrackingError['TD_squared'])/(len(df_Elastic_TrackingError)-1))
                if TE < min_te:
                    min_te = TE
                    min_te_num = i
                TE_.append(TE)

            
            if self.if_manual_set_number_elasticNet:
                return list(df_Elastic_select.head(self.num_elasticNet)['symbol'])

                return list(df_Elastic_select.head(min_te_num)['symbol'])

        if method == 'groupSelection':

            # Calculate every stock's tracking error
            ret_daily = daily_2.set_index(['time']).pct_change()
            tracking_error_dict = {}
            ticker_list_new = daily_2.columns[1:-1]
            for i in ticker_list_new:
                tmp_df = ret_daily[[i,'spy']].dropna()
                tmp_df['TD'] = tmp_df['spy'] - tmp_df[i]
                tmp_df['TD_squared'] = tmp_df['TD']**2
                TE = np.sqrt(sum(tmp_df['TD_squared'])/(len(tmp_df)-1))
                tracking_error_dict[i] = TE

            fund_df_new = fund_df[fund_df['ticker'].isin(ticker_list_new)]
            fund_df_new = fund_df_new[['ticker','market_cap','sector']]
            fund_df_new['tracking_error'] = fund_df_new['ticker'].apply(lambda x:tracking_error_dict[x])
            fund_df_group_industry = fund_df_new.groupby(['sector'])

            industry_dict = {}
            select_stock = []

            for industry_sec, industry_sec_df in fund_df_group_industry:
                # try:
                industry_sec_df = industry_sec_df.sort_values(by = ['market_cap'])
                industry_sec_df = industry_sec_df[industry_sec_df['market_cap'] > 0 ]
                industry_sec_df['market_size'] = industry_sec_df['market_cap'].transform(lambda x:pd.qcut(x,[0,0.33,0.67,1],labels=['small','medium','large']))
                industry_dict[industry_sec[0]] = industry_sec_df
            
                for a, b in industry_sec_df.groupby(['market_size']):
                    num = int(0.1*len(b)) + 1
                    b = b.sort_values(by = ['tracking_error'], ascending = True)
                    select_stock = select_stock + list(b['ticker'])[:num]

                # except:
                #     continue

            return select_stock
        
        # if method == 'passive_Ownership':
        # if method == 'compare':


        return stock_list


    def optimal_portfolio(self, method='markowitz'):
        random.seed(10)
        if method == 'markowitz':
            # Obtain returns
            daily_1 = self.get_historical_data()[0]
            opt_df1 = daily_1[['time']+self.selected_stocks]
            returns = opt_df1.set_index(['time']).pct_change().dropna()

            # Optimization
            n = len(returns)
            returns = np.asmatrix(returns)
            
            N = 3
            mus = [10**(5.0 * t/N - 1.0) for t in range(N)]
            
            # 转化为cvxopt matrices
            S = opt.matrix(np.cov(returns))
            pbar = opt.matrix(np.mean(returns, axis=1))
            
            # 约束条件
            G = -opt.matrix(np.eye(n))   # opt默认是求最大值,因此要求最小化问题,还得乘以一个负号
            h = opt.matrix(1/500, (n ,1))
            A = opt.matrix(1.0, (1, n))
            b = opt.matrix(1.0)
            
            # 使用凸优化计算有效前沿
            portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] 
                        for mu in mus]
            ## 计算有效前沿的收益率和风险
            returns = [blas.dot(pbar, x) for x in portfolios]
            risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]
            m1 = np.polyfit(returns, risks, 2) # polyfit函数可以使用最小二乘法将一些点拟合成一条曲线.
            x1 = np.sqrt(m1[2] / m1[0])
            # 计算最优组合
            try:
                wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
                # return np.asarray(wt), returns, risks
                return np.asarray(wt)
            except:
                self.rebalance_signal = False
                return 0 

        if method == 'StockRanker':
            def prepare_groups(df):
                df_copy = df.copy(deep=True)
                df_copy['day'] = df_copy.index
                group = df_copy.groupby('day')['day'].count()
                return group

            def train(x_train, y_train, q_train, model_save_path):
                '''
                模型的训练和保存
                '''
                train_data = lgb.Dataset(x_train, label=y_train, group=q_train)
                params = {
                    'task': 'train',  # 执行的任务类型
                    'boosting_type': 'gbrt',  # 基学习器
                    'objective': 'lambdarank',  # 排序任务(目标函数)
                    'metric': 'ndcg',  # 度量的指标(评估函数)
                    'max_position': 10,  # @NDCG 位置优化
                    'metric_freq': 1,  # 每隔多少次输出一次度量结果
                    'train_metric': True,  # 训练时就输出度量结果
                    'ndcg_at': [10],
                    'max_bin': 255,  # 一个整数,表示最大的桶的数量。默认值为 255。lightgbm 会根据它来自动压缩内存。如max_bin=255 时,则lightgbm 将使用uint8 来表示特征的每一个值。
                    'num_iterations': 500,  # 迭代次数
                    'learning_rate': 0.01,  # 学习率
                    'num_leaves': 31,  # 叶子数
                    # 'max_depth':6,
                    'tree_learner': 'serial',  # 用于并行学习,‘serial’: 单台机器的tree learner
                    'min_data_in_leaf': 30,  # 一个叶子节点上包含的最少样本数量
                    'verbose': 2  # 显示训练时的信息
                }
                gbm = lgb.train(params, train_data, valid_sets=[train_data])
                # import pickle 
                # serializedObject = pickle.dumps(gbm) 
                # modelKey = "MyModel" 
                # qb.ObjectStore.SaveBytes(modelKey, serializedObject) 
                # serializedObject = bytes(qb.ObjectStore.ReadBytes(modelKey)) 
                
                gbm.save_model(model_save_path)
                # qb.ObjectStore.Save(model_key, clf)

                
            def predict(x_test,  model_input_path):
                '''
                预测得分并排序
                '''
                # clf = pickle.loads(serializedObject) 
                gbm = lgb.Booster(model_file=model_input_path)  # 加载model

                ypred = gbm.predict(x_test)

                predicted_sorted_indexes = np.argsort(ypred)[::-1]  # 返回从大到小的索引

                #  t_results = comments[predicted_sorted_indexes]  # 返回对应的comments,从大到小的排序

                #  return t_results
                return predicted_sorted_indexes


            # daily2_ = pd.concat(daily_dfs2, axis = 0, sort = False, join = 'outer')
            # daily2_ = daily2_.dropna()
            # daily2_ = daily2_.set_index([daily2_.index, 'ticker']).sort_index()
            # daily2_['label'] = daily2_.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))

            # test_df = pd.concat(test_dfs, axis = 0, sort = False, join = 'outer')
            # test_df = test_df.dropna()
            # # test_df = test_df[test_df['time'] == datetime(2024, 6, 29)]
            # test_df = test_df.set_index([test_df.index, 'ticker']).sort_index()
            # test_df['label'] = test_df.groupby(['time'])['ret'].transform(lambda x:pd.qcut(x,[0,0.2,0.4,0.6,0.8,1], labels=[1,2,3,4,5]))
            # test_df = test_df.loc[(datetime(2024, 6,29),slice(None)),:]

            _, _, daily2_, test_df = self.get_historical_data()
            x_train = daily2_[['slope_20']]
            y_train = daily2_['label'].astype(int)
            q_train = prepare_groups(x_train)
            x_test = test_df[['slope_20']]
            y_test = test_df['label'].astype(int)
            q_test = prepare_groups(x_test)
            train(x_train, y_train, q_train, 'stock_ranker.pkl')
            t_results = predict(x_test, 'stock_ranker.pkl')
            # self.debug(test_df.index)
            new_seq = [i[1] for i in test_df.index]
            rank = [new_seq[x] for x in t_results ]
            self.selected_stocks = rank
            stock_weights = np.array(
                [1 / math.log(i + 2) for i in range(0, len(rank))]
            )
            stock_weights = stock_weights / stock_weights.sum()

            return stock_weights

        if method == 'Momentum':
            daily_1, _, _, _, = self.get_historical_data()
            pct_daily = daily_1.set_index(['time']).pct_change()
            winrate_daily = pct_daily.dropna().copy()
            winrate_daily = winrate_daily.apply(lambda x:np.where(x>0, 1,0))
            winrate_daily = winrate_daily.tail(self.mom_window)
            winrate_dict = {}
            # for i in self.ticker_list:
            #     winrate_dict[i] = winrate_daily[i].sum()/self.mom_window

            winrate_dict = {}
            for i in pct_daily.columns:
                winrate_dict[i] = winrate_daily[i].sum()/self.mom_window

            winrate_df = pd.DataFrame.from_dict(winrate_dict, orient='index',columns=['winrate'])
            winrate_df = winrate_df.reset_index().rename(columns= {'index':'ticker'})
            # stocks = list(df_Lasso_select['symbol'])
            stocks = self.selected_stocks
            winrate_df = winrate_df[winrate_df['ticker'].isin(stocks)]
            winrate_df = winrate_df.sort_values(by = ['winrate'], ascending= False).set_index(['ticker']).reset_index()

            weight_df = pd.DataFrame.from_dict(self.weight_universe,orient='index',columns=['weight'] ).reset_index().rename(columns= {'index':'ticker'})
            winrate_df = winrate_df.merge(weight_df, how = 'left', on = ['ticker'])
            winrate_df['weight_adj'] = winrate_df['weight']/winrate_df['weight'].sum()
            #winrate_df['weight_new'] = winrate_df['weight_adj'].apply(lambda x:x+0.001*(len(winrate_df) - x.index))
            winrate_df['weight_new'] = winrate_df['weight_adj'] + 0.001 * (len(winrate_df) - winrate_df.index)
            winrate_df['weight_new_adj'] = winrate_df['weight_new']/winrate_df['weight_new'].sum()
            self.selected_stocks = list(winrate_df['ticker'])

            return list(winrate_df['weight_new_adj'])


    def rebalance_weights(self):

        self.selected_stocks = self.replicate_portfolio(method = self.replicate_method) # Obtain stocks list

        self.rebalance_signal = True
        weights = self.optimal_portfolio(method = self.opt_method)

        if self.rebalance_signal:
            if self.opt_method== 'markowitz':
                for stock, weight in zip(self.selected_stocks, weights):
                    self.portfolio_weights[stock] = weight[0]

            if self.opt_method== 'StockRanker' or self.opt_method == 'Momentum':
                for stock, weight in zip(self.selected_stocks, weights):
                    self.portfolio_weights[stock] = weight
        
        return 

    def submit_orders(self):

        price = self.curr_spy

        if self.if_hedge:
            for i in self.selected_stocks:
                self.set_holdings(i, self.portfolio_weights[i])

            # self.set_holdings('SPY', - 1)
            self.debug(price) 
            self.contract_size = int(self.portfolio.total_portfolio_value * self.hedge_size / (price *100))
            self.debug(f"contract size is {self.contract_size}")
            self.market_order(self.contract.Symbol, self.contract_size)
            self.has_contract = True

        else:
            self.contract = None
            self.contract_size = None
            self.strike = None
            for i in self.selected_stocks:
                self.set_holdings(i, self.portfolio_weights[i])
            self.has_contract = False


        

    def on_data(self, slice: Slice):

        # if slice.Bars.ContainsKey("SPY"): 
        #     bar = slice["SPY"].close
        #     self.debug('yes we have SPY')
        # else:
        #     self.debug('no we do not have SPY')


        self.curr_spy = slice["SPY"].close
        chain = slice.OptionChains.get(self.option_symbol)
        # expiry = DateTime(2021,11,19)
        if chain:
            # self.debug('has chain')
            contracts = [i for i in chain if i.Right == OptionRight.PUT and i.Strike < i.UnderlyingLastPrice ]
            #self.debug(f"{len(contracts)}")
            S = [i.Strike for i in contracts]
            S = list(set(sorted(S, reverse=True)))
            S = sorted(S, reverse= True)

            # e = [i.expiry for i in contracts]
            # e = sorted(list(set(sorted(e))))
            # self.debug(f"strike max is {max(S)}, min is {min(S)}")
            # self.debug(f"expiry max is {max(e)}, min is {min(e)}")
            # strike = sorted(contracts, key=lambda x: x.Strike, reverse = True)[0].Strike
            
            # expiry = sorted(contracts, key=lambda x: x.expiry)[2].expiry

            strike = S[1]
            self.strike = strike
            # for i in S:
            #     self.debug(f"{self.Time}: {i}")
            #self.debug(f"Date is {self.time}, the expiry is {expiry}")
            # self.debug( sorted(contracts, key=lambda x: x.expiry))

            # self.contract = [contract for contract in contracts if contract.Strike == strike and contract.expiry == expiry][0]
            contracts = [contract for contract in contracts if contract.Strike == strike]
            
            e = [i.expiry for i in contracts]
            e = sorted(list(set(sorted(e))))
            # self.debug(f"expiry max is {max(e)}, min is {min(e)}")
            # for i in e:
            #     self.debug(f'{self.Time}: {i}')

            expiry = e[0]
            # self.debug(f"{self.Time}: expiry is {expiry}, strike is {strike}")
            self.contract = [contract for contract in contracts if contract.expiry == expiry][0]





        # if not self.portfolio.invested:
        #     self.market_order(self.contract.Symbol, 1)

        if self.rebalance_signal:
            self.debug('rebalance')
            # if self.has_contract and slice[self.spy.symbol].close < self.strike:
            #     self.exercise_option(self.contract, self.contract_size)
            self.liquidate()
            self.has_contract = False
            self.debug('before submit')
            self.debug(f"selected stocks:{len(self.selected_stocks)}")
            self.debug(f"selected stocks:{self.selected_stocks[0]}")
            self.debug(f"portfolio {len(self.portfolio_weights)}")
            self.debug(f"portfolio {self.portfolio_weights[self.selected_stocks[0]]}")
            self.submit_orders()
            self.debug('after submit')
            self.rebalance_signal = False
            # self.debug(f"latent_states_sequence is {self.latent_states_sequence}")
            # self.debug(f"best state is {self.best_status_sequence}")
            return


        # elif not self.portfolio.invested:
        #     self.regime_recognization()
        #     self.rebalance_weights()
        #     self.liquidate()
        #     self.submit_orders()
        #     self.rebalance_signal = False
        #     return


        '''
        stop loss and take profit (if necessary)
        '''
        return 

    
    def OnOrderEvent(self, orderEvent):

        
        if orderEvent.Status != OrderStatus.Filled:
            return
        

        # Webhook Notification    
        symbol = orderEvent.symbol
        price = orderEvent.FillPrice
        quantity = orderEvent.quantity
        self.Log(f"SP500 Enhanced-Indexing Paper order update] \nSymbol: {symbol} \nPrice: {price} \nQuantity: {quantity}")
        a = { "text": f"[SP500 Enhanced-Indexing Paper order update] \nSymbol: {symbol} \nPrice: {price} \nQuantity: {quantity}" }
        payload = json.dumps(a)
        self.notify.web("https://hooks.slack.com/services/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", payload)