Overall Statistics
Total Trades
0
Average Win
0%
Average Loss
0%
Compounding Annual Return
0%
Drawdown
0%
Expectancy
0
Net Profit
0%
Sharpe Ratio
0
Loss Rate
0%
Win Rate
0%
Profit-Loss Ratio
0
Alpha
0
Beta
0
Annual Standard Deviation
0
Annual Variance
0
Information Ratio
0
Tracking Error
0
Treynor Ratio
0
Total Fees
$0.00
from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Common")
from System import *
from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Data import *
import tensorflow as tf
import numpy as np
import pandas as pd
from Model import Model
from datetime import datetime, timedelta
from sklearn.preprocessing import normalize
from collections import deque
from decimal import Decimal

from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Common")

### <summary>
### Basic template algorithm simply initializes the date range and cash. This is a skeleton
### framework you can use for designing an algorithm.
### </summary>

class BasicTemplateAlgorithm(QCAlgorithm):
    '''Basic template algorithm simply initializes the date range and cash'''
    
    def Initialize(self):
        '''Initialise the data and resolution required, as well as the cash and start-end dates for your algorithm. All algorithms must initialized.'''
        
        self.TimeSpan = 15
        # Este es el periodo de las barras que vamos a crear
        self.BarPeriod = TimeSpan.FromMinutes(self.TimeSpan)
        self.RollingWindowSize = 50
        # Mantiene los datos identificados por simbolos
        self.Data = {}
        
        # Lista de activos
        self.assets = ["AAPL", "IBM", "MSFT"]
        self.equities = []
        self.tickets = []
        for asset in self.assets:
            equity = self.AddEquity(asset, Resolution.Hour)
            self.equities.append(equity)
            self.tickets.append(None)
            #self.Data[asset] = SymbolData(equity.Symbol, self.BarPerimid, self.RollingWindowSize)

        # Definir carácteristicas
        self.features = ["close", "high", "low"]
        
        self.f = len(self.features)
        self.m = len(self.assets)
        self.n = self.RollingWindowSize
        
        
        self.initial_value = 10000
        # Fecha de incio
        self.SetStartDate(2016, 1, 1)
        # Fecha final
        self.SetEndDate(2016, 1, 2)
        # self.SetEndDate(datetime.now() - timedelta(1))
        # Fondo inicial
        self.SetCash(self.initial_value)

        # Portafolio inicial
        self.portfolio_memory = []
        self.initial_portfolio = np.full( [self.m], 1/self.m)
        self.portfolio_memory.append(self.initial_portfolio)
        self.portfolio_memory.append(self.initial_portfolio)
        
        self.array_portfolio_array = np.array(self.portfolio_memory)
        
        self.con_mat = np.cov(self.array_portfolio_array.T)
        
        # Número de entrenamientos
        self.epoch = 1
        
        self.initializer = tf.contrib.layers.variance_scaling_initializer()
        
        self.learning_rate = 0.00028
        
        self.discount_rate = 0.95
        
        self.t_episodes = 20

        self.amplification_learning_factor = 1
        
        self.t_batch = 100
        
        self.t_mini_batch = 1
        
        self.t_steps = 100
        
        self.model = Model(batch_num = self.t_mini_batch, 
                            num_features= self.f, 
                            num_assets=self.m, num_periods=self.n, 
                            initial_value=self.initial_value, 
                            learning_rate=self.learning_rate, 
                            discount_rate=self.discount_rate, 
                            training_episodes = self.t_episodes)
        
        # Bucle por todos los simbolos para requerir la suscripcion de datos
        for symbol, symbolData in self.Data.items():
            # definir el consolidador para consolidar los datos de este simbolo en el periodo requerido
            consolidator = TradeBarConsolidator(self.BarPeriod) if symbolData.Symbol.SecurityType == SecurityType.Equity else QuoteBarConsolidator(self.BarPeriod)
            # Agragar funcion a evento para poner al dia los datos
            consolidator.DataConsolidated += self.OnDataConsolidated
            # suscribrir el consolidador para que se actualice automaticamente cuanto ocurra el evento
            self.SubscriptionManager.AddConsolidator(symbolData.Symbol, consolidator)
        
        # Evento porgramado cada 15 minutos
        self.Schedule.On(self.DateRules.EveryDay(), self.TimeRules.Every(timedelta(minutes=self.TimeSpan)), self.OnDataPeriod)
        
        self.Debug("Primero entrenemos el modelo")
        self.train_model()
        self.model.trained = True
        self.Debug("OK. Modelo entrenedado")

    def OnDataConsolidated(self, sender, bar):
        self.Data[bar.Symbol.Value].BarsData.append(np.array([bar.Close, bar.High, bar.Low]))
    
    def OnDataPeriod(self):
        self.norm_prices, self.current_prices, self.previous_prices = self.obtain_prices()
        if not isinstance(self.norm_prices, np.ndarray):
            return
        self.model.set_prices(self.norm_prices, self.current_prices, self.previous_prices)
        next_w = self.model.predict()
        if (self.epoch % self.t_episodes == 0):
                self.episode_count, self.steps_count, self.current_rewards, self.all_rewards, self.loss = self.model.train()
                self.Debug(" ")
                self.Debug("==========================================")
                self.Debug("Episodio: " + str(self.episode_count))
                self.Debug("Paso: " + str(self.episode_count * self.steps_count))
                self.Debug("-----------")
                self.Debug("Media de la ganancia: " + str(np.mean(self.current_rewards)))
                self.Debug("Promedio de ganancias en todos los entrenamientos: " + str(np.mean(self.all_rewards)))
                self.Debug("Perdida del episodio: " + str(self.loss))
                
        
        if self.epoch > self.t_steps:
            if self.epoch % 48 == 0:
                for asset in range(len(self.assets)): 
                    """
                    quantity = Decimal(float(self.Portfolio.TotalPortfolioValue) * next_w[asset] / self.current_prices[asset])
                    if self.tickets[asset] == None:
                        self.tickets[asset] = self.MarketOrder(self.assets[asset], quantity)
                        if self.tickets[asset] != None:
                            self.Debug("Orden enviada")
                    else:
                        updateOrderFields = UpdateOrderFields()
                        updateOrderFields.Quantity = quantity
                        self.tickets[asset].Update(updateOrderFields)
                    """
                    self.SetHoldings(self.assets[asset], next_w[asset], False)
    
            self.last_portfolio_value = self.Portfolio.TotalPortfolioValue #self.Portfolio.TotalAbsoluteHoldingsCost + self.Portfolio.TotalUnrealisedProfit + self.Portfolio.Cash
            self.Debug("Step: " + str(self.epoch) + " Portafolio Value: " + str(self.last_portfolio_value)) #+ " - Omega: " + str(new_w[0]) + " - Reward: " + str(reward_val))
            self.epoch = self.epoch + 1
        else:
            # self.Debug("Step: " + str(self.model.steps_count)+ " Status: Training")
            self.epoch = self.epoch + 1
        
    # GANANCIAS NORMALIZADAS
    def discount_rewards(self, rewards, discount_rate):
        discounted_rewards = np.empty(len(rewards))
        cumulative_rewards = 0
        for step in reversed(range(len(rewards))):
            cumulative_rewards = rewards[step] + cumulative_rewards * discount_rate
            discounted_rewards[step] = cumulative_rewards
        return discounted_rewards
        
    def discount_and_normalize_rewards(self, rewards, discount_rate):
        discounted_rewards = self.discount_rewards(rewards, discount_rate)
        reward_mean = discounted_rewards.mean()
        reward_std = discounted_rewards.std()
        normalized_rewards = (discounted_rewards - reward_mean)/reward_std     
        return normalized_rewards
    
    def get_training_prices(self):
        # Crear ventana de datos.
        back_bars = self.TimeSpan * self.t_batch
        h1 = self.History(self.Securities.Keys, back_bars, Resolution.Minute)
        # Filtrar las características deseadas
        h1 = h1.loc[self.assets][self.features].fillna(method='ffill').fillna(method='bfill')
        raw_prices = np.zeros(shape=(self.m,self.t_batch,self.f), dtype=np.float64)
        
        for asset in range(len(self.assets)):
            period = 0
            for i in range(self.t_batch * self.TimeSpan):
                if i % self.TimeSpan == 0 and i > 0:
                    ran = h1.loc[self.assets[asset]][:][i:i+self.TimeSpan].values
                    self.Debug(str(ran.shape))
                    close = ran[-1][0]
                    high = np.max(ran[:][1])
                    low = np.min(ran[:][2])
                    raw_prices[asset][period][0] = close
                    raw_prices[asset][period][1] = high
                    raw_prices[asset][period][2] = low
                    period += 1
        
        # Normalizar los valores para entrar en red neuronal
        """
        for asset in range(self.m):
            norm_prices[asset] = normalize(norm_prices[asset])
        """
        
        return raw_prices
    
    def obtain_prices(self):
        # Crear ventana de datos.
        h1 = self.History(self.Securities.Keys, 50, Resolution.Minute).fillna(method='ffill').fillna(method='bfill')
        
        # Definir formato de precios (assets=3, periods=50, features=3)
        norm_prices = np.zeros(shape=(self.m,self.n,self.f), dtype=np.float64)
        
        a = 0
        for k, v in self.Data.items():
            if self.Data[k].IsReady():
                if np.array(v.BarsData).shape == norm_prices[a].shape:
                    norm_prices[a] = np.array(self.Data[k].BarsData)
                    a += 1
                else:
                    self.Debug("Sin datos suficientes")
                    self.Debug(str(np.array(v.BarsData).shape))
                    self.Debug(str(norm_prices[a].shape))
                    return None, None, None
        
        # Normalizar los valores para entrar en red neuronal
        for asset in range(self.m):
            norm_prices[asset] = normalize(norm_prices[asset])
        
        current_prices = []
        previous_prices = []
        
        for asset in self.assets:
                current_prices.append(h1.loc[asset]["close"][-1])
                
        for asset in self.assets:
                previous_prices.append(h1.loc[asset]["close"][-2])
                
        return norm_prices, current_prices, previous_prices
    
    def train_model(self):
        # Crear ventana de datos.
        back_bars = self.TimeSpan * self.t_batch
        self.Debug("Obtengamos los datos")
        data = self.get_training_prices()
        self.Debug("Datos obtenidos: " + str(data.shape))

        
        for i in range(self.t_steps):
            # Get Data Slice
            self.norm_prices = np.empty((0,self.m, self.n, self.f))
            prices_shape = np.zeros(shape=(self.m,self.n,self.f), dtype=np.float64).shape
            data_slice = data[:, i:i + 50, :]
            if data_slice.shape == prices_shape:
                norm_prices = data_slice
                # Normalizar los valores para entrar en red neuronal
                for asset in range(self.m):
                    norm_prices[asset] = normalize(norm_prices[asset])
            else:
                self.Debug("Formas no consistentes en slice: " + str(data_slice.shape) + " " + str(prices_shape))
                return
            self.norm_prices = np.append(self.norm_prices, [norm_prices], axis=0)
            self.current_prices = []
            self.previous_prices = []
            for batch in range(self.t_mini_batch):
                current_prices = []
                previous_prices = []
                for asset in range(self.m):
                    current_prices.append(data_slice[asset,-1,0])
                    previous_prices.append(data_slice[asset,-2,0])
                self.current_prices.append(current_prices)
                self.previous_prices.append(previous_prices)
                
            if not isinstance(self.norm_prices, np.ndarray):
                return

            self.model.set_prices(self.norm_prices, self.current_prices, self.previous_prices)
            next_w = self.model.predict_batch()
            if (self.epoch % self.t_episodes == 0 and self.epoch > 0):
                    self.episode_count, self.steps_count, self.current_rewards, self.all_rewards, self.loss = self.model.train_batch()
                    self.Debug(" ")
                    self.Debug("==========================================")
                    self.Debug("Episodio: " + str(self.episode_count))
                    self.Debug("Paso: " + str(self.episode_count * self.steps_count))
                    self.Debug("-----------")
                    self.Debug("Media de la ganancia: " + str(np.mean(self.current_rewards)))
                    self.Debug("Promedio de ganancias en todos los entrenamientos: " + str(np.mean(self.all_rewards)))
                    self.Debug("Perdida del episodio: " + str(self.loss))
                    
            self.epoch = self.epoch + 1
    
    def OnData(self, data):
        return

class SymbolData(object):
    
    def __init__(self, symbol, barPeriod, windowSize):
        self.Symbol = symbol
        self.BarPeriod = barPeriod
        self.WindowSize = windowSize
        self.BarsData = deque(maxlen=self.WindowSize)
        
    def IsReady(self):
        if len(self.BarsData) == self.WindowSize:
            return True
        else:
            return False
from clr import AddReference
AddReference("System")
AddReference("QuantConnect.Algorithm")
AddReference("QuantConnect.Common")
from System import *
from QuantConnect import *
from QuantConnect.Algorithm import *
from QuantConnect.Data import *
import tensorflow as tf
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import normalize

class Model:
    def __init__(self, batch_num, num_features, num_assets, num_periods, initial_value, learning_rate, discount_rate, training_episodes):
        self.t_episodes = training_episodes
        self.batch_num = batch_num
        self.f = num_features
        self.m = num_assets
        self.n = num_periods
        self.initial_value = initial_value
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate
        self.commission_rate = 0.01
        self.there_is_model = False
        self.all_rewards = []
        self.current_rewards = []
        self.portfolio_memory = []
        self.initial_portfolio = np.full( [self.m], 1/self.m)
        self.portfolio_memory.append(self.initial_portfolio)
        self.portfolio_memory.append(self.initial_portfolio)
        self.array_portfolio_array = np.array(self.portfolio_memory)
        self.cov_mat = np.cov(self.array_portfolio_array.T)
        self.policy = np.random.multivariate_normal(self.initial_portfolio, self.cov_mat)
        self.previous_prices = np.zeros(shape=(self.m,self.n,self.f), dtype=np.float64)
        self.norm_prices = np.zeros(shape=(self.m,self.n,self.f), dtype=np.float64)
        self.current_prices = np.zeros(shape=(self.m,self.n,self.f), dtype=np.float64)
        self.episode_count = 1
        self.steps_count = 0
        self.trained = False
        
        self.setup_network()
        
    def discount_rewards(self, rewards, discount_rate):
        discounted_rewards = np.empty(len(rewards))
        cumulative_rewards = 0
        for step in reversed(range(len(rewards))):
            cumulative_rewards = rewards[step] + cumulative_rewards * discount_rate
            discounted_rewards[step] = cumulative_rewards
        return discounted_rewards
        
    def discount_and_normalize_rewards(self, rewards, discount_rate):
        discounted_rewards = self.discount_rewards(rewards, discount_rate)
        reward_mean = discounted_rewards.mean()
        reward_std = discounted_rewards.std()
        normalized_rewards = (discounted_rewards - reward_mean)/reward_std     
        return normalized_rewards   
     
    def set_prices(self, norm_prices, current_prices, previous_prices):
        self.current_prices = current_prices
        self.norm_prices = norm_prices
        self.previous_prices = previous_prices
    
    def next_step(self):
        self.steps_count = self.setps_count + 1
    
    def train_batch(self):
        previous_w_val = self.portfolio_memory[-1]
        self.previous_w_val = np.empty((0,self.m))
        for i in range(self.batch_num):
            self.previous_w_val = np.append(self.previous_w_val, [previous_w_val], axis=0)
        discounted_rewards = np.empty((0,self.t_episodes))
        for i in range(self.batch_num):
            discounted_rewards = np.append(discounted_rewards, [self.discount_and_normalize_rewards(self.current_rewards, self.discount_rate)], axis=0)
        policy =  np.empty((0,self.m))
        for i in range(self.batch_num):
            policy = np.append(policy, [self.policy], axis=0)
        loss_, _ = self.sess.run([self.loss, self.training_op], feed_dict={self.prices: self.current_prices, 
                                                           self.x: self.norm_prices, 
                                                           self.previous_w: self.previous_w_val, 
                                                           self.actions: policy,
                                                           self.discounted_episode_rewards: discounted_rewards
                                                           })
           

        episode_count = self.episode_count
        steps_count = self.steps_count
        current_rewards = self.current_rewards

        self.steps_count = 0
        self.episode_count = self.episode_count + 1
        self.all_rewards.append(self.current_rewards)
        self.current_rewards = []
        
        return episode_count, steps_count, current_rewards, self.all_rewards, loss_
            
    def train(self):
        self.previous_w_val = self.portfolio_memory[-1]
        discounted_rewards = self.discount_and_normalize_rewards(self.current_rewards, self.discount_rate)

        loss_, _ = self.sess.run([self.loss, self.training_op], feed_dict={self.prices: [self.current_prices], 
                                                           self.x: [self.norm_prices], 
                                                           self.previous_w:[self.previous_w_val], 
                                                           self.actions:[self.policy],
                                                           self.discounted_episode_rewards: [discounted_rewards]
                                                           })
           

        episode_count = self.episode_count
        steps_count = self.steps_count
        current_rewards = self.current_rewards

        self.steps_count = 0
        self.episode_count = self.episode_count + 1
        self.all_rewards.append(self.current_rewards)
        self.current_rewards = []
        
        return episode_count, steps_count, current_rewards, self.all_rewards, loss_
    
    def predict_batch(self):
        previous_w_val = self.portfolio_memory[-1]
        self.previous_w_val = np.empty((0,self.m))
        for i in range(self.batch_num):
            self.previous_w_val = np.append(self.previous_w_val, [previous_w_val], axis=0)
        new_w = self.soft_max.eval(session=self.sess, feed_dict={self.x: self.norm_prices, 
                                                                self.previous_w:self.previous_w_val
                                                                })
        reward = np.sum(self.current_prices * new_w[0] - self.previous_prices * self.previous_w_val) - np.sum(self.current_prices * new_w[0]) * self.commission_rate
        self.current_rewards.append(reward)
        self.portfolio_memory.append(new_w[0])
        
        self.cov_mat = np.cov(self.array_portfolio_array.T)
        self.policy = np.random.multivariate_normal(new_w[0], self.cov_mat)
        self.steps_count = self.steps_count + 1
        return self.policy
    
    def predict(self):
        self.previous_w_val = self.portfolio_memory[-1]
        new_w = self.soft_max.eval(session=self.sess, feed_dict={self.x: [self.norm_prices], 
                                                                self.previous_w:[self.previous_w_val]
                                                                })
                                                                
        reward = np.sum(self.current_prices * new_w[0] - self.previous_prices * self.previous_w_val) - np.sum(self.current_prices * new_w[0]) * self.commission_rate
        self.current_rewards.append(reward)
        self.portfolio_memory.append(new_w[0])
        
        self.cov_mat = np.cov(self.array_portfolio_array.T)
        self.policy = np.random.multivariate_normal(new_w[0], self.cov_mat)
        self.steps_count = self.steps_count + 1
        return self.policy
    
    def setup_network(self):
        self.sess = tf.InteractiveSession()

        self.x = tf.placeholder(tf.float32, [None, self.m, self.n, self.f])
        self.previous_w = tf.placeholder(tf.float32, [None, self.m])
        self.prices = tf.placeholder(tf.float32, [None, self.m])
        self.actions = tf.placeholder(tf.float32, [None, self.m])
        self.discounted_episode_rewards = tf.placeholder(tf.float32, [None, self.t_episodes])
    
        # La forma final debe ser (features, periods, assets)
        self.x = tf.transpose(self.x, [0, 1, 2, 3])
    
        # Capa Convolucional #1
        self.conv1 = tf.layers.conv2d(
            inputs=self.x,
            filters=2,
            kernel_size=[1, 3],
            padding="valid",
            activation=tf.nn.relu,
            kernel_initializer=None,
            bias_initializer=tf.zeros_initializer())
    
        self.width = self.conv1.get_shape()[2]
        
        # Capa Convolucional #2
        self.conv2 = tf.layers.conv2d(
            inputs=self.conv1,
            filters=20,
            kernel_size=[1,self.width],
            padding="valid",
            activation=tf.nn.relu,
            kernel_initializer=None,
            bias_initializer=tf.zeros_initializer(),
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=5e-09))
        
        # Capa Totalmente Conectada #3
        
        self.fc = tf.layers.flatten(self.conv2)
        self.fc = tf.concat([self.fc, self.previous_w], axis=1)
        self.fc = tf.contrib.layers.fully_connected(inputs=self.fc, 
                                               num_outputs=self.m,
                                               activation_fn=tf.nn.softmax,
                                               weights_regularizer=tf.contrib.layers.l2_regularizer(scale=5e-09)
                                              )
        # self.soft_max = tf.concat([self.cash, self.fc], axis=1)
        
        self.soft_max  = tf.nn.softmax(self.fc)
        
        self.neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.soft_max, labels = self.actions)
        
        self.loss = tf.reduce_mean(self.neg_log_prob * self.discounted_episode_rewards)
        
        """        
        self.loss = ( -tf.reduce_mean(tf.log(tf.reduce_sum(self.prices * self.fc) / self.initial_value)) ) + \
                self.discount_rate * ( tf.reduce_mean(tf.reduce_sum(-tf.log(1 + 1e-6 - self.fc) / self.initial_value)) )
        """
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        
        self.training_op = self.optimizer.minimize(self.loss)
        
        self.sess.run(tf.global_variables_initializer())