Just starting out with quantconnect that that long ago, I started working on editing on of the example ML examples:
Currently, the model is behaving very strangely and I don't know what to do, I suspect that may have something to do with the saving and loading of the model, since if I just renew and reset the model each time I train, it behaves normally. Here's my code below and the code that it is inspired and based on:
# region imports
from AlgorithmImports import *
import gym
from stable_baselines3 import PPO
# endregion
class StableBaselinesExampleAlgorithm(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2015, 7, 4)
self.SetCash(100000)
self.spy = self.AddEquity("SPY", Resolution.Daily).Symbol
self.index = True
training_length = 252*2
self.training_data = RollingWindow[TradeBar](training_length)
history = self.History[TradeBar](self.spy, training_length, Resolution.Daily)
for trade_bar in history:
self.training_data.Add(trade_bar)
self.Train(self.my_training_method)
self.Train(self.DateRules.MonthEnd(), self.TimeRules.At(8,0), self.my_training_method)
def get_observations_and_rewards(self, n_step=15):
training_df = self.PandasConverter.GetDataFrame[TradeBar](list(self.training_data)[::-1])
daily_pct_change = training_df['close'].pct_change().dropna()
obs = []
rewards = []
for i in range(len(daily_pct_change)-n_step):
obs.append(training_df.iloc[i:i+n_step].values)
rewards.append(float(daily_pct_change.iloc[i+n_step]))
obs = np.array(obs)
rewards = np.array(rewards)
return obs, rewards
def my_training_method(self):
obs, rewards = self.get_observations_and_rewards()
self.env = TradingEnv(obs, rewards)
self.model_key = "ppo_model" # Key name for the model in Object Store
if self.ObjectStore.ContainsKey(self.model_key):
file_name = self.ObjectStore.GetFilePath(self.model_key)
self.model = PPO.load(file_name, env=self.env) # Make sure to initialize `env` before this step
self.Log("loaded")
else:
self.model = PPO("MlpPolicy", self.env, learning_rate= 0.0005)
self.Log("new")
self.model.learn(total_timesteps=500)
file_name = self.ObjectStore.GetFilePath(self.model_key)
self.model.save(file_name)
def OnData(self, data):
if not self.model:
return # Model not yet initialized
features, _ = self.get_observations_and_rewards()
action, _ = self.model.predict(features[-15:], deterministic=True)
_, _, _, _ = self.env.step(action)
self.Log(action)
if action == 0:
self.Liquidate(self.spy)
elif action == 1:
# if self.spy.IsShort:
# self.Liquidate(self.spy)
self.SetHoldings(self.spy, 1)
elif action == 2:
# if self.spy.IsLong:
# self.Liquidate(self.spy)
self.SetHoldings(self.spy, -1)
class TradingEnv(gym.Env):
FLAT = 0
LONG = 1
SHORT = 2
def __init__(self, ohlcv, ret):
super(TradingEnv, self).__init__()
self.ohlcv = ohlcv
self.ret = ret
self.trading_cost = 0.01
self.reward = 1
self.current_step = 15
# The last action
self.last_action = 0
# Define action and observation space
# Example when using discrete actions, we have 3: LONG, SHORT and FLAT.
n_actions = 3
self.action_space = gym.spaces.Discrete(n_actions)
self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(15, 15, 5), dtype=np.float64)
def reset(self):
# Reset the number of step the training has taken
self.current_step = 15
# Reset the last action
self.last_action = 0
# must return np.array type
return self.ohlcv[self.current_step-15:self.current_step].astype(np.float32)
def step(self, action):
if action == self.LONG:
self.reward *= 1 + self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0)
elif action == self.SHORT:
self.reward *= 1 + -1 * self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0)
elif action == self.FLAT:
self.reward *= 1 - (self.trading_cost if self.last_action != action else 0)
else:
raise ValueError("Received invalid action={} which is not part of the action space".format(action))
self.last_action = action
self.current_step += 1
# Have we iterate all data points?
done = (self.current_step == self.ret.shape[0]-1)
# Reward as return
return self.ohlcv[self.current_step-15:self.current_step].astype(np.float32), self.reward, done, {}
and here's what the code is based on:
# region imports
from AlgorithmImports import *
import gym
from stable_baselines3 import DQN
# endregion
class StableBaselinesExampleAlgorithm(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2022, 7, 4)
self.SetCash(100000)
self.spy = self.AddEquity("SPY", Resolution.Daily).Symbol
training_length = 252*2
self.training_data = RollingWindow[TradeBar](training_length)
history = self.History[TradeBar](self.spy, training_length, Resolution.Daily)
for trade_bar in history:
self.training_data.Add(trade_bar)
self.Train(self.my_training_method)
self.Train(self.DateRules.Every(DayOfWeek.Sunday), self.TimeRules.At(8,0), self.my_training_method)
def get_observations_and_rewards(self, n_step=5):
training_df = self.PandasConverter.GetDataFrame[TradeBar](list(self.training_data)[::-1])
daily_pct_change = training_df['close'].pct_change().dropna()
obs = []
rewards = []
for i in range(len(daily_pct_change)-n_step):
obs.append(training_df.iloc[i:i+n_step].values)
rewards.append(float(daily_pct_change.iloc[i+n_step]))
obs = np.array(obs)
rewards = np.array(rewards)
return obs, rewards
def my_training_method(self):
obs, rewards = self.get_observations_and_rewards()
self.env = TradingEnv(obs, rewards)
self.model = DQN("MlpPolicy", self.env)
self.model.learn(total_timesteps=500)
def OnData(self, data):
features, _ = self.get_observations_and_rewards()
action, _ = self.model.predict(features[-5:], deterministic=True)
_, _, _, _ = self.env.step(action)
if action == 0:
self.Liquidate(self.spy)
elif action == 1:
self.SetHoldings(self.spy, 1)
elif action == 2:
self.SetHoldings(self.spy, -1)
class TradingEnv(gym.Env):
FLAT = 0
LONG = 1
SHORT = 2
def __init__(self, ohlcv, ret):
super(TradingEnv, self).__init__()
self.ohlcv = ohlcv
self.ret = ret
self.trading_cost = 0.01
self.reward = 1
# The number of step the training has taken, starts at 5 since we're using the previous 5 data for observation.
self.current_step = 5
# The last action
self.last_action = 0
# Define action and observation space
# Example when using discrete actions, we have 3: LONG, SHORT and FLAT.
n_actions = 3
self.action_space = gym.spaces.Discrete(n_actions)
# The observation will be the coordinate of the agent, shape for (5 previous data poionts, OHLCV)
self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(5, 5, 5), dtype=np.float64)
def reset(self):
# Reset the number of step the training has taken
self.current_step = 5
# Reset the last action
self.last_action = 0
# must return np.array type
return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32)
def step(self, action):
if action == self.LONG:
self.reward *= 1 + self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0)
elif action == self.SHORT:
self.reward *= 1 + -1 * self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0)
elif action == self.FLAT:
self.reward *= 1 - (self.trading_cost if self.last_action != action else 0)
else:
raise ValueError("Received invalid action={} which is not part of the action space".format(action))
self.last_action = action
self.current_step += 1
# Have we iterate all data points?
done = (self.current_step == self.ret.shape[0]-1)
# Reward as return
return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32), self.reward, done, {}
Jason Shen
Here's a backtest:
Mia Alissi
To assist "Jason" after they've mentioned "Here's a backtest:", you might want to ask for more details about the backtest or if they're encountering any specific issues with it. For example, you could ask:
These questions can help guide "Jason" to provide more information, making it easier to offer targeted assistance or advice.
The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.
Jason Shen
The material on this website is provided for informational purposes only and does not constitute an offer to sell, a solicitation to buy, or a recommendation or endorsement for any security or strategy, nor does it constitute an offer to provide investment advisory services by QuantConnect. In addition, the material offers no opinion with respect to the suitability of any security or specific investment. QuantConnect makes no guarantees as to the accuracy or completeness of the views expressed in the website. The views are subject to change, and may have become unreliable for various reasons, including changes in market conditions or economic circumstances. All investments involve risk, including loss of principal. You should consult with an investment professional before making any investment decisions.
To unlock posting to the community forums please complete at least 30% of Boot Camp.
You can continue your Boot Camp training progress from the terminal. We hope to see you in the community soon!