Overall Statistics |
Total Trades 65 Average Win 2.16% Average Loss -0.83% Compounding Annual Return 16.749% Drawdown 11.700% Expectancy 0.350 Net Profit 8.765% Sharpe Ratio 0.77 Probabilistic Sharpe Ratio 40.137% Loss Rate 62% Win Rate 38% Profit-Loss Ratio 2.60 Alpha 0.121 Beta 0.086 Annual Standard Deviation 0.168 Annual Variance 0.028 Information Ratio 0.165 Tracking Error 0.24 Treynor Ratio 1.495 Total Fees $77.06 Estimated Strategy Capacity $740000000.00 Lowest Capacity Asset SPY R735QTJ8XC9X |
# region imports from AlgorithmImports import * import gym from stable_baselines3 import DQN # endregion class StableBaselinesExampleAlgorithm(QCAlgorithm): def Initialize(self): self.SetStartDate(2022, 7, 4) self.SetCash(100000) self.spy = self.AddEquity("SPY", Resolution.Daily).Symbol training_length = 252*2 self.training_data = RollingWindow[TradeBar](training_length) history = self.History[TradeBar](self.spy, training_length, Resolution.Daily) for trade_bar in history: self.training_data.Add(trade_bar) self.Train(self.my_training_method) self.Train(self.DateRules.Every(DayOfWeek.Sunday), self.TimeRules.At(8,0), self.my_training_method) def get_observations_and_rewards(self, n_step=5): training_df = self.PandasConverter.GetDataFrame[TradeBar](list(self.training_data)[::-1]) daily_pct_change = training_df['close'].pct_change().dropna() obs = [] rewards = [] for i in range(len(daily_pct_change)-n_step): obs.append(training_df.iloc[i:i+n_step].values) rewards.append(float(daily_pct_change.iloc[i+n_step])) obs = np.array(obs) rewards = np.array(rewards) return obs, rewards def my_training_method(self): obs, rewards = self.get_observations_and_rewards() self.env = TradingEnv(obs, rewards) self.model = DQN("MlpPolicy", self.env) self.model.learn(total_timesteps=500) def OnData(self, data): features, _ = self.get_observations_and_rewards() action, _ = self.model.predict(features[-5:], deterministic=True) _, _, _, _ = self.env.step(action) if action == 0: self.Liquidate(self.spy) elif action == 1: self.SetHoldings(self.spy, 1) elif action == 2: self.SetHoldings(self.spy, -1) class TradingEnv(gym.Env): FLAT = 0 LONG = 1 SHORT = 2 def __init__(self, ohlcv, ret): super(TradingEnv, self).__init__() self.ohlcv = ohlcv self.ret = ret self.trading_cost = 0.01 self.reward = 1 # The number of step the training has taken, starts at 5 since we're using the previous 5 data for observation. self.current_step = 5 # The last action self.last_action = 0 # Define action and observation space # Example when using discrete actions, we have 3: LONG, SHORT and FLAT. n_actions = 3 self.action_space = gym.spaces.Discrete(n_actions) # The observation will be the coordinate of the agent, shape for (5 previous data poionts, OHLCV) self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(5, 5, 5), dtype=np.float64) def reset(self): # Reset the number of step the training has taken self.current_step = 5 # Reset the last action self.last_action = 0 # must return np.array type return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32) def step(self, action): if action == self.LONG: self.reward *= 1 + self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0) elif action == self.SHORT: self.reward *= 1 + -1 * self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0) elif action == self.FLAT: self.reward *= 1 - (self.trading_cost if self.last_action != action else 0) else: raise ValueError("Received invalid action={} which is not part of the action space".format(action)) self.last_action = action self.current_step += 1 # Have we iterate all data points? done = (self.current_step == self.ret.shape[0]-1) # Reward as return return self.ohlcv[self.current_step-5:self.current_step].astype(np.float32), self.reward, done, {}