| Overall Statistics |
|
Total Orders 74 Average Win 8.69% Average Loss -0.11% Compounding Annual Return -8.039% Drawdown 14.000% Expectancy 0.954 Start Equity 100000 End Equity 93838.00 Net Profit -6.162% Sharpe Ratio -1.377 Sortino Ratio -1.563 Probabilistic Sharpe Ratio 3.855% Loss Rate 98% Win Rate 2% Profit-Loss Ratio 77.17 Alpha -0.092 Beta -0.14 Annual Standard Deviation 0.079 Annual Variance 0.006 Information Ratio -1.667 Tracking Error 0.133 Treynor Ratio 0.772 Total Fees $74.77 Estimated Strategy Capacity $1200000000.00 Lowest Capacity Asset SPY R735QTJ8XC9X Portfolio Turnover 2.12% |
# region imports
from AlgorithmImports import *
import gym
from stable_baselines3 import PPO
# endregion
class StableBaselinesExampleAlgorithm(QCAlgorithm):
def Initialize(self):
self.SetStartDate(2023, 7, 4)
self.SetCash(100000)
self.spy = self.AddEquity("SPY", Resolution.Daily).Symbol
self.index = True
training_length = 252*2
self.training_data = RollingWindow[TradeBar](training_length)
history = self.History[TradeBar](self.spy, training_length, Resolution.Daily)
for trade_bar in history:
self.training_data.Add(trade_bar)
self.Train(self.my_training_method)
self.Train(self.DateRules.MonthEnd(), self.TimeRules.At(8,0), self.my_training_method)
def get_observations_and_rewards(self, n_step=15):
training_df = self.PandasConverter.GetDataFrame[TradeBar](list(self.training_data)[::-1])
daily_pct_change = training_df['close'].pct_change().dropna()
obs = []
rewards = []
for i in range(len(daily_pct_change)-n_step):
obs.append(training_df.iloc[i:i+n_step].values)
rewards.append(float(daily_pct_change.iloc[i+n_step]))
obs = np.array(obs)
rewards = np.array(rewards)
return obs, rewards
def my_training_method(self):
obs, rewards = self.get_observations_and_rewards()
self.env = TradingEnv(obs, rewards)
self.model_key = "ppo_model" # Key name for the model in Object Store
if self.ObjectStore.ContainsKey(self.model_key):
file_name = self.ObjectStore.GetFilePath(self.model_key)
self.model = PPO.load(file_name, env=self.env) # Make sure to initialize `env` before this step
self.Log("loaded")
else:
self.model = PPO("MlpPolicy", self.env, learning_rate= 0.0005)
self.Log("new")
self.model.learn(total_timesteps=500)
file_name = self.ObjectStore.GetFilePath(self.model_key)
self.model.save(file_name)
def OnData(self, data):
if not self.model:
return # Model not yet initialized
features, _ = self.get_observations_and_rewards()
action, _ = self.model.predict(features[-15:], deterministic=True)
_, _, _, _ = self.env.step(action)
self.Log(action)
if action == 0:
self.Liquidate(self.spy)
elif action == 1:
# if self.spy.IsShort:
# self.Liquidate(self.spy)
self.SetHoldings(self.spy, 1)
elif action == 2:
# if self.spy.IsLong:
# self.Liquidate(self.spy)
self.SetHoldings(self.spy, -1)
class TradingEnv(gym.Env):
FLAT = 0
LONG = 1
SHORT = 2
def __init__(self, ohlcv, ret):
super(TradingEnv, self).__init__()
self.ohlcv = ohlcv
self.ret = ret
self.trading_cost = 0.01
self.reward = 1
# The number of step the training has taken, starts at 5 since we're using the previous 5 data for observation.
self.current_step = 15
# The last action
self.last_action = 0
# Define action and observation space
# Example when using discrete actions, we have 3: LONG, SHORT and FLAT.
n_actions = 3
self.action_space = gym.spaces.Discrete(n_actions)
# The observation will be the coordinate of the agent, shape for (5 previous data poionts, OHLCV)
self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(15, 15, 5), dtype=np.float64)
def reset(self):
# Reset the number of step the training has taken
self.current_step = 15
# Reset the last action
self.last_action = 0
# must return np.array type
return self.ohlcv[self.current_step-15:self.current_step].astype(np.float32)
def step(self, action):
if action == self.LONG:
self.reward *= 1 + self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0)
elif action == self.SHORT:
self.reward *= 1 + -1 * self.ret[self.current_step] - (self.trading_cost if self.last_action != action else 0)
elif action == self.FLAT:
self.reward *= 1 - (self.trading_cost if self.last_action != action else 0)
else:
raise ValueError("Received invalid action={} which is not part of the action space".format(action))
self.last_action = action
self.current_step += 1
# Have we iterate all data points?
done = (self.current_step == self.ret.shape[0]-1)
# Reward as return
return self.ohlcv[self.current_step-15:self.current_step].astype(np.float32), self.reward, done, {}