From 2c7ace4f0600c41d1df70185cd7c383a533cdf6c Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 09:19:20 +1100 Subject: [PATCH 1/7] Update StockTradingEnv.py --- env/StockTradingEnv.py | 126 +++++++++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 35 deletions(-) diff --git a/env/StockTradingEnv.py b/env/StockTradingEnv.py index 23c79ec..045bb89 100644 --- a/env/StockTradingEnv.py +++ b/env/StockTradingEnv.py @@ -17,30 +17,55 @@ LOOKBACK_WINDOW_SIZE = 40 - def factor_pairs(val): return [(i, val / i) for i in range(1, int(val**0.5)+1) if val % i == 0] +# np.seterr(all='raise') class StockTradingEnv(gym.Env): - """A stock trading environment for OpenAI gym""" - metadata = {'render.modes': ['live', 'file', 'none']} - visualization = None - - def __init__(self, df): - super(StockTradingEnv, self).__init__() - + """stock trading gym environment + + """ + metadata = {'render.modes':['live','human','file','none']} + + def __init__(self, df) -> None: + super(StockTradingEnv,self).__init__() + self.df = self._adjust_prices(df) - self.reward_range = (0, MAX_ACCOUNT_BALANCE) - - # Actions of the format Buy x%, Sell x%, Hold, etc. - self.action_space = spaces.Box( - low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16) - - # Prices contains the OHCL values for the last five prices - self.observation_space = spaces.Box( - low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16) - + self.visualization = None + + """ + Reward, we want to incentivize profit that is sustained over long periods of time. + At each step, we will set the reward to the account balance multiplied by + some fraction of the number of time steps so far. + The purpose of this is to delay rewarding the agent too fast in the early stages + and allow it to explore sufficiently before optimizing a single strategy too deeply. + It will also reward agents that maintain a higher balance for longer, + rather than those who rapidly gain money using unsustainable strategies. + """ + self.reward_range = (0, MAX_ACCOUNT_BALANCE) + + """ + Action space that has a discrete number of action types (buy, sell, and hold), + as well as a continuous spectrum of amounts to buy/sell + (0-100% of the account balance/position size respectively). + You’ll notice the amount is not necessary for the hold action, + but will be provided anyway. Our agent does not initially know this, + but over time should learn that the amount is extraneous for this action. + """ + self.action_space = spaces.Box(low=np.array([0,0]), high=np.array([3,1]),dtype=np.float16) + + """ + Observation_space contains all of the input variables we want our agent to consider before making, + or not making a trade. We want our agent to “see” the forex data points + (open price, high, low, close, and daily volume) for the last five days, + as well a couple other data points like its account balance, current stock positions, and current profit. + The intuition here is that for each time step, we want our agent to consider the price action + leading up to the current price, as well as their own portfolio’s status in order to make + an informed decision for the next action. + """ + self.observation_space = spaces.Box(low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16) + def _adjust_prices(self, df): adjust_ratio = df['Adjusted_Close'] / df['Close'] @@ -50,7 +75,7 @@ def _adjust_prices(self, df): df['Close'] = df['Close'] * adjust_ratio return df - + def _next_observation(self): frame = np.zeros((5, LOOKBACK_WINDOW_SIZE + 1)) @@ -80,6 +105,7 @@ def _next_observation(self): return obs def _take_action(self, action): + # Set the current price to a random price within the time step current_price = random.uniform( self.df.loc[self.current_step, "Open"], self.df.loc[self.current_step, "Close"]) @@ -94,15 +120,18 @@ def _take_action(self, action): additional_cost = shares_bought * current_price self.balance -= additional_cost - self.cost_basis = ( - prev_cost + additional_cost) / (self.shares_held + shares_bought) + if (self.shares_held + shares_bought) != 0 : + self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought) + else: + self.cost_basis = 0 + self.shares_held += shares_bought if shares_bought > 0: self.trades.append({'step': self.current_step, 'shares': shares_bought, 'total': additional_cost, 'type': "buy"}) - + elif action_type < 2: # Sell amount % of shares held shares_sold = int(self.shares_held * amount) @@ -110,12 +139,11 @@ def _take_action(self, action): self.shares_held -= shares_sold self.total_shares_sold += shares_sold self.total_sales_value += shares_sold * current_price - if shares_sold > 0: self.trades.append({'step': self.current_step, 'shares': shares_sold, 'total': shares_sold * current_price, 'type': "sell"}) - + self.net_worth = self.balance + self.shares_held * current_price if self.net_worth > self.max_net_worth: @@ -124,6 +152,24 @@ def _take_action(self, action): if self.shares_held == 0: self.cost_basis = 0 + def step(self, action): + # Execute one time step within the environment + self._take_action(action) + + self.current_step += 1 + + if self.current_step > len(self.df.loc[:, 'Open'].values) - 6: + self.current_step = 0 + + delay_modifier = (self.current_step / MAX_STEPS) + + reward = self.balance * delay_modifier + done = self.net_worth <= 0 + + obs = self._next_observation() + + return obs, reward, done, {} + def step(self, action): # Execute one time step within the environment self._take_action(action) @@ -151,7 +197,7 @@ def reset(self): self.total_sales_value = 0 self.current_step = 0 self.trades = [] - + self.visualization = None return self._next_observation() def _render_to_file(self, filename='render.txt'): @@ -171,21 +217,31 @@ def _render_to_file(self, filename='render.txt'): file.close() - def render(self, mode='live', **kwargs): + def render(self, mode='live', title=None, **kwargs): # Render the environment to the screen + if mode == 'human': + profit = self.net_worth - INITIAL_ACCOUNT_BALANCE + + print(f'Step: {self.current_step}') + print(f'Balance: {self.balance}') + print( + f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})') + print( + f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})') + print( + f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})') + print(f'Profit: {profit}') if mode == 'file': self._render_to_file(kwargs.get('filename', 'render.txt')) - elif mode == 'live': if self.visualization == None: - self.visualization = StockTradingGraph( - self.df, kwargs.get('title', None)) - - if self.current_step > LOOKBACK_WINDOW_SIZE: - self.visualization.render( - self.current_step, self.net_worth, self.trades, window_size=LOOKBACK_WINDOW_SIZE) - + self.visualization = StockTradingGraph(self.df, title) + + if self.current_step > LOOKBACK_WINDOW_SIZE: + self.visualization.render(self.current_step, self.net_worth, + self.trades, window_size=LOOKBACK_WINDOW_SIZE) + def close(self): if self.visualization != None: self.visualization.close() - self.visualization = None + self.visualization = None From 61e2655b9937503bccd24388abb2dbcfb8e5e66b Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 10:16:42 +1100 Subject: [PATCH 2/7] use latest stable_baselines3 PPO --- main.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 25213be..0487ed0 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,6 @@ -import gym - -from stable_baselines.common.policies import MlpPolicy -from stable_baselines.common.vec_env import DummyVecEnv -from stable_baselines import PPO2 +from stable_baselines3.ppo import MlpPolicy +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3 import PPO from env.StockTradingEnv import StockTradingEnv @@ -14,11 +12,12 @@ # The algorithms require a vectorized environment to run env = DummyVecEnv([lambda: StockTradingEnv(df)]) -model = PPO2(MlpPolicy, env, verbose=1) +model = PPO(MlpPolicy, env, verbose=1) model.learn(total_timesteps=50) obs = env.reset() + for i in range(len(df['Date'])): action, _states = model.predict(obs) obs, rewards, done, info = env.step(action) - env.render(title="MSFT") + env.render(mode='live') From a9fd5cf57b85e8d35d67d3483643e423808a53bf Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 10:18:21 +1100 Subject: [PATCH 3/7] new version mplfinance and datestr2num conversion --- render/StockTradingGraph.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/render/StockTradingGraph.py b/render/StockTradingGraph.py index 7928787..9c4ccfc 100644 --- a/render/StockTradingGraph.py +++ b/render/StockTradingGraph.py @@ -1,15 +1,11 @@ - - import numpy as np -import matplotlib +import pandas as pd import matplotlib.pyplot as plt -import matplotlib.dates as mdates +import matplotlib.dates as dates from matplotlib import style - -# finance module is no longer part of matplotlib -# see: https://github.com/matplotlib/mpl_finance -from mpl_finance import candlestick_ochl as candlestick - +from datetime import datetime +# import mplfinance as mpf +from mplfinance.original_flavor import candlestick_ohlc as candlestick style.use('dark_background') VOLUME_CHART_HEIGHT = 0.33 @@ -21,8 +17,8 @@ def date2num(date): - converter = mdates.strpdate2num('%Y-%m-%d') - return converter(date) + converter = dates.datestr2num(datetime.strptime(date,'%Y-%m-%d').strftime('%Y-%m-%d')) + return converter class StockTradingGraph: @@ -30,6 +26,7 @@ class StockTradingGraph: def __init__(self, df, title=None): self.df = df + df['dt'] = pd.to_datetime(df['Date']) self.net_worths = np.zeros(len(df['Date'])) # Create a figure on screen and set the title From 2dca1156a367810ef1e3cfc8cac46509f34e1bd3 Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 10:20:36 +1100 Subject: [PATCH 4/7] Update __init__.py --- env/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/env/__init__.py b/env/__init__.py index e69de29..072ceb6 100644 --- a/env/__init__.py +++ b/env/__init__.py @@ -0,0 +1 @@ +__version__="0.0.1" From 4d9a9239a21deeeecc0d2e5519d0cb0c0c4ed43c Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 10:20:56 +1100 Subject: [PATCH 5/7] Update __init__.py --- render/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/render/__init__.py b/render/__init__.py index e69de29..072ceb6 100644 --- a/render/__init__.py +++ b/render/__init__.py @@ -0,0 +1 @@ +__version__="0.0.1" From e43dd93b19d14a5035df294bf2d3efa50abb984c Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 10:21:10 +1100 Subject: [PATCH 6/7] Update __init__.py --- render/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/render/__init__.py b/render/__init__.py index 072ceb6..625ea6a 100644 --- a/render/__init__.py +++ b/render/__init__.py @@ -1 +1 @@ -__version__="0.0.1" +__version__="0.0.2" From 7d875742945c31a97d7b5f5b07ea0225529b74cd Mon Sep 17 00:00:00 2001 From: paulg Date: Tue, 12 Oct 2021 10:21:33 +1100 Subject: [PATCH 7/7] Update __init__.py --- env/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env/__init__.py b/env/__init__.py index 072ceb6..625ea6a 100644 --- a/env/__init__.py +++ b/env/__init__.py @@ -1 +1 @@ -__version__="0.0.1" +__version__="0.0.2"