From 2c7ace4f0600c41d1df70185cd7c383a533cdf6c Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 09:19:20 +1100
Subject: [PATCH 1/7] Update StockTradingEnv.py

---
 env/StockTradingEnv.py | 126 +++++++++++++++++++++++++++++------------
 1 file changed, 91 insertions(+), 35 deletions(-)

diff --git a/env/StockTradingEnv.py b/env/StockTradingEnv.py
index 23c79ec..045bb89 100644
--- a/env/StockTradingEnv.py
+++ b/env/StockTradingEnv.py
@@ -17,30 +17,55 @@
 
 LOOKBACK_WINDOW_SIZE = 40
 
-
 def factor_pairs(val):
     return [(i, val / i) for i in range(1, int(val**0.5)+1) if val % i == 0]
 
+# np.seterr(all='raise')
 
 class StockTradingEnv(gym.Env):
-    """A stock trading environment for OpenAI gym"""
-    metadata = {'render.modes': ['live', 'file', 'none']}
-    visualization = None
-
-    def __init__(self, df):
-        super(StockTradingEnv, self).__init__()
-
+    """stock trading gym environment
+
+    """
+    metadata = {'render.modes':['live','human','file','none']}
+    
+    def __init__(self, df) -> None:
+        super(StockTradingEnv,self).__init__()
+        
         self.df = self._adjust_prices(df)
-        self.reward_range = (0, MAX_ACCOUNT_BALANCE)
-
-        # Actions of the format Buy x%, Sell x%, Hold, etc.
-        self.action_space = spaces.Box(
-            low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)
-
-        # Prices contains the OHCL values for the last five prices
-        self.observation_space = spaces.Box(
-            low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16)
-
+        self.visualization = None
+        
+        """
+        Reward, we want to incentivize profit that is sustained over long periods of time. 
+        At each step, we will set the reward to the account balance multiplied by 
+        some fraction of the number of time steps so far.
+        The purpose of this is to delay rewarding the agent too fast in the early stages 
+        and allow it to explore sufficiently before optimizing a single strategy too deeply. 
+        It will also reward agents that maintain a higher balance for longer, 
+        rather than those who rapidly gain money using unsustainable strategies.
+        """
+        self.reward_range = (0, MAX_ACCOUNT_BALANCE) 
+    
+        """
+        Action space that has a discrete number of action types (buy, sell, and hold), 
+        as well as a continuous spectrum of amounts to buy/sell 
+        (0-100% of the account balance/position size respectively).
+        You’ll notice the amount is not necessary for the hold action, 
+        but will be provided anyway. Our agent does not initially know this, 
+        but over time should learn that the amount is extraneous for this action.
+        """
+        self.action_space = spaces.Box(low=np.array([0,0]), high=np.array([3,1]),dtype=np.float16)
+        
+        """
+        Observation_space contains all of the input variables we want our agent to consider before making, 
+        or not making a trade. We want our agent to “see” the forex data points 
+        (open price, high, low, close, and daily volume) for the last five days, 
+        as well a couple other data points like its account balance, current stock positions, and current profit.
+        The intuition here is that for each time step, we want our agent to consider the price action 
+        leading up to the current price, as well as their own portfolio’s status in order to make 
+        an informed decision for the next action.
+        """
+        self.observation_space = spaces.Box(low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16)    
+    
     def _adjust_prices(self, df):
         adjust_ratio = df['Adjusted_Close'] / df['Close']
 
@@ -50,7 +75,7 @@ def _adjust_prices(self, df):
         df['Close'] = df['Close'] * adjust_ratio
 
         return df
-
+    
     def _next_observation(self):
         frame = np.zeros((5, LOOKBACK_WINDOW_SIZE + 1))
 
@@ -80,6 +105,7 @@ def _next_observation(self):
         return obs
 
     def _take_action(self, action):
+        # Set the current price to a random price within the time step
         current_price = random.uniform(
             self.df.loc[self.current_step, "Open"], self.df.loc[self.current_step, "Close"])
 
@@ -94,15 +120,18 @@ def _take_action(self, action):
             additional_cost = shares_bought * current_price
 
             self.balance -= additional_cost
-            self.cost_basis = (
-                prev_cost + additional_cost) / (self.shares_held + shares_bought)
+            if (self.shares_held + shares_bought) != 0 :            
+                self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought)
+            else:
+                self.cost_basis = 0
+                
             self.shares_held += shares_bought
 
             if shares_bought > 0:
                 self.trades.append({'step': self.current_step,
                                     'shares': shares_bought, 'total': additional_cost,
                                     'type': "buy"})
-
+          
         elif action_type < 2:
             # Sell amount % of shares held
             shares_sold = int(self.shares_held * amount)
@@ -110,12 +139,11 @@ def _take_action(self, action):
             self.shares_held -= shares_sold
             self.total_shares_sold += shares_sold
             self.total_sales_value += shares_sold * current_price
-
             if shares_sold > 0:
                 self.trades.append({'step': self.current_step,
                                     'shares': shares_sold, 'total': shares_sold * current_price,
                                     'type': "sell"})
-
+          
         self.net_worth = self.balance + self.shares_held * current_price
 
         if self.net_worth > self.max_net_worth:
@@ -124,6 +152,24 @@ def _take_action(self, action):
         if self.shares_held == 0:
             self.cost_basis = 0
 
+    def step(self, action):
+        # Execute one time step within the environment
+        self._take_action(action)
+
+        self.current_step += 1
+
+        if self.current_step > len(self.df.loc[:, 'Open'].values) - 6:
+            self.current_step = 0
+
+        delay_modifier = (self.current_step / MAX_STEPS)
+
+        reward = self.balance * delay_modifier
+        done = self.net_worth <= 0
+
+        obs = self._next_observation()
+
+        return obs, reward, done, {}
+
     def step(self, action):
         # Execute one time step within the environment
         self._take_action(action)
@@ -151,7 +197,7 @@ def reset(self):
         self.total_sales_value = 0
         self.current_step = 0
         self.trades = []
-
+        self.visualization = None
         return self._next_observation()
 
     def _render_to_file(self, filename='render.txt'):
@@ -171,21 +217,31 @@ def _render_to_file(self, filename='render.txt'):
 
         file.close()
 
-    def render(self, mode='live', **kwargs):
+    def render(self, mode='live', title=None, **kwargs):
         # Render the environment to the screen
+        if mode == 'human':
+            profit = self.net_worth - INITIAL_ACCOUNT_BALANCE
+
+            print(f'Step: {self.current_step}')
+            print(f'Balance: {self.balance}')
+            print(
+                f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})')
+            print(
+                f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})')
+            print(
+                f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
+            print(f'Profit: {profit}')
         if mode == 'file':
             self._render_to_file(kwargs.get('filename', 'render.txt'))
-
         elif mode == 'live':
             if self.visualization == None:
-                self.visualization = StockTradingGraph(
-                    self.df, kwargs.get('title', None))
-
-            if self.current_step > LOOKBACK_WINDOW_SIZE:
-                self.visualization.render(
-                    self.current_step, self.net_worth, self.trades, window_size=LOOKBACK_WINDOW_SIZE)
-
+                self.visualization = StockTradingGraph(self.df, title)
+    
+            if self.current_step > LOOKBACK_WINDOW_SIZE:        
+                self.visualization.render(self.current_step, self.net_worth, 
+                self.trades, window_size=LOOKBACK_WINDOW_SIZE)
+              
     def close(self):
         if self.visualization != None:
             self.visualization.close()
-            self.visualization = None
+            self.visualization = None      

From 61e2655b9937503bccd24388abb2dbcfb8e5e66b Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 10:16:42 +1100
Subject: [PATCH 2/7] use latest stable_baselines3 PPO

---
 main.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/main.py b/main.py
index 25213be..0487ed0 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,6 @@
-import gym
-
-from stable_baselines.common.policies import MlpPolicy
-from stable_baselines.common.vec_env import DummyVecEnv
-from stable_baselines import PPO2
+from stable_baselines3.ppo import MlpPolicy
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3 import PPO
 
 from env.StockTradingEnv import StockTradingEnv
 
@@ -14,11 +12,12 @@
 # The algorithms require a vectorized environment to run
 env = DummyVecEnv([lambda: StockTradingEnv(df)])
 
-model = PPO2(MlpPolicy, env, verbose=1)
+model = PPO(MlpPolicy, env, verbose=1)
 model.learn(total_timesteps=50)
 
 obs = env.reset()
+
 for i in range(len(df['Date'])):
     action, _states = model.predict(obs)
     obs, rewards, done, info = env.step(action)
-    env.render(title="MSFT")
+    env.render(mode='live')

From a9fd5cf57b85e8d35d67d3483643e423808a53bf Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 10:18:21 +1100
Subject: [PATCH 3/7] new version mplfinance and datestr2num conversion

---
 render/StockTradingGraph.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/render/StockTradingGraph.py b/render/StockTradingGraph.py
index 7928787..9c4ccfc 100644
--- a/render/StockTradingGraph.py
+++ b/render/StockTradingGraph.py
@@ -1,15 +1,11 @@
-
-
 import numpy as np
-import matplotlib
+import pandas as pd
 import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
+import matplotlib.dates as dates
 from matplotlib import style
-
-# finance module is no longer part of matplotlib
-# see: https://github.com/matplotlib/mpl_finance
-from mpl_finance import candlestick_ochl as candlestick
-
+from datetime import datetime
+# import mplfinance as mpf
+from mplfinance.original_flavor import candlestick_ohlc  as candlestick
 style.use('dark_background')
 
 VOLUME_CHART_HEIGHT = 0.33
@@ -21,8 +17,8 @@
 
 
 def date2num(date):
-    converter = mdates.strpdate2num('%Y-%m-%d')
-    return converter(date)
+    converter = dates.datestr2num(datetime.strptime(date,'%Y-%m-%d').strftime('%Y-%m-%d'))
+    return converter
 
 
 class StockTradingGraph:
@@ -30,6 +26,7 @@ class StockTradingGraph:
 
     def __init__(self, df, title=None):
         self.df = df
+        df['dt'] = pd.to_datetime(df['Date'])
         self.net_worths = np.zeros(len(df['Date']))
 
         # Create a figure on screen and set the title

From 2dca1156a367810ef1e3cfc8cac46509f34e1bd3 Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 10:20:36 +1100
Subject: [PATCH 4/7] Update __init__.py

---
 env/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/env/__init__.py b/env/__init__.py
index e69de29..072ceb6 100644
--- a/env/__init__.py
+++ b/env/__init__.py
@@ -0,0 +1 @@
+__version__="0.0.1"

From 4d9a9239a21deeeecc0d2e5519d0cb0c0c4ed43c Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 10:20:56 +1100
Subject: [PATCH 5/7] Update __init__.py

---
 render/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/render/__init__.py b/render/__init__.py
index e69de29..072ceb6 100644
--- a/render/__init__.py
+++ b/render/__init__.py
@@ -0,0 +1 @@
+__version__="0.0.1"

From e43dd93b19d14a5035df294bf2d3efa50abb984c Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 10:21:10 +1100
Subject: [PATCH 6/7] Update __init__.py

---
 render/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/render/__init__.py b/render/__init__.py
index 072ceb6..625ea6a 100644
--- a/render/__init__.py
+++ b/render/__init__.py
@@ -1 +1 @@
-__version__="0.0.1"
+__version__="0.0.2"

From 7d875742945c31a97d7b5f5b07ea0225529b74cd Mon Sep 17 00:00:00 2001
From: paulg <cove9988@gmail.com>
Date: Tue, 12 Oct 2021 10:21:33 +1100
Subject: [PATCH 7/7] Update __init__.py

---
 env/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/env/__init__.py b/env/__init__.py
index 072ceb6..625ea6a 100644
--- a/env/__init__.py
+++ b/env/__init__.py
@@ -1 +1 @@
-__version__="0.0.1"
+__version__="0.0.2"