From 4be5e4ca61e40e8830d5616fd1a93106be3e0d44 Mon Sep 17 00:00:00 2001
From: litch <litch@me.com>
Date: Thu, 6 Jun 2019 12:27:07 -0500
Subject: [PATCH 01/11] Paths are parameterized to be OS agnostic

---
 optimize.py |  5 +++--
 test.py     |  7 ++++---
 train.py    | 10 +++++-----
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/optimize.py b/optimize.py
index d758f34..5e18147 100644
--- a/optimize.py
+++ b/optimize.py
@@ -10,6 +10,7 @@
 
 import optuna
 
+import os
 import pandas as pd
 import numpy as np
 
@@ -22,7 +23,7 @@
 
 
 reward_strategy = 'sortino'
-input_data_file = 'data/coinbase_hourly.csv'
+input_data_file = os.path.join('data', 'coinbase_hourly.csv')
 params_db_file = 'sqlite:///params.db'
 
 # number of parallel jobs
@@ -78,7 +79,7 @@ def optimize_agent(trial):
 
     model_params = optimize_ppo2(trial)
     model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-                 tensorboard_log="./tensorboard", **model_params)
+                 tensorboard_log=os.path.join('.', '/tensorboard'), **model_params)
 
     last_reward = -np.finfo(np.float16).max
     evaluation_interval = int(len(train_df) / n_evaluations)
diff --git a/test.py b/test.py
index a2a14eb..8d47518 100644
--- a/test.py
+++ b/test.py
@@ -1,3 +1,4 @@
+import os
 import gym
 import optuna
 import pandas as pd
@@ -11,7 +12,7 @@
 
 curr_idx = 0
 reward_strategy = 'sortino'
-input_data_file = 'data/coinbase_hourly.csv'
+input_data_file = os.path.join('data', 'coinbase_hourly.csv')
 params_db_file = 'sqlite:///params.db'
 
 study_name = 'ppo2' + reward_strategy
@@ -21,7 +22,7 @@
 print("Testing PPO2 agent with params:", params)
 print("Best trial:", -1 * study.best_trial.value)
 
-df = pd.read_csv('./data/coinbase_hourly.csv')
+df = pd.read_csv(input_data_file)
 df = df.drop(['Symbol'], axis=1)
 df = df.sort_values(['Date'])
 df = add_indicators(df.reset_index())
@@ -44,7 +45,7 @@
     'lam': params['lam'],
 }
 
-model = PPO2.load('./agents/ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl', env=test_env)
+model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=test_env)
 
 obs, done = test_env.reset(), False
 while not done:
diff --git a/train.py b/train.py
index 0183be8..b9a4561 100644
--- a/train.py
+++ b/train.py
@@ -1,3 +1,4 @@
+import os
 import gym
 import optuna
 import pandas as pd
@@ -10,10 +11,9 @@
 from env.BitcoinTradingEnv import BitcoinTradingEnv
 from util.indicators import add_indicators
 
-
 curr_idx = -1
 reward_strategy = 'sortino'
-input_data_file = 'data/coinbase_hourly.csv'
+input_data_file = os.path.join('data', 'coinbase_hourly.csv')
 params_db_file = 'sqlite:///params.db'
 
 study_name = 'ppo2' + reward_strategy
@@ -52,9 +52,9 @@
 
 if curr_idx == -1:
     model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-            tensorboard_log="./tensorboard", **model_params)
+            tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
 else:
-    model = PPO2.load('./agents/ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl', env=train_env)
+    model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=train_env)
 
 for idx in range(curr_idx + 1, 10):
     print('[', idx, '] Training for: ', train_len, ' time steps')
@@ -70,4 +70,4 @@
         reward_sum += reward
 
     print('[', idx, '] Total reward: ', reward_sum, ' (' + reward_strategy + ')')
-    model.save('./agents/ppo2_' + reward_strategy + '_' + str(idx) + '.pkl')
+    model.save(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(idx) + '.pkl'))

From 8aa39e455ffb49a938dacc18aa1e1f8bf93e2347 Mon Sep 17 00:00:00 2001
From: litch <litch@me.com>
Date: Thu, 6 Jun 2019 14:25:36 -0500
Subject: [PATCH 02/11] File/object name references are corrected

---
 optimize.py | 3 +--
 train.py    | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/optimize.py b/optimize.py
index 5e18147..8231f9c 100644
--- a/optimize.py
+++ b/optimize.py
@@ -21,7 +21,6 @@
 from env.BitcoinTradingEnv import BitcoinTradingEnv
 from util.indicators import add_indicators
 
-
 reward_strategy = 'sortino'
 input_data_file = os.path.join('data', 'coinbase_hourly.csv')
 params_db_file = 'sqlite:///params.db'
@@ -79,7 +78,7 @@ def optimize_agent(trial):
 
     model_params = optimize_ppo2(trial)
     model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-                 tensorboard_log=os.path.join('.', '/tensorboard'), **model_params)
+                 tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
 
     last_reward = -np.finfo(np.float16).max
     evaluation_interval = int(len(train_df) / n_evaluations)
diff --git a/train.py b/train.py
index b9a4561..ba51c51 100644
--- a/train.py
+++ b/train.py
@@ -16,7 +16,7 @@
 input_data_file = os.path.join('data', 'coinbase_hourly.csv')
 params_db_file = 'sqlite:///params.db'
 
-study_name = 'ppo2' + reward_strategy
+study_name = 'ppo2_' + reward_strategy
 study = optuna.load_study(study_name=study_name, storage=params_db_file)
 params = study.best_trial.params
 

From 70d96c6422120d634af084af5810f386667f57b6 Mon Sep 17 00:00:00 2001
From: litch <litch@me.com>
Date: Thu, 6 Jun 2019 14:48:35 -0500
Subject: [PATCH 03/11] Agents directory is preserved

---
 agents/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 agents/.gitkeep

diff --git a/agents/.gitkeep b/agents/.gitkeep
new file mode 100644
index 0000000..e69de29

From 2bad70e66ee635cad381ef3456fcc724d02b26da Mon Sep 17 00:00:00 2001
From: litch <litch@me.com>
Date: Sat, 8 Jun 2019 22:48:35 -0500
Subject: [PATCH 04/11] Optimize wrapped into a class for parameterization

---
 README.md   |  42 ++++++++++-
 optimize.py | 198 +++++++++++++++++++++++++++-------------------------
 test.py     |   2 +-
 3 files changed, 141 insertions(+), 101 deletions(-)

diff --git a/README.md b/README.md
index 2993937..92c7859 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,47 @@ The first thing you will need to do to get started is install the requirements i
  ```bash
  pip install -r requirements.txt
  ```
- 
+
  The requirements include the `tensorflow-gpu` library, though if you do not have access to a GPU, you should replace this requirement with `tensorflow`.
- 
+
+## Testing workflow
+
+First let's try the "optimize" strategy with a single run, single evaluation, just to make sure that things are "sane".
+
+### Expected output
+
+```
+% date ; python optimize.py; date
+Thu Jun  6 14:09:23 CDT 2019
+[I 2019-06-06 14:09:35,557] A new study created with name: ppo2_sortino
+
+<maybe some Tensorflow deprecation warnings>
+
+[I 2019-06-06 14:21:50,724] Finished trial#1 resulted in value: -956.9744873046875. Current best value is -956.9744873046875 with parameters: {'cliprange': 0.18943365028795878, 'confidence_interval': 0.8286824056507663, 'ent_coef': 8.094794121881875e-08, 'forecast_len': 14.7463$
+0586736364, 'gamma': 0.9834343245286393, 'lam': 0.9646711236104828, 'learning_rate': 0.032564661147532384, 'n_steps': 28.294495666878618, 'noptepochs': 2.3568984946859066}.
+Number of finished trials:  2
+Best trial:
+Value:  -956.9744873046875
+Params:
+    cliprange: 0.18943365028795878
+    confidence_interval: 0.8286824056507663
+    ent_coef: 8.094794121881875e-08
+    forecast_len: 14.746310586736364
+    gamma: 0.9834343245286393
+    lam: 0.9646711236104828
+    learning_rate: 0.032564661147532384
+    n_steps: 28.294495666878618
+    noptepochs: 2.3568984946859066
+
+Thu Jun  6 14:21:51 CDT 2019
+
+%
+```
+
+So that took about 12 minutes on a pretty powerful laptop to run a single trial (at least as of Jun 2019).
+
  # Finding Hyper-Parameters
- 
+
 While you could just let the agent train and run with the default PPO2 hyper-parameters, your agent would likely not be very profitable. The `stable-baselines` library provides a great set of default parameters that work for most problem domains, but  we need to better.
 
 To do this, you will need to run `optimize.py`. Within the file, you can define the `reward_strategy` for the environment to use, this is currently defaulted to `sortino`.
diff --git a/optimize.py b/optimize.py
index 8231f9c..0ba50c8 100644
--- a/optimize.py
+++ b/optimize.py
@@ -21,105 +21,108 @@
 from env.BitcoinTradingEnv import BitcoinTradingEnv
 from util.indicators import add_indicators
 
-reward_strategy = 'sortino'
-input_data_file = os.path.join('data', 'coinbase_hourly.csv')
-params_db_file = 'sqlite:///params.db'
-
-# number of parallel jobs
-n_jobs = 4
-# maximum number of trials for finding the best hyperparams
-n_trials = 1000
-# number of test episodes per trial
-n_test_episodes = 3
-# number of evaluations for pruning per trial
-n_evaluations = 4
-
-
-df = pd.read_csv(input_data_file)
-df = df.drop(['Symbol'], axis=1)
-df = df.sort_values(['Date'])
-df = add_indicators(df.reset_index())
-
-train_len = int(len(df) * 0.8)
-
-df = df[:train_len]
-
-validation_len = int(train_len * 0.8)
-train_df = df[:validation_len]
-test_df = df[validation_len:]
-
-
-def optimize_envs(trial):
-    return {
-        'reward_func': reward_strategy,
-        'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)),
-        'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99),
-    }
-
-
-def optimize_ppo2(trial):
-    return {
-        'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
-        'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
-        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
-        'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
-        'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
-        'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
-        'lam': trial.suggest_uniform('lam', 0.8, 1.)
-    }
-
-
-def optimize_agent(trial):
-    env_params = optimize_envs(trial)
-    train_env = DummyVecEnv(
-        [lambda: BitcoinTradingEnv(train_df,  **env_params)])
-    test_env = DummyVecEnv(
-        [lambda: BitcoinTradingEnv(test_df, **env_params)])
-
-    model_params = optimize_ppo2(trial)
-    model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-                 tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
-
-    last_reward = -np.finfo(np.float16).max
-    evaluation_interval = int(len(train_df) / n_evaluations)
-
-    for eval_idx in range(n_evaluations):
-        try:
-            model.learn(evaluation_interval)
-        except AssertionError:
-            raise
-
-        rewards = []
-        n_episodes, reward_sum = 0, 0.0
-
-        obs = test_env.reset()
-        while n_episodes < n_test_episodes:
-            action, _ = model.predict(obs)
-            obs, reward, done, _ = test_env.step(action)
-            reward_sum += reward
-
-            if done:
-                rewards.append(reward_sum)
-                reward_sum = 0.0
-                n_episodes += 1
-                obs = test_env.reset()
-
-        last_reward = np.mean(rewards)
-        trial.report(-1 * last_reward, eval_idx)
-
-        if trial.should_prune(eval_idx):
-            raise optuna.structs.TrialPruned()
-
-    return -1 * last_reward
-
-
-def optimize():
-    study_name = 'ppo2_' + reward_strategy
+class Optimize:
+  def __init__(self):
+    self.reward_strategy = 'sortino'
+    self.input_data_file = os.path.join('data', 'coinbase_daily.csv')
+    self.params_db_file = 'sqlite:///params.db'
+
+    # number of parallel jobs
+    self.n_jobs = 4
+    # maximum number of trials for finding the best hyperparams
+    self.n_trials = 1
+    # number of test episodes per trial
+    self.n_test_episodes = 1
+    # number of evaluations for pruning per trial
+    self.n_evaluations = 1
+    self.prepare_data()
+
+  def prepare_data(self):
+    df = pd.read_csv(self.input_data_file)
+    df = df.drop(['Symbol'], axis=1)
+    df = df.sort_values(['Date'])
+    df = add_indicators(df.reset_index())
+
+    train_len = int(len(df) * 0.8)
+
+    df = df[:train_len]
+
+    validation_len = int(train_len * 0.8)
+    self.train_df = df[:validation_len]
+    self.test_df = df[validation_len:]
+
+
+  def optimize_envs(self, trial):
+      return {
+          'reward_func': self.reward_strategy,
+          'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)),
+          'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99),
+      }
+
+
+  def optimize_ppo2(self, trial):
+      return {
+          'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
+          'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
+          'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
+          'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
+          'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
+          'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
+          'lam': trial.suggest_uniform('lam', 0.8, 1.)
+      }
+
+  def optimize_agent(self, trial):
+      env_params = self.optimize_envs(trial)
+      train_env = DummyVecEnv(
+          [lambda: BitcoinTradingEnv(self.train_df,  **env_params)])
+      test_env = DummyVecEnv(
+          [lambda: BitcoinTradingEnv(self.test_df, **env_params)])
+
+      model_params = self.optimize_ppo2(trial)
+      model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
+                   tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
+
+      last_reward = -np.finfo(np.float16).max
+      evaluation_interval = int(len(self.train_df) / self.n_evaluations)
+
+      for eval_idx in range(self.n_evaluations):
+          try:
+              model.learn(evaluation_interval)
+          except AssertionError:
+              raise
+
+          rewards = []
+          n_episodes, reward_sum = 0, 0.0
+
+          obs = test_env.reset()
+          while n_episodes < self.n_test_episodes:
+              action, _ = model.predict(obs)
+              obs, reward, done, _ = test_env.step(action)
+              reward_sum += reward
+
+              if done:
+                  rewards.append(reward_sum)
+                  reward_sum = 0.0
+                  n_episodes += 1
+                  obs = test_env.reset()
+
+          last_reward = np.mean(rewards)
+          trial.report(-1 * last_reward, eval_idx)
+
+          if trial.should_prune(eval_idx):
+              raise optuna.structs.TrialPruned()
+
+      return -1 * last_reward
+
+
+  def optimize(self):
+
+    study_name = 'ppo2_' + self.reward_strategy
     study = optuna.create_study(
-        study_name=study_name, storage=params_db_file, load_if_exists=True)
+        study_name=study_name, storage=self.params_db_file, load_if_exists=True)
 
     try:
-        study.optimize(optimize_agent, n_trials=n_trials, n_jobs=n_jobs)
+        study.optimize(self.optimize_agent, n_trials=self.n_trials, n_jobs=self.n_jobs)
     except KeyboardInterrupt:
         pass
 
@@ -138,4 +141,5 @@ def optimize():
 
 
 if __name__ == '__main__':
-    optimize()
+    optimizer = Optimize()
+    optimizer.optimize()
diff --git a/test.py b/test.py
index 8d47518..b7711c8 100644
--- a/test.py
+++ b/test.py
@@ -15,7 +15,7 @@
 input_data_file = os.path.join('data', 'coinbase_hourly.csv')
 params_db_file = 'sqlite:///params.db'
 
-study_name = 'ppo2' + reward_strategy
+study_name = 'ppo2_' + reward_strategy
 study = optuna.load_study(study_name=study_name, storage=params_db_file)
 params = study.best_trial.params
 

From 7e486254c4b408b865e7c1d37138d8c8a9fe2010 Mon Sep 17 00:00:00 2001
From: litch <litch@me.com>
Date: Mon, 10 Jun 2019 06:25:44 +0530
Subject: [PATCH 05/11] Logging is improved

---
 .gitignore   |  4 +++-
 log/.gitkeep |  0
 optimize.py  | 54 +++++++++++++++++++++++++++++++++++++++++-----------
 util/log.py  | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 12 deletions(-)
 create mode 100644 log/.gitkeep
 create mode 100644 util/log.py

diff --git a/.gitignore b/.gitignore
index 69d100a..8ac6194 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@ data/bitstamp.csv
 tensorboard/*
 agents
 research/results
+research/.ipynb_checkpoints/
 **/__pycache__
 *.pkl
-*.db
\ No newline at end of file
+*.db
+log/
diff --git a/log/.gitkeep b/log/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/optimize.py b/optimize.py
index 0ba50c8..c28fa18 100644
--- a/optimize.py
+++ b/optimize.py
@@ -11,6 +11,7 @@
 import optuna
 
 import os
+from util.log import init_logger
 import pandas as pd
 import numpy as np
 
@@ -24,18 +25,24 @@
 class Optimize:
   def __init__(self):
     self.reward_strategy = 'sortino'
-    self.input_data_file = os.path.join('data', 'coinbase_daily.csv')
+    self.input_data_file = os.path.join('data', 'coinbase_hourly.csv')
     self.params_db_file = 'sqlite:///params.db'
 
     # number of parallel jobs
     self.n_jobs = 4
     # maximum number of trials for finding the best hyperparams
-    self.n_trials = 1
+    self.n_trials = 1000
     # number of test episodes per trial
-    self.n_test_episodes = 1
+    self.n_test_episodes = 3
     # number of evaluations for pruning per trial
-    self.n_evaluations = 1
-    self.prepare_data()
+    self.n_evaluations = 4
+
+    self.train_df = None
+    self.test_df = None
+
+    self.logger = init_logger(__name__, testing_mode=True)
+
+    self.logger.debug("Initialized Optimizer")
 
   def prepare_data(self):
     df = pd.read_csv(self.input_data_file)
@@ -114,8 +121,27 @@ def optimize_agent(self, trial):
 
       return -1 * last_reward
 
+  def log_parameters(self):
+    self.logger.debug("Reward Strategy: %s" % self.reward_strategy)
+    self.logger.debug("Input Data File: %s" % self.input_data_file)
+    self.logger.debug("Params DB File: %s" % self.params_db_file)
+    self.logger.debug("Parallel jobs: %d" % self.n_jobs)
+    self.logger.debug("Trials: %d" % self.n_trials)
+    self.logger.debug("Test episodes (per trial): %d" % self.n_test_episodes)
+    self.logger.debug("Evaluations (per trial): %d" % self.n_evaluations)
+    self.logger.debug("Train DF Length: %d" % len(self.train_df))
+    self.logger.debug("Test DF Length: %d" % len(self.test_df))
+    self.logger.debug("Features: %s", self.train_df.columns.str.cat(sep=", "))
+
 
   def optimize(self):
+    if not self.train_df:
+      self.logger.info("Running built-in data preparation")
+      self.prepare_data()
+    else:
+      self.logger.info("Using provided data (Length: %d)" % len(self.train_df))
+
+    self.log_parameters()
 
     study_name = 'ppo2_' + self.reward_strategy
     study = optuna.create_study(
@@ -126,20 +152,26 @@ def optimize(self):
     except KeyboardInterrupt:
         pass
 
-    print('Number of finished trials: ', len(study.trials))
+    self.logger.info('Number of finished trials: {}'.format(len(study.trials)))
 
-    print('Best trial:')
+    self.logger.info('Best trial:')
     trial = study.best_trial
 
-    print('Value: ', trial.value)
+    self.logger.info('Value: {}'.format(trial.value))
 
-    print('Params: ')
+    self.logger.info('Params: ')
     for key, value in trial.params.items():
-        print('    {}: {}'.format(key, value))
+        self.logger.info('    {}: {}'.format(key, value))
 
     return study.trials_dataframe()
 
-
 if __name__ == '__main__':
     optimizer = Optimize()
+    test_mode = "FAST"
+    if test_mode == "FAST":
+      optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv')
+      optimizer.n_jobs = 1
+      optimizer.n_trials = 1
+      optimizer.n_test_episodes = 1
+      optimizer.n_evaluations = 1
     optimizer.optimize()
diff --git a/util/log.py b/util/log.py
new file mode 100644
index 0000000..53842f2
--- /dev/null
+++ b/util/log.py
@@ -0,0 +1,51 @@
+import os
+import logging
+import colorlog
+
+def init_logger(dunder_name, testing_mode) -> logging.Logger:
+    log_format = (
+        '%(asctime)s - '
+        '%(name)s - '
+        '%(funcName)s - '
+        '%(levelname)s - '
+        '%(message)s'
+    )
+    bold_seq = '\033[1m'
+    colorlog_format = (
+        f'{bold_seq} '
+        '%(log_color)s '
+        f'{log_format}'
+    )
+    colorlog.basicConfig(format=colorlog_format)
+    logger = logging.getLogger(dunder_name)
+
+    if testing_mode:
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+
+    ## Note: these file outputs are left in place as examples
+    ## Feel free to uncomment and use the outputs as you like
+
+    # Output full log
+    # fh = logging.FileHandler(os.path.join('log', 'trading.log')
+    # fh.setLevel(logging.DEBUG)
+    # formatter = logging.Formatter(log_format)
+    # fh.setFormatter(formatter)
+    # logger.addHandler(fh)
+
+    # # Output warning log
+    # fh = logging.FileHandler(os.path.join('log', 'trading.warning.log')
+    # fh.setLevel(logging.WARNING)
+    # formatter = logging.Formatter(log_format)
+    # fh.setFormatter(formatter)
+    # logger.addHandler(fh)
+
+    # # Output error log
+    # fh = logging.FileHandler(os.path.join('log', 'trading.error.log')
+    # fh.setLevel(logging.ERROR)
+    # formatter = logging.Formatter(log_format)
+    # fh.setFormatter(formatter)
+    # logger.addHandler(fh)
+
+    return logger

From 16b4a96940789ab9ccbbdd14591d2eb55d70fc4f Mon Sep 17 00:00:00 2001
From: litch <litch@me.com>
Date: Tue, 11 Jun 2019 07:13:11 +0530
Subject: [PATCH 06/11] Train and test functionality is moved into optimize.py

---
 optimize.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 3 deletions(-)

diff --git a/optimize.py b/optimize.py
index c28fa18..aa9554f 100644
--- a/optimize.py
+++ b/optimize.py
@@ -133,7 +133,6 @@ def log_parameters(self):
     self.logger.debug("Test DF Length: %d" % len(self.test_df))
     self.logger.debug("Features: %s", self.train_df.columns.str.cat(sep=", "))
 
-
   def optimize(self):
     if not self.train_df:
       self.logger.info("Running built-in data preparation")
@@ -165,13 +164,91 @@ def optimize(self):
 
     return study.trials_dataframe()
 
+  def model_params(self, params):
+    return {
+        'n_steps': int(params['n_steps']),
+        'gamma': params['gamma'],
+        'learning_rate': params['learning_rate'],
+        'ent_coef': params['ent_coef'],
+        'cliprange': params['cliprange'],
+        'noptepochs': int(params['noptepochs']),
+        'lam': params['lam'],
+    }
+
+  def train(self):
+    if not self.train_df:
+      self.logger.info("Running built-in data preparation")
+      self.prepare_data()
+    else:
+      self.logger.info("Using provided data (Length: %d)" % len(self.train_df))
+
+    study_name = 'ppo2_' + self.reward_strategy
+
+    study = optuna.load_study(study_name=study_name, storage=self.params_db_file)
+    params = study.best_trial.params
+
+    train_env = DummyVecEnv([lambda: BitcoinTradingEnv(
+      self.train_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
+
+    test_env = DummyVecEnv([lambda: BitcoinTradingEnv(
+      self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
+
+    model_params = self.model_params(params)
+
+    model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
+            tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
+
+    models_to_train = 1
+    self.logger.info("Training {} model instances".format(models_to_train))
+
+    for idx in range(0, models_to_train): #Not sure why we are doing this, tbh
+      self.logger.info('[', idx, '] Training for: ', len(self.train_df), ' time steps')
+
+      model.learn(total_timesteps=len(self.train_df))
+
+      obs = test_env.reset()
+      done, reward_sum = False, 0
+
+      while not done:
+          action, _states = model.predict(obs)
+          obs, reward, done, info = test_env.step(action)
+          reward_sum += reward
+
+      self.logger.info('[', idx, '] Total reward: ', reward_sum, ' (' + self.reward_strategy + ')')
+      model.save(os.path.join('.', 'agents', 'ppo2_' + self.reward_strategy + '_' + str(idx) + '.pkl'))
+
+    self.logger.info("Trained {} model instances".format(models_to_train))
+
+  def test(self, model_instance: 0):
+
+    study_name = 'ppo2_' + self.reward_strategy
+    study = optuna.load_study(study_name=study_name, storage=self.params_db_file)
+    params = study.best_trial.params
+
+    test_env = DummyVecEnv([lambda: BitcoinTradingEnv(
+      self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
+
+    model_params = self.model_params(params)
+
+    model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(model_instance) + '.pkl'), env=test_env)
+
+    obs, done = test_env.reset(), False
+    while not done:
+        action, _states = model.predict(obs)
+        obs, reward, done, info = test_env.step(action)
+
+        test_env.render(mode="human")
+
+
 if __name__ == '__main__':
     optimizer = Optimize()
-    test_mode = "FAST"
+    test_mode = "FAST" # I'm hard-coding this for now
     if test_mode == "FAST":
       optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv')
       optimizer.n_jobs = 1
       optimizer.n_trials = 1
       optimizer.n_test_episodes = 1
       optimizer.n_evaluations = 1
-    optimizer.optimize()
+    # optimizer.optimize()
+    optimizer.train()
+    # optimizer.test()

From 3f899009315ce2634c871e62af5bc0e1397be7e4 Mon Sep 17 00:00:00 2001
From: adamjking3 <adamjking3@gmail.com>
Date: Mon, 24 Jun 2019 00:01:26 -0700
Subject: [PATCH 07/11] Move /agents, /tensorboard, /log, and params.db inside
 /data for cleaner file structure.

---
 .gitignore                           | 9 +++------
 data/{ => input}/coinbase_daily.csv  | 0
 data/{ => input}/coinbase_hourly.csv | 0
 3 files changed, 3 insertions(+), 6 deletions(-)
 rename data/{ => input}/coinbase_daily.csv (100%)
 rename data/{ => input}/coinbase_hourly.csv (100%)

diff --git a/.gitignore b/.gitignore
index 8ac6194..3cdcf7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,7 @@
 .vscode
-data/bitstamp.csv
-tensorboard/*
-agents
-research/results
-research/.ipynb_checkpoints/
 **/__pycache__
+data/tensorboard/*
+data/agents/*
+data/log/*
 *.pkl
 *.db
-log/
diff --git a/data/coinbase_daily.csv b/data/input/coinbase_daily.csv
similarity index 100%
rename from data/coinbase_daily.csv
rename to data/input/coinbase_daily.csv
diff --git a/data/coinbase_hourly.csv b/data/input/coinbase_hourly.csv
similarity index 100%
rename from data/coinbase_hourly.csv
rename to data/input/coinbase_hourly.csv

From 7e4c1e8d94aa5bddf1f9dd3e583914b6010835aa Mon Sep 17 00:00:00 2001
From: adamjking3 <adamjking3@gmail.com>
Date: Mon, 24 Jun 2019 00:02:54 -0700
Subject: [PATCH 08/11] Replace MIT license with GNU v3 to prevent this library
 from ever being resold, etc.

---
 LICENSE | 695 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 674 insertions(+), 21 deletions(-)

diff --git a/LICENSE b/LICENSE
index e36694c..61d1860 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,674 @@
-MIT License
-
-Copyright (c) 2019 Adam King
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
\ No newline at end of file

From b87d127cebb333ec089a22aad8b8b58491bccaac Mon Sep 17 00:00:00 2001
From: adamjking3 <adamjking3@gmail.com>
Date: Mon, 24 Jun 2019 00:07:52 -0700
Subject: [PATCH 09/11] Update readme to support single optimize.py file for
 easier onboarding.

---
 README.md | 66 ++++++-------------------------------------------------
 1 file changed, 7 insertions(+), 59 deletions(-)

diff --git a/README.md b/README.md
index 64e0c31..a262b0e 100644
--- a/README.md
+++ b/README.md
@@ -15,51 +15,15 @@ https://towardsdatascience.com/using-reinforcement-learning-to-trade-bitcoin-for
 
 The first thing you will need to do to get started is install the requirements in `requirements.txt`.
 
- ```bash
- pip install -r requirements.txt
- ```
-
- The requirements include the `tensorflow-gpu` library, though if you do not have access to a GPU, you should replace this requirement with `tensorflow`.
-
-## Testing workflow
-
-First let's try the "optimize" strategy with a single run, single evaluation, just to make sure that things are "sane".
-
-### Expected output
-
-```
-% date ; python optimize.py; date
-Thu Jun  6 14:09:23 CDT 2019
-[I 2019-06-06 14:09:35,557] A new study created with name: ppo2_sortino
-
-<maybe some Tensorflow deprecation warnings>
-
-[I 2019-06-06 14:21:50,724] Finished trial#1 resulted in value: -956.9744873046875. Current best value is -956.9744873046875 with parameters: {'cliprange': 0.18943365028795878, 'confidence_interval': 0.8286824056507663, 'ent_coef': 8.094794121881875e-08, 'forecast_len': 14.7463$
-0586736364, 'gamma': 0.9834343245286393, 'lam': 0.9646711236104828, 'learning_rate': 0.032564661147532384, 'n_steps': 28.294495666878618, 'noptepochs': 2.3568984946859066}.
-Number of finished trials:  2
-Best trial:
-Value:  -956.9744873046875
-Params:
-    cliprange: 0.18943365028795878
-    confidence_interval: 0.8286824056507663
-    ent_coef: 8.094794121881875e-08
-    forecast_len: 14.746310586736364
-    gamma: 0.9834343245286393
-    lam: 0.9646711236104828
-    learning_rate: 0.032564661147532384
-    n_steps: 28.294495666878618
-    noptepochs: 2.3568984946859066
-
-Thu Jun  6 14:21:51 CDT 2019
-
-%
+```bash
+pip install -r requirements.txt
 ```
 
-So that took about 12 minutes on a pretty powerful laptop to run a single trial (at least as of Jun 2019).
+The requirements include the `tensorflow-gpu` library, though if you do not have access to a GPU, you should replace this requirement with `tensorflow`.
 
- # Finding Hyper-Parameters
+# Optimizing, Training, and Testing
 
-While you could just let the agent train and run with the default PPO2 hyper-parameters, your agent would likely not be very profitable. The `stable-baselines` library provides a great set of default parameters that work for most problem domains, but  we need to better.
+While you could just let the agent train and run with the default PPO2 hyper-parameters, your agent would likely not be very profitable. The `stable-baselines` library provides a great set of default parameters that work for most problem domains, but we need to better.
 
 To do this, you will need to run `optimize.py`. Within the file, you can define the `reward_strategy` for the environment to use, this is currently defaulted to `sortino`.
 
@@ -67,25 +31,9 @@ To do this, you will need to run `optimize.py`. Within the file, you can define
 python ./optimize.py
 ```
 
-This will take a while (hours to days depending on your hardware setup), but over time it will print to the console as trials are completed. Once a trial is completed, it will be stored in `./params.db`, an SQLite database, from which we can pull hyper-parameters to train our agent.
-
-# Training Agents
-
-Once you've found a good set of hyper-parameters, we can train an agent with that set. To do this, you will want to open `train.py` and ensure the `reward_strategy` is set to the correct strategy. Then let `train.py` run until you've got some saved models to test.
+This can take a while (hours to days depending on your hardware setup), but over time it will print to the console as trials are completed. Once a trial is completed, it will be stored in `./data/params.db`, an SQLite database, from which we can pull hyper-parameters to train our agent.
 
-```bash
-python ./train.py
-```
-
-If you have already trained a model, and would like to resume training from the next epoch, you can set `curr_idx` at the top of the file to the index of the last trained model. Otherwise, leave this at `-1` to start training at epoch 0.
-
-# Testing Agents
-
-Once you've successfully trained and saved a model, it's time to test it. Open up `test.py` and set the `reward_strategy` to the correct strategy and `curr_idx` to the index of the agent you'd like to train. Then run `test.py` to watch your agent trade.
-
-```bash
-python ./test.py
-```
+From there, you can train an agent with the best set of hyper-parameters, and later test it on completely new data to verify the generalization of the algorithm.
 
 # Contributing
 

From 5486766b1e40c91f0bbcac21480ea18164903c4f Mon Sep 17 00:00:00 2001
From: adamjking3 <adamjking3@gmail.com>
Date: Mon, 24 Jun 2019 00:08:15 -0700
Subject: [PATCH 10/11] Add results.py because I'm not sure why it was hidden

---
 research/results.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/research/results.py b/research/results.py
index cad604e..97db2fd 100644
--- a/research/results.py
+++ b/research/results.py
@@ -7,8 +7,8 @@
 from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
 from stable_baselines import A2C, ACKTR, PPO2
 
-from env.BitcoinTradingEnv import BitcoinTradingEnv
-from util.indicators import add_indicators
+from lib.env.BitcoinTradingEnv import BitcoinTradingEnv
+from lib.util.indicators import add_indicators
 
 
 df = pd.read_csv('./data/coinbase_hourly.csv')
@@ -22,24 +22,24 @@
 test_df = df[train_len:]
 
 profit_study = optuna.load_study(study_name='ppo2_profit',
-                          storage='sqlite:///params.db')
+                                 storage='sqlite:///params.db')
 profit_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-    test_df, reward_func="profit", forecast_len=int(profit_study.best_trial.params['forecast_len']), confidence_interval=profit_study.best_trial.params['confidence_interval'])])
+    test_df, reward_func="profit", forecast_steps=int(profit_study.best_trial.params['forecast_steps']), forecast_alpha=profit_study.best_trial.params['forecast_alpha'])])
 
 sortino_study = optuna.load_study(study_name='ppo2_sortino',
-storage='sqlite:///params.db')
+                                  storage='sqlite:///params.db')
 sortino_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-    test_df, reward_func="profit", forecast_len=int(sortino_study.best_trial.params['forecast_len']), confidence_interval=sortino_study.best_trial.params['confidence_interval'])])
+    test_df, reward_func="profit", forecast_steps=int(sortino_study.best_trial.params['forecast_steps']), forecast_alpha=sortino_study.best_trial.params['forecast_alpha'])])
 
 # calmar_study = optuna.load_study(study_name='ppo2_sortino',
 # storage='sqlite:///params.db')
 # calmar_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-#    test_df, reward_func="profit", forecast_len=int(calmar_study.best_trial.params['forecast_len']), confidence_interval=calmar_study.best_trial.params['confidence_interval'])])
+#    test_df, reward_func="profit", forecast_steps=int(calmar_study.best_trial.params['forecast_steps']), forecast_alpha=calmar_study.best_trial.params['forecast_alpha'])])
 
 omega_study = optuna.load_study(study_name='ppo2_omega',
-storage='sqlite:///params.db')
+                                storage='sqlite:///params.db')
 omega_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-    test_df, reward_func="profit", forecast_len=int(omega_study.best_trial.params['forecast_len']), confidence_interval=omega_study.best_trial.params['confidence_interval'])])
+    test_df, reward_func="profit", forecast_steps=int(omega_study.best_trial.params['forecast_steps']), forecast_alpha=omega_study.best_trial.params['forecast_alpha'])])
 
 
 profit_model = PPO2.load('./agents/ppo2_profit_4.pkl', env=profit_env)
@@ -85,4 +85,3 @@
 
 with open('./research/results/omega_net_worths_4.pkl', 'wb') as handle:
     pickle.dump(omega_net_worths, handle)
-

From 147e6e1ea9c9ec7ef434edb05ed94a22d22ae32e Mon Sep 17 00:00:00 2001
From: adamjking3 <adamjking3@gmail.com>
Date: Mon, 24 Jun 2019 00:39:26 -0700
Subject: [PATCH 11/11] Refactor optimize/train/test into single RLTrader
 class.

---
 lib/RLTrader.py                               | 252 ++++++++++++++++
 {env => lib}/__init__.py                      |   0
 lib/__init__.pyc                              | Bin 0 -> 144 bytes
 {env => lib/env}/BitcoinTradingEnv.py         | 134 ++++-----
 {render => lib/env}/__init__.py               |   0
 .../env/render}/BitcoinTradingGraph.py        |   5 +-
 {util => lib/env/render}/__init__.py          |   0
 agents/.gitkeep => lib/util/__init__.py       |   0
 {util => lib/util}/benchmarks.py              |   0
 lib/util/indicators.py                        |  78 +++++
 {util => lib/util}/log.py                     |  10 +-
 lib/util/transform.py                         |  25 ++
 log/.gitkeep                                  |   0
 optimize.py                                   | 269 +-----------------
 test.py                                       |  55 ----
 train.py                                      |  78 -----
 util/indicators.py                            |  78 -----
 util/transform.py                             |  37 ---
 18 files changed, 434 insertions(+), 587 deletions(-)
 create mode 100644 lib/RLTrader.py
 rename {env => lib}/__init__.py (100%)
 create mode 100644 lib/__init__.pyc
 rename {env => lib/env}/BitcoinTradingEnv.py (53%)
 rename {render => lib/env}/__init__.py (100%)
 rename {render => lib/env/render}/BitcoinTradingGraph.py (97%)
 rename {util => lib/env/render}/__init__.py (100%)
 rename agents/.gitkeep => lib/util/__init__.py (100%)
 rename {util => lib/util}/benchmarks.py (100%)
 create mode 100644 lib/util/indicators.py
 rename {util => lib/util}/log.py (78%)
 create mode 100644 lib/util/transform.py
 delete mode 100644 log/.gitkeep
 delete mode 100644 test.py
 delete mode 100644 train.py
 delete mode 100644 util/indicators.py
 delete mode 100644 util/transform.py

diff --git a/lib/RLTrader.py b/lib/RLTrader.py
new file mode 100644
index 0000000..170671d
--- /dev/null
+++ b/lib/RLTrader.py
@@ -0,0 +1,252 @@
+import optuna
+import pandas as pd
+import numpy as np
+
+from os import path
+from stable_baselines.common.base_class import BaseRLModel
+from stable_baselines.common.policies import BasePolicy, MlpLnLstmPolicy
+from stable_baselines.common.vec_env import DummyVecEnv
+from stable_baselines import PPO2
+
+from lib.env.BitcoinTradingEnv import BitcoinTradingEnv
+from lib.util.indicators import add_indicators
+from lib.util.log import init_logger
+
+
+class RLTrader:
+    feature_df = None
+
+    def __init__(self, model: BaseRLModel = PPO2, policy: BasePolicy = MlpLnLstmPolicy, **kwargs):
+        self.logger = init_logger(
+            __name__, show_debug=kwargs.get('show_debug', True))
+
+        self.model = model
+        self.policy = policy
+        self.reward_strategy = kwargs.get('reward_strategy', 'sortino')
+        self.tensorboard_path = kwargs.get(
+            'tensorboard_path', path.join('data', 'tensorboard'))
+        self.input_data_path = kwargs.get('input_data_path', None)
+        self.params_db_path = kwargs.get(
+            'params_db_path', 'sqlite:///data/params.db')
+
+        self.model_verbose = kwargs.get('model_verbose', 1)
+        self.nminibatches = kwargs.get('nminibatches', 1)
+
+        self.initialize_data(kwargs)
+
+        self.logger.debug(f'Reward Strategy: {self.reward_strategy}')
+
+    def initialize_data(self, kwargs):
+        if self.input_data_path is None:
+            self.input_data_path = path.join(
+                'data', 'input', 'coinbase_hourly.csv')
+
+        self.feature_df = pd.read_csv(self.input_data_path)
+        self.feature_df = self.feature_df.drop(['Symbol'], axis=1)
+        self.feature_df['Date'] = pd.to_datetime(
+            self.feature_df['Date'], format='%Y-%m-%d %I-%p')
+        self.feature_df['Date'] = self.feature_df['Date'].astype(str)
+        self.feature_df = self.feature_df.sort_values(['Date'])
+        self.feature_df = add_indicators(self.feature_df.reset_index())
+
+        self.validation_set_percentage = kwargs.get(
+            'validation_set_percentage', 0.8)
+        self.test_set_percentage = kwargs.get('test_set_percentage', 0.8)
+
+        self.logger.debug(
+            f'Initialized Features: {self.feature_df.columns.str.cat(sep=", ")}')
+
+    def initialize_optuna(self, should_create: bool = False):
+        self.study_name = f'{self.model.__class__.__name__}__{self.policy.__class__.__name__}__{self.reward_strategy}'
+
+        if should_create:
+            self.optuna_study = optuna.create_study(
+                study_name=self.study_name, storage=self.params_db_path, load_if_exists=True)
+        else:
+            self.optuna_study = optuna.load_study(
+                study_name=self.study_name, storage=self.params_db_path)
+
+        self.logger.debug('Initialized Optuna:')
+
+        try:
+            self.logger.debug(
+                f'Best reward in ({len(self.optuna_study.trials)}) trials: {-self.optuna_study.best_value}')
+        except:
+            self.logger.debug('No trials have been finished yet.')
+
+    def get_env_params(self):
+        params = self.optuna_study.best_trial.params
+        return {
+            'reward_strategy': self.reward_strategy,
+            'forecast_steps': int(params['forecast_steps']),
+            'forecast_alpha': params['forecast_alpha'],
+        }
+
+    def get_model_params(self):
+        params = self.optuna_study.best_trial.params
+        return {
+            'n_steps': int(params['n_steps']),
+            'gamma': params['gamma'],
+            'learning_rate': params['learning_rate'],
+            'ent_coef': params['ent_coef'],
+            'cliprange': params['cliprange'],
+            'noptepochs': int(params['noptepochs']),
+            'lam': params['lam'],
+        }
+
+    def optimize_env_params(self, trial):
+        return {
+            'forecast_steps': int(trial.suggest_loguniform('forecast_steps', 1, 200)),
+            'forecast_alpha': trial.suggest_uniform('forecast_alpha', 0.001, 0.30),
+        }
+
+    def optimize_agent_params(self, trial):
+        if self.model != PPO2:
+            return {'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.)}
+
+        return {
+            'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
+            'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
+            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
+            'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
+            'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
+            'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
+            'lam': trial.suggest_uniform('lam', 0.8, 1.)
+        }
+
+    def optimize_params(self, trial, n_prune_evals_per_trial: int = 4, n_tests_per_eval: int = 1, speedup_factor: int = 10):
+        env_params = self.optimize_env_params(trial)
+
+        full_train_len = self.test_set_percentage * len(self.feature_df)
+        optimize_train_len = int(
+            self.validation_set_percentage * full_train_len)
+        train_len = int(optimize_train_len / speedup_factor)
+        train_start = optimize_train_len - train_len
+
+        train_df = self.feature_df[train_start:optimize_train_len]
+        validation_df = self.feature_df[optimize_train_len:]
+
+        train_env = DummyVecEnv(
+            [lambda: BitcoinTradingEnv(train_df,  **env_params)])
+        validation_env = DummyVecEnv(
+            [lambda: BitcoinTradingEnv(validation_df, **env_params)])
+
+        model_params = self.optimize_agent_params(trial)
+        model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches,
+                           tensorboard_log=self.tensorboard_path, **model_params)
+
+        last_reward = -np.finfo(np.float16).max
+        evaluation_interval = int(
+            train_len / n_prune_evals_per_trial)
+
+        for eval_idx in range(n_prune_evals_per_trial):
+            try:
+                model.learn(evaluation_interval)
+            except AssertionError:
+                raise
+
+            rewards = []
+            n_episodes, reward_sum = 0, 0.0
+
+            obs = validation_env.reset()
+            while n_episodes < n_tests_per_eval:
+                action, _ = model.predict(obs)
+                obs, reward, done, _ = validation_env.step(action)
+                reward_sum += reward
+
+                if done:
+                    rewards.append(reward_sum)
+                    reward_sum = 0.0
+                    n_episodes += 1
+                    obs = validation_env.reset()
+
+            last_reward = np.mean(rewards)
+            trial.report(-1 * last_reward, eval_idx)
+
+            if trial.should_prune(eval_idx):
+                raise optuna.structs.TrialPruned()
+
+        return -1 * last_reward
+
+    def optimize(self, n_trials: int = 10, n_parallel_jobs: int = 4, *optimize_params):
+        self.initialize_optuna(should_create=True)
+
+        try:
+            self.optuna_study.optimize(
+                self.optimize_params, n_trials=n_trials, n_jobs=n_parallel_jobs, *optimize_params)
+        except KeyboardInterrupt:
+            pass
+
+        self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}')
+
+        self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}')
+
+        self.logger.info('Params: ')
+        for key, value in self.optuna_study.best_trial.params.items():
+            self.logger.info(f'    {key}: {value}')
+
+        return self.optuna_study.trials_dataframe()
+
+    def train(self, n_epochs: int = 1, iters_per_epoch: int = 1, test_trained_model: bool = False, render_trained_model: bool = False):
+        self.initialize_optuna()
+
+        env_params = self.get_env_params()
+
+        train_len = int(self.test_set_percentage * len(self.feature_df))
+        train_df = self.feature_df[:train_len]
+
+        train_env = DummyVecEnv(
+            [lambda: BitcoinTradingEnv(train_df, **env_params)])
+
+        model_params = self.get_model_params()
+
+        model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches,
+                           tensorboard_log=self.tensorboard_path, **model_params)
+
+        self.logger.info(f'Training for {n_epochs} epochs')
+
+        n_timesteps = len(train_df) * iters_per_epoch
+
+        for model_epoch in range(0, n_epochs):
+            self.logger.info(
+                f'[{model_epoch}] Training for: {n_timesteps} time steps')
+
+            model.learn(total_timesteps=n_timesteps)
+
+            model_path = path.join(
+                'data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
+            model.save(model_path)
+
+            if test_trained_model:
+                self.test(model_epoch, should_render=render_trained_model)
+
+        self.logger.info(f'Trained {n_epochs} models')
+
+    def test(self, model_epoch: int = 0, should_render: bool = True):
+        env_params = self.get_env_params()
+
+        train_len = int(self.test_set_percentage * len(self.feature_df))
+        test_df = self.feature_df[train_len:]
+
+        test_env = DummyVecEnv(
+            [lambda: BitcoinTradingEnv(test_df, **env_params)])
+
+        model_path = path.join(
+            'data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
+        model = self.model.load(model_path, env=test_env)
+
+        self.logger.info(
+            f'Testing model ({self.study_name}__{model_epoch})')
+
+        obs, done, reward_sum = test_env.reset(), False, 0
+        while not done:
+            action, _states = model.predict(obs)
+            obs, reward, done, _ = test_env.step(action)
+
+            reward_sum += reward
+
+            if should_render:
+                test_env.render(mode='human')
+
+        self.logger.info(
+            f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(reward_sum)}')
diff --git a/env/__init__.py b/lib/__init__.py
similarity index 100%
rename from env/__init__.py
rename to lib/__init__.py
diff --git a/lib/__init__.pyc b/lib/__init__.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2581be62e3b630e12d5e4d90e0cc0cc2087dc9e4
GIT binary patch
literal 144
zcmZSn%**vyM<6zt0SXv_v;z<qvjB+{28Lh_kcgiKkYGR~iY<X+`k}?CMaBA#DT%rI
zE~&-YCHV#Vk@=+|rAev!PMIai`I&hkMTse?Mfy3JN&4~enR%Hd@$q^El_eZN-8Q-T
PDWy57b|5>7ftUdRNBAEs

literal 0
HcmV?d00001

diff --git a/env/BitcoinTradingEnv.py b/lib/env/BitcoinTradingEnv.py
similarity index 53%
rename from env/BitcoinTradingEnv.py
rename to lib/env/BitcoinTradingEnv.py
index 0f11c7e..1e1ea47 100644
--- a/env/BitcoinTradingEnv.py
+++ b/lib/env/BitcoinTradingEnv.py
@@ -1,19 +1,14 @@
 import gym
 import pandas as pd
 import numpy as np
-import tensorflow as tf
 
 from gym import spaces
 from statsmodels.tsa.statespace.sarimax import SARIMAX
-from empyrical import sortino_ratio, calmar_ratio, omega_ratio
+from empyrical import sortino_ratio, sharpe_ratio, omega_ratio
 
-from render.BitcoinTradingGraph import BitcoinTradingGraph
-from util.transform import log_and_difference, max_min_normalize
-from util.indicators import add_indicators
-
-
-# Delete this if debugging
-np.warnings.filterwarnings('ignore')
+from lib.env.render.BitcoinTradingGraph import BitcoinTradingGraph
+from lib.util.transform import log_and_difference, max_min_normalize
+from lib.util.indicators import add_indicators
 
 
 class BitcoinTradingEnv(gym.Env):
@@ -21,59 +16,59 @@ class BitcoinTradingEnv(gym.Env):
     metadata = {'render.modes': ['human', 'system', 'none']}
     viewer = None
 
-    def __init__(self, df, initial_balance=10000, commission=0.0025, reward_func='sortino', **kwargs):
+    def __init__(self, df, initial_balance=10000, commission=0.0025, reward_strategy='sortino', **kwargs):
         super(BitcoinTradingEnv, self).__init__()
 
         self.initial_balance = initial_balance
         self.commission = commission
-        self.reward_func = reward_func
+        self.reward_strategy = reward_strategy
 
         self.df = df.fillna(method='bfill').reset_index()
-        self.stationary_df = log_and_difference(
-            self.df, ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD'])
+        self.stationary_df = self.df.copy()
+        self.stationary_df = self.stationary_df[self.stationary_df.columns.difference([
+                                                                                      'index', 'Date'])]
+        self.stationary_df = log_and_difference(self.stationary_df,
+                                                ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD'])
 
         self.benchmarks = kwargs.get('benchmarks', [])
 
-        self.forecast_len = kwargs.get('forecast_len', 10)
-        self.confidence_interval = kwargs.get('confidence_interval', 0.95)
-        self.obs_shape = (1, 5 + len(self.df.columns) -
-                          2 + (self.forecast_len * 3))
+        self.forecast_steps = kwargs.get('forecast_steps', 2)
+        self.forecast_alpha = kwargs.get('forecast_alpha', 0.05)
+
+        self.action_space = spaces.Discrete(3)
 
-        # Actions of the format Buy 1/4, Sell 3/4, Hold (amount ignored), etc.
-        self.action_space = spaces.Discrete(12)
+        n_features = 5 + len(self.df.columns) - 2
+        n_prediction_features = (self.forecast_steps * 3)
+        self.obs_shape = (1, n_features + n_prediction_features)
 
-        # Observes the price action, indicators, account action, price forecasts
         self.observation_space = spaces.Box(
             low=0, high=1, shape=self.obs_shape, dtype=np.float16)
 
     def _next_observation(self):
-        features = self.stationary_df[self.stationary_df.columns.difference([
-            'index', 'Date'])]
+        current_idx = self.current_step + self.forecast_steps + 1
 
-        scaled = features[:self.current_step + self.forecast_len + 1].values
-        scaled[np.bitwise_not(np.isfinite(scaled))] = 0
+        scaled = self.stationary_df[:current_idx].values
 
-        scaled = tf.contrib.eager.py_func(
-            func=max_min_normalize, inp=scaled, Tout=tf.float16)
-        scaled = pd.DataFrame(scaled, columns=features.columns)
+        scaled = pd.DataFrame(scaled, columns=self.stationary_df.columns)
+        scaled = max_min_normalize(scaled)
 
         obs = scaled.values[-1]
 
-        past_df = self.stationary_df['Close'][:
-                                              self.current_step + self.forecast_len + 1]
-        forecast_model = SARIMAX(
-            past_df.values, enforce_stationarity=False, simple_differencing=True)
+        forecast_model = SARIMAX(self.stationary_df['Close'][:current_idx].values,
+                                 enforce_stationarity=False,
+                                 simple_differencing=True)
+
         model_fit = forecast_model.fit(method='bfgs', disp=False)
-        forecast = model_fit.get_forecast(
-            steps=self.forecast_len, alpha=(1 - self.confidence_interval))
+
+        forecast = model_fit.get_forecast(steps=self.forecast_steps,
+                                          alpha=self.forecast_alpha)
 
         obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0)
         obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0)
 
-        scaled_history = tf.contrib.eager.py_func(
-            func=max_min_normalize, inp=self.account_history.astype('float32'), Tout=tf.float16)
+        scaled_history = max_min_normalize(self.account_history)
 
-        obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0)
+        obs = np.insert(obs, len(obs), scaled_history.values[-1], axis=0)
 
         obs = np.reshape(obs.astype('float16'), self.obs_shape)
         obs[np.bitwise_not(np.isfinite(obs))] = 0
@@ -81,64 +76,61 @@ def _next_observation(self):
         return obs
 
     def _current_price(self):
-        return self.df['Close'].values[self.current_step + self.forecast_len] + 0.01
+        return self.df['Close'].values[self.current_step + self.forecast_steps]
 
     def _take_action(self, action):
         current_price = self._current_price()
-        action_type = int(action / 4)
-        amount = 1 / (action % 4 + 1)
 
         btc_bought = 0
         btc_sold = 0
-        cost = 0
-        sales = 0
+        cost_of_btc = 0
+        revenue_from_sold = 0
 
-        if action_type == 0:
+        if action == 0:
             price = current_price * (1 + self.commission)
-            btc_bought = min(self.balance * amount /
-                             price, self.balance / price)
-            cost = btc_bought * price
+            btc_bought = self.balance / price
+            cost_of_btc = self.balance
 
             self.btc_held += btc_bought
-            self.balance -= cost
-        elif action_type == 1:
+            self.balance -= cost_of_btc
+        elif action == 1:
             price = current_price * (1 - self.commission)
-            btc_sold = self.btc_held * amount
-            sales = btc_sold * price
+            btc_sold = self.btc_held
+            revenue_from_sold = btc_sold * price
 
             self.btc_held -= btc_sold
-            self.balance += sales
+            self.balance += revenue_from_sold
 
         if btc_sold > 0 or btc_bought > 0:
             self.trades.append({'step': self.current_step,
-                                'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost,
+                                'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': revenue_from_sold if btc_sold > 0 else cost_of_btc,
                                 'type': 'sell' if btc_sold > 0 else 'buy'})
 
         self.net_worths.append(
             self.balance + self.btc_held * current_price)
 
-        self.account_history = np.append(self.account_history, [
-            [self.balance],
-            [btc_bought],
-            [cost],
-            [btc_sold],
-            [sales]
-        ], axis=1)
+        self.account_history.append({
+            'balance': self.balance,
+            'btc_bought': btc_bought,
+            'cost_of_btc': cost_of_btc,
+            'btc_sold': btc_sold,
+            'revenue_from_sold': revenue_from_sold,
+        }, ignore_index=True)
 
     def _reward(self):
-        length = min(self.current_step, self.forecast_len)
+        length = min(self.current_step, self.forecast_steps)
         returns = np.diff(self.net_worths[-length:])
 
         if np.count_nonzero(returns) < 1:
             return 0
 
-        if self.reward_func == 'sortino':
+        if self.reward_strategy == 'sortino':
             reward = sortino_ratio(
                 returns, annualization=365*24)
-        elif self.reward_func == 'calmar':
-            reward = calmar_ratio(
+        elif self.reward_strategy == 'sharpe':
+            reward = sharpe_ratio(
                 returns, annualization=365*24)
-        elif self.reward_func == 'omega':
+        elif self.reward_strategy == 'omega':
             reward = omega_ratio(
                 returns, annualization=365*24)
         else:
@@ -147,7 +139,7 @@ def _reward(self):
         return reward if np.isfinite(reward) else 0
 
     def _done(self):
-        return self.net_worths[-1] < self.initial_balance / 10 or self.current_step == len(self.df) - self.forecast_len - 1
+        return self.net_worths[-1] < self.initial_balance / 10 or self.current_step == len(self.df) - self.forecast_steps - 1
 
     def reset(self):
         self.balance = self.initial_balance
@@ -155,13 +147,13 @@ def reset(self):
         self.btc_held = 0
         self.current_step = 0
 
-        self.account_history = np.array([
-            [self.balance],
-            [0],
-            [0],
-            [0],
-            [0]
-        ])
+        self.account_history = pd.DataFrame([{
+            'balance': self.balance,
+            'btc_bought': 0,
+            'cost_of_btc': 0,
+            'btc_sold': 0,
+            'revenue_from_sold': 0,
+        }])
         self.trades = []
 
         return self._next_observation()
diff --git a/render/__init__.py b/lib/env/__init__.py
similarity index 100%
rename from render/__init__.py
rename to lib/env/__init__.py
diff --git a/render/BitcoinTradingGraph.py b/lib/env/render/BitcoinTradingGraph.py
similarity index 97%
rename from render/BitcoinTradingGraph.py
rename to lib/env/render/BitcoinTradingGraph.py
index cf28be7..ab0ad9a 100644
--- a/render/BitcoinTradingGraph.py
+++ b/lib/env/render/BitcoinTradingGraph.py
@@ -19,7 +19,7 @@ class BitcoinTradingGraph:
     def __init__(self, df):
         self.df = df
         self.df['Time'] = self.df['Date'].apply(
-            lambda x: datetime.strptime(x, '%Y-%m-%d %I-%p'))
+            lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
         self.df = self.df.sort_values('Time')
 
         # Create a figure on screen and set the title
@@ -74,7 +74,8 @@ def _render_net_worth(self, step_range, dates, current_step, net_worths, benchma
             min(net_worths) / 1.25, max(net_worths) * 1.25)
 
     def _render_benchmarks(self, step_range, dates, benchmarks):
-        colors = ['orange', 'cyan', 'purple', 'blue', 'magenta', 'yellow', 'black', 'red', 'green']
+        colors = ['orange', 'cyan', 'purple', 'blue',
+                  'magenta', 'yellow', 'black', 'red', 'green']
 
         for i, benchmark in enumerate(benchmarks):
             self.net_worth_ax.plot(
diff --git a/util/__init__.py b/lib/env/render/__init__.py
similarity index 100%
rename from util/__init__.py
rename to lib/env/render/__init__.py
diff --git a/agents/.gitkeep b/lib/util/__init__.py
similarity index 100%
rename from agents/.gitkeep
rename to lib/util/__init__.py
diff --git a/util/benchmarks.py b/lib/util/benchmarks.py
similarity index 100%
rename from util/benchmarks.py
rename to lib/util/benchmarks.py
diff --git a/lib/util/indicators.py b/lib/util/indicators.py
new file mode 100644
index 0000000..1ed7cc4
--- /dev/null
+++ b/lib/util/indicators.py
@@ -0,0 +1,78 @@
+import ta
+
+
+def add_indicators(df):
+    df['RSI'] = ta.rsi(df["Close"])
+    # df['MFI'] = ta.money_flow_index(
+    #     df["High"], df["Low"], df["Close"], df["Volume BTC"])
+    # df['TSI'] = ta.tsi(df["Close"])
+    # df['UO'] = ta.uo(df["High"], df["Low"], df["Close"])
+    # df['AO'] = ta.ao(df["High"], df["Low"])
+
+    df['MACD_diff'] = ta.macd_diff(df["Close"])
+    # df['Vortex_pos'] = ta.vortex_indicator_pos(
+    #     df["High"], df["Low"], df["Close"])
+    # df['Vortex_neg'] = ta.vortex_indicator_neg(
+    #     df["High"], df["Low"], df["Close"])
+    # df['Vortex_diff'] = abs(
+    #     df['Vortex_pos'] -
+    #     df['Vortex_neg'])
+    # df['Trix'] = ta.trix(df["Close"])
+    # df['Mass_index'] = ta.mass_index(df["High"], df["Low"])
+    # df['CCI'] = ta.cci(df["High"], df["Low"], df["Close"])
+    # df['DPO'] = ta.dpo(df["Close"])
+    # df['KST'] = ta.kst(df["Close"])
+    # df['KST_sig'] = ta.kst_sig(df["Close"])
+    # df['KST_diff'] = (
+    #     df['KST'] -
+    #     df['KST_sig'])
+    # df['Aroon_up'] = ta.aroon_up(df["Close"])
+    # df['Aroon_down'] = ta.aroon_down(df["Close"])
+    # df['Aroon_ind'] = (
+    #     df['Aroon_up'] -
+    #     df['Aroon_down']
+    # )
+
+    df['BBH'] = ta.bollinger_hband(df["Close"])
+    df['BBL'] = ta.bollinger_lband(df["Close"])
+    df['BBM'] = ta.bollinger_mavg(df["Close"])
+    df['BBHI'] = ta.bollinger_hband_indicator(
+        df["Close"])
+    df['BBLI'] = ta.bollinger_lband_indicator(
+        df["Close"])
+    # df['KCHI'] = ta.keltner_channel_hband_indicator(df["High"],
+    #                                                 df["Low"],
+    #                                                 df["Close"])
+    # df['KCLI'] = ta.keltner_channel_lband_indicator(df["High"],
+    #                                                 df["Low"],
+    #                                                 df["Close"])
+    # df['DCHI'] = ta.donchian_channel_hband_indicator(df["Close"])
+    # df['DCLI'] = ta.donchian_channel_lband_indicator(df["Close"])
+
+    df['ADI'] = ta.acc_dist_index(df["High"],
+                                  df["Low"],
+                                  df["Close"],
+                                  df["Volume BTC"])
+    # df['OBV'] = ta.on_balance_volume(df["Close"],
+    #                                  df["Volume BTC"])
+    # df['CMF'] = ta.chaikin_money_flow(df["High"],
+    #                                   df["Low"],
+    #                                   df["Close"],
+    #                                   df["Volume BTC"])
+    # df['FI'] = ta.force_index(df["Close"],
+    #                           df["Volume BTC"])
+    # df['EM'] = ta.ease_of_movement(df["High"],
+    #                                df["Low"],
+    #                                df["Close"],
+    #                                df["Volume BTC"])
+    # df['VPT'] = ta.volume_price_trend(df["Close"],
+    #                                   df["Volume BTC"])
+    # df['NVI'] = ta.negative_volume_index(df["Close"],
+    #                                      df["Volume BTC"])
+
+    df['DR'] = ta.daily_return(df["Close"])
+    # df['DLR'] = ta.daily_log_return(df["Close"])
+
+    df.fillna(method='bfill', inplace=True)
+
+    return df
diff --git a/util/log.py b/lib/util/log.py
similarity index 78%
rename from util/log.py
rename to lib/util/log.py
index 526b946..e9a6e55 100644
--- a/util/log.py
+++ b/lib/util/log.py
@@ -3,7 +3,7 @@
 import colorlog
 
 
-def init_logger(dunder_name, testing_mode) -> logging.Logger:
+def init_logger(dunder_name, show_debug=False) -> logging.Logger:
     log_format = (
         '%(asctime)s - '
         '%(name)s - '
@@ -20,7 +20,7 @@ def init_logger(dunder_name, testing_mode) -> logging.Logger:
     colorlog.basicConfig(format=colorlog_format)
     logger = logging.getLogger(dunder_name)
 
-    if testing_mode:
+    if show_debug:
         logger.setLevel(logging.DEBUG)
     else:
         logger.setLevel(logging.INFO)
@@ -29,21 +29,21 @@ def init_logger(dunder_name, testing_mode) -> logging.Logger:
     # Feel free to uncomment and use the outputs as you like
 
     # Output full log
-    # fh = logging.FileHandler(os.path.join('log', 'trading.log')
+    # fh = logging.FileHandler(os.path.join('data', log', 'trading.log')
     # fh.setLevel(logging.DEBUG)
     # formatter = logging.Formatter(log_format)
     # fh.setFormatter(formatter)
     # logger.addHandler(fh)
 
     # # Output warning log
-    # fh = logging.FileHandler(os.path.join('log', 'trading.warning.log')
+    # fh = logging.FileHandler(os.path.join('data', log', 'trading.warning.log')
     # fh.setLevel(logging.WARNING)
     # formatter = logging.Formatter(log_format)
     # fh.setFormatter(formatter)
     # logger.addHandler(fh)
 
     # # Output error log
-    # fh = logging.FileHandler(os.path.join('log', 'trading.error.log')
+    # fh = logging.FileHandler(os.path.join('data', log', 'trading.error.log')
     # fh.setLevel(logging.ERROR)
     # formatter = logging.Formatter(log_format)
     # fh.setFormatter(formatter)
diff --git a/lib/util/transform.py b/lib/util/transform.py
new file mode 100644
index 0000000..8e14dab
--- /dev/null
+++ b/lib/util/transform.py
@@ -0,0 +1,25 @@
+import numpy as np
+
+
+def transform(df, columns=None, transform_fn=None):
+    transformed_df = df.copy().fillna(method='bfill')
+
+    if columns is None:
+        transformed_df = transform_fn(transformed_df)
+    else:
+        for column in columns:
+            transformed_df[column] = transform_fn(transformed_df[column])
+
+    return transformed_df
+
+
+def max_min_normalize(df, columns=None):
+    return transform(df, columns, lambda t_df: (t_df - t_df.min()) / (t_df.max() - t_df.min()))
+
+
+def difference(df, columns=None):
+    return transform(df, columns, lambda t_df: t_df - t_df.shift(1))
+
+
+def log_and_difference(df, columns=None):
+    return transform(df, columns, lambda t_df: np.log(t_df) - np.log(t_df).shift(1))
diff --git a/log/.gitkeep b/log/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/optimize.py b/optimize.py
index 190def6..e80585a 100644
--- a/optimize.py
+++ b/optimize.py
@@ -1,266 +1,13 @@
-'''
-
-A large part of the code in this file was sourced from the rl-baselines-zoo library on GitHub.
-In particular, the library provides a great parameter optimization set for the PPO2 algorithm,
-as well as a great example implementation using optuna.
-
-Source: https://github.com/araffin/rl-baselines-zoo/blob/master/utils/hyperparams_opt.py
-
-'''
-
-import optuna
-
-import os
-import pandas as pd
 import numpy as np
 
-from stable_baselines.common.policies import MlpLnLstmPolicy
-from stable_baselines.common.vec_env import DummyVecEnv
-from stable_baselines import PPO2
-
-from pathlib import Path
-
-from env.BitcoinTradingEnv import BitcoinTradingEnv
-from util.indicators import add_indicators
-from util.log import init_logger
-
-
-class Optimize:
-    def __init__(self):
-        self.reward_strategy = 'sortino'
-        self.input_data_file = os.path.join('data', 'coinbase_hourly.csv')
-        self.params_db_file = 'sqlite:///params.db'
-
-        # number of parallel jobs
-        self.n_jobs = 4
-        # maximum number of trials for finding the best hyperparams
-        self.n_trials = 1000
-        # number of test episodes per trial
-        self.n_test_episodes = 3
-        # number of evaluations for pruning per trial
-        self.n_evaluations = 4
-
-        self.train_df = None
-        self.test_df = None
-
-        self.logger = init_logger(__name__, testing_mode=True)
-
-        self.logger.debug("Initialized Optimizer")
-
-    def prepare_data(self):
-        df = pd.read_csv(self.input_data_file)
-        df = df.drop(['Symbol'], axis=1)
-        df = df.sort_values(['Date'])
-        df = add_indicators(df.reset_index())
-
-        train_len = int(len(df) * 0.8)
-
-        df = df[:train_len]
-
-        validation_len = int(train_len * 0.8)
-        self.train_df = df[:validation_len]
-        self.test_df = df[validation_len:]
-
-    def optimize_envs(self, trial):
-        return {
-            'reward_func': self.reward_strategy,
-            'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)),
-            'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99),
-        }
-
-    def optimize_ppo2(self, trial):
-        return {
-            'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
-            'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
-            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
-            'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
-            'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
-            'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
-            'lam': trial.suggest_uniform('lam', 0.8, 1.)
-        }
-
-    def optimize_agent(self, trial):
-        env_params = self.optimize_envs(trial)
-        train_env = DummyVecEnv(
-            [lambda: BitcoinTradingEnv(self.train_df,  **env_params)])
-        test_env = DummyVecEnv(
-            [lambda: BitcoinTradingEnv(self.test_df, **env_params)])
-
-        model_params = self.optimize_ppo2(trial)
-        model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-                     tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
-
-        last_reward = -np.finfo(np.float16).max
-        evaluation_interval = int(len(self.train_df) / self.n_evaluations)
-
-        for eval_idx in range(self.n_evaluations):
-            try:
-                model.learn(evaluation_interval)
-            except AssertionError:
-                raise
-
-            rewards = []
-            n_episodes, reward_sum = 0, 0.0
-
-            obs = test_env.reset()
-            while n_episodes < self.n_test_episodes:
-                action, _ = model.predict(obs)
-                obs, reward, done, _ = test_env.step(action)
-                reward_sum += reward
-
-                if done:
-                    rewards.append(reward_sum)
-                    reward_sum = 0.0
-                    n_episodes += 1
-                    obs = test_env.reset()
-
-            last_reward = np.mean(rewards)
-            trial.report(-1 * last_reward, eval_idx)
-
-            if trial.should_prune(eval_idx):
-                raise optuna.structs.TrialPruned()
-
-        return -1 * last_reward
-
-    def log_parameters(self):
-        self.logger.debug("Reward Strategy: %s" % self.reward_strategy)
-        self.logger.debug("Input Data File: %s" % self.input_data_file)
-        self.logger.debug("Params DB File: %s" % self.params_db_file)
-        self.logger.debug("Parallel jobs: %d" % self.n_jobs)
-        self.logger.debug("Trials: %d" % self.n_trials)
-        self.logger.debug("Test episodes (per trial): %d" %
-                          self.n_test_episodes)
-        self.logger.debug("Evaluations (per trial): %d" % self.n_evaluations)
-        self.logger.debug("Train DF Length: %d" % len(self.train_df))
-        self.logger.debug("Test DF Length: %d" % len(self.test_df))
-        self.logger.debug(
-            "Features: %s", self.train_df.columns.str.cat(sep=", "))
-
-    def optimize(self):
-        if not self.train_df:
-            self.logger.info("Running built-in data preparation")
-            self.prepare_data()
-        else:
-            self.logger.info("Using provided data (Length: %d)" %
-                             len(self.train_df))
-
-        self.log_parameters()
-
-        study_name = 'ppo2_' + self.reward_strategy
-        study = optuna.create_study(
-            study_name=study_name, storage=self.params_db_file, load_if_exists=True)
-
-        try:
-            study.optimize(self.optimize_agent,
-                           n_trials=self.n_trials, n_jobs=self.n_jobs)
-        except KeyboardInterrupt:
-            pass
-
-        self.logger.info(
-            'Number of finished trials: {}'.format(len(study.trials)))
-
-        self.logger.info('Best trial:')
-        trial = study.best_trial
-
-        self.logger.info('Value: {}'.format(trial.value))
-
-        self.logger.info('Params: ')
-        for key, value in trial.params.items():
-            self.logger.info('    {}: {}'.format(key, value))
-
-        return study.trials_dataframe()
-
-    def model_params(self, params):
-        return {
-            'n_steps': int(params['n_steps']),
-            'gamma': params['gamma'],
-            'learning_rate': params['learning_rate'],
-            'ent_coef': params['ent_coef'],
-            'cliprange': params['cliprange'],
-            'noptepochs': int(params['noptepochs']),
-            'lam': params['lam'],
-        }
-
-    def train(self):
-        if not self.train_df:
-            self.logger.info("Running built-in data preparation")
-            self.prepare_data()
-        else:
-            self.logger.info("Using provided data (Length: %d)" %
-                             len(self.train_df))
-
-        study_name = 'ppo2_' + self.reward_strategy
-
-        study = optuna.load_study(
-            study_name=study_name, storage=self.params_db_file)
-        params = study.best_trial.params
-
-        train_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-            self.train_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
-
-        test_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-            self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
-
-        model_params = self.model_params(params)
-
-        model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-                     tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
-
-        models_to_train = 1
-        self.logger.info("Training {} model instances".format(models_to_train))
-
-        for idx in range(0, models_to_train):  # Not sure why we are doing this, tbh
-            self.logger.info(
-                f'[{idx}] Training for: {len(self.train_df)} time steps')
-
-            model.learn(total_timesteps=len(self.train_df))
-
-            obs = test_env.reset()
-            done, reward_sum = False, 0
-
-            while not done:
-                action, _states = model.predict(obs)
-                obs, reward, done, info = test_env.step(action)
-                reward_sum += reward
-
-            self.logger.info(
-                f'[{idx}] Total reward: {reward_sum} ({self.reward_strategy})')
-
-            model.save(os.path.join('.', 'agents', 'ppo2_' +
-                                    self.reward_strategy + '_' + str(idx) + '.pkl'))
-
-        self.logger.info("Trained {} model instances".format(models_to_train))
-
-    def test(self, model_instance: 0):
-
-        study_name = 'ppo2_' + self.reward_strategy
-        study = optuna.load_study(
-            study_name=study_name, storage=self.params_db_file)
-        params = study.best_trial.params
-
-        test_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-            self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
-
-        model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' +
-                                       self.reward_strategy + '_' + str(model_instance) + '.pkl'), env=test_env)
-
-        obs, done = test_env.reset(), False
-        while not done:
-            action, _states = model.predict(obs)
-            obs, reward, done, info = test_env.step(action)
-
-            test_env.render(mode="human")
+from lib.RLTrader import RLTrader
 
+np.warnings.filterwarnings('ignore')
 
 if __name__ == '__main__':
-    optimizer = Optimize()
-    test_mode = "FAST"  # I'm hard-coding this for now
-    if test_mode == "FAST":
-        optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv')
-        optimizer.n_jobs = 1
-        optimizer.n_trials = 1
-        optimizer.n_test_episodes = 1
-        optimizer.n_evaluations = 1
-    # optimizer.optimize()
-    optimizer.train()
-    # optimizer.test()
+    trader = RLTrader()
+
+    trader.optimize(n_trials=1)
+    trader.train(n_epochs=1,
+                 test_trained_model=True,
+                 render_trained_model=True)
diff --git a/test.py b/test.py
deleted file mode 100644
index b7711c8..0000000
--- a/test.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import os
-import gym
-import optuna
-import pandas as pd
-
-from stable_baselines.common.policies import MlpLnLstmPolicy
-from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
-from stable_baselines import A2C, ACKTR, PPO2
-
-from env.BitcoinTradingEnv import BitcoinTradingEnv
-from util.indicators import add_indicators
-
-curr_idx = 0
-reward_strategy = 'sortino'
-input_data_file = os.path.join('data', 'coinbase_hourly.csv')
-params_db_file = 'sqlite:///params.db'
-
-study_name = 'ppo2_' + reward_strategy
-study = optuna.load_study(study_name=study_name, storage=params_db_file)
-params = study.best_trial.params
-
-print("Testing PPO2 agent with params:", params)
-print("Best trial:", -1 * study.best_trial.value)
-
-df = pd.read_csv(input_data_file)
-df = df.drop(['Symbol'], axis=1)
-df = df.sort_values(['Date'])
-df = add_indicators(df.reset_index())
-
-test_len = int(len(df) * 0.2)
-train_len = int(len(df)) - test_len
-
-test_df = df[train_len:]
-
-test_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-    test_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
-
-model_params = {
-    'n_steps': int(params['n_steps']),
-    'gamma': params['gamma'],
-    'learning_rate': params['learning_rate'],
-    'ent_coef': params['ent_coef'],
-    'cliprange': params['cliprange'],
-    'noptepochs': int(params['noptepochs']),
-    'lam': params['lam'],
-}
-
-model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=test_env)
-
-obs, done = test_env.reset(), False
-while not done:
-    action, _states = model.predict(obs)
-    obs, reward, done, info = test_env.step(action)
-
-    test_env.render(mode="human")
diff --git a/train.py b/train.py
deleted file mode 100644
index 2e076d9..0000000
--- a/train.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import os
-import gym
-import optuna
-import pandas as pd
-import numpy as np
-
-from stable_baselines.common.policies import MlpLnLstmPolicy
-from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv
-from stable_baselines import A2C, ACKTR, PPO2
-
-from pathlib import Path
-
-from env.BitcoinTradingEnv import BitcoinTradingEnv
-from util.indicators import add_indicators
-
-curr_idx = -1
-reward_strategy = 'sortino'
-input_data_file = os.path.join('data', 'coinbase_hourly.csv')
-params_db_file = 'sqlite:///params.db'
-
-study_name = 'ppo2_' + reward_strategy
-study = optuna.load_study(study_name=study_name, storage=params_db_file)
-params = study.best_trial.params
-
-print("Training PPO2 agent with params:", params)
-print("Best trial reward:", -1 * study.best_trial.value)
-
-df = pd.read_csv(input_data_file)
-df = df.drop(['Symbol'], axis=1)
-df = df.sort_values(['Date'])
-df = add_indicators(df.reset_index())
-
-test_len = int(len(df) * 0.2)
-train_len = int(len(df)) - test_len
-
-train_df = df[:train_len]
-test_df = df[train_len:]
-
-train_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-    train_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
-
-test_env = DummyVecEnv([lambda: BitcoinTradingEnv(
-    test_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])])
-
-model_params = {
-    'n_steps': int(params['n_steps']),
-    'gamma': params['gamma'],
-    'learning_rate': params['learning_rate'],
-    'ent_coef': params['ent_coef'],
-    'cliprange': params['cliprange'],
-    'noptepochs': int(params['noptepochs']),
-    'lam': params['lam'],
-}
-
-if curr_idx == -1:
-    model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1,
-                 tensorboard_log=os.path.join('.', 'tensorboard'), **model_params)
-else:
-    model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' +
-                                   reward_strategy + '_' + str(curr_idx) + '.pkl'), env=train_env)
-
-for idx in range(curr_idx + 1, 10):
-    print('[', idx, '] Training for: ', train_len, ' time steps')
-
-    model.learn(total_timesteps=train_len)
-
-    obs = test_env.reset()
-    done, reward_sum = False, 0
-
-    while not done:
-        action, _states = model.predict(obs)
-        obs, reward, done, info = test_env.step(action)
-        reward_sum += reward
-
-    print('[', idx, '] Total reward: ',
-          reward_sum, ' (' + reward_strategy + ')')
-    model.save(os.path.join('.', 'agents', 'ppo2_' +
-                            reward_strategy + '_' + str(idx) + '.pkl'))
diff --git a/util/indicators.py b/util/indicators.py
deleted file mode 100644
index 0a7a1e8..0000000
--- a/util/indicators.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import ta
-
-
-def add_indicators(df):
-    df['RSI'] = ta.rsi(df["Close"])
-    df['MFI'] = ta.money_flow_index(
-        df["High"], df["Low"], df["Close"], df["Volume BTC"])
-    df['TSI'] = ta.tsi(df["Close"])
-    df['UO'] = ta.uo(df["High"], df["Low"], df["Close"])
-    df['AO'] = ta.ao(df["High"], df["Low"])
-
-    df['MACD_diff'] = ta.macd_diff(df["Close"])
-    df['Vortex_pos'] = ta.vortex_indicator_pos(
-        df["High"], df["Low"], df["Close"])
-    df['Vortex_neg'] = ta.vortex_indicator_neg(
-        df["High"], df["Low"], df["Close"])
-    df['Vortex_diff'] = abs(
-        df['Vortex_pos'] -
-        df['Vortex_neg'])
-    df['Trix'] = ta.trix(df["Close"])
-    df['Mass_index'] = ta.mass_index(df["High"], df["Low"])
-    df['CCI'] = ta.cci(df["High"], df["Low"], df["Close"])
-    df['DPO'] = ta.dpo(df["Close"])
-    df['KST'] = ta.kst(df["Close"])
-    df['KST_sig'] = ta.kst_sig(df["Close"])
-    df['KST_diff'] = (
-        df['KST'] -
-        df['KST_sig'])
-    df['Aroon_up'] = ta.aroon_up(df["Close"])
-    df['Aroon_down'] = ta.aroon_down(df["Close"])
-    df['Aroon_ind'] = (
-        df['Aroon_up'] -
-        df['Aroon_down']
-    )
-
-    df['BBH'] = ta.bollinger_hband(df["Close"])
-    df['BBL'] = ta.bollinger_lband(df["Close"])
-    df['BBM'] = ta.bollinger_mavg(df["Close"])
-    df['BBHI'] = ta.bollinger_hband_indicator(
-        df["Close"])
-    df['BBLI'] = ta.bollinger_lband_indicator(
-        df["Close"])
-    df['KCHI'] = ta.keltner_channel_hband_indicator(df["High"],
-                                                    df["Low"],
-                                                    df["Close"])
-    df['KCLI'] = ta.keltner_channel_lband_indicator(df["High"],
-                                                    df["Low"],
-                                                    df["Close"])
-    df['DCHI'] = ta.donchian_channel_hband_indicator(df["Close"])
-    df['DCLI'] = ta.donchian_channel_lband_indicator(df["Close"])
-
-    df['ADI'] = ta.acc_dist_index(df["High"],
-                                  df["Low"],
-                                  df["Close"],
-                                  df["Volume BTC"])
-    df['OBV'] = ta.on_balance_volume(df["Close"],
-                                     df["Volume BTC"])
-    df['CMF'] = ta.chaikin_money_flow(df["High"],
-                                      df["Low"],
-                                      df["Close"],
-                                      df["Volume BTC"])
-    df['FI'] = ta.force_index(df["Close"],
-                              df["Volume BTC"])
-    df['EM'] = ta.ease_of_movement(df["High"],
-                                   df["Low"],
-                                   df["Close"],
-                                   df["Volume BTC"])
-    df['VPT'] = ta.volume_price_trend(df["Close"],
-                                      df["Volume BTC"])
-    df['NVI'] = ta.negative_volume_index(df["Close"],
-                                         df["Volume BTC"])
-
-    df['DR'] = ta.daily_return(df["Close"])
-    df['DLR'] = ta.daily_log_return(df["Close"])
-
-    df.fillna(method='bfill', inplace=True)
-
-    return df
diff --git a/util/transform.py b/util/transform.py
deleted file mode 100644
index 69b1401..0000000
--- a/util/transform.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import tensorflow as tf
-
-
-def transform(df, transform_fn, columns=None):
-    transformed_df = df.copy()
-
-    if columns is None:
-        transformed_df = transform_fn(transformed_df)
-
-    for column in columns:
-        transformed_df[column] = transform_fn(transformed_df[column])
-
-    transformed_df = transformed_df.fillna(method='bfill')
-
-    return transformed_df
-
-
-def max_min_normalize(df, columns):
-    def transform_fn(transform_df):
-        return (transform_df - transform_df.min()) / (transform_df.max() - transform_df.min())
-
-    return transform(df, transform_fn, columns)
-
-
-def difference(df, columns):
-    def transform_fn(transform_df):
-        return transform_df - transform_df.shift(1)
-
-    return transform(df, transform_fn, columns)
-
-
-def log_and_difference(df, columns):
-    def transform_fn(transform_df):
-        transform_df.loc[transform_df == 0] = 1E-10
-        return tf.log(transform_df) - tf.log(transform_df.shift(1))
-
-    return transform(df, transform_fn, columns)