From 4be5e4ca61e40e8830d5616fd1a93106be3e0d44 Mon Sep 17 00:00:00 2001 From: litch Date: Thu, 6 Jun 2019 12:27:07 -0500 Subject: [PATCH 01/11] Paths are parameterized to be OS agnostic --- optimize.py | 5 +++-- test.py | 7 ++++--- train.py | 10 +++++----- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/optimize.py b/optimize.py index d758f34..5e18147 100644 --- a/optimize.py +++ b/optimize.py @@ -10,6 +10,7 @@ import optuna +import os import pandas as pd import numpy as np @@ -22,7 +23,7 @@ reward_strategy = 'sortino' -input_data_file = 'data/coinbase_hourly.csv' +input_data_file = os.path.join('data', 'coinbase_hourly.csv') params_db_file = 'sqlite:///params.db' # number of parallel jobs @@ -78,7 +79,7 @@ def optimize_agent(trial): model_params = optimize_ppo2(trial) model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log="./tensorboard", **model_params) + tensorboard_log=os.path.join('.', '/tensorboard'), **model_params) last_reward = -np.finfo(np.float16).max evaluation_interval = int(len(train_df) / n_evaluations) diff --git a/test.py b/test.py index a2a14eb..8d47518 100644 --- a/test.py +++ b/test.py @@ -1,3 +1,4 @@ +import os import gym import optuna import pandas as pd @@ -11,7 +12,7 @@ curr_idx = 0 reward_strategy = 'sortino' -input_data_file = 'data/coinbase_hourly.csv' +input_data_file = os.path.join('data', 'coinbase_hourly.csv') params_db_file = 'sqlite:///params.db' study_name = 'ppo2' + reward_strategy @@ -21,7 +22,7 @@ print("Testing PPO2 agent with params:", params) print("Best trial:", -1 * study.best_trial.value) -df = pd.read_csv('./data/coinbase_hourly.csv') +df = pd.read_csv(input_data_file) df = df.drop(['Symbol'], axis=1) df = df.sort_values(['Date']) df = add_indicators(df.reset_index()) @@ -44,7 +45,7 @@ 'lam': params['lam'], } -model = PPO2.load('./agents/ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl', env=test_env) +model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=test_env) obs, done = test_env.reset(), False while not done: diff --git a/train.py b/train.py index 0183be8..b9a4561 100644 --- a/train.py +++ b/train.py @@ -1,3 +1,4 @@ +import os import gym import optuna import pandas as pd @@ -10,10 +11,9 @@ from env.BitcoinTradingEnv import BitcoinTradingEnv from util.indicators import add_indicators - curr_idx = -1 reward_strategy = 'sortino' -input_data_file = 'data/coinbase_hourly.csv' +input_data_file = os.path.join('data', 'coinbase_hourly.csv') params_db_file = 'sqlite:///params.db' study_name = 'ppo2' + reward_strategy @@ -52,9 +52,9 @@ if curr_idx == -1: model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log="./tensorboard", **model_params) + tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) else: - model = PPO2.load('./agents/ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl', env=train_env) + model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=train_env) for idx in range(curr_idx + 1, 10): print('[', idx, '] Training for: ', train_len, ' time steps') @@ -70,4 +70,4 @@ reward_sum += reward print('[', idx, '] Total reward: ', reward_sum, ' (' + reward_strategy + ')') - model.save('./agents/ppo2_' + reward_strategy + '_' + str(idx) + '.pkl') + model.save(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(idx) + '.pkl')) From 8aa39e455ffb49a938dacc18aa1e1f8bf93e2347 Mon Sep 17 00:00:00 2001 From: litch Date: Thu, 6 Jun 2019 14:25:36 -0500 Subject: [PATCH 02/11] File/object name references are corrected --- optimize.py | 3 +-- train.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/optimize.py b/optimize.py index 5e18147..8231f9c 100644 --- a/optimize.py +++ b/optimize.py @@ -21,7 +21,6 @@ from env.BitcoinTradingEnv import BitcoinTradingEnv from util.indicators import add_indicators - reward_strategy = 'sortino' input_data_file = os.path.join('data', 'coinbase_hourly.csv') params_db_file = 'sqlite:///params.db' @@ -79,7 +78,7 @@ def optimize_agent(trial): model_params = optimize_ppo2(trial) model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', '/tensorboard'), **model_params) + tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) last_reward = -np.finfo(np.float16).max evaluation_interval = int(len(train_df) / n_evaluations) diff --git a/train.py b/train.py index b9a4561..ba51c51 100644 --- a/train.py +++ b/train.py @@ -16,7 +16,7 @@ input_data_file = os.path.join('data', 'coinbase_hourly.csv') params_db_file = 'sqlite:///params.db' -study_name = 'ppo2' + reward_strategy +study_name = 'ppo2_' + reward_strategy study = optuna.load_study(study_name=study_name, storage=params_db_file) params = study.best_trial.params From 70d96c6422120d634af084af5810f386667f57b6 Mon Sep 17 00:00:00 2001 From: litch Date: Thu, 6 Jun 2019 14:48:35 -0500 Subject: [PATCH 03/11] Agents directory is preserved --- agents/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 agents/.gitkeep diff --git a/agents/.gitkeep b/agents/.gitkeep new file mode 100644 index 0000000..e69de29 From 2bad70e66ee635cad381ef3456fcc724d02b26da Mon Sep 17 00:00:00 2001 From: litch Date: Sat, 8 Jun 2019 22:48:35 -0500 Subject: [PATCH 04/11] Optimize wrapped into a class for parameterization --- README.md | 42 ++++++++++- optimize.py | 198 +++++++++++++++++++++++++++------------------------- test.py | 2 +- 3 files changed, 141 insertions(+), 101 deletions(-) diff --git a/README.md b/README.md index 2993937..92c7859 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,47 @@ The first thing you will need to do to get started is install the requirements i ```bash pip install -r requirements.txt ``` - + The requirements include the `tensorflow-gpu` library, though if you do not have access to a GPU, you should replace this requirement with `tensorflow`. - + +## Testing workflow + +First let's try the "optimize" strategy with a single run, single evaluation, just to make sure that things are "sane". + +### Expected output + +``` +% date ; python optimize.py; date +Thu Jun 6 14:09:23 CDT 2019 +[I 2019-06-06 14:09:35,557] A new study created with name: ppo2_sortino + + + +[I 2019-06-06 14:21:50,724] Finished trial#1 resulted in value: -956.9744873046875. Current best value is -956.9744873046875 with parameters: {'cliprange': 0.18943365028795878, 'confidence_interval': 0.8286824056507663, 'ent_coef': 8.094794121881875e-08, 'forecast_len': 14.7463$ +0586736364, 'gamma': 0.9834343245286393, 'lam': 0.9646711236104828, 'learning_rate': 0.032564661147532384, 'n_steps': 28.294495666878618, 'noptepochs': 2.3568984946859066}. +Number of finished trials: 2 +Best trial: +Value: -956.9744873046875 +Params: + cliprange: 0.18943365028795878 + confidence_interval: 0.8286824056507663 + ent_coef: 8.094794121881875e-08 + forecast_len: 14.746310586736364 + gamma: 0.9834343245286393 + lam: 0.9646711236104828 + learning_rate: 0.032564661147532384 + n_steps: 28.294495666878618 + noptepochs: 2.3568984946859066 + +Thu Jun 6 14:21:51 CDT 2019 + +% +``` + +So that took about 12 minutes on a pretty powerful laptop to run a single trial (at least as of Jun 2019). + # Finding Hyper-Parameters - + While you could just let the agent train and run with the default PPO2 hyper-parameters, your agent would likely not be very profitable. The `stable-baselines` library provides a great set of default parameters that work for most problem domains, but we need to better. To do this, you will need to run `optimize.py`. Within the file, you can define the `reward_strategy` for the environment to use, this is currently defaulted to `sortino`. diff --git a/optimize.py b/optimize.py index 8231f9c..0ba50c8 100644 --- a/optimize.py +++ b/optimize.py @@ -21,105 +21,108 @@ from env.BitcoinTradingEnv import BitcoinTradingEnv from util.indicators import add_indicators -reward_strategy = 'sortino' -input_data_file = os.path.join('data', 'coinbase_hourly.csv') -params_db_file = 'sqlite:///params.db' - -# number of parallel jobs -n_jobs = 4 -# maximum number of trials for finding the best hyperparams -n_trials = 1000 -# number of test episodes per trial -n_test_episodes = 3 -# number of evaluations for pruning per trial -n_evaluations = 4 - - -df = pd.read_csv(input_data_file) -df = df.drop(['Symbol'], axis=1) -df = df.sort_values(['Date']) -df = add_indicators(df.reset_index()) - -train_len = int(len(df) * 0.8) - -df = df[:train_len] - -validation_len = int(train_len * 0.8) -train_df = df[:validation_len] -test_df = df[validation_len:] - - -def optimize_envs(trial): - return { - 'reward_func': reward_strategy, - 'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)), - 'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99), - } - - -def optimize_ppo2(trial): - return { - 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), - 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), - 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), - 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), - 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), - 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), - 'lam': trial.suggest_uniform('lam', 0.8, 1.) - } - - -def optimize_agent(trial): - env_params = optimize_envs(trial) - train_env = DummyVecEnv( - [lambda: BitcoinTradingEnv(train_df, **env_params)]) - test_env = DummyVecEnv( - [lambda: BitcoinTradingEnv(test_df, **env_params)]) - - model_params = optimize_ppo2(trial) - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) - - last_reward = -np.finfo(np.float16).max - evaluation_interval = int(len(train_df) / n_evaluations) - - for eval_idx in range(n_evaluations): - try: - model.learn(evaluation_interval) - except AssertionError: - raise - - rewards = [] - n_episodes, reward_sum = 0, 0.0 - - obs = test_env.reset() - while n_episodes < n_test_episodes: - action, _ = model.predict(obs) - obs, reward, done, _ = test_env.step(action) - reward_sum += reward - - if done: - rewards.append(reward_sum) - reward_sum = 0.0 - n_episodes += 1 - obs = test_env.reset() - - last_reward = np.mean(rewards) - trial.report(-1 * last_reward, eval_idx) - - if trial.should_prune(eval_idx): - raise optuna.structs.TrialPruned() - - return -1 * last_reward - - -def optimize(): - study_name = 'ppo2_' + reward_strategy +class Optimize: + def __init__(self): + self.reward_strategy = 'sortino' + self.input_data_file = os.path.join('data', 'coinbase_daily.csv') + self.params_db_file = 'sqlite:///params.db' + + # number of parallel jobs + self.n_jobs = 4 + # maximum number of trials for finding the best hyperparams + self.n_trials = 1 + # number of test episodes per trial + self.n_test_episodes = 1 + # number of evaluations for pruning per trial + self.n_evaluations = 1 + self.prepare_data() + + def prepare_data(self): + df = pd.read_csv(self.input_data_file) + df = df.drop(['Symbol'], axis=1) + df = df.sort_values(['Date']) + df = add_indicators(df.reset_index()) + + train_len = int(len(df) * 0.8) + + df = df[:train_len] + + validation_len = int(train_len * 0.8) + self.train_df = df[:validation_len] + self.test_df = df[validation_len:] + + + def optimize_envs(self, trial): + return { + 'reward_func': self.reward_strategy, + 'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)), + 'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99), + } + + + def optimize_ppo2(self, trial): + return { + 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), + 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), + 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), + 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), + 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), + 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), + 'lam': trial.suggest_uniform('lam', 0.8, 1.) + } + + def optimize_agent(self, trial): + env_params = self.optimize_envs(trial) + train_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(self.train_df, **env_params)]) + test_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(self.test_df, **env_params)]) + + model_params = self.optimize_ppo2(trial) + model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, + tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) + + last_reward = -np.finfo(np.float16).max + evaluation_interval = int(len(self.train_df) / self.n_evaluations) + + for eval_idx in range(self.n_evaluations): + try: + model.learn(evaluation_interval) + except AssertionError: + raise + + rewards = [] + n_episodes, reward_sum = 0, 0.0 + + obs = test_env.reset() + while n_episodes < self.n_test_episodes: + action, _ = model.predict(obs) + obs, reward, done, _ = test_env.step(action) + reward_sum += reward + + if done: + rewards.append(reward_sum) + reward_sum = 0.0 + n_episodes += 1 + obs = test_env.reset() + + last_reward = np.mean(rewards) + trial.report(-1 * last_reward, eval_idx) + + if trial.should_prune(eval_idx): + raise optuna.structs.TrialPruned() + + return -1 * last_reward + + + def optimize(self): + + study_name = 'ppo2_' + self.reward_strategy study = optuna.create_study( - study_name=study_name, storage=params_db_file, load_if_exists=True) + study_name=study_name, storage=self.params_db_file, load_if_exists=True) try: - study.optimize(optimize_agent, n_trials=n_trials, n_jobs=n_jobs) + study.optimize(self.optimize_agent, n_trials=self.n_trials, n_jobs=self.n_jobs) except KeyboardInterrupt: pass @@ -138,4 +141,5 @@ def optimize(): if __name__ == '__main__': - optimize() + optimizer = Optimize() + optimizer.optimize() diff --git a/test.py b/test.py index 8d47518..b7711c8 100644 --- a/test.py +++ b/test.py @@ -15,7 +15,7 @@ input_data_file = os.path.join('data', 'coinbase_hourly.csv') params_db_file = 'sqlite:///params.db' -study_name = 'ppo2' + reward_strategy +study_name = 'ppo2_' + reward_strategy study = optuna.load_study(study_name=study_name, storage=params_db_file) params = study.best_trial.params From 7e486254c4b408b865e7c1d37138d8c8a9fe2010 Mon Sep 17 00:00:00 2001 From: litch Date: Mon, 10 Jun 2019 06:25:44 +0530 Subject: [PATCH 05/11] Logging is improved --- .gitignore | 4 +++- log/.gitkeep | 0 optimize.py | 54 +++++++++++++++++++++++++++++++++++++++++----------- util/log.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 12 deletions(-) create mode 100644 log/.gitkeep create mode 100644 util/log.py diff --git a/.gitignore b/.gitignore index 69d100a..8ac6194 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ data/bitstamp.csv tensorboard/* agents research/results +research/.ipynb_checkpoints/ **/__pycache__ *.pkl -*.db \ No newline at end of file +*.db +log/ diff --git a/log/.gitkeep b/log/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/optimize.py b/optimize.py index 0ba50c8..c28fa18 100644 --- a/optimize.py +++ b/optimize.py @@ -11,6 +11,7 @@ import optuna import os +from util.log import init_logger import pandas as pd import numpy as np @@ -24,18 +25,24 @@ class Optimize: def __init__(self): self.reward_strategy = 'sortino' - self.input_data_file = os.path.join('data', 'coinbase_daily.csv') + self.input_data_file = os.path.join('data', 'coinbase_hourly.csv') self.params_db_file = 'sqlite:///params.db' # number of parallel jobs self.n_jobs = 4 # maximum number of trials for finding the best hyperparams - self.n_trials = 1 + self.n_trials = 1000 # number of test episodes per trial - self.n_test_episodes = 1 + self.n_test_episodes = 3 # number of evaluations for pruning per trial - self.n_evaluations = 1 - self.prepare_data() + self.n_evaluations = 4 + + self.train_df = None + self.test_df = None + + self.logger = init_logger(__name__, testing_mode=True) + + self.logger.debug("Initialized Optimizer") def prepare_data(self): df = pd.read_csv(self.input_data_file) @@ -114,8 +121,27 @@ def optimize_agent(self, trial): return -1 * last_reward + def log_parameters(self): + self.logger.debug("Reward Strategy: %s" % self.reward_strategy) + self.logger.debug("Input Data File: %s" % self.input_data_file) + self.logger.debug("Params DB File: %s" % self.params_db_file) + self.logger.debug("Parallel jobs: %d" % self.n_jobs) + self.logger.debug("Trials: %d" % self.n_trials) + self.logger.debug("Test episodes (per trial): %d" % self.n_test_episodes) + self.logger.debug("Evaluations (per trial): %d" % self.n_evaluations) + self.logger.debug("Train DF Length: %d" % len(self.train_df)) + self.logger.debug("Test DF Length: %d" % len(self.test_df)) + self.logger.debug("Features: %s", self.train_df.columns.str.cat(sep=", ")) + def optimize(self): + if not self.train_df: + self.logger.info("Running built-in data preparation") + self.prepare_data() + else: + self.logger.info("Using provided data (Length: %d)" % len(self.train_df)) + + self.log_parameters() study_name = 'ppo2_' + self.reward_strategy study = optuna.create_study( @@ -126,20 +152,26 @@ def optimize(self): except KeyboardInterrupt: pass - print('Number of finished trials: ', len(study.trials)) + self.logger.info('Number of finished trials: {}'.format(len(study.trials))) - print('Best trial:') + self.logger.info('Best trial:') trial = study.best_trial - print('Value: ', trial.value) + self.logger.info('Value: {}'.format(trial.value)) - print('Params: ') + self.logger.info('Params: ') for key, value in trial.params.items(): - print(' {}: {}'.format(key, value)) + self.logger.info(' {}: {}'.format(key, value)) return study.trials_dataframe() - if __name__ == '__main__': optimizer = Optimize() + test_mode = "FAST" + if test_mode == "FAST": + optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv') + optimizer.n_jobs = 1 + optimizer.n_trials = 1 + optimizer.n_test_episodes = 1 + optimizer.n_evaluations = 1 optimizer.optimize() diff --git a/util/log.py b/util/log.py new file mode 100644 index 0000000..53842f2 --- /dev/null +++ b/util/log.py @@ -0,0 +1,51 @@ +import os +import logging +import colorlog + +def init_logger(dunder_name, testing_mode) -> logging.Logger: + log_format = ( + '%(asctime)s - ' + '%(name)s - ' + '%(funcName)s - ' + '%(levelname)s - ' + '%(message)s' + ) + bold_seq = '\033[1m' + colorlog_format = ( + f'{bold_seq} ' + '%(log_color)s ' + f'{log_format}' + ) + colorlog.basicConfig(format=colorlog_format) + logger = logging.getLogger(dunder_name) + + if testing_mode: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) + + ## Note: these file outputs are left in place as examples + ## Feel free to uncomment and use the outputs as you like + + # Output full log + # fh = logging.FileHandler(os.path.join('log', 'trading.log') + # fh.setLevel(logging.DEBUG) + # formatter = logging.Formatter(log_format) + # fh.setFormatter(formatter) + # logger.addHandler(fh) + + # # Output warning log + # fh = logging.FileHandler(os.path.join('log', 'trading.warning.log') + # fh.setLevel(logging.WARNING) + # formatter = logging.Formatter(log_format) + # fh.setFormatter(formatter) + # logger.addHandler(fh) + + # # Output error log + # fh = logging.FileHandler(os.path.join('log', 'trading.error.log') + # fh.setLevel(logging.ERROR) + # formatter = logging.Formatter(log_format) + # fh.setFormatter(formatter) + # logger.addHandler(fh) + + return logger From 16b4a96940789ab9ccbbdd14591d2eb55d70fc4f Mon Sep 17 00:00:00 2001 From: litch Date: Tue, 11 Jun 2019 07:13:11 +0530 Subject: [PATCH 06/11] Train and test functionality is moved into optimize.py --- optimize.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/optimize.py b/optimize.py index c28fa18..aa9554f 100644 --- a/optimize.py +++ b/optimize.py @@ -133,7 +133,6 @@ def log_parameters(self): self.logger.debug("Test DF Length: %d" % len(self.test_df)) self.logger.debug("Features: %s", self.train_df.columns.str.cat(sep=", ")) - def optimize(self): if not self.train_df: self.logger.info("Running built-in data preparation") @@ -165,13 +164,91 @@ def optimize(self): return study.trials_dataframe() + def model_params(self, params): + return { + 'n_steps': int(params['n_steps']), + 'gamma': params['gamma'], + 'learning_rate': params['learning_rate'], + 'ent_coef': params['ent_coef'], + 'cliprange': params['cliprange'], + 'noptepochs': int(params['noptepochs']), + 'lam': params['lam'], + } + + def train(self): + if not self.train_df: + self.logger.info("Running built-in data preparation") + self.prepare_data() + else: + self.logger.info("Using provided data (Length: %d)" % len(self.train_df)) + + study_name = 'ppo2_' + self.reward_strategy + + study = optuna.load_study(study_name=study_name, storage=self.params_db_file) + params = study.best_trial.params + + train_env = DummyVecEnv([lambda: BitcoinTradingEnv( + self.train_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) + + test_env = DummyVecEnv([lambda: BitcoinTradingEnv( + self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) + + model_params = self.model_params(params) + + model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, + tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) + + models_to_train = 1 + self.logger.info("Training {} model instances".format(models_to_train)) + + for idx in range(0, models_to_train): #Not sure why we are doing this, tbh + self.logger.info('[', idx, '] Training for: ', len(self.train_df), ' time steps') + + model.learn(total_timesteps=len(self.train_df)) + + obs = test_env.reset() + done, reward_sum = False, 0 + + while not done: + action, _states = model.predict(obs) + obs, reward, done, info = test_env.step(action) + reward_sum += reward + + self.logger.info('[', idx, '] Total reward: ', reward_sum, ' (' + self.reward_strategy + ')') + model.save(os.path.join('.', 'agents', 'ppo2_' + self.reward_strategy + '_' + str(idx) + '.pkl')) + + self.logger.info("Trained {} model instances".format(models_to_train)) + + def test(self, model_instance: 0): + + study_name = 'ppo2_' + self.reward_strategy + study = optuna.load_study(study_name=study_name, storage=self.params_db_file) + params = study.best_trial.params + + test_env = DummyVecEnv([lambda: BitcoinTradingEnv( + self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) + + model_params = self.model_params(params) + + model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(model_instance) + '.pkl'), env=test_env) + + obs, done = test_env.reset(), False + while not done: + action, _states = model.predict(obs) + obs, reward, done, info = test_env.step(action) + + test_env.render(mode="human") + + if __name__ == '__main__': optimizer = Optimize() - test_mode = "FAST" + test_mode = "FAST" # I'm hard-coding this for now if test_mode == "FAST": optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv') optimizer.n_jobs = 1 optimizer.n_trials = 1 optimizer.n_test_episodes = 1 optimizer.n_evaluations = 1 - optimizer.optimize() + # optimizer.optimize() + optimizer.train() + # optimizer.test() From 3f899009315ce2634c871e62af5bc0e1397be7e4 Mon Sep 17 00:00:00 2001 From: adamjking3 Date: Mon, 24 Jun 2019 00:01:26 -0700 Subject: [PATCH 07/11] Move /agents, /tensorboard, /log, and params.db inside /data for cleaner file structure. --- .gitignore | 9 +++------ data/{ => input}/coinbase_daily.csv | 0 data/{ => input}/coinbase_hourly.csv | 0 3 files changed, 3 insertions(+), 6 deletions(-) rename data/{ => input}/coinbase_daily.csv (100%) rename data/{ => input}/coinbase_hourly.csv (100%) diff --git a/.gitignore b/.gitignore index 8ac6194..3cdcf7d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,7 @@ .vscode -data/bitstamp.csv -tensorboard/* -agents -research/results -research/.ipynb_checkpoints/ **/__pycache__ +data/tensorboard/* +data/agents/* +data/log/* *.pkl *.db -log/ diff --git a/data/coinbase_daily.csv b/data/input/coinbase_daily.csv similarity index 100% rename from data/coinbase_daily.csv rename to data/input/coinbase_daily.csv diff --git a/data/coinbase_hourly.csv b/data/input/coinbase_hourly.csv similarity index 100% rename from data/coinbase_hourly.csv rename to data/input/coinbase_hourly.csv From 7e4c1e8d94aa5bddf1f9dd3e583914b6010835aa Mon Sep 17 00:00:00 2001 From: adamjking3 Date: Mon, 24 Jun 2019 00:02:54 -0700 Subject: [PATCH 08/11] Replace MIT license with GNU v3 to prevent this library from ever being resold, etc. --- LICENSE | 695 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 674 insertions(+), 21 deletions(-) diff --git a/LICENSE b/LICENSE index e36694c..61d1860 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,674 @@ -MIT License - -Copyright (c) 2019 Adam King - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file From b87d127cebb333ec089a22aad8b8b58491bccaac Mon Sep 17 00:00:00 2001 From: adamjking3 Date: Mon, 24 Jun 2019 00:07:52 -0700 Subject: [PATCH 09/11] Update readme to support single optimize.py file for easier onboarding. --- README.md | 66 ++++++------------------------------------------------- 1 file changed, 7 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 64e0c31..a262b0e 100644 --- a/README.md +++ b/README.md @@ -15,51 +15,15 @@ https://towardsdatascience.com/using-reinforcement-learning-to-trade-bitcoin-for The first thing you will need to do to get started is install the requirements in `requirements.txt`. - ```bash - pip install -r requirements.txt - ``` - - The requirements include the `tensorflow-gpu` library, though if you do not have access to a GPU, you should replace this requirement with `tensorflow`. - -## Testing workflow - -First let's try the "optimize" strategy with a single run, single evaluation, just to make sure that things are "sane". - -### Expected output - -``` -% date ; python optimize.py; date -Thu Jun 6 14:09:23 CDT 2019 -[I 2019-06-06 14:09:35,557] A new study created with name: ppo2_sortino - - - -[I 2019-06-06 14:21:50,724] Finished trial#1 resulted in value: -956.9744873046875. Current best value is -956.9744873046875 with parameters: {'cliprange': 0.18943365028795878, 'confidence_interval': 0.8286824056507663, 'ent_coef': 8.094794121881875e-08, 'forecast_len': 14.7463$ -0586736364, 'gamma': 0.9834343245286393, 'lam': 0.9646711236104828, 'learning_rate': 0.032564661147532384, 'n_steps': 28.294495666878618, 'noptepochs': 2.3568984946859066}. -Number of finished trials: 2 -Best trial: -Value: -956.9744873046875 -Params: - cliprange: 0.18943365028795878 - confidence_interval: 0.8286824056507663 - ent_coef: 8.094794121881875e-08 - forecast_len: 14.746310586736364 - gamma: 0.9834343245286393 - lam: 0.9646711236104828 - learning_rate: 0.032564661147532384 - n_steps: 28.294495666878618 - noptepochs: 2.3568984946859066 - -Thu Jun 6 14:21:51 CDT 2019 - -% +```bash +pip install -r requirements.txt ``` -So that took about 12 minutes on a pretty powerful laptop to run a single trial (at least as of Jun 2019). +The requirements include the `tensorflow-gpu` library, though if you do not have access to a GPU, you should replace this requirement with `tensorflow`. - # Finding Hyper-Parameters +# Optimizing, Training, and Testing -While you could just let the agent train and run with the default PPO2 hyper-parameters, your agent would likely not be very profitable. The `stable-baselines` library provides a great set of default parameters that work for most problem domains, but we need to better. +While you could just let the agent train and run with the default PPO2 hyper-parameters, your agent would likely not be very profitable. The `stable-baselines` library provides a great set of default parameters that work for most problem domains, but we need to better. To do this, you will need to run `optimize.py`. Within the file, you can define the `reward_strategy` for the environment to use, this is currently defaulted to `sortino`. @@ -67,25 +31,9 @@ To do this, you will need to run `optimize.py`. Within the file, you can define python ./optimize.py ``` -This will take a while (hours to days depending on your hardware setup), but over time it will print to the console as trials are completed. Once a trial is completed, it will be stored in `./params.db`, an SQLite database, from which we can pull hyper-parameters to train our agent. - -# Training Agents - -Once you've found a good set of hyper-parameters, we can train an agent with that set. To do this, you will want to open `train.py` and ensure the `reward_strategy` is set to the correct strategy. Then let `train.py` run until you've got some saved models to test. +This can take a while (hours to days depending on your hardware setup), but over time it will print to the console as trials are completed. Once a trial is completed, it will be stored in `./data/params.db`, an SQLite database, from which we can pull hyper-parameters to train our agent. -```bash -python ./train.py -``` - -If you have already trained a model, and would like to resume training from the next epoch, you can set `curr_idx` at the top of the file to the index of the last trained model. Otherwise, leave this at `-1` to start training at epoch 0. - -# Testing Agents - -Once you've successfully trained and saved a model, it's time to test it. Open up `test.py` and set the `reward_strategy` to the correct strategy and `curr_idx` to the index of the agent you'd like to train. Then run `test.py` to watch your agent trade. - -```bash -python ./test.py -``` +From there, you can train an agent with the best set of hyper-parameters, and later test it on completely new data to verify the generalization of the algorithm. # Contributing From 5486766b1e40c91f0bbcac21480ea18164903c4f Mon Sep 17 00:00:00 2001 From: adamjking3 Date: Mon, 24 Jun 2019 00:08:15 -0700 Subject: [PATCH 10/11] Add results.py because I'm not sure why it was hidden --- research/results.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/research/results.py b/research/results.py index cad604e..97db2fd 100644 --- a/research/results.py +++ b/research/results.py @@ -7,8 +7,8 @@ from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv from stable_baselines import A2C, ACKTR, PPO2 -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators +from lib.env.BitcoinTradingEnv import BitcoinTradingEnv +from lib.util.indicators import add_indicators df = pd.read_csv('./data/coinbase_hourly.csv') @@ -22,24 +22,24 @@ test_df = df[train_len:] profit_study = optuna.load_study(study_name='ppo2_profit', - storage='sqlite:///params.db') + storage='sqlite:///params.db') profit_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func="profit", forecast_len=int(profit_study.best_trial.params['forecast_len']), confidence_interval=profit_study.best_trial.params['confidence_interval'])]) + test_df, reward_func="profit", forecast_steps=int(profit_study.best_trial.params['forecast_steps']), forecast_alpha=profit_study.best_trial.params['forecast_alpha'])]) sortino_study = optuna.load_study(study_name='ppo2_sortino', -storage='sqlite:///params.db') + storage='sqlite:///params.db') sortino_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func="profit", forecast_len=int(sortino_study.best_trial.params['forecast_len']), confidence_interval=sortino_study.best_trial.params['confidence_interval'])]) + test_df, reward_func="profit", forecast_steps=int(sortino_study.best_trial.params['forecast_steps']), forecast_alpha=sortino_study.best_trial.params['forecast_alpha'])]) # calmar_study = optuna.load_study(study_name='ppo2_sortino', # storage='sqlite:///params.db') # calmar_env = DummyVecEnv([lambda: BitcoinTradingEnv( -# test_df, reward_func="profit", forecast_len=int(calmar_study.best_trial.params['forecast_len']), confidence_interval=calmar_study.best_trial.params['confidence_interval'])]) +# test_df, reward_func="profit", forecast_steps=int(calmar_study.best_trial.params['forecast_steps']), forecast_alpha=calmar_study.best_trial.params['forecast_alpha'])]) omega_study = optuna.load_study(study_name='ppo2_omega', -storage='sqlite:///params.db') + storage='sqlite:///params.db') omega_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func="profit", forecast_len=int(omega_study.best_trial.params['forecast_len']), confidence_interval=omega_study.best_trial.params['confidence_interval'])]) + test_df, reward_func="profit", forecast_steps=int(omega_study.best_trial.params['forecast_steps']), forecast_alpha=omega_study.best_trial.params['forecast_alpha'])]) profit_model = PPO2.load('./agents/ppo2_profit_4.pkl', env=profit_env) @@ -85,4 +85,3 @@ with open('./research/results/omega_net_worths_4.pkl', 'wb') as handle: pickle.dump(omega_net_worths, handle) - From 147e6e1ea9c9ec7ef434edb05ed94a22d22ae32e Mon Sep 17 00:00:00 2001 From: adamjking3 Date: Mon, 24 Jun 2019 00:39:26 -0700 Subject: [PATCH 11/11] Refactor optimize/train/test into single RLTrader class. --- lib/RLTrader.py | 252 ++++++++++++++++ {env => lib}/__init__.py | 0 lib/__init__.pyc | Bin 0 -> 144 bytes {env => lib/env}/BitcoinTradingEnv.py | 134 ++++----- {render => lib/env}/__init__.py | 0 .../env/render}/BitcoinTradingGraph.py | 5 +- {util => lib/env/render}/__init__.py | 0 agents/.gitkeep => lib/util/__init__.py | 0 {util => lib/util}/benchmarks.py | 0 lib/util/indicators.py | 78 +++++ {util => lib/util}/log.py | 10 +- lib/util/transform.py | 25 ++ log/.gitkeep | 0 optimize.py | 269 +----------------- test.py | 55 ---- train.py | 78 ----- util/indicators.py | 78 ----- util/transform.py | 37 --- 18 files changed, 434 insertions(+), 587 deletions(-) create mode 100644 lib/RLTrader.py rename {env => lib}/__init__.py (100%) create mode 100644 lib/__init__.pyc rename {env => lib/env}/BitcoinTradingEnv.py (53%) rename {render => lib/env}/__init__.py (100%) rename {render => lib/env/render}/BitcoinTradingGraph.py (97%) rename {util => lib/env/render}/__init__.py (100%) rename agents/.gitkeep => lib/util/__init__.py (100%) rename {util => lib/util}/benchmarks.py (100%) create mode 100644 lib/util/indicators.py rename {util => lib/util}/log.py (78%) create mode 100644 lib/util/transform.py delete mode 100644 log/.gitkeep delete mode 100644 test.py delete mode 100644 train.py delete mode 100644 util/indicators.py delete mode 100644 util/transform.py diff --git a/lib/RLTrader.py b/lib/RLTrader.py new file mode 100644 index 0000000..170671d --- /dev/null +++ b/lib/RLTrader.py @@ -0,0 +1,252 @@ +import optuna +import pandas as pd +import numpy as np + +from os import path +from stable_baselines.common.base_class import BaseRLModel +from stable_baselines.common.policies import BasePolicy, MlpLnLstmPolicy +from stable_baselines.common.vec_env import DummyVecEnv +from stable_baselines import PPO2 + +from lib.env.BitcoinTradingEnv import BitcoinTradingEnv +from lib.util.indicators import add_indicators +from lib.util.log import init_logger + + +class RLTrader: + feature_df = None + + def __init__(self, model: BaseRLModel = PPO2, policy: BasePolicy = MlpLnLstmPolicy, **kwargs): + self.logger = init_logger( + __name__, show_debug=kwargs.get('show_debug', True)) + + self.model = model + self.policy = policy + self.reward_strategy = kwargs.get('reward_strategy', 'sortino') + self.tensorboard_path = kwargs.get( + 'tensorboard_path', path.join('data', 'tensorboard')) + self.input_data_path = kwargs.get('input_data_path', None) + self.params_db_path = kwargs.get( + 'params_db_path', 'sqlite:///data/params.db') + + self.model_verbose = kwargs.get('model_verbose', 1) + self.nminibatches = kwargs.get('nminibatches', 1) + + self.initialize_data(kwargs) + + self.logger.debug(f'Reward Strategy: {self.reward_strategy}') + + def initialize_data(self, kwargs): + if self.input_data_path is None: + self.input_data_path = path.join( + 'data', 'input', 'coinbase_hourly.csv') + + self.feature_df = pd.read_csv(self.input_data_path) + self.feature_df = self.feature_df.drop(['Symbol'], axis=1) + self.feature_df['Date'] = pd.to_datetime( + self.feature_df['Date'], format='%Y-%m-%d %I-%p') + self.feature_df['Date'] = self.feature_df['Date'].astype(str) + self.feature_df = self.feature_df.sort_values(['Date']) + self.feature_df = add_indicators(self.feature_df.reset_index()) + + self.validation_set_percentage = kwargs.get( + 'validation_set_percentage', 0.8) + self.test_set_percentage = kwargs.get('test_set_percentage', 0.8) + + self.logger.debug( + f'Initialized Features: {self.feature_df.columns.str.cat(sep=", ")}') + + def initialize_optuna(self, should_create: bool = False): + self.study_name = f'{self.model.__class__.__name__}__{self.policy.__class__.__name__}__{self.reward_strategy}' + + if should_create: + self.optuna_study = optuna.create_study( + study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) + else: + self.optuna_study = optuna.load_study( + study_name=self.study_name, storage=self.params_db_path) + + self.logger.debug('Initialized Optuna:') + + try: + self.logger.debug( + f'Best reward in ({len(self.optuna_study.trials)}) trials: {-self.optuna_study.best_value}') + except: + self.logger.debug('No trials have been finished yet.') + + def get_env_params(self): + params = self.optuna_study.best_trial.params + return { + 'reward_strategy': self.reward_strategy, + 'forecast_steps': int(params['forecast_steps']), + 'forecast_alpha': params['forecast_alpha'], + } + + def get_model_params(self): + params = self.optuna_study.best_trial.params + return { + 'n_steps': int(params['n_steps']), + 'gamma': params['gamma'], + 'learning_rate': params['learning_rate'], + 'ent_coef': params['ent_coef'], + 'cliprange': params['cliprange'], + 'noptepochs': int(params['noptepochs']), + 'lam': params['lam'], + } + + def optimize_env_params(self, trial): + return { + 'forecast_steps': int(trial.suggest_loguniform('forecast_steps', 1, 200)), + 'forecast_alpha': trial.suggest_uniform('forecast_alpha', 0.001, 0.30), + } + + def optimize_agent_params(self, trial): + if self.model != PPO2: + return {'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.)} + + return { + 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), + 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), + 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), + 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), + 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), + 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), + 'lam': trial.suggest_uniform('lam', 0.8, 1.) + } + + def optimize_params(self, trial, n_prune_evals_per_trial: int = 4, n_tests_per_eval: int = 1, speedup_factor: int = 10): + env_params = self.optimize_env_params(trial) + + full_train_len = self.test_set_percentage * len(self.feature_df) + optimize_train_len = int( + self.validation_set_percentage * full_train_len) + train_len = int(optimize_train_len / speedup_factor) + train_start = optimize_train_len - train_len + + train_df = self.feature_df[train_start:optimize_train_len] + validation_df = self.feature_df[optimize_train_len:] + + train_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(train_df, **env_params)]) + validation_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(validation_df, **env_params)]) + + model_params = self.optimize_agent_params(trial) + model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, + tensorboard_log=self.tensorboard_path, **model_params) + + last_reward = -np.finfo(np.float16).max + evaluation_interval = int( + train_len / n_prune_evals_per_trial) + + for eval_idx in range(n_prune_evals_per_trial): + try: + model.learn(evaluation_interval) + except AssertionError: + raise + + rewards = [] + n_episodes, reward_sum = 0, 0.0 + + obs = validation_env.reset() + while n_episodes < n_tests_per_eval: + action, _ = model.predict(obs) + obs, reward, done, _ = validation_env.step(action) + reward_sum += reward + + if done: + rewards.append(reward_sum) + reward_sum = 0.0 + n_episodes += 1 + obs = validation_env.reset() + + last_reward = np.mean(rewards) + trial.report(-1 * last_reward, eval_idx) + + if trial.should_prune(eval_idx): + raise optuna.structs.TrialPruned() + + return -1 * last_reward + + def optimize(self, n_trials: int = 10, n_parallel_jobs: int = 4, *optimize_params): + self.initialize_optuna(should_create=True) + + try: + self.optuna_study.optimize( + self.optimize_params, n_trials=n_trials, n_jobs=n_parallel_jobs, *optimize_params) + except KeyboardInterrupt: + pass + + self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}') + + self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}') + + self.logger.info('Params: ') + for key, value in self.optuna_study.best_trial.params.items(): + self.logger.info(f' {key}: {value}') + + return self.optuna_study.trials_dataframe() + + def train(self, n_epochs: int = 1, iters_per_epoch: int = 1, test_trained_model: bool = False, render_trained_model: bool = False): + self.initialize_optuna() + + env_params = self.get_env_params() + + train_len = int(self.test_set_percentage * len(self.feature_df)) + train_df = self.feature_df[:train_len] + + train_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(train_df, **env_params)]) + + model_params = self.get_model_params() + + model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, + tensorboard_log=self.tensorboard_path, **model_params) + + self.logger.info(f'Training for {n_epochs} epochs') + + n_timesteps = len(train_df) * iters_per_epoch + + for model_epoch in range(0, n_epochs): + self.logger.info( + f'[{model_epoch}] Training for: {n_timesteps} time steps') + + model.learn(total_timesteps=n_timesteps) + + model_path = path.join( + 'data', 'agents', f'{self.study_name}__{model_epoch}.pkl') + model.save(model_path) + + if test_trained_model: + self.test(model_epoch, should_render=render_trained_model) + + self.logger.info(f'Trained {n_epochs} models') + + def test(self, model_epoch: int = 0, should_render: bool = True): + env_params = self.get_env_params() + + train_len = int(self.test_set_percentage * len(self.feature_df)) + test_df = self.feature_df[train_len:] + + test_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(test_df, **env_params)]) + + model_path = path.join( + 'data', 'agents', f'{self.study_name}__{model_epoch}.pkl') + model = self.model.load(model_path, env=test_env) + + self.logger.info( + f'Testing model ({self.study_name}__{model_epoch})') + + obs, done, reward_sum = test_env.reset(), False, 0 + while not done: + action, _states = model.predict(obs) + obs, reward, done, _ = test_env.step(action) + + reward_sum += reward + + if should_render: + test_env.render(mode='human') + + self.logger.info( + f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(reward_sum)}') diff --git a/env/__init__.py b/lib/__init__.py similarity index 100% rename from env/__init__.py rename to lib/__init__.py diff --git a/lib/__init__.pyc b/lib/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2581be62e3b630e12d5e4d90e0cc0cc2087dc9e4 GIT binary patch literal 144 zcmZSn%**vyM<6zt0SXv_v;z7ftUdRNBAEs literal 0 HcmV?d00001 diff --git a/env/BitcoinTradingEnv.py b/lib/env/BitcoinTradingEnv.py similarity index 53% rename from env/BitcoinTradingEnv.py rename to lib/env/BitcoinTradingEnv.py index 0f11c7e..1e1ea47 100644 --- a/env/BitcoinTradingEnv.py +++ b/lib/env/BitcoinTradingEnv.py @@ -1,19 +1,14 @@ import gym import pandas as pd import numpy as np -import tensorflow as tf from gym import spaces from statsmodels.tsa.statespace.sarimax import SARIMAX -from empyrical import sortino_ratio, calmar_ratio, omega_ratio +from empyrical import sortino_ratio, sharpe_ratio, omega_ratio -from render.BitcoinTradingGraph import BitcoinTradingGraph -from util.transform import log_and_difference, max_min_normalize -from util.indicators import add_indicators - - -# Delete this if debugging -np.warnings.filterwarnings('ignore') +from lib.env.render.BitcoinTradingGraph import BitcoinTradingGraph +from lib.util.transform import log_and_difference, max_min_normalize +from lib.util.indicators import add_indicators class BitcoinTradingEnv(gym.Env): @@ -21,59 +16,59 @@ class BitcoinTradingEnv(gym.Env): metadata = {'render.modes': ['human', 'system', 'none']} viewer = None - def __init__(self, df, initial_balance=10000, commission=0.0025, reward_func='sortino', **kwargs): + def __init__(self, df, initial_balance=10000, commission=0.0025, reward_strategy='sortino', **kwargs): super(BitcoinTradingEnv, self).__init__() self.initial_balance = initial_balance self.commission = commission - self.reward_func = reward_func + self.reward_strategy = reward_strategy self.df = df.fillna(method='bfill').reset_index() - self.stationary_df = log_and_difference( - self.df, ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD']) + self.stationary_df = self.df.copy() + self.stationary_df = self.stationary_df[self.stationary_df.columns.difference([ + 'index', 'Date'])] + self.stationary_df = log_and_difference(self.stationary_df, + ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD']) self.benchmarks = kwargs.get('benchmarks', []) - self.forecast_len = kwargs.get('forecast_len', 10) - self.confidence_interval = kwargs.get('confidence_interval', 0.95) - self.obs_shape = (1, 5 + len(self.df.columns) - - 2 + (self.forecast_len * 3)) + self.forecast_steps = kwargs.get('forecast_steps', 2) + self.forecast_alpha = kwargs.get('forecast_alpha', 0.05) + + self.action_space = spaces.Discrete(3) - # Actions of the format Buy 1/4, Sell 3/4, Hold (amount ignored), etc. - self.action_space = spaces.Discrete(12) + n_features = 5 + len(self.df.columns) - 2 + n_prediction_features = (self.forecast_steps * 3) + self.obs_shape = (1, n_features + n_prediction_features) - # Observes the price action, indicators, account action, price forecasts self.observation_space = spaces.Box( low=0, high=1, shape=self.obs_shape, dtype=np.float16) def _next_observation(self): - features = self.stationary_df[self.stationary_df.columns.difference([ - 'index', 'Date'])] + current_idx = self.current_step + self.forecast_steps + 1 - scaled = features[:self.current_step + self.forecast_len + 1].values - scaled[np.bitwise_not(np.isfinite(scaled))] = 0 + scaled = self.stationary_df[:current_idx].values - scaled = tf.contrib.eager.py_func( - func=max_min_normalize, inp=scaled, Tout=tf.float16) - scaled = pd.DataFrame(scaled, columns=features.columns) + scaled = pd.DataFrame(scaled, columns=self.stationary_df.columns) + scaled = max_min_normalize(scaled) obs = scaled.values[-1] - past_df = self.stationary_df['Close'][: - self.current_step + self.forecast_len + 1] - forecast_model = SARIMAX( - past_df.values, enforce_stationarity=False, simple_differencing=True) + forecast_model = SARIMAX(self.stationary_df['Close'][:current_idx].values, + enforce_stationarity=False, + simple_differencing=True) + model_fit = forecast_model.fit(method='bfgs', disp=False) - forecast = model_fit.get_forecast( - steps=self.forecast_len, alpha=(1 - self.confidence_interval)) + + forecast = model_fit.get_forecast(steps=self.forecast_steps, + alpha=self.forecast_alpha) obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0) obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0) - scaled_history = tf.contrib.eager.py_func( - func=max_min_normalize, inp=self.account_history.astype('float32'), Tout=tf.float16) + scaled_history = max_min_normalize(self.account_history) - obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0) + obs = np.insert(obs, len(obs), scaled_history.values[-1], axis=0) obs = np.reshape(obs.astype('float16'), self.obs_shape) obs[np.bitwise_not(np.isfinite(obs))] = 0 @@ -81,64 +76,61 @@ def _next_observation(self): return obs def _current_price(self): - return self.df['Close'].values[self.current_step + self.forecast_len] + 0.01 + return self.df['Close'].values[self.current_step + self.forecast_steps] def _take_action(self, action): current_price = self._current_price() - action_type = int(action / 4) - amount = 1 / (action % 4 + 1) btc_bought = 0 btc_sold = 0 - cost = 0 - sales = 0 + cost_of_btc = 0 + revenue_from_sold = 0 - if action_type == 0: + if action == 0: price = current_price * (1 + self.commission) - btc_bought = min(self.balance * amount / - price, self.balance / price) - cost = btc_bought * price + btc_bought = self.balance / price + cost_of_btc = self.balance self.btc_held += btc_bought - self.balance -= cost - elif action_type == 1: + self.balance -= cost_of_btc + elif action == 1: price = current_price * (1 - self.commission) - btc_sold = self.btc_held * amount - sales = btc_sold * price + btc_sold = self.btc_held + revenue_from_sold = btc_sold * price self.btc_held -= btc_sold - self.balance += sales + self.balance += revenue_from_sold if btc_sold > 0 or btc_bought > 0: self.trades.append({'step': self.current_step, - 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost, + 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': revenue_from_sold if btc_sold > 0 else cost_of_btc, 'type': 'sell' if btc_sold > 0 else 'buy'}) self.net_worths.append( self.balance + self.btc_held * current_price) - self.account_history = np.append(self.account_history, [ - [self.balance], - [btc_bought], - [cost], - [btc_sold], - [sales] - ], axis=1) + self.account_history.append({ + 'balance': self.balance, + 'btc_bought': btc_bought, + 'cost_of_btc': cost_of_btc, + 'btc_sold': btc_sold, + 'revenue_from_sold': revenue_from_sold, + }, ignore_index=True) def _reward(self): - length = min(self.current_step, self.forecast_len) + length = min(self.current_step, self.forecast_steps) returns = np.diff(self.net_worths[-length:]) if np.count_nonzero(returns) < 1: return 0 - if self.reward_func == 'sortino': + if self.reward_strategy == 'sortino': reward = sortino_ratio( returns, annualization=365*24) - elif self.reward_func == 'calmar': - reward = calmar_ratio( + elif self.reward_strategy == 'sharpe': + reward = sharpe_ratio( returns, annualization=365*24) - elif self.reward_func == 'omega': + elif self.reward_strategy == 'omega': reward = omega_ratio( returns, annualization=365*24) else: @@ -147,7 +139,7 @@ def _reward(self): return reward if np.isfinite(reward) else 0 def _done(self): - return self.net_worths[-1] < self.initial_balance / 10 or self.current_step == len(self.df) - self.forecast_len - 1 + return self.net_worths[-1] < self.initial_balance / 10 or self.current_step == len(self.df) - self.forecast_steps - 1 def reset(self): self.balance = self.initial_balance @@ -155,13 +147,13 @@ def reset(self): self.btc_held = 0 self.current_step = 0 - self.account_history = np.array([ - [self.balance], - [0], - [0], - [0], - [0] - ]) + self.account_history = pd.DataFrame([{ + 'balance': self.balance, + 'btc_bought': 0, + 'cost_of_btc': 0, + 'btc_sold': 0, + 'revenue_from_sold': 0, + }]) self.trades = [] return self._next_observation() diff --git a/render/__init__.py b/lib/env/__init__.py similarity index 100% rename from render/__init__.py rename to lib/env/__init__.py diff --git a/render/BitcoinTradingGraph.py b/lib/env/render/BitcoinTradingGraph.py similarity index 97% rename from render/BitcoinTradingGraph.py rename to lib/env/render/BitcoinTradingGraph.py index cf28be7..ab0ad9a 100644 --- a/render/BitcoinTradingGraph.py +++ b/lib/env/render/BitcoinTradingGraph.py @@ -19,7 +19,7 @@ class BitcoinTradingGraph: def __init__(self, df): self.df = df self.df['Time'] = self.df['Date'].apply( - lambda x: datetime.strptime(x, '%Y-%m-%d %I-%p')) + lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')) self.df = self.df.sort_values('Time') # Create a figure on screen and set the title @@ -74,7 +74,8 @@ def _render_net_worth(self, step_range, dates, current_step, net_worths, benchma min(net_worths) / 1.25, max(net_worths) * 1.25) def _render_benchmarks(self, step_range, dates, benchmarks): - colors = ['orange', 'cyan', 'purple', 'blue', 'magenta', 'yellow', 'black', 'red', 'green'] + colors = ['orange', 'cyan', 'purple', 'blue', + 'magenta', 'yellow', 'black', 'red', 'green'] for i, benchmark in enumerate(benchmarks): self.net_worth_ax.plot( diff --git a/util/__init__.py b/lib/env/render/__init__.py similarity index 100% rename from util/__init__.py rename to lib/env/render/__init__.py diff --git a/agents/.gitkeep b/lib/util/__init__.py similarity index 100% rename from agents/.gitkeep rename to lib/util/__init__.py diff --git a/util/benchmarks.py b/lib/util/benchmarks.py similarity index 100% rename from util/benchmarks.py rename to lib/util/benchmarks.py diff --git a/lib/util/indicators.py b/lib/util/indicators.py new file mode 100644 index 0000000..1ed7cc4 --- /dev/null +++ b/lib/util/indicators.py @@ -0,0 +1,78 @@ +import ta + + +def add_indicators(df): + df['RSI'] = ta.rsi(df["Close"]) + # df['MFI'] = ta.money_flow_index( + # df["High"], df["Low"], df["Close"], df["Volume BTC"]) + # df['TSI'] = ta.tsi(df["Close"]) + # df['UO'] = ta.uo(df["High"], df["Low"], df["Close"]) + # df['AO'] = ta.ao(df["High"], df["Low"]) + + df['MACD_diff'] = ta.macd_diff(df["Close"]) + # df['Vortex_pos'] = ta.vortex_indicator_pos( + # df["High"], df["Low"], df["Close"]) + # df['Vortex_neg'] = ta.vortex_indicator_neg( + # df["High"], df["Low"], df["Close"]) + # df['Vortex_diff'] = abs( + # df['Vortex_pos'] - + # df['Vortex_neg']) + # df['Trix'] = ta.trix(df["Close"]) + # df['Mass_index'] = ta.mass_index(df["High"], df["Low"]) + # df['CCI'] = ta.cci(df["High"], df["Low"], df["Close"]) + # df['DPO'] = ta.dpo(df["Close"]) + # df['KST'] = ta.kst(df["Close"]) + # df['KST_sig'] = ta.kst_sig(df["Close"]) + # df['KST_diff'] = ( + # df['KST'] - + # df['KST_sig']) + # df['Aroon_up'] = ta.aroon_up(df["Close"]) + # df['Aroon_down'] = ta.aroon_down(df["Close"]) + # df['Aroon_ind'] = ( + # df['Aroon_up'] - + # df['Aroon_down'] + # ) + + df['BBH'] = ta.bollinger_hband(df["Close"]) + df['BBL'] = ta.bollinger_lband(df["Close"]) + df['BBM'] = ta.bollinger_mavg(df["Close"]) + df['BBHI'] = ta.bollinger_hband_indicator( + df["Close"]) + df['BBLI'] = ta.bollinger_lband_indicator( + df["Close"]) + # df['KCHI'] = ta.keltner_channel_hband_indicator(df["High"], + # df["Low"], + # df["Close"]) + # df['KCLI'] = ta.keltner_channel_lband_indicator(df["High"], + # df["Low"], + # df["Close"]) + # df['DCHI'] = ta.donchian_channel_hband_indicator(df["Close"]) + # df['DCLI'] = ta.donchian_channel_lband_indicator(df["Close"]) + + df['ADI'] = ta.acc_dist_index(df["High"], + df["Low"], + df["Close"], + df["Volume BTC"]) + # df['OBV'] = ta.on_balance_volume(df["Close"], + # df["Volume BTC"]) + # df['CMF'] = ta.chaikin_money_flow(df["High"], + # df["Low"], + # df["Close"], + # df["Volume BTC"]) + # df['FI'] = ta.force_index(df["Close"], + # df["Volume BTC"]) + # df['EM'] = ta.ease_of_movement(df["High"], + # df["Low"], + # df["Close"], + # df["Volume BTC"]) + # df['VPT'] = ta.volume_price_trend(df["Close"], + # df["Volume BTC"]) + # df['NVI'] = ta.negative_volume_index(df["Close"], + # df["Volume BTC"]) + + df['DR'] = ta.daily_return(df["Close"]) + # df['DLR'] = ta.daily_log_return(df["Close"]) + + df.fillna(method='bfill', inplace=True) + + return df diff --git a/util/log.py b/lib/util/log.py similarity index 78% rename from util/log.py rename to lib/util/log.py index 526b946..e9a6e55 100644 --- a/util/log.py +++ b/lib/util/log.py @@ -3,7 +3,7 @@ import colorlog -def init_logger(dunder_name, testing_mode) -> logging.Logger: +def init_logger(dunder_name, show_debug=False) -> logging.Logger: log_format = ( '%(asctime)s - ' '%(name)s - ' @@ -20,7 +20,7 @@ def init_logger(dunder_name, testing_mode) -> logging.Logger: colorlog.basicConfig(format=colorlog_format) logger = logging.getLogger(dunder_name) - if testing_mode: + if show_debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) @@ -29,21 +29,21 @@ def init_logger(dunder_name, testing_mode) -> logging.Logger: # Feel free to uncomment and use the outputs as you like # Output full log - # fh = logging.FileHandler(os.path.join('log', 'trading.log') + # fh = logging.FileHandler(os.path.join('data', log', 'trading.log') # fh.setLevel(logging.DEBUG) # formatter = logging.Formatter(log_format) # fh.setFormatter(formatter) # logger.addHandler(fh) # # Output warning log - # fh = logging.FileHandler(os.path.join('log', 'trading.warning.log') + # fh = logging.FileHandler(os.path.join('data', log', 'trading.warning.log') # fh.setLevel(logging.WARNING) # formatter = logging.Formatter(log_format) # fh.setFormatter(formatter) # logger.addHandler(fh) # # Output error log - # fh = logging.FileHandler(os.path.join('log', 'trading.error.log') + # fh = logging.FileHandler(os.path.join('data', log', 'trading.error.log') # fh.setLevel(logging.ERROR) # formatter = logging.Formatter(log_format) # fh.setFormatter(formatter) diff --git a/lib/util/transform.py b/lib/util/transform.py new file mode 100644 index 0000000..8e14dab --- /dev/null +++ b/lib/util/transform.py @@ -0,0 +1,25 @@ +import numpy as np + + +def transform(df, columns=None, transform_fn=None): + transformed_df = df.copy().fillna(method='bfill') + + if columns is None: + transformed_df = transform_fn(transformed_df) + else: + for column in columns: + transformed_df[column] = transform_fn(transformed_df[column]) + + return transformed_df + + +def max_min_normalize(df, columns=None): + return transform(df, columns, lambda t_df: (t_df - t_df.min()) / (t_df.max() - t_df.min())) + + +def difference(df, columns=None): + return transform(df, columns, lambda t_df: t_df - t_df.shift(1)) + + +def log_and_difference(df, columns=None): + return transform(df, columns, lambda t_df: np.log(t_df) - np.log(t_df).shift(1)) diff --git a/log/.gitkeep b/log/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/optimize.py b/optimize.py index 190def6..e80585a 100644 --- a/optimize.py +++ b/optimize.py @@ -1,266 +1,13 @@ -''' - -A large part of the code in this file was sourced from the rl-baselines-zoo library on GitHub. -In particular, the library provides a great parameter optimization set for the PPO2 algorithm, -as well as a great example implementation using optuna. - -Source: https://github.com/araffin/rl-baselines-zoo/blob/master/utils/hyperparams_opt.py - -''' - -import optuna - -import os -import pandas as pd import numpy as np -from stable_baselines.common.policies import MlpLnLstmPolicy -from stable_baselines.common.vec_env import DummyVecEnv -from stable_baselines import PPO2 - -from pathlib import Path - -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators -from util.log import init_logger - - -class Optimize: - def __init__(self): - self.reward_strategy = 'sortino' - self.input_data_file = os.path.join('data', 'coinbase_hourly.csv') - self.params_db_file = 'sqlite:///params.db' - - # number of parallel jobs - self.n_jobs = 4 - # maximum number of trials for finding the best hyperparams - self.n_trials = 1000 - # number of test episodes per trial - self.n_test_episodes = 3 - # number of evaluations for pruning per trial - self.n_evaluations = 4 - - self.train_df = None - self.test_df = None - - self.logger = init_logger(__name__, testing_mode=True) - - self.logger.debug("Initialized Optimizer") - - def prepare_data(self): - df = pd.read_csv(self.input_data_file) - df = df.drop(['Symbol'], axis=1) - df = df.sort_values(['Date']) - df = add_indicators(df.reset_index()) - - train_len = int(len(df) * 0.8) - - df = df[:train_len] - - validation_len = int(train_len * 0.8) - self.train_df = df[:validation_len] - self.test_df = df[validation_len:] - - def optimize_envs(self, trial): - return { - 'reward_func': self.reward_strategy, - 'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)), - 'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99), - } - - def optimize_ppo2(self, trial): - return { - 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), - 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), - 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), - 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), - 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), - 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), - 'lam': trial.suggest_uniform('lam', 0.8, 1.) - } - - def optimize_agent(self, trial): - env_params = self.optimize_envs(trial) - train_env = DummyVecEnv( - [lambda: BitcoinTradingEnv(self.train_df, **env_params)]) - test_env = DummyVecEnv( - [lambda: BitcoinTradingEnv(self.test_df, **env_params)]) - - model_params = self.optimize_ppo2(trial) - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) - - last_reward = -np.finfo(np.float16).max - evaluation_interval = int(len(self.train_df) / self.n_evaluations) - - for eval_idx in range(self.n_evaluations): - try: - model.learn(evaluation_interval) - except AssertionError: - raise - - rewards = [] - n_episodes, reward_sum = 0, 0.0 - - obs = test_env.reset() - while n_episodes < self.n_test_episodes: - action, _ = model.predict(obs) - obs, reward, done, _ = test_env.step(action) - reward_sum += reward - - if done: - rewards.append(reward_sum) - reward_sum = 0.0 - n_episodes += 1 - obs = test_env.reset() - - last_reward = np.mean(rewards) - trial.report(-1 * last_reward, eval_idx) - - if trial.should_prune(eval_idx): - raise optuna.structs.TrialPruned() - - return -1 * last_reward - - def log_parameters(self): - self.logger.debug("Reward Strategy: %s" % self.reward_strategy) - self.logger.debug("Input Data File: %s" % self.input_data_file) - self.logger.debug("Params DB File: %s" % self.params_db_file) - self.logger.debug("Parallel jobs: %d" % self.n_jobs) - self.logger.debug("Trials: %d" % self.n_trials) - self.logger.debug("Test episodes (per trial): %d" % - self.n_test_episodes) - self.logger.debug("Evaluations (per trial): %d" % self.n_evaluations) - self.logger.debug("Train DF Length: %d" % len(self.train_df)) - self.logger.debug("Test DF Length: %d" % len(self.test_df)) - self.logger.debug( - "Features: %s", self.train_df.columns.str.cat(sep=", ")) - - def optimize(self): - if not self.train_df: - self.logger.info("Running built-in data preparation") - self.prepare_data() - else: - self.logger.info("Using provided data (Length: %d)" % - len(self.train_df)) - - self.log_parameters() - - study_name = 'ppo2_' + self.reward_strategy - study = optuna.create_study( - study_name=study_name, storage=self.params_db_file, load_if_exists=True) - - try: - study.optimize(self.optimize_agent, - n_trials=self.n_trials, n_jobs=self.n_jobs) - except KeyboardInterrupt: - pass - - self.logger.info( - 'Number of finished trials: {}'.format(len(study.trials))) - - self.logger.info('Best trial:') - trial = study.best_trial - - self.logger.info('Value: {}'.format(trial.value)) - - self.logger.info('Params: ') - for key, value in trial.params.items(): - self.logger.info(' {}: {}'.format(key, value)) - - return study.trials_dataframe() - - def model_params(self, params): - return { - 'n_steps': int(params['n_steps']), - 'gamma': params['gamma'], - 'learning_rate': params['learning_rate'], - 'ent_coef': params['ent_coef'], - 'cliprange': params['cliprange'], - 'noptepochs': int(params['noptepochs']), - 'lam': params['lam'], - } - - def train(self): - if not self.train_df: - self.logger.info("Running built-in data preparation") - self.prepare_data() - else: - self.logger.info("Using provided data (Length: %d)" % - len(self.train_df)) - - study_name = 'ppo2_' + self.reward_strategy - - study = optuna.load_study( - study_name=study_name, storage=self.params_db_file) - params = study.best_trial.params - - train_env = DummyVecEnv([lambda: BitcoinTradingEnv( - self.train_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - - test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - - model_params = self.model_params(params) - - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) - - models_to_train = 1 - self.logger.info("Training {} model instances".format(models_to_train)) - - for idx in range(0, models_to_train): # Not sure why we are doing this, tbh - self.logger.info( - f'[{idx}] Training for: {len(self.train_df)} time steps') - - model.learn(total_timesteps=len(self.train_df)) - - obs = test_env.reset() - done, reward_sum = False, 0 - - while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - reward_sum += reward - - self.logger.info( - f'[{idx}] Total reward: {reward_sum} ({self.reward_strategy})') - - model.save(os.path.join('.', 'agents', 'ppo2_' + - self.reward_strategy + '_' + str(idx) + '.pkl')) - - self.logger.info("Trained {} model instances".format(models_to_train)) - - def test(self, model_instance: 0): - - study_name = 'ppo2_' + self.reward_strategy - study = optuna.load_study( - study_name=study_name, storage=self.params_db_file) - params = study.best_trial.params - - test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - - model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + - self.reward_strategy + '_' + str(model_instance) + '.pkl'), env=test_env) - - obs, done = test_env.reset(), False - while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - - test_env.render(mode="human") +from lib.RLTrader import RLTrader +np.warnings.filterwarnings('ignore') if __name__ == '__main__': - optimizer = Optimize() - test_mode = "FAST" # I'm hard-coding this for now - if test_mode == "FAST": - optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv') - optimizer.n_jobs = 1 - optimizer.n_trials = 1 - optimizer.n_test_episodes = 1 - optimizer.n_evaluations = 1 - # optimizer.optimize() - optimizer.train() - # optimizer.test() + trader = RLTrader() + + trader.optimize(n_trials=1) + trader.train(n_epochs=1, + test_trained_model=True, + render_trained_model=True) diff --git a/test.py b/test.py deleted file mode 100644 index b7711c8..0000000 --- a/test.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -import gym -import optuna -import pandas as pd - -from stable_baselines.common.policies import MlpLnLstmPolicy -from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv -from stable_baselines import A2C, ACKTR, PPO2 - -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators - -curr_idx = 0 -reward_strategy = 'sortino' -input_data_file = os.path.join('data', 'coinbase_hourly.csv') -params_db_file = 'sqlite:///params.db' - -study_name = 'ppo2_' + reward_strategy -study = optuna.load_study(study_name=study_name, storage=params_db_file) -params = study.best_trial.params - -print("Testing PPO2 agent with params:", params) -print("Best trial:", -1 * study.best_trial.value) - -df = pd.read_csv(input_data_file) -df = df.drop(['Symbol'], axis=1) -df = df.sort_values(['Date']) -df = add_indicators(df.reset_index()) - -test_len = int(len(df) * 0.2) -train_len = int(len(df)) - test_len - -test_df = df[train_len:] - -test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - -model_params = { - 'n_steps': int(params['n_steps']), - 'gamma': params['gamma'], - 'learning_rate': params['learning_rate'], - 'ent_coef': params['ent_coef'], - 'cliprange': params['cliprange'], - 'noptepochs': int(params['noptepochs']), - 'lam': params['lam'], -} - -model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=test_env) - -obs, done = test_env.reset(), False -while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - - test_env.render(mode="human") diff --git a/train.py b/train.py deleted file mode 100644 index 2e076d9..0000000 --- a/train.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -import gym -import optuna -import pandas as pd -import numpy as np - -from stable_baselines.common.policies import MlpLnLstmPolicy -from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv -from stable_baselines import A2C, ACKTR, PPO2 - -from pathlib import Path - -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators - -curr_idx = -1 -reward_strategy = 'sortino' -input_data_file = os.path.join('data', 'coinbase_hourly.csv') -params_db_file = 'sqlite:///params.db' - -study_name = 'ppo2_' + reward_strategy -study = optuna.load_study(study_name=study_name, storage=params_db_file) -params = study.best_trial.params - -print("Training PPO2 agent with params:", params) -print("Best trial reward:", -1 * study.best_trial.value) - -df = pd.read_csv(input_data_file) -df = df.drop(['Symbol'], axis=1) -df = df.sort_values(['Date']) -df = add_indicators(df.reset_index()) - -test_len = int(len(df) * 0.2) -train_len = int(len(df)) - test_len - -train_df = df[:train_len] -test_df = df[train_len:] - -train_env = DummyVecEnv([lambda: BitcoinTradingEnv( - train_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - -test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - -model_params = { - 'n_steps': int(params['n_steps']), - 'gamma': params['gamma'], - 'learning_rate': params['learning_rate'], - 'ent_coef': params['ent_coef'], - 'cliprange': params['cliprange'], - 'noptepochs': int(params['noptepochs']), - 'lam': params['lam'], -} - -if curr_idx == -1: - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) -else: - model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + - reward_strategy + '_' + str(curr_idx) + '.pkl'), env=train_env) - -for idx in range(curr_idx + 1, 10): - print('[', idx, '] Training for: ', train_len, ' time steps') - - model.learn(total_timesteps=train_len) - - obs = test_env.reset() - done, reward_sum = False, 0 - - while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - reward_sum += reward - - print('[', idx, '] Total reward: ', - reward_sum, ' (' + reward_strategy + ')') - model.save(os.path.join('.', 'agents', 'ppo2_' + - reward_strategy + '_' + str(idx) + '.pkl')) diff --git a/util/indicators.py b/util/indicators.py deleted file mode 100644 index 0a7a1e8..0000000 --- a/util/indicators.py +++ /dev/null @@ -1,78 +0,0 @@ -import ta - - -def add_indicators(df): - df['RSI'] = ta.rsi(df["Close"]) - df['MFI'] = ta.money_flow_index( - df["High"], df["Low"], df["Close"], df["Volume BTC"]) - df['TSI'] = ta.tsi(df["Close"]) - df['UO'] = ta.uo(df["High"], df["Low"], df["Close"]) - df['AO'] = ta.ao(df["High"], df["Low"]) - - df['MACD_diff'] = ta.macd_diff(df["Close"]) - df['Vortex_pos'] = ta.vortex_indicator_pos( - df["High"], df["Low"], df["Close"]) - df['Vortex_neg'] = ta.vortex_indicator_neg( - df["High"], df["Low"], df["Close"]) - df['Vortex_diff'] = abs( - df['Vortex_pos'] - - df['Vortex_neg']) - df['Trix'] = ta.trix(df["Close"]) - df['Mass_index'] = ta.mass_index(df["High"], df["Low"]) - df['CCI'] = ta.cci(df["High"], df["Low"], df["Close"]) - df['DPO'] = ta.dpo(df["Close"]) - df['KST'] = ta.kst(df["Close"]) - df['KST_sig'] = ta.kst_sig(df["Close"]) - df['KST_diff'] = ( - df['KST'] - - df['KST_sig']) - df['Aroon_up'] = ta.aroon_up(df["Close"]) - df['Aroon_down'] = ta.aroon_down(df["Close"]) - df['Aroon_ind'] = ( - df['Aroon_up'] - - df['Aroon_down'] - ) - - df['BBH'] = ta.bollinger_hband(df["Close"]) - df['BBL'] = ta.bollinger_lband(df["Close"]) - df['BBM'] = ta.bollinger_mavg(df["Close"]) - df['BBHI'] = ta.bollinger_hband_indicator( - df["Close"]) - df['BBLI'] = ta.bollinger_lband_indicator( - df["Close"]) - df['KCHI'] = ta.keltner_channel_hband_indicator(df["High"], - df["Low"], - df["Close"]) - df['KCLI'] = ta.keltner_channel_lband_indicator(df["High"], - df["Low"], - df["Close"]) - df['DCHI'] = ta.donchian_channel_hband_indicator(df["Close"]) - df['DCLI'] = ta.donchian_channel_lband_indicator(df["Close"]) - - df['ADI'] = ta.acc_dist_index(df["High"], - df["Low"], - df["Close"], - df["Volume BTC"]) - df['OBV'] = ta.on_balance_volume(df["Close"], - df["Volume BTC"]) - df['CMF'] = ta.chaikin_money_flow(df["High"], - df["Low"], - df["Close"], - df["Volume BTC"]) - df['FI'] = ta.force_index(df["Close"], - df["Volume BTC"]) - df['EM'] = ta.ease_of_movement(df["High"], - df["Low"], - df["Close"], - df["Volume BTC"]) - df['VPT'] = ta.volume_price_trend(df["Close"], - df["Volume BTC"]) - df['NVI'] = ta.negative_volume_index(df["Close"], - df["Volume BTC"]) - - df['DR'] = ta.daily_return(df["Close"]) - df['DLR'] = ta.daily_log_return(df["Close"]) - - df.fillna(method='bfill', inplace=True) - - return df diff --git a/util/transform.py b/util/transform.py deleted file mode 100644 index 69b1401..0000000 --- a/util/transform.py +++ /dev/null @@ -1,37 +0,0 @@ -import tensorflow as tf - - -def transform(df, transform_fn, columns=None): - transformed_df = df.copy() - - if columns is None: - transformed_df = transform_fn(transformed_df) - - for column in columns: - transformed_df[column] = transform_fn(transformed_df[column]) - - transformed_df = transformed_df.fillna(method='bfill') - - return transformed_df - - -def max_min_normalize(df, columns): - def transform_fn(transform_df): - return (transform_df - transform_df.min()) / (transform_df.max() - transform_df.min()) - - return transform(df, transform_fn, columns) - - -def difference(df, columns): - def transform_fn(transform_df): - return transform_df - transform_df.shift(1) - - return transform(df, transform_fn, columns) - - -def log_and_difference(df, columns): - def transform_fn(transform_df): - transform_df.loc[transform_df == 0] = 1E-10 - return tf.log(transform_df) - tf.log(transform_df.shift(1)) - - return transform(df, transform_fn, columns)