diff --git a/lib/RLTrader.py b/lib/RLTrader.py new file mode 100644 index 0000000..170671d --- /dev/null +++ b/lib/RLTrader.py @@ -0,0 +1,252 @@ +import optuna +import pandas as pd +import numpy as np + +from os import path +from stable_baselines.common.base_class import BaseRLModel +from stable_baselines.common.policies import BasePolicy, MlpLnLstmPolicy +from stable_baselines.common.vec_env import DummyVecEnv +from stable_baselines import PPO2 + +from lib.env.BitcoinTradingEnv import BitcoinTradingEnv +from lib.util.indicators import add_indicators +from lib.util.log import init_logger + + +class RLTrader: + feature_df = None + + def __init__(self, model: BaseRLModel = PPO2, policy: BasePolicy = MlpLnLstmPolicy, **kwargs): + self.logger = init_logger( + __name__, show_debug=kwargs.get('show_debug', True)) + + self.model = model + self.policy = policy + self.reward_strategy = kwargs.get('reward_strategy', 'sortino') + self.tensorboard_path = kwargs.get( + 'tensorboard_path', path.join('data', 'tensorboard')) + self.input_data_path = kwargs.get('input_data_path', None) + self.params_db_path = kwargs.get( + 'params_db_path', 'sqlite:///data/params.db') + + self.model_verbose = kwargs.get('model_verbose', 1) + self.nminibatches = kwargs.get('nminibatches', 1) + + self.initialize_data(kwargs) + + self.logger.debug(f'Reward Strategy: {self.reward_strategy}') + + def initialize_data(self, kwargs): + if self.input_data_path is None: + self.input_data_path = path.join( + 'data', 'input', 'coinbase_hourly.csv') + + self.feature_df = pd.read_csv(self.input_data_path) + self.feature_df = self.feature_df.drop(['Symbol'], axis=1) + self.feature_df['Date'] = pd.to_datetime( + self.feature_df['Date'], format='%Y-%m-%d %I-%p') + self.feature_df['Date'] = self.feature_df['Date'].astype(str) + self.feature_df = self.feature_df.sort_values(['Date']) + self.feature_df = add_indicators(self.feature_df.reset_index()) + + self.validation_set_percentage = kwargs.get( + 'validation_set_percentage', 0.8) + self.test_set_percentage = kwargs.get('test_set_percentage', 0.8) + + self.logger.debug( + f'Initialized Features: {self.feature_df.columns.str.cat(sep=", ")}') + + def initialize_optuna(self, should_create: bool = False): + self.study_name = f'{self.model.__class__.__name__}__{self.policy.__class__.__name__}__{self.reward_strategy}' + + if should_create: + self.optuna_study = optuna.create_study( + study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) + else: + self.optuna_study = optuna.load_study( + study_name=self.study_name, storage=self.params_db_path) + + self.logger.debug('Initialized Optuna:') + + try: + self.logger.debug( + f'Best reward in ({len(self.optuna_study.trials)}) trials: {-self.optuna_study.best_value}') + except: + self.logger.debug('No trials have been finished yet.') + + def get_env_params(self): + params = self.optuna_study.best_trial.params + return { + 'reward_strategy': self.reward_strategy, + 'forecast_steps': int(params['forecast_steps']), + 'forecast_alpha': params['forecast_alpha'], + } + + def get_model_params(self): + params = self.optuna_study.best_trial.params + return { + 'n_steps': int(params['n_steps']), + 'gamma': params['gamma'], + 'learning_rate': params['learning_rate'], + 'ent_coef': params['ent_coef'], + 'cliprange': params['cliprange'], + 'noptepochs': int(params['noptepochs']), + 'lam': params['lam'], + } + + def optimize_env_params(self, trial): + return { + 'forecast_steps': int(trial.suggest_loguniform('forecast_steps', 1, 200)), + 'forecast_alpha': trial.suggest_uniform('forecast_alpha', 0.001, 0.30), + } + + def optimize_agent_params(self, trial): + if self.model != PPO2: + return {'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.)} + + return { + 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), + 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), + 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), + 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), + 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), + 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), + 'lam': trial.suggest_uniform('lam', 0.8, 1.) + } + + def optimize_params(self, trial, n_prune_evals_per_trial: int = 4, n_tests_per_eval: int = 1, speedup_factor: int = 10): + env_params = self.optimize_env_params(trial) + + full_train_len = self.test_set_percentage * len(self.feature_df) + optimize_train_len = int( + self.validation_set_percentage * full_train_len) + train_len = int(optimize_train_len / speedup_factor) + train_start = optimize_train_len - train_len + + train_df = self.feature_df[train_start:optimize_train_len] + validation_df = self.feature_df[optimize_train_len:] + + train_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(train_df, **env_params)]) + validation_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(validation_df, **env_params)]) + + model_params = self.optimize_agent_params(trial) + model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, + tensorboard_log=self.tensorboard_path, **model_params) + + last_reward = -np.finfo(np.float16).max + evaluation_interval = int( + train_len / n_prune_evals_per_trial) + + for eval_idx in range(n_prune_evals_per_trial): + try: + model.learn(evaluation_interval) + except AssertionError: + raise + + rewards = [] + n_episodes, reward_sum = 0, 0.0 + + obs = validation_env.reset() + while n_episodes < n_tests_per_eval: + action, _ = model.predict(obs) + obs, reward, done, _ = validation_env.step(action) + reward_sum += reward + + if done: + rewards.append(reward_sum) + reward_sum = 0.0 + n_episodes += 1 + obs = validation_env.reset() + + last_reward = np.mean(rewards) + trial.report(-1 * last_reward, eval_idx) + + if trial.should_prune(eval_idx): + raise optuna.structs.TrialPruned() + + return -1 * last_reward + + def optimize(self, n_trials: int = 10, n_parallel_jobs: int = 4, *optimize_params): + self.initialize_optuna(should_create=True) + + try: + self.optuna_study.optimize( + self.optimize_params, n_trials=n_trials, n_jobs=n_parallel_jobs, *optimize_params) + except KeyboardInterrupt: + pass + + self.logger.info(f'Finished trials: {len(self.optuna_study.trials)}') + + self.logger.info(f'Best trial: {self.optuna_study.best_trial.value}') + + self.logger.info('Params: ') + for key, value in self.optuna_study.best_trial.params.items(): + self.logger.info(f' {key}: {value}') + + return self.optuna_study.trials_dataframe() + + def train(self, n_epochs: int = 1, iters_per_epoch: int = 1, test_trained_model: bool = False, render_trained_model: bool = False): + self.initialize_optuna() + + env_params = self.get_env_params() + + train_len = int(self.test_set_percentage * len(self.feature_df)) + train_df = self.feature_df[:train_len] + + train_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(train_df, **env_params)]) + + model_params = self.get_model_params() + + model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, + tensorboard_log=self.tensorboard_path, **model_params) + + self.logger.info(f'Training for {n_epochs} epochs') + + n_timesteps = len(train_df) * iters_per_epoch + + for model_epoch in range(0, n_epochs): + self.logger.info( + f'[{model_epoch}] Training for: {n_timesteps} time steps') + + model.learn(total_timesteps=n_timesteps) + + model_path = path.join( + 'data', 'agents', f'{self.study_name}__{model_epoch}.pkl') + model.save(model_path) + + if test_trained_model: + self.test(model_epoch, should_render=render_trained_model) + + self.logger.info(f'Trained {n_epochs} models') + + def test(self, model_epoch: int = 0, should_render: bool = True): + env_params = self.get_env_params() + + train_len = int(self.test_set_percentage * len(self.feature_df)) + test_df = self.feature_df[train_len:] + + test_env = DummyVecEnv( + [lambda: BitcoinTradingEnv(test_df, **env_params)]) + + model_path = path.join( + 'data', 'agents', f'{self.study_name}__{model_epoch}.pkl') + model = self.model.load(model_path, env=test_env) + + self.logger.info( + f'Testing model ({self.study_name}__{model_epoch})') + + obs, done, reward_sum = test_env.reset(), False, 0 + while not done: + action, _states = model.predict(obs) + obs, reward, done, _ = test_env.step(action) + + reward_sum += reward + + if should_render: + test_env.render(mode='human') + + self.logger.info( + f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(reward_sum)}') diff --git a/env/__init__.py b/lib/__init__.py similarity index 100% rename from env/__init__.py rename to lib/__init__.py diff --git a/lib/__init__.pyc b/lib/__init__.pyc new file mode 100644 index 0000000..2581be6 Binary files /dev/null and b/lib/__init__.pyc differ diff --git a/env/BitcoinTradingEnv.py b/lib/env/BitcoinTradingEnv.py similarity index 53% rename from env/BitcoinTradingEnv.py rename to lib/env/BitcoinTradingEnv.py index 0f11c7e..1e1ea47 100644 --- a/env/BitcoinTradingEnv.py +++ b/lib/env/BitcoinTradingEnv.py @@ -1,19 +1,14 @@ import gym import pandas as pd import numpy as np -import tensorflow as tf from gym import spaces from statsmodels.tsa.statespace.sarimax import SARIMAX -from empyrical import sortino_ratio, calmar_ratio, omega_ratio +from empyrical import sortino_ratio, sharpe_ratio, omega_ratio -from render.BitcoinTradingGraph import BitcoinTradingGraph -from util.transform import log_and_difference, max_min_normalize -from util.indicators import add_indicators - - -# Delete this if debugging -np.warnings.filterwarnings('ignore') +from lib.env.render.BitcoinTradingGraph import BitcoinTradingGraph +from lib.util.transform import log_and_difference, max_min_normalize +from lib.util.indicators import add_indicators class BitcoinTradingEnv(gym.Env): @@ -21,59 +16,59 @@ class BitcoinTradingEnv(gym.Env): metadata = {'render.modes': ['human', 'system', 'none']} viewer = None - def __init__(self, df, initial_balance=10000, commission=0.0025, reward_func='sortino', **kwargs): + def __init__(self, df, initial_balance=10000, commission=0.0025, reward_strategy='sortino', **kwargs): super(BitcoinTradingEnv, self).__init__() self.initial_balance = initial_balance self.commission = commission - self.reward_func = reward_func + self.reward_strategy = reward_strategy self.df = df.fillna(method='bfill').reset_index() - self.stationary_df = log_and_difference( - self.df, ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD']) + self.stationary_df = self.df.copy() + self.stationary_df = self.stationary_df[self.stationary_df.columns.difference([ + 'index', 'Date'])] + self.stationary_df = log_and_difference(self.stationary_df, + ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD']) self.benchmarks = kwargs.get('benchmarks', []) - self.forecast_len = kwargs.get('forecast_len', 10) - self.confidence_interval = kwargs.get('confidence_interval', 0.95) - self.obs_shape = (1, 5 + len(self.df.columns) - - 2 + (self.forecast_len * 3)) + self.forecast_steps = kwargs.get('forecast_steps', 2) + self.forecast_alpha = kwargs.get('forecast_alpha', 0.05) + + self.action_space = spaces.Discrete(3) - # Actions of the format Buy 1/4, Sell 3/4, Hold (amount ignored), etc. - self.action_space = spaces.Discrete(12) + n_features = 5 + len(self.df.columns) - 2 + n_prediction_features = (self.forecast_steps * 3) + self.obs_shape = (1, n_features + n_prediction_features) - # Observes the price action, indicators, account action, price forecasts self.observation_space = spaces.Box( low=0, high=1, shape=self.obs_shape, dtype=np.float16) def _next_observation(self): - features = self.stationary_df[self.stationary_df.columns.difference([ - 'index', 'Date'])] + current_idx = self.current_step + self.forecast_steps + 1 - scaled = features[:self.current_step + self.forecast_len + 1].values - scaled[np.bitwise_not(np.isfinite(scaled))] = 0 + scaled = self.stationary_df[:current_idx].values - scaled = tf.contrib.eager.py_func( - func=max_min_normalize, inp=scaled, Tout=tf.float16) - scaled = pd.DataFrame(scaled, columns=features.columns) + scaled = pd.DataFrame(scaled, columns=self.stationary_df.columns) + scaled = max_min_normalize(scaled) obs = scaled.values[-1] - past_df = self.stationary_df['Close'][: - self.current_step + self.forecast_len + 1] - forecast_model = SARIMAX( - past_df.values, enforce_stationarity=False, simple_differencing=True) + forecast_model = SARIMAX(self.stationary_df['Close'][:current_idx].values, + enforce_stationarity=False, + simple_differencing=True) + model_fit = forecast_model.fit(method='bfgs', disp=False) - forecast = model_fit.get_forecast( - steps=self.forecast_len, alpha=(1 - self.confidence_interval)) + + forecast = model_fit.get_forecast(steps=self.forecast_steps, + alpha=self.forecast_alpha) obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0) obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0) - scaled_history = tf.contrib.eager.py_func( - func=max_min_normalize, inp=self.account_history.astype('float32'), Tout=tf.float16) + scaled_history = max_min_normalize(self.account_history) - obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0) + obs = np.insert(obs, len(obs), scaled_history.values[-1], axis=0) obs = np.reshape(obs.astype('float16'), self.obs_shape) obs[np.bitwise_not(np.isfinite(obs))] = 0 @@ -81,64 +76,61 @@ def _next_observation(self): return obs def _current_price(self): - return self.df['Close'].values[self.current_step + self.forecast_len] + 0.01 + return self.df['Close'].values[self.current_step + self.forecast_steps] def _take_action(self, action): current_price = self._current_price() - action_type = int(action / 4) - amount = 1 / (action % 4 + 1) btc_bought = 0 btc_sold = 0 - cost = 0 - sales = 0 + cost_of_btc = 0 + revenue_from_sold = 0 - if action_type == 0: + if action == 0: price = current_price * (1 + self.commission) - btc_bought = min(self.balance * amount / - price, self.balance / price) - cost = btc_bought * price + btc_bought = self.balance / price + cost_of_btc = self.balance self.btc_held += btc_bought - self.balance -= cost - elif action_type == 1: + self.balance -= cost_of_btc + elif action == 1: price = current_price * (1 - self.commission) - btc_sold = self.btc_held * amount - sales = btc_sold * price + btc_sold = self.btc_held + revenue_from_sold = btc_sold * price self.btc_held -= btc_sold - self.balance += sales + self.balance += revenue_from_sold if btc_sold > 0 or btc_bought > 0: self.trades.append({'step': self.current_step, - 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost, + 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': revenue_from_sold if btc_sold > 0 else cost_of_btc, 'type': 'sell' if btc_sold > 0 else 'buy'}) self.net_worths.append( self.balance + self.btc_held * current_price) - self.account_history = np.append(self.account_history, [ - [self.balance], - [btc_bought], - [cost], - [btc_sold], - [sales] - ], axis=1) + self.account_history.append({ + 'balance': self.balance, + 'btc_bought': btc_bought, + 'cost_of_btc': cost_of_btc, + 'btc_sold': btc_sold, + 'revenue_from_sold': revenue_from_sold, + }, ignore_index=True) def _reward(self): - length = min(self.current_step, self.forecast_len) + length = min(self.current_step, self.forecast_steps) returns = np.diff(self.net_worths[-length:]) if np.count_nonzero(returns) < 1: return 0 - if self.reward_func == 'sortino': + if self.reward_strategy == 'sortino': reward = sortino_ratio( returns, annualization=365*24) - elif self.reward_func == 'calmar': - reward = calmar_ratio( + elif self.reward_strategy == 'sharpe': + reward = sharpe_ratio( returns, annualization=365*24) - elif self.reward_func == 'omega': + elif self.reward_strategy == 'omega': reward = omega_ratio( returns, annualization=365*24) else: @@ -147,7 +139,7 @@ def _reward(self): return reward if np.isfinite(reward) else 0 def _done(self): - return self.net_worths[-1] < self.initial_balance / 10 or self.current_step == len(self.df) - self.forecast_len - 1 + return self.net_worths[-1] < self.initial_balance / 10 or self.current_step == len(self.df) - self.forecast_steps - 1 def reset(self): self.balance = self.initial_balance @@ -155,13 +147,13 @@ def reset(self): self.btc_held = 0 self.current_step = 0 - self.account_history = np.array([ - [self.balance], - [0], - [0], - [0], - [0] - ]) + self.account_history = pd.DataFrame([{ + 'balance': self.balance, + 'btc_bought': 0, + 'cost_of_btc': 0, + 'btc_sold': 0, + 'revenue_from_sold': 0, + }]) self.trades = [] return self._next_observation() diff --git a/render/__init__.py b/lib/env/__init__.py similarity index 100% rename from render/__init__.py rename to lib/env/__init__.py diff --git a/render/BitcoinTradingGraph.py b/lib/env/render/BitcoinTradingGraph.py similarity index 97% rename from render/BitcoinTradingGraph.py rename to lib/env/render/BitcoinTradingGraph.py index cf28be7..ab0ad9a 100644 --- a/render/BitcoinTradingGraph.py +++ b/lib/env/render/BitcoinTradingGraph.py @@ -19,7 +19,7 @@ class BitcoinTradingGraph: def __init__(self, df): self.df = df self.df['Time'] = self.df['Date'].apply( - lambda x: datetime.strptime(x, '%Y-%m-%d %I-%p')) + lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')) self.df = self.df.sort_values('Time') # Create a figure on screen and set the title @@ -74,7 +74,8 @@ def _render_net_worth(self, step_range, dates, current_step, net_worths, benchma min(net_worths) / 1.25, max(net_worths) * 1.25) def _render_benchmarks(self, step_range, dates, benchmarks): - colors = ['orange', 'cyan', 'purple', 'blue', 'magenta', 'yellow', 'black', 'red', 'green'] + colors = ['orange', 'cyan', 'purple', 'blue', + 'magenta', 'yellow', 'black', 'red', 'green'] for i, benchmark in enumerate(benchmarks): self.net_worth_ax.plot( diff --git a/util/__init__.py b/lib/env/render/__init__.py similarity index 100% rename from util/__init__.py rename to lib/env/render/__init__.py diff --git a/agents/.gitkeep b/lib/util/__init__.py similarity index 100% rename from agents/.gitkeep rename to lib/util/__init__.py diff --git a/util/benchmarks.py b/lib/util/benchmarks.py similarity index 100% rename from util/benchmarks.py rename to lib/util/benchmarks.py diff --git a/lib/util/indicators.py b/lib/util/indicators.py new file mode 100644 index 0000000..1ed7cc4 --- /dev/null +++ b/lib/util/indicators.py @@ -0,0 +1,78 @@ +import ta + + +def add_indicators(df): + df['RSI'] = ta.rsi(df["Close"]) + # df['MFI'] = ta.money_flow_index( + # df["High"], df["Low"], df["Close"], df["Volume BTC"]) + # df['TSI'] = ta.tsi(df["Close"]) + # df['UO'] = ta.uo(df["High"], df["Low"], df["Close"]) + # df['AO'] = ta.ao(df["High"], df["Low"]) + + df['MACD_diff'] = ta.macd_diff(df["Close"]) + # df['Vortex_pos'] = ta.vortex_indicator_pos( + # df["High"], df["Low"], df["Close"]) + # df['Vortex_neg'] = ta.vortex_indicator_neg( + # df["High"], df["Low"], df["Close"]) + # df['Vortex_diff'] = abs( + # df['Vortex_pos'] - + # df['Vortex_neg']) + # df['Trix'] = ta.trix(df["Close"]) + # df['Mass_index'] = ta.mass_index(df["High"], df["Low"]) + # df['CCI'] = ta.cci(df["High"], df["Low"], df["Close"]) + # df['DPO'] = ta.dpo(df["Close"]) + # df['KST'] = ta.kst(df["Close"]) + # df['KST_sig'] = ta.kst_sig(df["Close"]) + # df['KST_diff'] = ( + # df['KST'] - + # df['KST_sig']) + # df['Aroon_up'] = ta.aroon_up(df["Close"]) + # df['Aroon_down'] = ta.aroon_down(df["Close"]) + # df['Aroon_ind'] = ( + # df['Aroon_up'] - + # df['Aroon_down'] + # ) + + df['BBH'] = ta.bollinger_hband(df["Close"]) + df['BBL'] = ta.bollinger_lband(df["Close"]) + df['BBM'] = ta.bollinger_mavg(df["Close"]) + df['BBHI'] = ta.bollinger_hband_indicator( + df["Close"]) + df['BBLI'] = ta.bollinger_lband_indicator( + df["Close"]) + # df['KCHI'] = ta.keltner_channel_hband_indicator(df["High"], + # df["Low"], + # df["Close"]) + # df['KCLI'] = ta.keltner_channel_lband_indicator(df["High"], + # df["Low"], + # df["Close"]) + # df['DCHI'] = ta.donchian_channel_hband_indicator(df["Close"]) + # df['DCLI'] = ta.donchian_channel_lband_indicator(df["Close"]) + + df['ADI'] = ta.acc_dist_index(df["High"], + df["Low"], + df["Close"], + df["Volume BTC"]) + # df['OBV'] = ta.on_balance_volume(df["Close"], + # df["Volume BTC"]) + # df['CMF'] = ta.chaikin_money_flow(df["High"], + # df["Low"], + # df["Close"], + # df["Volume BTC"]) + # df['FI'] = ta.force_index(df["Close"], + # df["Volume BTC"]) + # df['EM'] = ta.ease_of_movement(df["High"], + # df["Low"], + # df["Close"], + # df["Volume BTC"]) + # df['VPT'] = ta.volume_price_trend(df["Close"], + # df["Volume BTC"]) + # df['NVI'] = ta.negative_volume_index(df["Close"], + # df["Volume BTC"]) + + df['DR'] = ta.daily_return(df["Close"]) + # df['DLR'] = ta.daily_log_return(df["Close"]) + + df.fillna(method='bfill', inplace=True) + + return df diff --git a/util/log.py b/lib/util/log.py similarity index 78% rename from util/log.py rename to lib/util/log.py index 526b946..e9a6e55 100644 --- a/util/log.py +++ b/lib/util/log.py @@ -3,7 +3,7 @@ import colorlog -def init_logger(dunder_name, testing_mode) -> logging.Logger: +def init_logger(dunder_name, show_debug=False) -> logging.Logger: log_format = ( '%(asctime)s - ' '%(name)s - ' @@ -20,7 +20,7 @@ def init_logger(dunder_name, testing_mode) -> logging.Logger: colorlog.basicConfig(format=colorlog_format) logger = logging.getLogger(dunder_name) - if testing_mode: + if show_debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) @@ -29,21 +29,21 @@ def init_logger(dunder_name, testing_mode) -> logging.Logger: # Feel free to uncomment and use the outputs as you like # Output full log - # fh = logging.FileHandler(os.path.join('log', 'trading.log') + # fh = logging.FileHandler(os.path.join('data', log', 'trading.log') # fh.setLevel(logging.DEBUG) # formatter = logging.Formatter(log_format) # fh.setFormatter(formatter) # logger.addHandler(fh) # # Output warning log - # fh = logging.FileHandler(os.path.join('log', 'trading.warning.log') + # fh = logging.FileHandler(os.path.join('data', log', 'trading.warning.log') # fh.setLevel(logging.WARNING) # formatter = logging.Formatter(log_format) # fh.setFormatter(formatter) # logger.addHandler(fh) # # Output error log - # fh = logging.FileHandler(os.path.join('log', 'trading.error.log') + # fh = logging.FileHandler(os.path.join('data', log', 'trading.error.log') # fh.setLevel(logging.ERROR) # formatter = logging.Formatter(log_format) # fh.setFormatter(formatter) diff --git a/lib/util/transform.py b/lib/util/transform.py new file mode 100644 index 0000000..8e14dab --- /dev/null +++ b/lib/util/transform.py @@ -0,0 +1,25 @@ +import numpy as np + + +def transform(df, columns=None, transform_fn=None): + transformed_df = df.copy().fillna(method='bfill') + + if columns is None: + transformed_df = transform_fn(transformed_df) + else: + for column in columns: + transformed_df[column] = transform_fn(transformed_df[column]) + + return transformed_df + + +def max_min_normalize(df, columns=None): + return transform(df, columns, lambda t_df: (t_df - t_df.min()) / (t_df.max() - t_df.min())) + + +def difference(df, columns=None): + return transform(df, columns, lambda t_df: t_df - t_df.shift(1)) + + +def log_and_difference(df, columns=None): + return transform(df, columns, lambda t_df: np.log(t_df) - np.log(t_df).shift(1)) diff --git a/log/.gitkeep b/log/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/optimize.py b/optimize.py index 190def6..e80585a 100644 --- a/optimize.py +++ b/optimize.py @@ -1,266 +1,13 @@ -''' - -A large part of the code in this file was sourced from the rl-baselines-zoo library on GitHub. -In particular, the library provides a great parameter optimization set for the PPO2 algorithm, -as well as a great example implementation using optuna. - -Source: https://github.com/araffin/rl-baselines-zoo/blob/master/utils/hyperparams_opt.py - -''' - -import optuna - -import os -import pandas as pd import numpy as np -from stable_baselines.common.policies import MlpLnLstmPolicy -from stable_baselines.common.vec_env import DummyVecEnv -from stable_baselines import PPO2 - -from pathlib import Path - -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators -from util.log import init_logger - - -class Optimize: - def __init__(self): - self.reward_strategy = 'sortino' - self.input_data_file = os.path.join('data', 'coinbase_hourly.csv') - self.params_db_file = 'sqlite:///params.db' - - # number of parallel jobs - self.n_jobs = 4 - # maximum number of trials for finding the best hyperparams - self.n_trials = 1000 - # number of test episodes per trial - self.n_test_episodes = 3 - # number of evaluations for pruning per trial - self.n_evaluations = 4 - - self.train_df = None - self.test_df = None - - self.logger = init_logger(__name__, testing_mode=True) - - self.logger.debug("Initialized Optimizer") - - def prepare_data(self): - df = pd.read_csv(self.input_data_file) - df = df.drop(['Symbol'], axis=1) - df = df.sort_values(['Date']) - df = add_indicators(df.reset_index()) - - train_len = int(len(df) * 0.8) - - df = df[:train_len] - - validation_len = int(train_len * 0.8) - self.train_df = df[:validation_len] - self.test_df = df[validation_len:] - - def optimize_envs(self, trial): - return { - 'reward_func': self.reward_strategy, - 'forecast_len': int(trial.suggest_loguniform('forecast_len', 1, 200)), - 'confidence_interval': trial.suggest_uniform('confidence_interval', 0.7, 0.99), - } - - def optimize_ppo2(self, trial): - return { - 'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)), - 'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999), - 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.), - 'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1), - 'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4), - 'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)), - 'lam': trial.suggest_uniform('lam', 0.8, 1.) - } - - def optimize_agent(self, trial): - env_params = self.optimize_envs(trial) - train_env = DummyVecEnv( - [lambda: BitcoinTradingEnv(self.train_df, **env_params)]) - test_env = DummyVecEnv( - [lambda: BitcoinTradingEnv(self.test_df, **env_params)]) - - model_params = self.optimize_ppo2(trial) - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) - - last_reward = -np.finfo(np.float16).max - evaluation_interval = int(len(self.train_df) / self.n_evaluations) - - for eval_idx in range(self.n_evaluations): - try: - model.learn(evaluation_interval) - except AssertionError: - raise - - rewards = [] - n_episodes, reward_sum = 0, 0.0 - - obs = test_env.reset() - while n_episodes < self.n_test_episodes: - action, _ = model.predict(obs) - obs, reward, done, _ = test_env.step(action) - reward_sum += reward - - if done: - rewards.append(reward_sum) - reward_sum = 0.0 - n_episodes += 1 - obs = test_env.reset() - - last_reward = np.mean(rewards) - trial.report(-1 * last_reward, eval_idx) - - if trial.should_prune(eval_idx): - raise optuna.structs.TrialPruned() - - return -1 * last_reward - - def log_parameters(self): - self.logger.debug("Reward Strategy: %s" % self.reward_strategy) - self.logger.debug("Input Data File: %s" % self.input_data_file) - self.logger.debug("Params DB File: %s" % self.params_db_file) - self.logger.debug("Parallel jobs: %d" % self.n_jobs) - self.logger.debug("Trials: %d" % self.n_trials) - self.logger.debug("Test episodes (per trial): %d" % - self.n_test_episodes) - self.logger.debug("Evaluations (per trial): %d" % self.n_evaluations) - self.logger.debug("Train DF Length: %d" % len(self.train_df)) - self.logger.debug("Test DF Length: %d" % len(self.test_df)) - self.logger.debug( - "Features: %s", self.train_df.columns.str.cat(sep=", ")) - - def optimize(self): - if not self.train_df: - self.logger.info("Running built-in data preparation") - self.prepare_data() - else: - self.logger.info("Using provided data (Length: %d)" % - len(self.train_df)) - - self.log_parameters() - - study_name = 'ppo2_' + self.reward_strategy - study = optuna.create_study( - study_name=study_name, storage=self.params_db_file, load_if_exists=True) - - try: - study.optimize(self.optimize_agent, - n_trials=self.n_trials, n_jobs=self.n_jobs) - except KeyboardInterrupt: - pass - - self.logger.info( - 'Number of finished trials: {}'.format(len(study.trials))) - - self.logger.info('Best trial:') - trial = study.best_trial - - self.logger.info('Value: {}'.format(trial.value)) - - self.logger.info('Params: ') - for key, value in trial.params.items(): - self.logger.info(' {}: {}'.format(key, value)) - - return study.trials_dataframe() - - def model_params(self, params): - return { - 'n_steps': int(params['n_steps']), - 'gamma': params['gamma'], - 'learning_rate': params['learning_rate'], - 'ent_coef': params['ent_coef'], - 'cliprange': params['cliprange'], - 'noptepochs': int(params['noptepochs']), - 'lam': params['lam'], - } - - def train(self): - if not self.train_df: - self.logger.info("Running built-in data preparation") - self.prepare_data() - else: - self.logger.info("Using provided data (Length: %d)" % - len(self.train_df)) - - study_name = 'ppo2_' + self.reward_strategy - - study = optuna.load_study( - study_name=study_name, storage=self.params_db_file) - params = study.best_trial.params - - train_env = DummyVecEnv([lambda: BitcoinTradingEnv( - self.train_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - - test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - - model_params = self.model_params(params) - - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) - - models_to_train = 1 - self.logger.info("Training {} model instances".format(models_to_train)) - - for idx in range(0, models_to_train): # Not sure why we are doing this, tbh - self.logger.info( - f'[{idx}] Training for: {len(self.train_df)} time steps') - - model.learn(total_timesteps=len(self.train_df)) - - obs = test_env.reset() - done, reward_sum = False, 0 - - while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - reward_sum += reward - - self.logger.info( - f'[{idx}] Total reward: {reward_sum} ({self.reward_strategy})') - - model.save(os.path.join('.', 'agents', 'ppo2_' + - self.reward_strategy + '_' + str(idx) + '.pkl')) - - self.logger.info("Trained {} model instances".format(models_to_train)) - - def test(self, model_instance: 0): - - study_name = 'ppo2_' + self.reward_strategy - study = optuna.load_study( - study_name=study_name, storage=self.params_db_file) - params = study.best_trial.params - - test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - self.test_df, reward_func=self.reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - - model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + - self.reward_strategy + '_' + str(model_instance) + '.pkl'), env=test_env) - - obs, done = test_env.reset(), False - while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - - test_env.render(mode="human") +from lib.RLTrader import RLTrader +np.warnings.filterwarnings('ignore') if __name__ == '__main__': - optimizer = Optimize() - test_mode = "FAST" # I'm hard-coding this for now - if test_mode == "FAST": - optimizer.input_data_file = os.path.join('data', 'coinbase_daily.csv') - optimizer.n_jobs = 1 - optimizer.n_trials = 1 - optimizer.n_test_episodes = 1 - optimizer.n_evaluations = 1 - # optimizer.optimize() - optimizer.train() - # optimizer.test() + trader = RLTrader() + + trader.optimize(n_trials=1) + trader.train(n_epochs=1, + test_trained_model=True, + render_trained_model=True) diff --git a/test.py b/test.py deleted file mode 100644 index b7711c8..0000000 --- a/test.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -import gym -import optuna -import pandas as pd - -from stable_baselines.common.policies import MlpLnLstmPolicy -from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv -from stable_baselines import A2C, ACKTR, PPO2 - -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators - -curr_idx = 0 -reward_strategy = 'sortino' -input_data_file = os.path.join('data', 'coinbase_hourly.csv') -params_db_file = 'sqlite:///params.db' - -study_name = 'ppo2_' + reward_strategy -study = optuna.load_study(study_name=study_name, storage=params_db_file) -params = study.best_trial.params - -print("Testing PPO2 agent with params:", params) -print("Best trial:", -1 * study.best_trial.value) - -df = pd.read_csv(input_data_file) -df = df.drop(['Symbol'], axis=1) -df = df.sort_values(['Date']) -df = add_indicators(df.reset_index()) - -test_len = int(len(df) * 0.2) -train_len = int(len(df)) - test_len - -test_df = df[train_len:] - -test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - -model_params = { - 'n_steps': int(params['n_steps']), - 'gamma': params['gamma'], - 'learning_rate': params['learning_rate'], - 'ent_coef': params['ent_coef'], - 'cliprange': params['cliprange'], - 'noptepochs': int(params['noptepochs']), - 'lam': params['lam'], -} - -model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + reward_strategy + '_' + str(curr_idx) + '.pkl'), env=test_env) - -obs, done = test_env.reset(), False -while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - - test_env.render(mode="human") diff --git a/train.py b/train.py deleted file mode 100644 index 2e076d9..0000000 --- a/train.py +++ /dev/null @@ -1,78 +0,0 @@ -import os -import gym -import optuna -import pandas as pd -import numpy as np - -from stable_baselines.common.policies import MlpLnLstmPolicy -from stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv -from stable_baselines import A2C, ACKTR, PPO2 - -from pathlib import Path - -from env.BitcoinTradingEnv import BitcoinTradingEnv -from util.indicators import add_indicators - -curr_idx = -1 -reward_strategy = 'sortino' -input_data_file = os.path.join('data', 'coinbase_hourly.csv') -params_db_file = 'sqlite:///params.db' - -study_name = 'ppo2_' + reward_strategy -study = optuna.load_study(study_name=study_name, storage=params_db_file) -params = study.best_trial.params - -print("Training PPO2 agent with params:", params) -print("Best trial reward:", -1 * study.best_trial.value) - -df = pd.read_csv(input_data_file) -df = df.drop(['Symbol'], axis=1) -df = df.sort_values(['Date']) -df = add_indicators(df.reset_index()) - -test_len = int(len(df) * 0.2) -train_len = int(len(df)) - test_len - -train_df = df[:train_len] -test_df = df[train_len:] - -train_env = DummyVecEnv([lambda: BitcoinTradingEnv( - train_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - -test_env = DummyVecEnv([lambda: BitcoinTradingEnv( - test_df, reward_func=reward_strategy, forecast_len=int(params['forecast_len']), confidence_interval=params['confidence_interval'])]) - -model_params = { - 'n_steps': int(params['n_steps']), - 'gamma': params['gamma'], - 'learning_rate': params['learning_rate'], - 'ent_coef': params['ent_coef'], - 'cliprange': params['cliprange'], - 'noptepochs': int(params['noptepochs']), - 'lam': params['lam'], -} - -if curr_idx == -1: - model = PPO2(MlpLnLstmPolicy, train_env, verbose=0, nminibatches=1, - tensorboard_log=os.path.join('.', 'tensorboard'), **model_params) -else: - model = PPO2.load(os.path.join('.', 'agents', 'ppo2_' + - reward_strategy + '_' + str(curr_idx) + '.pkl'), env=train_env) - -for idx in range(curr_idx + 1, 10): - print('[', idx, '] Training for: ', train_len, ' time steps') - - model.learn(total_timesteps=train_len) - - obs = test_env.reset() - done, reward_sum = False, 0 - - while not done: - action, _states = model.predict(obs) - obs, reward, done, info = test_env.step(action) - reward_sum += reward - - print('[', idx, '] Total reward: ', - reward_sum, ' (' + reward_strategy + ')') - model.save(os.path.join('.', 'agents', 'ppo2_' + - reward_strategy + '_' + str(idx) + '.pkl')) diff --git a/util/indicators.py b/util/indicators.py deleted file mode 100644 index 0a7a1e8..0000000 --- a/util/indicators.py +++ /dev/null @@ -1,78 +0,0 @@ -import ta - - -def add_indicators(df): - df['RSI'] = ta.rsi(df["Close"]) - df['MFI'] = ta.money_flow_index( - df["High"], df["Low"], df["Close"], df["Volume BTC"]) - df['TSI'] = ta.tsi(df["Close"]) - df['UO'] = ta.uo(df["High"], df["Low"], df["Close"]) - df['AO'] = ta.ao(df["High"], df["Low"]) - - df['MACD_diff'] = ta.macd_diff(df["Close"]) - df['Vortex_pos'] = ta.vortex_indicator_pos( - df["High"], df["Low"], df["Close"]) - df['Vortex_neg'] = ta.vortex_indicator_neg( - df["High"], df["Low"], df["Close"]) - df['Vortex_diff'] = abs( - df['Vortex_pos'] - - df['Vortex_neg']) - df['Trix'] = ta.trix(df["Close"]) - df['Mass_index'] = ta.mass_index(df["High"], df["Low"]) - df['CCI'] = ta.cci(df["High"], df["Low"], df["Close"]) - df['DPO'] = ta.dpo(df["Close"]) - df['KST'] = ta.kst(df["Close"]) - df['KST_sig'] = ta.kst_sig(df["Close"]) - df['KST_diff'] = ( - df['KST'] - - df['KST_sig']) - df['Aroon_up'] = ta.aroon_up(df["Close"]) - df['Aroon_down'] = ta.aroon_down(df["Close"]) - df['Aroon_ind'] = ( - df['Aroon_up'] - - df['Aroon_down'] - ) - - df['BBH'] = ta.bollinger_hband(df["Close"]) - df['BBL'] = ta.bollinger_lband(df["Close"]) - df['BBM'] = ta.bollinger_mavg(df["Close"]) - df['BBHI'] = ta.bollinger_hband_indicator( - df["Close"]) - df['BBLI'] = ta.bollinger_lband_indicator( - df["Close"]) - df['KCHI'] = ta.keltner_channel_hband_indicator(df["High"], - df["Low"], - df["Close"]) - df['KCLI'] = ta.keltner_channel_lband_indicator(df["High"], - df["Low"], - df["Close"]) - df['DCHI'] = ta.donchian_channel_hband_indicator(df["Close"]) - df['DCLI'] = ta.donchian_channel_lband_indicator(df["Close"]) - - df['ADI'] = ta.acc_dist_index(df["High"], - df["Low"], - df["Close"], - df["Volume BTC"]) - df['OBV'] = ta.on_balance_volume(df["Close"], - df["Volume BTC"]) - df['CMF'] = ta.chaikin_money_flow(df["High"], - df["Low"], - df["Close"], - df["Volume BTC"]) - df['FI'] = ta.force_index(df["Close"], - df["Volume BTC"]) - df['EM'] = ta.ease_of_movement(df["High"], - df["Low"], - df["Close"], - df["Volume BTC"]) - df['VPT'] = ta.volume_price_trend(df["Close"], - df["Volume BTC"]) - df['NVI'] = ta.negative_volume_index(df["Close"], - df["Volume BTC"]) - - df['DR'] = ta.daily_return(df["Close"]) - df['DLR'] = ta.daily_log_return(df["Close"]) - - df.fillna(method='bfill', inplace=True) - - return df diff --git a/util/transform.py b/util/transform.py deleted file mode 100644 index 69b1401..0000000 --- a/util/transform.py +++ /dev/null @@ -1,37 +0,0 @@ -import tensorflow as tf - - -def transform(df, transform_fn, columns=None): - transformed_df = df.copy() - - if columns is None: - transformed_df = transform_fn(transformed_df) - - for column in columns: - transformed_df[column] = transform_fn(transformed_df[column]) - - transformed_df = transformed_df.fillna(method='bfill') - - return transformed_df - - -def max_min_normalize(df, columns): - def transform_fn(transform_df): - return (transform_df - transform_df.min()) / (transform_df.max() - transform_df.min()) - - return transform(df, transform_fn, columns) - - -def difference(df, columns): - def transform_fn(transform_df): - return transform_df - transform_df.shift(1) - - return transform(df, transform_fn, columns) - - -def log_and_difference(df, columns): - def transform_fn(transform_df): - transform_df.loc[transform_df == 0] = 1E-10 - return tf.log(transform_df) - tf.log(transform_df.shift(1)) - - return transform(df, transform_fn, columns)