Update DQN/QRDQN Atari hyperparams (#271)

* Update DQN/QRDQN Atari hyperparams * Fix loading of pretrained agents
DLR-RM · Jul 15, 2022 · f1064a7 · f1064a7
1 parent 643fdee
commit f1064a7
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,9 @@
 - Fix `Reacher-v3` name in PPO hyperparameter file
 - Pinned ale-py==0.7.4 until new SB3 version is released
 - Fix enjoy / record videos with LSTM policy
+- Changed `optimize_memory_usage` to `False` for DQN/QR-DQN on Atari games,
+  if you want to save RAM, you need to deactivate `handle_timeout_termination`
+  in the `replay_buffer_kwargs`
 
 ### Documentation
 

diff --git a/enjoy.py b/enjoy.py
@@ -158,6 +158,11 @@ def main():  # noqa: C901
     if algo in off_policy_algos:
         # Dummy buffer size as we don't need memory to enjoy the trained agent
         kwargs.update(dict(buffer_size=1))
+        # Hack due to breaking change in v1.6
+        # handle_timeout_termination cannot be at the same time
+        # with optimize_memory_usage
+        if "optimize_memory_usage" in hyperparams:
+            kwargs.update(optimize_memory_usage=False)
 
     # Check if we are running python 3.8+
     # we need to patch saved model under python 3.6/3.7 to load them

diff --git a/hyperparams/dqn.yml b/hyperparams/dqn.yml
@@ -13,7 +13,9 @@ atari:
   gradient_steps: 1
   exploration_fraction: 0.1
   exploration_final_eps: 0.01
-  optimize_memory_usage: True
+  # If True, you need to deactivate handle_timeout_termination
+  # in the replay_buffer_kwargs
+  optimize_memory_usage: False
 
 # Almost Tuned
 CartPole-v1:

diff --git a/hyperparams/qrdqn.yml b/hyperparams/qrdqn.yml
@@ -5,7 +5,9 @@ atari:
   policy: 'CnnPolicy'
   n_timesteps: !!float 1e7
   exploration_fraction: 0.025  # explore 250k steps = 10M * 0.025
-  optimize_memory_usage: True
+  # If True, you need to deactivate handle_timeout_termination
+  # in the replay_buffer_kwargs
+  optimize_memory_usage: False
 
 # Tuned
 CartPole-v1:

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 gym==0.21
-stable-baselines3[extra,tests,docs]>=1.5.1a7
-sb3-contrib>=1.5.1a8
+stable-baselines3[extra,tests,docs]>=1.6.0
+sb3-contrib>=1.6.0
 box2d-py==2.3.8
 pybullet
 gym-minigrid