Merge pull request #68 from araffin/feat/update-sb-2.10

Stable Baselines v2.10.0
araffin · Mar 14, 2020 · 1c2a3cf · 1c2a3cf
2 parents 415f383 + 6d498c7
commit 1c2a3cf
Show file tree

Hide file tree

Showing 40 changed files with 75 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ logs/
 .idea/
 cluster_sbatch.sh
 cluster_sbatch_mpi.sh
+trained_agents/
+.git/
diff --git a/.travis.yml b/.travis.yml
@@ -7,7 +7,7 @@ notifications:
 
 env:
   global:
-    - DOCKER_IMAGE=stablebaselines/rl-baselines-zoo-cpu:v2.9.0
+    - DOCKER_IMAGE=stablebaselines/rl-baselines-zoo-cpu:v2.10.0
 
 services:
   - docker

diff --git a/Makefile b/Makefile
@@ -0,0 +1,15 @@
+# Run pytest and coverage report
+pytest:
+	./scripts/run_tests.sh
+
+# Type check
+type:
+	pytype
+
+docker: docker-cpu docker-gpu
+
+docker-cpu:
+	./scripts/build_docker.sh
+
+docker-gpu:
+	USE_GPU=True ./scripts/build_docker.sh
diff --git a/README.md b/README.md
@@ -241,7 +241,7 @@ You can train agents online using [colab notebook](https://colab.research.google
 
 ### Stable-Baselines PyPi Package
 
-Min version: stable-baselines[mpi] >= 2.9.0
+Min version: stable-baselines[mpi] >= 2.10.0
 
 ```
 apt-get install swig cmake libopenmpi-dev zlib1g-dev ffmpeg

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -4,45 +4,22 @@ FROM $PARENT_IMAGE
 
 RUN apt-get -y update \
     && apt-get -y install \
-    curl \
-    cmake \
     ffmpeg \
     freeglut3-dev \
-    git \
-    jq \
-    python-dev \
-    python-pip \
-    python3-dev \
-    libfontconfig1 \
-    libglib2.0-0 \
-    libsm6 \
-    libxext6 \
-    libxrender1 \
-    libopenmpi-dev \
     swig \
     xvfb \
-    zlib1g-dev \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
 ENV CODE_DIR /root/code
 ENV VENV /root/venv
 
 RUN \
-    mkdir -p ${CODE_DIR}/stable_baselines && \
-    pip install virtualenv && \
-    virtualenv $VENV --python=python3 && \
+    mkdir -p ${CODE_DIR}/rl_zoo && \
     . $VENV/bin/activate && \
-    cd $CODE_DIR && \
-    pip install --upgrade pip && \
-    if [ "$USE_GPU" = "True" ]; then \
-        TENSORFLOW_PACKAGE="tensorflow-gpu==1.8.0"; \
-    else \
-        TENSORFLOW_PACKAGE="tensorflow==1.8.0"; \
-    fi; \
-    pip install ${TENSORFLOW_PACKAGE} && \
+    pip uninstall -y stable-baselines && \
+    pip install stable-baselines[mpi]==2.10.0 && \
     pip install box2d-py==2.3.5 && \
-    pip install stable-baselines[mpi,tests]==2.9.0 && \
     pip install pybullet && \
     pip install gym-minigrid && \
     pip install scikit-optimize && \

diff --git a/enjoy.py b/enjoy.py
@@ -26,9 +26,9 @@
 from utils import ALGOS, create_test_env, get_latest_run_id, get_saved_hyperparams, find_saved_model
 
 # Fix for breaking change in v2.6.0
-if pkg_resources.get_distribution("stable_baselines").version >= "2.6.0":
-    sys.modules['stable_baselines.ddpg.memory'] = stable_baselines.deepq.replay_buffer
-    stable_baselines.deepq.replay_buffer.Memory = stable_baselines.deepq.replay_buffer.ReplayBuffer
+sys.modules['stable_baselines.ddpg.memory'] = stable_baselines.common.buffers
+stable_baselines.common.buffers.Memory = stable_baselines.common.buffers.ReplayBuffer
+
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/hyperparams/a2c.yml b/hyperparams/a2c.yml
@@ -77,7 +77,7 @@ HalfCheetahBulletEnv-v0:
   gamma: 0.99
   learning_rate: 0.002
 
-BipedalWalkerHardcore-v2:
+BipedalWalkerHardcore-v3:
   normalize: true
   n_envs: 16
   n_timesteps: !!float 10e7

diff --git a/hyperparams/acktr.yml b/hyperparams/acktr.yml
@@ -110,7 +110,7 @@ HalfCheetah-v2:
   policy_kwargs: "dict(net_arch=[256, 256])"
 
 # Tuned
-BipedalWalkerHardcore-v2:
+BipedalWalkerHardcore-v3:
   normalize: true
   n_envs: 8
   n_timesteps: !!float 10e7

diff --git a/hyperparams/ppo2.yml b/hyperparams/ppo2.yml
@@ -115,7 +115,7 @@ BipedalWalker-v2:
   learning_rate: !!float 2.5e-4
   cliprange: 0.2
 
-BipedalWalkerHardcore-v2:
+BipedalWalkerHardcore-v3:
   normalize: true
   n_envs: 16
   n_timesteps: !!float 10e7

diff --git a/hyperparams/sac.yml b/hyperparams/sac.yml
@@ -34,7 +34,7 @@ BipedalWalker-v2:
   gradient_steps: 1
   learning_starts: 1000
 
-BipedalWalkerHardcore-v2:
+BipedalWalkerHardcore-v3:
   n_timesteps: !!float 5e7
   policy: 'CustomSACPolicy'
   learning_rate: lin_3e-4

diff --git a/hyperparams/td3.yml b/hyperparams/td3.yml
@@ -64,7 +64,7 @@ BipedalWalker-v2:
   policy_kwargs: "dict(layers=[400, 300])"
 
 # To be tuned
-BipedalWalkerHardcore-v2:
+BipedalWalkerHardcore-v3:
   n_timesteps: !!float 5e7
   policy: 'MlpPolicy'
   gamma: 0.99

diff --git a/hyperparams/trpo.yml b/hyperparams/trpo.yml
@@ -160,7 +160,7 @@ BipedalWalker-v2:
   vf_stepsize: !!float 1e-3
 
 # To be tuned
-BipedalWalkerHardcore-v2:
+BipedalWalkerHardcore-v3:
   n_timesteps: !!float 5e7
   policy: 'MlpPolicy'
   timesteps_per_batch: 4096

diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh
@@ -1,15 +1,14 @@
 #!/bin/bash
 
-CPU_PARENT=ubuntu:16.04
-GPU_PARENT=nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
+PARENT=stablebaselines/stable-baselines
 
 TAG=stablebaselines/rl-baselines-zoo
-VERSION=v2.9.0
+VERSION=v2.10.0
 
 if [[ ${USE_GPU} == "True" ]]; then
-  PARENT=${GPU_PARENT}
+  PARENT="${PARENT}:${VERSION}"
 else
-  PARENT=${CPU_PARENT}
+  PARENT="${PARENT}-cpu:${VERSION}"
   TAG="${TAG}-cpu"
 fi
 

diff --git a/scripts/run_docker_cpu.sh b/scripts/run_docker_cpu.sh
@@ -7,5 +7,5 @@ echo "Executing in the docker (cpu image):"
 echo $cmd_line
 
 docker run -it --rm --network host --ipc=host \
- --mount src=$(pwd),target=/root/code/stable-baselines,type=bind stablebaselines/rl-baselines-zoo-cpu:v2.9.0\
-  bash -c "cd /root/code/stable-baselines/ && $cmd_line"
+ --mount src=$(pwd),target=/root/code/rl_zoo,type=bind stablebaselines/rl-baselines-zoo-cpu:v2.10.0\
+  bash -c "cd /root/code/rl_zoo/ && $cmd_line"
diff --git a/scripts/run_docker_gpu.sh b/scripts/run_docker_gpu.sh
@@ -7,5 +7,5 @@ echo "Executing in the docker (gpu image):"
 echo $cmd_line
 
 docker run -it --runtime=nvidia --rm --network host --ipc=host \
-  --mount src=$(pwd),target=/root/code/stable-baselines,type=bind stablebaselines/rl-baselines-zoo:v2.9.0\
-  bash -c "cd /root/code/stable-baselines/ && $cmd_line"
+  --mount src=$(pwd),target=/root/code/rl_zoo,type=bind stablebaselines/rl-baselines-zoo:v2.10.0\
+  bash -c "cd /root/code/rl_zoo/ && $cmd_line"
diff --git a/scripts/run_tests_travis.sh b/scripts/run_tests_travis.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
-DOCKER_CMD="docker run -it --rm --network host --ipc=host --mount src=$(pwd),target=/root/code/stable-baselines,type=bind"
-BASH_CMD="cd /root/code/stable-baselines/"
+DOCKER_CMD="docker run -it --rm --network host --ipc=host --mount src=$(pwd),target=/root/code/rl_zoo,type=bind"
+BASH_CMD="cd /root/code/rl_zoo/"
 
 if [[ $# -ne 1 ]]; then
   echo "usage: $0 <test glob>"

diff --git a/tests/test_enjoy.py b/tests/test_enjoy.py
@@ -26,11 +26,15 @@ def test_enjoy(trained_model):
         '--env', env_id,
         '--no-render'
     ]
-    
+
     # Skip mujoco envs
     if 'Fetch' in trained_model:
         return
 
+    # Skip old BipedalWalker version
+    if 'Walker-v2' in trained_model or 'WalkerHardcore-v2' in trained_model:
+        return
+
     if '-MiniGrid-' in trained_model:
         args = args + ['--gym-packages', 'gym_minigrid']
 
@@ -53,12 +57,12 @@ def test_record_video():
     args = [
         '-n', '100',
         '--algo', 'ppo2',
-        '--env', 'BipedalWalkerHardcore-v2',
+        '--env', 'BipedalWalkerHardcore-v3',
         '-o', 'logs/tests/videos/'
     ]
 
     return_code = subprocess.call(['python', '-m', 'utils.record_video'] + args)
     _assert_eq(return_code, 0)
-    video_path = 'logs/tests/videos/ppo2-BipedalWalkerHardcore-v2-step-0-to-step-100.mp4'
+    video_path = 'logs/tests/videos/ppo2-BipedalWalkerHardcore-v3-step-0-to-step-100.mp4'
     # File is not empty
     assert os.stat(video_path).st_size != 0, "Recorded video is empty"
diff --git a/tests/test_train.py b/tests/test_train.py
@@ -22,7 +22,7 @@ def _assert_eq(left, right):
         experiments['{}-{}'.format(algo, env_id)] = (algo, env_id)
 
 # Test for vecnormalize and frame-stack
-experiments['ppo2-BipedalWalkerHardcore-v2'] = ('ppo2', 'BipedalWalkerHardcore-v2')
+experiments['ppo2-BipedalWalkerHardcore-v3'] = ('ppo2', 'BipedalWalkerHardcore-v3')
 # Test for DDPG
 experiments['ddpg-MountainCarContinuous-v0'] = ('ddpg', 'MountainCarContinuous-v0')
 # Test for SAC

diff --git a/train.py b/train.py
@@ -34,7 +34,7 @@
 from stable_baselines.common.cmd_util import make_atari_env
 from stable_baselines.common.vec_env import VecFrameStack, SubprocVecEnv, VecNormalize, DummyVecEnv
 from stable_baselines.common.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
-from stable_baselines.ppo2.ppo2 import constfn
+from stable_baselines.common.schedules import constfn
 
 from utils import make_env, ALGOS, linear_schedule, get_latest_run_id, get_wrapper_class, find_saved_model
 from utils.hyperparams_opt import hyperparam_optimization
@@ -186,7 +186,7 @@
     # Convert to python object if needed
     if 'policy_kwargs' in hyperparams.keys() and isinstance(hyperparams['policy_kwargs'], str):
         hyperparams['policy_kwargs'] = eval(hyperparams['policy_kwargs'])
-        
+
     # Delete keys so the dict can be pass to the model constructor
     if 'n_envs' in hyperparams.keys():
         del hyperparams['n_envs']

diff --git a/trained_agents/a2c/BipedalWalker-v3 b/trained_agents/a2c/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/a2c/BipedalWalker-v3.pkl b/trained_agents/a2c/BipedalWalker-v3.pkl
@@ -0,0 +1 @@
+BipedalWalker-v2.pkl
diff --git a/trained_agents/a2c/BipedalWalkerHardcore-v3 b/trained_agents/a2c/BipedalWalkerHardcore-v3
@@ -0,0 +1 @@
+BipedalWalkerHardcore-v2
diff --git a/trained_agents/a2c/BipedalWalkerHardcore-v3.pkl b/trained_agents/a2c/BipedalWalkerHardcore-v3.pkl
@@ -0,0 +1 @@
+BipedalWalkerHardcore-v2.pkl
diff --git a/trained_agents/acktr/BipedalWalker-v3 b/trained_agents/acktr/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/acktr/BipedalWalker-v3.zip b/trained_agents/acktr/BipedalWalker-v3.zip
@@ -0,0 +1 @@
+BipedalWalker-v2.zip
diff --git a/trained_agents/ddpg/BipedalWalker-v3 b/trained_agents/ddpg/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/ddpg/BipedalWalker-v3.pkl b/trained_agents/ddpg/BipedalWalker-v3.pkl
@@ -0,0 +1 @@
+BipedalWalker-v2.pkl
diff --git a/trained_agents/ppo2/BipedalWalker-v3 b/trained_agents/ppo2/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/ppo2/BipedalWalker-v3.pkl b/trained_agents/ppo2/BipedalWalker-v3.pkl
@@ -0,0 +1 @@
+BipedalWalker-v2.pkl
diff --git a/trained_agents/ppo2/BipedalWalkerHardcore-v3 b/trained_agents/ppo2/BipedalWalkerHardcore-v3
@@ -0,0 +1 @@
+BipedalWalkerHardcore-v2
diff --git a/trained_agents/ppo2/BipedalWalkerHardcore-v3.pkl b/trained_agents/ppo2/BipedalWalkerHardcore-v3.pkl
@@ -0,0 +1 @@
+BipedalWalkerHardcore-v2.pkl
diff --git a/trained_agents/sac/BipedalWalker-v3 b/trained_agents/sac/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/sac/BipedalWalker-v3.pkl b/trained_agents/sac/BipedalWalker-v3.pkl
@@ -0,0 +1 @@
+BipedalWalker-v2.pkl
diff --git a/trained_agents/sac/BipedalWalkerHardcore-v3 b/trained_agents/sac/BipedalWalkerHardcore-v3
@@ -0,0 +1 @@
+BipedalWalkerHardcore-v2
diff --git a/trained_agents/sac/BipedalWalkerHardcore-v3.pkl b/trained_agents/sac/BipedalWalkerHardcore-v3.pkl
@@ -0,0 +1 @@
+BipedalWalkerHardcore-v2.pkl
diff --git a/trained_agents/td3/BipedalWalker-v3 b/trained_agents/td3/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/td3/BipedalWalker-v3.zip b/trained_agents/td3/BipedalWalker-v3.zip
@@ -0,0 +1 @@
+BipedalWalker-v2.zip
diff --git a/trained_agents/trpo/BipedalWalker-v3 b/trained_agents/trpo/BipedalWalker-v3
@@ -0,0 +1 @@
+BipedalWalker-v2
diff --git a/trained_agents/trpo/BipedalWalker-v3.pkl b/trained_agents/trpo/BipedalWalker-v3.pkl
@@ -0,0 +1 @@
+BipedalWalker-v2.pkl
diff --git a/utils/utils.py b/utils/utils.py
@@ -112,6 +112,10 @@ def get_class_name(wrapper_name):
 
     if 'env_wrapper' in hyperparams.keys():
         wrapper_name = hyperparams.get('env_wrapper')
+
+        if wrapper_name is None:
+            return None
+
         if not isinstance(wrapper_name, list):
             wrapper_names = [wrapper_name]
         else: