Skip to content

Commit

Permalink
Merge pull request #68 from araffin/feat/update-sb-2.10
Browse files Browse the repository at this point in the history
Stable Baselines v2.10.0
  • Loading branch information
araffin authored Mar 14, 2020
2 parents 415f383 + 6d498c7 commit 1c2a3cf
Show file tree
Hide file tree
Showing 40 changed files with 75 additions and 54 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ logs/
.idea/
cluster_sbatch.sh
cluster_sbatch_mpi.sh
trained_agents/
.git/
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ notifications:

env:
global:
- DOCKER_IMAGE=stablebaselines/rl-baselines-zoo-cpu:v2.9.0
- DOCKER_IMAGE=stablebaselines/rl-baselines-zoo-cpu:v2.10.0

services:
- docker
Expand Down
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Run pytest and coverage report
pytest:
./scripts/run_tests.sh

# Type check
type:
pytype

docker: docker-cpu docker-gpu

docker-cpu:
./scripts/build_docker.sh

docker-gpu:
USE_GPU=True ./scripts/build_docker.sh
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ You can train agents online using [colab notebook](https://colab.research.google

### Stable-Baselines PyPi Package

Min version: stable-baselines[mpi] >= 2.9.0
Min version: stable-baselines[mpi] >= 2.10.0

```
apt-get install swig cmake libopenmpi-dev zlib1g-dev ffmpeg
Expand Down
29 changes: 3 additions & 26 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,22 @@ FROM $PARENT_IMAGE

RUN apt-get -y update \
&& apt-get -y install \
curl \
cmake \
ffmpeg \
freeglut3-dev \
git \
jq \
python-dev \
python-pip \
python3-dev \
libfontconfig1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
libopenmpi-dev \
swig \
xvfb \
zlib1g-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ENV CODE_DIR /root/code
ENV VENV /root/venv

RUN \
mkdir -p ${CODE_DIR}/stable_baselines && \
pip install virtualenv && \
virtualenv $VENV --python=python3 && \
mkdir -p ${CODE_DIR}/rl_zoo && \
. $VENV/bin/activate && \
cd $CODE_DIR && \
pip install --upgrade pip && \
if [ "$USE_GPU" = "True" ]; then \
TENSORFLOW_PACKAGE="tensorflow-gpu==1.8.0"; \
else \
TENSORFLOW_PACKAGE="tensorflow==1.8.0"; \
fi; \
pip install ${TENSORFLOW_PACKAGE} && \
pip uninstall -y stable-baselines && \
pip install stable-baselines[mpi]==2.10.0 && \
pip install box2d-py==2.3.5 && \
pip install stable-baselines[mpi,tests]==2.9.0 && \
pip install pybullet && \
pip install gym-minigrid && \
pip install scikit-optimize && \
Expand Down
6 changes: 3 additions & 3 deletions enjoy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
from utils import ALGOS, create_test_env, get_latest_run_id, get_saved_hyperparams, find_saved_model

# Fix for breaking change in v2.6.0
if pkg_resources.get_distribution("stable_baselines").version >= "2.6.0":
sys.modules['stable_baselines.ddpg.memory'] = stable_baselines.deepq.replay_buffer
stable_baselines.deepq.replay_buffer.Memory = stable_baselines.deepq.replay_buffer.ReplayBuffer
sys.modules['stable_baselines.ddpg.memory'] = stable_baselines.common.buffers
stable_baselines.common.buffers.Memory = stable_baselines.common.buffers.ReplayBuffer


def main():
parser = argparse.ArgumentParser()
Expand Down
2 changes: 1 addition & 1 deletion hyperparams/a2c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ HalfCheetahBulletEnv-v0:
gamma: 0.99
learning_rate: 0.002

BipedalWalkerHardcore-v2:
BipedalWalkerHardcore-v3:
normalize: true
n_envs: 16
n_timesteps: !!float 10e7
Expand Down
2 changes: 1 addition & 1 deletion hyperparams/acktr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ HalfCheetah-v2:
policy_kwargs: "dict(net_arch=[256, 256])"

# Tuned
BipedalWalkerHardcore-v2:
BipedalWalkerHardcore-v3:
normalize: true
n_envs: 8
n_timesteps: !!float 10e7
Expand Down
2 changes: 1 addition & 1 deletion hyperparams/ppo2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ BipedalWalker-v2:
learning_rate: !!float 2.5e-4
cliprange: 0.2

BipedalWalkerHardcore-v2:
BipedalWalkerHardcore-v3:
normalize: true
n_envs: 16
n_timesteps: !!float 10e7
Expand Down
2 changes: 1 addition & 1 deletion hyperparams/sac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ BipedalWalker-v2:
gradient_steps: 1
learning_starts: 1000

BipedalWalkerHardcore-v2:
BipedalWalkerHardcore-v3:
n_timesteps: !!float 5e7
policy: 'CustomSACPolicy'
learning_rate: lin_3e-4
Expand Down
2 changes: 1 addition & 1 deletion hyperparams/td3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ BipedalWalker-v2:
policy_kwargs: "dict(layers=[400, 300])"

# To be tuned
BipedalWalkerHardcore-v2:
BipedalWalkerHardcore-v3:
n_timesteps: !!float 5e7
policy: 'MlpPolicy'
gamma: 0.99
Expand Down
2 changes: 1 addition & 1 deletion hyperparams/trpo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ BipedalWalker-v2:
vf_stepsize: !!float 1e-3

# To be tuned
BipedalWalkerHardcore-v2:
BipedalWalkerHardcore-v3:
n_timesteps: !!float 5e7
policy: 'MlpPolicy'
timesteps_per_batch: 4096
Expand Down
9 changes: 4 additions & 5 deletions scripts/build_docker.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#!/bin/bash

CPU_PARENT=ubuntu:16.04
GPU_PARENT=nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
PARENT=stablebaselines/stable-baselines

TAG=stablebaselines/rl-baselines-zoo
VERSION=v2.9.0
VERSION=v2.10.0

if [[ ${USE_GPU} == "True" ]]; then
PARENT=${GPU_PARENT}
PARENT="${PARENT}:${VERSION}"
else
PARENT=${CPU_PARENT}
PARENT="${PARENT}-cpu:${VERSION}"
TAG="${TAG}-cpu"
fi

Expand Down
4 changes: 2 additions & 2 deletions scripts/run_docker_cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ echo "Executing in the docker (cpu image):"
echo $cmd_line

docker run -it --rm --network host --ipc=host \
--mount src=$(pwd),target=/root/code/stable-baselines,type=bind stablebaselines/rl-baselines-zoo-cpu:v2.9.0\
bash -c "cd /root/code/stable-baselines/ && $cmd_line"
--mount src=$(pwd),target=/root/code/rl_zoo,type=bind stablebaselines/rl-baselines-zoo-cpu:v2.10.0\
bash -c "cd /root/code/rl_zoo/ && $cmd_line"
4 changes: 2 additions & 2 deletions scripts/run_docker_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ echo "Executing in the docker (gpu image):"
echo $cmd_line

docker run -it --runtime=nvidia --rm --network host --ipc=host \
--mount src=$(pwd),target=/root/code/stable-baselines,type=bind stablebaselines/rl-baselines-zoo:v2.9.0\
bash -c "cd /root/code/stable-baselines/ && $cmd_line"
--mount src=$(pwd),target=/root/code/rl_zoo,type=bind stablebaselines/rl-baselines-zoo:v2.10.0\
bash -c "cd /root/code/rl_zoo/ && $cmd_line"
4 changes: 2 additions & 2 deletions scripts/run_tests_travis.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

DOCKER_CMD="docker run -it --rm --network host --ipc=host --mount src=$(pwd),target=/root/code/stable-baselines,type=bind"
BASH_CMD="cd /root/code/stable-baselines/"
DOCKER_CMD="docker run -it --rm --network host --ipc=host --mount src=$(pwd),target=/root/code/rl_zoo,type=bind"
BASH_CMD="cd /root/code/rl_zoo/"

if [[ $# -ne 1 ]]; then
echo "usage: $0 <test glob>"
Expand Down
10 changes: 7 additions & 3 deletions tests/test_enjoy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ def test_enjoy(trained_model):
'--env', env_id,
'--no-render'
]

# Skip mujoco envs
if 'Fetch' in trained_model:
return

# Skip old BipedalWalker version
if 'Walker-v2' in trained_model or 'WalkerHardcore-v2' in trained_model:
return

if '-MiniGrid-' in trained_model:
args = args + ['--gym-packages', 'gym_minigrid']

Expand All @@ -53,12 +57,12 @@ def test_record_video():
args = [
'-n', '100',
'--algo', 'ppo2',
'--env', 'BipedalWalkerHardcore-v2',
'--env', 'BipedalWalkerHardcore-v3',
'-o', 'logs/tests/videos/'
]

return_code = subprocess.call(['python', '-m', 'utils.record_video'] + args)
_assert_eq(return_code, 0)
video_path = 'logs/tests/videos/ppo2-BipedalWalkerHardcore-v2-step-0-to-step-100.mp4'
video_path = 'logs/tests/videos/ppo2-BipedalWalkerHardcore-v3-step-0-to-step-100.mp4'
# File is not empty
assert os.stat(video_path).st_size != 0, "Recorded video is empty"
2 changes: 1 addition & 1 deletion tests/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def _assert_eq(left, right):
experiments['{}-{}'.format(algo, env_id)] = (algo, env_id)

# Test for vecnormalize and frame-stack
experiments['ppo2-BipedalWalkerHardcore-v2'] = ('ppo2', 'BipedalWalkerHardcore-v2')
experiments['ppo2-BipedalWalkerHardcore-v3'] = ('ppo2', 'BipedalWalkerHardcore-v3')
# Test for DDPG
experiments['ddpg-MountainCarContinuous-v0'] = ('ddpg', 'MountainCarContinuous-v0')
# Test for SAC
Expand Down
4 changes: 2 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.vec_env import VecFrameStack, SubprocVecEnv, VecNormalize, DummyVecEnv
from stable_baselines.common.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines.ppo2.ppo2 import constfn
from stable_baselines.common.schedules import constfn

from utils import make_env, ALGOS, linear_schedule, get_latest_run_id, get_wrapper_class, find_saved_model
from utils.hyperparams_opt import hyperparam_optimization
Expand Down Expand Up @@ -186,7 +186,7 @@
# Convert to python object if needed
if 'policy_kwargs' in hyperparams.keys() and isinstance(hyperparams['policy_kwargs'], str):
hyperparams['policy_kwargs'] = eval(hyperparams['policy_kwargs'])

# Delete keys so the dict can be pass to the model constructor
if 'n_envs' in hyperparams.keys():
del hyperparams['n_envs']
Expand Down
1 change: 1 addition & 0 deletions trained_agents/a2c/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/a2c/BipedalWalker-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/a2c/BipedalWalkerHardcore-v3
1 change: 1 addition & 0 deletions trained_agents/a2c/BipedalWalkerHardcore-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/acktr/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/acktr/BipedalWalker-v3.zip
1 change: 1 addition & 0 deletions trained_agents/ddpg/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/ddpg/BipedalWalker-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/ppo2/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/ppo2/BipedalWalker-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/ppo2/BipedalWalkerHardcore-v3
1 change: 1 addition & 0 deletions trained_agents/ppo2/BipedalWalkerHardcore-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/sac/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/sac/BipedalWalker-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/sac/BipedalWalkerHardcore-v3
1 change: 1 addition & 0 deletions trained_agents/sac/BipedalWalkerHardcore-v3.pkl
1 change: 1 addition & 0 deletions trained_agents/td3/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/td3/BipedalWalker-v3.zip
1 change: 1 addition & 0 deletions trained_agents/trpo/BipedalWalker-v3
1 change: 1 addition & 0 deletions trained_agents/trpo/BipedalWalker-v3.pkl
4 changes: 4 additions & 0 deletions utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ def get_class_name(wrapper_name):

if 'env_wrapper' in hyperparams.keys():
wrapper_name = hyperparams.get('env_wrapper')

if wrapper_name is None:
return None

if not isinstance(wrapper_name, list):
wrapper_names = [wrapper_name]
else:
Expand Down

0 comments on commit 1c2a3cf

Please sign in to comment.