Merge pull request #184 from hill-a/update-doc

Update the documentation
hill-a · Feb 6, 2019 · d997d65 · d997d65
2 parents cb0662f + d13e68c
commit d997d65
Show file tree

Hide file tree

Showing 9 changed files with 39 additions and 29 deletions.
diff --git a/docs/guide/rl.rst b/docs/guide/rl.rst
@@ -0,0 +1,14 @@
+.. _rl:
+
+================================
+Reinforcement Learning Resources
+================================
+
+
+Stable-Baselines assumes that you already understand the basic concepts of Reinforcement Learning (RL).
+
+However, if you want to learn about RL, there are several good resources to get started:
+
+- `OpenAI Spinning Up <https://spinningup.openai.com/en/latest/>`_
+- `David Silver's course <http://www0.cs.ucl.ac.uk/staff/d.silver/web/Teaching.html>`_
+- `More resources <https://github.com/dennybritz/reinforcement-learning>`_
diff --git a/docs/guide/vec_envs.rst b/docs/guide/vec_envs.rst
@@ -21,8 +21,10 @@ Because of that, `actions` passed to the environment are now a vector (of dimens
 
 .. warning::
 
-	It seems that Windows users are experiencing issues with SubprocVecEnv.
-	We recommend to use the docker image in that case. (See `Issue #42 <https://github.com/hill-a/stable-baselines/issues/40>`_)
+		When using ``SubprocVecEnv``, Windows users must wrap the code
+		in an ``if __name__=="__main__":``.
+		See `stackoverflow question <https://stackoverflow.com/questions/24374288/where-to-put-freeze-support-in-a-python-script>`_
+		for more information about multiprocessing on Windows using python.
 
 
 DummyVecEnv

diff --git a/docs/index.rst b/docs/index.rst
@@ -38,6 +38,7 @@ This toolset is a fork of OpenAI Baselines, with a major structural refactoring,
 
    guide/install
    guide/quickstart
+   guide/rl
    guide/algos
    guide/examples
    guide/vec_envs

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -5,10 +5,12 @@ Changelog
 
 For download links, please look at `Github release page <https://github.com/hill-a/stable-baselines/releases>`_.
 
-Pre-Release 2.4.1 (WIP)
+Pre-Release 2.4.1a (WIP)
 --------------------------
 
 - fixed computation of training metrics in TRPO and PPO1
+- fixed custom policy examples in the doc for DQN and DDPG
+- remove gym spaces patch for equality functions
 
 
 Release 2.4.0 (2019-01-17)

diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst
@@ -61,3 +61,14 @@ Experimental - using OpenAI baselines with MarathonEnvs (ML-Agents)
 
 | Author: Joe Booth (@Sohojoe)
 | Github repo: https://github.com/Sohojoe/MarathonEnvsBaselines
+
+
+Learning to drive smoothly in minutes
+-------------------------------------
+
+Implementation of reinforcement learning approach to make a car learn to drive smoothly in minutes.
+Uses SAC on VAE features.
+
+| Author: Antonin Raffin (@araffin)
+| Blog post: https://towardsdatascience.com/learning-to-drive-smoothly-in-minutes-450a7cdb35f4
+| Github repo: https://github.com/araffin/learning-to-drive-in-5-minutes
diff --git a/docs/modules/ddpg.rst b/docs/modules/ddpg.rst
@@ -147,12 +147,11 @@ You can easily define a custom architecture for the policy network:
   from stable_baselines.common.vec_env import DummyVecEnv
   from stable_baselines import DDPG
 
-  # Custom MLP policy of three layers of size 128 each
+  # Custom MLP policy of two layers of size 16 each
   class CustomPolicy(FeedForwardPolicy):
       def __init__(self, *args, **kwargs):
           super(CustomPolicy, self).__init__(*args, **kwargs,
-                                             net_arch=[dict(pi=[128, 128, 128],
-                                                            vf=[128, 128, 128])],
+                                             layers=[16, 16],
                                              layer_norm=False,
                                              feature_extraction="mlp")
 

diff --git a/docs/modules/dqn.rst b/docs/modules/dqn.rst
@@ -147,12 +147,11 @@ You can easily define a custom architecture for the policy network:
   from stable_baselines.common.vec_env import DummyVecEnv
   from stable_baselines import DQN
 
-  # Custom MLP policy of three layers of size 128 each
+  # Custom MLP policy of two layers of size 32 each
   class CustomPolicy(FeedForwardPolicy):
       def __init__(self, *args, **kwargs):
           super(CustomPolicy, self).__init__(*args, **kwargs,
-                                             net_arch=[dict(pi=[128, 128, 128],
-                                                            vf=[128, 128, 128])],
+                                             layers=[32, 32],
                                              layer_norm=False,
                                              feature_extraction="mlp")
 

diff --git a/setup.py b/setup.py
@@ -112,7 +112,7 @@
       license="MIT",
       long_description=long_description,
       long_description_content_type='text/markdown',
-      version="2.4.0",
+      version="2.4.1a",
       )
 
 # python setup.py sdist

diff --git a/stable_baselines/__init__.py b/stable_baselines/__init__.py
@@ -1,6 +1,3 @@
-import gym
-import numpy as np
-
 from stable_baselines.a2c import A2C
 from stable_baselines.acer import ACER
 from stable_baselines.acktr import ACKTR
@@ -12,19 +9,4 @@
 from stable_baselines.trpo_mpi import TRPO
 from stable_baselines.sac import SAC
 
-__version__ = "2.4.0"
-
-
-# patch Gym spaces to add equality functions, if not implemented
-# See https://github.com/openai/gym/issues/1171
-if gym.spaces.MultiBinary.__eq__ == object.__eq__:  # by default, all classes have the __eq__ function from object.
-    def _eq(self, other):
-        return self.n == other.n
-
-    gym.spaces.MultiBinary.__eq__ = _eq
-
-if gym.spaces.MultiDiscrete.__eq__ == object.__eq__:
-    def _eq(self, other):
-        return np.all(self.nvec == other.nvec)
-
-    gym.spaces.MultiDiscrete.__eq__ = _eq
+__version__ = "2.4.1a"