diff --git a/CHANGELOG.md b/CHANGELOG.md index 49fd95cb2..1e3654f90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,18 @@ -## Release 2.0.0a12 (WIP) +## Release 2.0.0 (2023-06-22) + +**Gymnasium support** + +> **Warning** +> Stable-Baselines3 (SB3) v2.0.0 will be the last one supporting python 3.7 ### Breaking Changes -- Upgraded to gym 0.26+ - Fixed bug in HistoryWrapper, now returns the correct obs space limits - Upgraded to SB3 >= 2.0.0 - Upgraded to Huggingface-SB3 >= 2.2.5 +- Upgraded to Gym API 0.26+, RL Zoo3 doesn't work anymore with Gym 0.21 ### New Features +- Added Gymnasium support - Gym 0.26+ patches to continue working with pybullet and TimeLimit wrapper ### Bug fixes diff --git a/requirements.txt b/requirements.txt index 5a77be0ed..28b49e8a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ gym==0.26.2 -stable-baselines3[extra_no_roms,tests,docs]>=2.0.0a13 -sb3-contrib>=2.0.0a13 +stable-baselines3[extra_no_roms,tests,docs]>=2.0.0 +sb3-contrib>=2.0.0 box2d-py==2.3.8 pybullet # minigrid diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt index 2e4adf320..227cea215 100644 --- a/rl_zoo3/version.txt +++ b/rl_zoo3/version.txt @@ -1 +1 @@ -2.0.0a13 +2.0.0 diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py index d5476bf0d..4f51d472b 100644 --- a/rl_zoo3/wrappers.py +++ b/rl_zoo3/wrappers.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, SupportsFloat, Tuple +from typing import Any, ClassVar, Dict, Optional, SupportsFloat, Tuple import gymnasium as gym import numpy as np @@ -299,7 +299,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper): """ # Supported envs - velocity_indices = { + velocity_indices: ClassVar[Dict[str, np.ndarray]] = { "CartPole-v1": np.array([1, 3]), "MountainCar-v0": np.array([1]), "MountainCarContinuous-v0": np.array([1]), diff --git a/setup.py b/setup.py index c7c3dc6b9..b0dd36492 100644 --- a/setup.py +++ b/setup.py @@ -27,8 +27,8 @@ }, entry_points={"console_scripts": ["rl_zoo3=rl_zoo3.cli:main"]}, install_requires=[ - "sb3_contrib>=2.0.0a13", - "gym==0.26.2", + "sb3_contrib>=2.0.0", + "gym==0.26.2", # for patches to make gym backward compat "huggingface_sb3>=2.2.5", "tqdm", "rich", @@ -52,6 +52,15 @@ version=__version__, python_requires=">=3.7", # PyPI package information. + # PyPI package information. + project_urls={ + "Code": "https://github.com/DLR-RM/rl-baselines3-zoo", + "Documentation": "https://rl-baselines3-zoo.readthedocs.io/en/master/", + "Changelog": "https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/CHANGELOG.md", + "Stable-Baselines3": "https://github.com/DLR-RM/stable-baselines3", + "RL-Zoo": "https://github.com/DLR-RM/rl-baselines3-zoo", + "SBX": "https://github.com/araffin/sbx", + }, classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", diff --git a/tests/dummy_env/test_env/test_env.py b/tests/dummy_env/test_env/test_env.py index 5db419b40..abc7fa53b 100644 --- a/tests/dummy_env/test_env/test_env.py +++ b/tests/dummy_env/test_env/test_env.py @@ -1,10 +1,12 @@ +from typing import ClassVar + import gymnasium as gym import numpy as np from gymnasium import spaces class TestEnv(gym.Env): - metadata = {"render_modes": ["human"], "render_fps": 4} + metadata: ClassVar[dict] = {"render_modes": ["human"], "render_fps": 4} __test__ = False def __init__(self, render_mode=None): diff --git a/tests/test_enjoy.py b/tests/test_enjoy.py index a36794b23..6c9ad6123 100644 --- a/tests/test_enjoy.py +++ b/tests/test_enjoy.py @@ -87,15 +87,15 @@ def test_load(tmp_path): # Load best model args = ["-n", str(N_STEPS), "-f", tmp_path, "--algo", algo, "--env", env_id, "--no-render"] # Test with progress bar - return_code = subprocess.call(["python", "enjoy.py", *args] + ["--load-best", "-P"]) + return_code = subprocess.call(["python", "enjoy.py", *args, "--load-best", "-P"]) _assert_eq(return_code, 0) # Load checkpoint - return_code = subprocess.call(["python", "enjoy.py", *args] + ["--load-checkpoint", str(500)]) + return_code = subprocess.call(["python", "enjoy.py", *args, "--load-checkpoint", str(500)]) _assert_eq(return_code, 0) # Load last checkpoint - return_code = subprocess.call(["python", "enjoy.py", *args] + ["--load-last-checkpoint"]) + return_code = subprocess.call(["python", "enjoy.py", *args, "--load-last-checkpoint"]) _assert_eq(return_code, 0)