Skip to content

Commit

Permalink
Warnings correction (#319)
Browse files Browse the repository at this point in the history
* suggest_uniform -> suggest_float  et  suggest_loguniform->suggest_float(...,log=true)

* suggest_uniform -> suggest_float  et  suggest_loguniform->suggest_float(...,log=true)

* tensor from a list of numpy.ndarrays is extremely slow. Converting the list to a single numpy.ndarray before converting to a tensor.

* CartPole-v0 is out of date, upgrading to `v1`.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Empty-Commit to trigger CI

* tostring -> tobytes : pygame/pygame#3316

* tostring -> tobytes : pygame/pygame#3316

* remove the specific version (new version do not generate warnings)

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
JulienT01 and pre-commit-ci[bot] authored Jun 19, 2023
1 parent aa6ca14 commit c61172a
Show file tree
Hide file tree
Showing 25 changed files with 41 additions and 40 deletions.
4 changes: 2 additions & 2 deletions docs/basics/evaluate_agent.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ For :class:`~rlberry.agents.reinforce.reinforce.REINFORCEAgent`, this method loo
"""
batch_size = trial.suggest_categorical("batch_size", [1, 4, 8, 16, 32])
gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.99])
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
entr_coef = trial.suggest_loguniform("entr_coef", 1e-8, 0.1)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
entr_coef = trial.suggest_float("entr_coef", 1e-8, 0.1, log=True)
return {
"batch_size": batch_size,
Expand Down
2 changes: 1 addition & 1 deletion docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class MyAgent(Agent):
# for example, param_1 could be the batch_size...
param_1 = trial.suggest_categorical("param_1", [1, 4, 8, 16, 32, 64])
# ... and param_2 could be a learning_rate
param_2 = trial.suggest_loguniform("param_2", 1e-5, 1)
param_2 = trial.suggest_float("param_2", 1e-5, 1, log=True)
return {
"param_1": param_1,
"param_2": param_2,
Expand Down
6 changes: 3 additions & 3 deletions docs/other/using_stable_baselines.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ implementation of `Stable Baselines`_ and evaluate two hyperparameter configurat
@classmethod
def sample_parameters(cls, trial):
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True)
vf_coef = trial.suggest_float("vf_coef", 0, 1)
normalize_advantage = trial.suggest_categorical(
"normalize_advantage", [False, True]
)
Expand Down
6 changes: 3 additions & 3 deletions examples/demo_agents/demo_SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,17 @@ def env_ctor(env, wrap_spaces=True):
)

# basic version
# env_kwargs = dict(id = "CartPole-v0")
# env_kwargs = dict(id = "CartPole-v1")
# agent = AgentManager(SACAgent, (gym_make, env_kwargs), fit_budget=200, n_fit=1)

# # timothe's
# env = gym_make("CartPole-v0")
# env = gym_make("CartPole-v1")
# agent = AgentManager(
# SACAgent, (env.__class__, dict()), fit_budget=200, n_fit=1, enable_tensorboard=True,
# )

# Omar's
# env = gym_make("CartPole-v0")
# env = gym_make("CartPole-v1")
# from copy import deepcopy
# def env_constructor():
# return deepcopy(env)
Expand Down
2 changes: 1 addition & 1 deletion examples/demo_agents/video_plot_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

configure_logging(level="INFO")

env = gym_make("CartPole-v0")
env = gym_make("CartPole-v1")
agent = DQNAgent(env, epsilon_decay_interval=1000)
agent.set_writer(SummaryWriter())

Expand Down
2 changes: 1 addition & 1 deletion examples/demo_agents/video_plot_mdqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

configure_logging(level="INFO")

env = gym_make("CartPole-v0")
env = gym_make("CartPole-v1")
agent = MunchausenDQNAgent(env, epsilon_decay_interval=1000)
agent.set_writer(SummaryWriter())

Expand Down
4 changes: 2 additions & 2 deletions examples/demo_network/run_remote_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

local_manager = AgentManager(
agent_class=REINFORCEAgent,
train_env=(gym_make, dict(id="CartPole-v0")),
train_env=(gym_make, dict(id="CartPole-v1")),
fit_budget=FIT_BUDGET,
init_kwargs=dict(gamma=0.99),
eval_kwargs=dict(eval_horizon=200, n_simulations=20),
Expand All @@ -36,7 +36,7 @@
remote_manager = RemoteAgentManager(
client,
agent_class=ResourceRequest(name="REINFORCEAgent"),
train_env=ResourceRequest(name="gym_make", kwargs=dict(id="CartPole-v0")),
train_env=ResourceRequest(name="gym_make", kwargs=dict(id="CartPole-v1")),
fit_budget=FIT_BUDGET,
init_kwargs=dict(gamma=0.99),
eval_kwargs=dict(eval_horizon=200, n_simulations=20),
Expand Down
Binary file modified profile.prof
Binary file not shown.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ pyyaml
numba
optuna
ffmpeg-python
PyOpenGL==3.1.5
PyOpenGL_accelerate==3.1.5
PyOpenGL
PyOpenGL_accelerate
pyvirtualdisplay
torch>=1.6.0
stable-baselines3
Expand Down
5 changes: 3 additions & 2 deletions rlberry/agents/experimental/torch/sac/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,9 @@ def _update(self):
def sample_parameters(cls, trial):
batch_size = trial.suggest_categorical("batch_size", [1, 4, 8, 16, 32])
gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.99])
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
entr_coef = trial.suggest_loguniform("entr_coef", 1e-8, 0.1)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
entr_coef = trial.suggest_float("entr_coef", 1e-8, 0.1, log=True)

k_epochs = trial.suggest_categorical("k_epochs", [1, 5, 10, 20])

return {
Expand Down
4 changes: 2 additions & 2 deletions rlberry/agents/torch/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ def _update(self):
def sample_parameters(cls, trial):
batch_size = trial.suggest_categorical("batch_size", [1, 4, 8, 16, 32])
gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.99])
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)

entr_coef = trial.suggest_loguniform("entr_coef", 1e-8, 0.1)
entr_coef = trial.suggest_float("entr_coef", 1e-8, 0.1, log=True)

return {
"batch_size": batch_size,
Expand Down
5 changes: 2 additions & 3 deletions rlberry/agents/torch/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,9 +638,8 @@ def _to_tensor(x):
def sample_parameters(cls, trial):
batch_size = trial.suggest_categorical("batch_size", [1, 4, 8, 16, 32])
gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.99])
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)

entr_coef = trial.suggest_loguniform("entr_coef", 1e-8, 0.1)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
entr_coef = trial.suggest_float("entr_coef", 1e-8, 0.1, log=True)

clip_eps = trial.suggest_categorical("clip_eps", [0.1, 0.2, 0.3])

Expand Down
7 changes: 4 additions & 3 deletions rlberry/agents/torch/reinforce/reinforce.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch
import inspect
import numpy as np

import gym.spaces as spaces
from rlberry.agents import AgentWithSimplePolicy
Expand Down Expand Up @@ -197,7 +198,7 @@ def _update(self):
rewards.insert(0, discounted_reward)

# convert list to tensor
states = torch.FloatTensor(self.memory.states).to(self.device)
states = torch.FloatTensor(np.array(self.memory.states)).to(self.device)
actions = torch.LongTensor(self.memory.actions).to(self.device)
rewards = torch.FloatTensor(rewards).to(self.device)
if self.normalize:
Expand Down Expand Up @@ -225,9 +226,9 @@ def _update(self):
def sample_parameters(cls, trial):
batch_size = trial.suggest_categorical("batch_size", [1, 4, 8, 16, 32])
gamma = trial.suggest_categorical("gamma", [0.9, 0.95, 0.99])
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)

entr_coef = trial.suggest_loguniform("entr_coef", 1e-8, 0.1)
entr_coef = trial.suggest_float("entr_coef", 1e-8, 0.1, log=True)

return {
"batch_size": batch_size,
Expand Down
4 changes: 2 additions & 2 deletions rlberry/agents/torch/tests/test_a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def test_a2c():
env = "CartPole-v0"
env = "CartPole-v1"
mdp = make(env)
env_ctor = Wrapper
env_kwargs = dict(env=mdp)
Expand Down Expand Up @@ -84,7 +84,7 @@ def test_a2c():
a2crlberry_stats.clear_output_dir()

# test also non default
env = "CartPole-v0"
env = "CartPole-v1"
mdp = make(env)
env_ctor = Wrapper
env_kwargs = dict(env=mdp)
Expand Down
2 changes: 1 addition & 1 deletion rlberry/agents/torch/tests/test_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"use_double_dqn, use_prioritized_replay", [(False, False), (True, True)]
)
def test_dqn_agent(use_double_dqn, use_prioritized_replay):
env = gym_make("CartPole-v0")
env = gym_make("CartPole-v1")
agent = DQNAgent(
env,
learning_starts=5,
Expand Down
2 changes: 1 addition & 1 deletion rlberry/agents/torch/tests/test_mdqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

@pytest.mark.parametrize("use_prioritized_replay", [(False), (True)])
def test_mdqn_agent(use_prioritized_replay):
env = gym_make("CartPole-v0")
env = gym_make("CartPole-v1")
agent = MunchausenDQNAgent(
env,
learning_starts=5,
Expand Down
4 changes: 2 additions & 2 deletions rlberry/agents/torch/tests/test_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
@pytest.mark.timeout(300)
@pytest.mark.xfail(sys.platform == "win32", reason="bug with windows???")
def test_ppo():
env = "CartPole-v0"
env = "CartPole-v1"
mdp = make(env)
env_ctor = Wrapper
env_kwargs = dict(env=mdp)
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_ppo():
pporlberry_stats.clear_output_dir()

# test also non default
env = "CartPole-v0"
env = "CartPole-v1"
mdp = make(env)
env_ctor = Wrapper
env_kwargs = dict(env=mdp)
Expand Down
2 changes: 1 addition & 1 deletion rlberry/agents/utils/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class ReplayBuffer:
>>> buffer.setup_entry("rewards", np.float32)
>>>
>>> # Store data in the replay
>>> env = gym_make("CartPole-v0")
>>> env = gym_make("CartPole-v1")
>>> for _ in range(500):
>>> done = False
>>> obs = env.reset()
Expand Down
2 changes: 1 addition & 1 deletion rlberry/envs/gym_make.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def gym_make(id, wrap_spaces=False, **kwargs):
--------
>>> from rlberry.envs import gym_make
>>> env_ctor = gym_make
>>> env_kwargs = {"id": "CartPole-v0"}
>>> env_kwargs = {"id": "CartPole-v1"}
>>> env = env_ctor(**env_kwargs)
"""
if "module_import" in kwargs:
Expand Down
2 changes: 1 addition & 1 deletion rlberry/manager/tests/test_agent_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def policy(self, observation):
@classmethod
def sample_parameters(cls, trial):
hyperparameter1 = trial.suggest_categorical("hyperparameter1", [1, 2, 3])
hyperparameter2 = trial.suggest_uniform("hyperparameter2", -10, 10)
hyperparameter2 = trial.suggest_float("hyperparameter2", -10, 10)
return {"hyperparameter1": hyperparameter1, "hyperparameter2": hyperparameter2}


Expand Down
2 changes: 1 addition & 1 deletion rlberry/manager/tests/test_hyperparam_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def policy(self, observation):
@classmethod
def sample_parameters(cls, trial):
hyperparameter1 = trial.suggest_categorical("hyperparameter1", [1, 2, 3])
hyperparameter2 = trial.suggest_uniform("hyperparameter2", -10, 10)
hyperparameter2 = trial.suggest_float("hyperparameter2", -10, 10)
return {"hyperparameter1": hyperparameter1, "hyperparameter2": hyperparameter2}


Expand Down
2 changes: 1 addition & 1 deletion rlberry/network/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_remote_manager():
remote_manager = RemoteAgentManager(
client,
agent_class=ResourceRequest(name="REINFORCEAgent"),
train_env=ResourceRequest(name="gym_make", kwargs=dict(id="CartPole-v0")),
train_env=ResourceRequest(name="gym_make", kwargs=dict(id="CartPole-v1")),
fit_budget=10,
init_kwargs=dict(gamma=0.99),
eval_kwargs=dict(eval_horizon=2, n_simulations=2),
Expand Down
2 changes: 1 addition & 1 deletion rlberry/rendering/opengl_render2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def get_video_data(self):
# See https://stackoverflow.com/a/42754578/5691288
#
string_image = self.get_gl_image_str()
temp_surf = pg.image.fromstring(
temp_surf = pg.image.frombytes(
string_image, (self.window_width, self.window_height), "RGB"
)
tmp_arr = pg.surfarray.array3d(temp_surf)
Expand Down
4 changes: 2 additions & 2 deletions rlberry/rendering/pygame_render2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ def get_video_data(self):
#
# See https://stackoverflow.com/a/42754578/5691288
#
string_image = pg.image.tostring(self.screen, "RGB")
temp_surf = pg.image.fromstring(
string_image = pg.image.tobytes(self.screen, "RGB")
temp_surf = pg.image.frombytes(
string_image, (self.window_width, self.window_height), "RGB"
)
tmp_arr = pg.surfarray.array3d(temp_surf)
Expand Down
2 changes: 1 addition & 1 deletion rlberry/utils/check_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _make_tuple_env(env):
env_kwargs = {}
elif env == "vectorized_env_continuous":
env_ctor = gym_make
env_kwargs = dict(id="CartPole-v0")
env_kwargs = dict(id="CartPole-v1")
else:
raise ValueError("The env given in parameter is not implemented")
elif isinstance(env, tuple): # If env param is a tuple, return it
Expand Down

0 comments on commit c61172a

Please sign in to comment.