diff --git a/coltra/envs/subproc_vec_env.py b/coltra/envs/subproc_vec_env.py index 66db2471..ccc50eb7 100644 --- a/coltra/envs/subproc_vec_env.py +++ b/coltra/envs/subproc_vec_env.py @@ -220,7 +220,9 @@ def _flatten_scalar(values: List[Dict[str, Any]]) -> Dict[str, np.ndarray]: return {k: np.array([v[k] for v in values]) for k in keys} -def _flatten_info(infos: List[Dict[str, np.ndarray]]) -> Dict[str, Union[np.ndarray, List]]: +def _flatten_info( + infos: List[Dict[str, np.ndarray]] +) -> Dict[str, Union[np.ndarray, List]]: all_metrics = {} all_keys = set([k for dictionary in infos for k in dictionary]) diff --git a/coltra/envs/unity_envs.py b/coltra/envs/unity_envs.py index 26b9289e..3430d2af 100644 --- a/coltra/envs/unity_envs.py +++ b/coltra/envs/unity_envs.py @@ -212,7 +212,9 @@ def step( return obs_dict, reward_dict, done_dict, info_dict - def reset(self, mode: Optional[Mode] = None, num_agents: Optional[int] = None, **kwargs) -> ObsDict: + def reset( + self, mode: Optional[Mode] = None, num_agents: Optional[int] = None, **kwargs + ) -> ObsDict: if mode: self.param_channel.set_float_parameter("mode", mode.value) if num_agents: diff --git a/coltra/scripts/train_gym.py b/coltra/scripts/train_gym.py index 2b0c8fc5..9d6678c3 100644 --- a/coltra/scripts/train_gym.py +++ b/coltra/scripts/train_gym.py @@ -41,7 +41,7 @@ class Parser(BaseParser): } -if __name__ == '__main__': +if __name__ == "__main__": CUDA = torch.cuda.is_available() args = Parser() @@ -83,7 +83,6 @@ class Parser(BaseParser): model_cls = FancyMLPModel agent_cls = CAgent if isinstance(action_space, gym.spaces.Box) else DAgent - if args.start_dir: agent = agent_cls.load_agent(args.start_dir, weight_idx=args.start_idx) else: @@ -94,4 +93,4 @@ class Parser(BaseParser): agent.cuda() trainer = PPOCrowdTrainer(agent, env, config) - trainer.train(args.iters, disable_tqdm=False, save_path=trainer.path) \ No newline at end of file + trainer.train(args.iters, disable_tqdm=False, save_path=trainer.path) diff --git a/setup.py b/setup.py index 87661b9c..3f642867 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,12 @@ from setuptools import setup setup( - name='coltra-rl', - version='0.1.0', - packages=['coltra', 'coltra.envs', 'coltra.models'], - url='https://github.com/redtachyon/coltra-rl', - license='GNU GPLv3', - author='RedTachyon', - author_email='ariel.j.kwiatkowski@gmail.com', - description='Coltra-RL is a simple moddable RL algorithm implementation' + name="coltra-rl", + version="0.1.0", + packages=["coltra", "coltra.envs", "coltra.models"], + url="https://github.com/redtachyon/coltra-rl", + license="GNU GPLv3", + author="RedTachyon", + author_email="ariel.j.kwiatkowski@gmail.com", + description="Coltra-RL is a simple moddable RL algorithm implementation", ) diff --git a/tests/test_agents.py b/tests/test_agents.py index d292b553..fd6398c7 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -9,7 +9,7 @@ def test_constant_agent(): obs = Observation(vector=np.random.randn(5, 81), buffer=np.random.randn(5, 10, 4)) - agent = ConstantAgent(np.array([1., 1.], dtype=np.float32)) + agent = ConstantAgent(np.array([1.0, 1.0], dtype=np.float32)) actions, _, _ = agent.act(obs_batch=obs) @@ -79,8 +79,10 @@ def test_constant_agent(): def test_fancy_mlp_agent(): - obs = Observation(vector=np.random.randn(5, 81).astype(np.float32), - buffer=np.random.randn(5, 10, 4).astype(np.float32)) + obs = Observation( + vector=np.random.randn(5, 81).astype(np.float32), + buffer=np.random.randn(5, 10, 4).astype(np.float32), + ) model = FancyMLPModel({"input_size": 81, "hidden_sizes": [32, 32]}) @@ -126,10 +128,14 @@ def test_fancy_mlp_agent(): def test_discrete_fancy_mlp_agent(): - obs = Observation(vector=np.random.randn(5, 81).astype(np.float32), - buffer=np.random.randn(5, 10, 4).astype(np.float32)) - - model = FancyMLPModel({"input_size": 81, "hidden_sizes": [32, 32], "discrete": True}) + obs = Observation( + vector=np.random.randn(5, 81).astype(np.float32), + buffer=np.random.randn(5, 10, 4).astype(np.float32), + ) + + model = FancyMLPModel( + {"input_size": 81, "hidden_sizes": [32, 32], "discrete": True} + ) assert len(model.policy_network.hidden_layers) == 2 assert model.discrete diff --git a/tests/test_buffers.py b/tests/test_buffers.py index 55f501c6..22145a4e 100644 --- a/tests/test_buffers.py +++ b/tests/test_buffers.py @@ -2,8 +2,18 @@ import numpy as np import torch -from coltra.buffers import Observation, Action, TensorArray, Reward, LogProb, Value, Done, MemoryRecord, MemoryBuffer, \ - AgentMemoryBuffer +from coltra.buffers import ( + Observation, + Action, + TensorArray, + Reward, + LogProb, + Value, + Done, + MemoryRecord, + MemoryBuffer, + AgentMemoryBuffer, +) def test_observation_array(): @@ -27,7 +37,9 @@ def test_observation_misshaped(): def test_obs_to_tensor(): - obs = Observation(vector=np.random.randn(5, 81), buffer=np.random.randn(5, 10, 4)).tensor() + obs = Observation( + vector=np.random.randn(5, 81), buffer=np.random.randn(5, 10, 4) + ).tensor() assert obs.batch_size == 5 assert obs.vector.shape == (5, 81) assert obs.buffer.shape == (5, 10, 4) @@ -44,8 +56,10 @@ def test_obs_get(): def test_obs_stack(): - obs_list = [Observation(vector=np.random.randn(81), buffer=np.random.randn(10, 4)) - for _ in range(7)] + obs_list = [ + Observation(vector=np.random.randn(81), buffer=np.random.randn(10, 4)) + for _ in range(7) + ] obs = Observation.stack_tensor(obs_list, dim=0) @@ -55,8 +69,10 @@ def test_obs_stack(): def test_obs_cat(): - obs_list = [Observation(vector=np.random.randn(5, 81), buffer=np.random.randn(5, 10, 4)) - for _ in range(5)] + obs_list = [ + Observation(vector=np.random.randn(5, 81), buffer=np.random.randn(5, 10, 4)) + for _ in range(5) + ] obs = Observation.cat_tensor(obs_list, dim=0) @@ -87,10 +103,10 @@ def test_action_misshaped(): def test_apply(): obs = Observation(vector=np.ones((5, 81)), buffer=np.ones((5, 10, 4))) - new_obs = obs.apply(lambda x: 2*x) + new_obs = obs.apply(lambda x: 2 * x) - assert np.allclose(new_obs.vector, 2*np.ones((5, 81))) - assert np.allclose(new_obs.buffer, 2*np.ones((5, 10, 4))) + assert np.allclose(new_obs.vector, 2 * np.ones((5, 81))) + assert np.allclose(new_obs.buffer, 2 * np.ones((5, 10, 4))) def test_memory_buffer(): @@ -99,10 +115,20 @@ def test_memory_buffer(): batch_size = 100 for _ in range(batch_size): - obs = {agent_id: Observation(vector=np.random.randn(81).astype(np.float32), - buffer=np.random.randn(10, 4).astype(np.float32)) for agent_id in agents} - action = {agent_id: Action(continuous=np.random.randn(2).astype(np.float32)) for agent_id in agents} - reward = {agent_id: np.random.randn(1).astype(np.float32) for agent_id in agents} + obs = { + agent_id: Observation( + vector=np.random.randn(81).astype(np.float32), + buffer=np.random.randn(10, 4).astype(np.float32), + ) + for agent_id in agents + } + action = { + agent_id: Action(continuous=np.random.randn(2).astype(np.float32)) + for agent_id in agents + } + reward = { + agent_id: np.random.randn(1).astype(np.float32) for agent_id in agents + } value = {agent_id: np.random.randn(1).astype(np.float32) for agent_id in agents} done = {agent_id: False for agent_id in agents} @@ -117,8 +143,6 @@ def test_memory_buffer(): crowd_data = memory.crowd_tensorify() assert isinstance(crowd_data, MemoryRecord) - assert crowd_data.obs.vector.shape == (3*batch_size, 81) - assert crowd_data.obs.buffer.shape == (3*batch_size, 10, 4) - assert crowd_data.obs.batch_size == 3*batch_size - - + assert crowd_data.obs.vector.shape == (3 * batch_size, 81) + assert crowd_data.obs.buffer.shape == (3 * batch_size, 10, 4) + assert crowd_data.obs.batch_size == 3 * batch_size diff --git a/tests/test_collection.py b/tests/test_collection.py index e13a276b..e52127ba 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -16,7 +16,12 @@ def test_const_reward(): assert data.obs.vector.shape == (1000, 1) assert torch.allclose(data.obs.vector, torch.ones(1000, 1)) - assert torch.allclose(data.reward, torch.ones(1000, )) + assert torch.allclose( + data.reward, + torch.ones( + 1000, + ), + ) assert all(data.done) assert env.render() == 0 diff --git a/tests/test_discounting.py b/tests/test_discounting.py index cc6692b2..dbc52a17 100644 --- a/tests/test_discounting.py +++ b/tests/test_discounting.py @@ -1,12 +1,17 @@ import numpy as np import torch -from coltra.discounting import discount_experience, _discount_bgae, convert_params, get_beta_vector +from coltra.discounting import ( + discount_experience, + _discount_bgae, + convert_params, + get_beta_vector, +) def test_convert(): assert np.allclose(convert_params(0.5, 0), (0.5, np.inf)) - assert np.allclose(convert_params(0.9, 0.5), (9*2, 2)) - assert np.allclose(convert_params(0.99, 0.5), (99*2, 2)) + assert np.allclose(convert_params(0.9, 0.5), (9 * 2, 2)) + assert np.allclose(convert_params(0.99, 0.5), (99 * 2, 2)) assert np.allclose(convert_params(0.9, 1), (9, 1)) @@ -15,16 +20,16 @@ def test_beta_vector(): Γ = get_beta_vector(T=100, α=0.9, β=np.inf) assert Γ.shape == (100,) - assert np.allclose(Γ, np.array([0.9**t for t in range(100)])) + assert np.allclose(Γ, np.array([0.9 ** t for t in range(100)])) # Hyperbolic Γ = get_beta_vector(T=100, α=0.9, β=1) assert Γ.shape == (100,) - assert np.allclose(Γ, np.array([1 / (1 + (1/0.9) * t) for t in range(100)])) + assert np.allclose(Γ, np.array([1 / (1 + (1 / 0.9) * t) for t in range(100)])) # Some intermediate values - Γ = get_beta_vector(T=100, α=0.9, β=2.) + Γ = get_beta_vector(T=100, α=0.9, β=2.0) assert Γ.shape == (100,) Γ = get_beta_vector(T=100, α=0.99, β=0.5) @@ -46,9 +51,9 @@ def test_discounting(): rewards = torch.cat([torch.zeros(10), torch.zeros(10) + 1, torch.zeros(10) + 2]) values = torch.cat([torch.zeros(10), torch.zeros(10) + 1, torch.zeros(10) + 2]) - dones = torch.tensor([False if (t+1) % 5 else True for t in range(30)]) + dones = torch.tensor([False if (t + 1) % 5 else True for t in range(30)]) - returns, advantages = discount_experience(rewards, values, dones, 0.99, 0., 1.) + returns, advantages = discount_experience(rewards, values, dones, 0.99, 0.0, 1.0) assert isinstance(returns, torch.Tensor) assert isinstance(advantages, torch.Tensor) @@ -58,7 +63,7 @@ def test_discounting(): rewards = torch.randn(1000) values = torch.randn(1000) - dones = torch.tensor([False if (t+1) % 500 else True for t in range(1000)]) + dones = torch.tensor([False if (t + 1) % 500 else True for t in range(1000)]) returns, advantages = discount_experience(rewards, values, dones, 0.99, 0.5, 0.95) @@ -72,7 +77,7 @@ def test_discounting(): rewards = torch.ones(2000) values = torch.zeros(2000) - dones = torch.tensor([False if (t+1) % 1000 else True for t in range(2000)]) + dones = torch.tensor([False if (t + 1) % 1000 else True for t in range(2000)]) returns, advantages = discount_experience(rewards, values, dones, 0.99, 1.0, 0.95) @@ -90,4 +95,4 @@ def test_discounting(): # # def test_episode_rewards(): # rewards = torch.cat([torch.zeros(10), torch.zeros(10) + 1, torch.zeros(10) + 2]) -# dones = torch.tensor([...]) \ No newline at end of file +# dones = torch.tensor([...]) diff --git a/tests/test_gym.py b/tests/test_gym.py index 8de3604c..f390a47f 100644 --- a/tests/test_gym.py +++ b/tests/test_gym.py @@ -37,8 +37,7 @@ def test_multigym(): assert isinstance(obs[name], Observation) assert isinstance(obs[name].vector, np.ndarray) - action = {key: Action(discrete=env.action_space.sample()) - for key in obs} + action = {key: Action(discrete=env.action_space.sample()) for key in obs} obs, reward, done, info = env.step(action) assert isinstance(obs, dict) @@ -147,4 +146,3 @@ def test_training(): trainer = PPOCrowdTrainer(agent, env, config) trainer.train(2, disable_tqdm=False, save_path=None) - diff --git a/tests/test_info_stacking.py b/tests/test_info_stacking.py index 572df065..d712e4d9 100644 --- a/tests/test_info_stacking.py +++ b/tests/test_info_stacking.py @@ -7,10 +7,10 @@ def test_flatten_info(): # I had some problems with this, so an explicit test infos = [ - {"foo": "asdf", "m_metric": np_float(1.)}, - {"bar": 1, "m_metric": np_float(2.)}, + {"foo": "asdf", "m_metric": np_float(1.0)}, + {"bar": 1, "m_metric": np_float(2.0)}, {}, - {"foo": "potato", "bar": "saf"} + {"foo": "potato", "bar": "saf"}, ] info = _flatten_info(infos) diff --git a/tests/test_models.py b/tests/test_models.py index 9229cb4e..f532470d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,12 +15,14 @@ def test_fc(): torch.manual_seed(0) - network = FCNetwork(input_size=10, - output_sizes=[2, 2], - hidden_sizes=[64, 64], - activation='tanh', - initializer='kaiming_uniform', - is_policy=True) + network = FCNetwork( + input_size=10, + output_sizes=[2, 2], + hidden_sizes=[64, 64], + activation="tanh", + initializer="kaiming_uniform", + is_policy=True, + ) inp = torch.zeros(5, 10) [out1, out2] = network(inp) @@ -40,12 +42,14 @@ def test_fc(): def test_empty_fc(): - network = FCNetwork(input_size=10, - output_sizes=[32], - hidden_sizes=[], - activation='elu', - initializer='kaiming_uniform', - is_policy=False) + network = FCNetwork( + input_size=10, + output_sizes=[32], + hidden_sizes=[], + activation="elu", + initializer="kaiming_uniform", + is_policy=False, + ) inp = torch.randn(5, 10) [out] = network(inp) @@ -55,10 +59,9 @@ def test_empty_fc(): def test_lee(): - network = LeeNetwork(input_size=4, - output_sizes=[2, 4], - rays_input_size=126, - conv_filters=2) + network = LeeNetwork( + input_size=4, output_sizes=[2, 4], rays_input_size=126, conv_filters=2 + ) obs = Observation(vector=torch.randn(10, 4), rays=torch.randn(10, 126)) @@ -91,10 +94,7 @@ class Config(BaseConfig): config = Config.to_dict() model = RelationModel(config) - obs = Observation( - vector=torch.rand(7, 4), - buffer=torch.rand(7, 11, 5) - ) + obs = Observation(vector=torch.rand(7, 4), buffer=torch.rand(7, 11, 5)) action, state, extra = model(obs, get_value=True) @@ -102,4 +102,4 @@ class Config(BaseConfig): assert action.loc.shape == (7, 2) assert action.scale.shape == (7, 2) assert state == () - assert extra["value"].shape == (7, 1) \ No newline at end of file + assert extra["value"].shape == (7, 1) diff --git a/tests/test_optimization.py b/tests/test_optimization.py index 61d8f957..5d2589fc 100644 --- a/tests/test_optimization.py +++ b/tests/test_optimization.py @@ -23,9 +23,9 @@ def test_minibatches(): assert m_obs.vector.shape == (80, 4) assert m_logprobs.shape == (80,) assert m_values.shape == (80,) - assert torch.allclose(m_obs.vector, obs[i*80: i*80 + 80].vector) - assert torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) - assert torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) + assert torch.allclose(m_obs.vector, obs[i * 80 : i * 80 + 80].vector) + assert torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) + assert torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) assert count == 10 @@ -37,7 +37,9 @@ def test_shuffle_minibatches(): logprobs = torch.randn(800, generator=rng) values = torch.randn(800, generator=rng) - batches = minibatches(obs, logprobs, values, batch_size=80, shuffle=True, rng=np_rng) + batches = minibatches( + obs, logprobs, values, batch_size=80, shuffle=True, rng=np_rng + ) count = 0 for i, (m_obs, m_logprobs, m_values) in enumerate(batches): count += 1 @@ -46,9 +48,9 @@ def test_shuffle_minibatches(): assert m_values.shape == (80,) # Due to stochasticity of shuffling, this *could* fail if stars align and something messes up the random seed # But in principle, if a minibatch is not shuffled, something might be wrong - assert not torch.allclose(m_obs.vector, obs[i*80: i*80 + 80].vector) - assert not torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) - assert not torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) + assert not torch.allclose(m_obs.vector, obs[i * 80 : i * 80 + 80].vector) + assert not torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) + assert not torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) assert count == 10 @@ -67,16 +69,16 @@ def test_uneven_minibatches(): assert m_obs.vector.shape == (80, 4) assert m_logprobs.shape == (80,) assert m_values.shape == (80,) - assert torch.allclose(m_obs.vector, obs[i*80: i*80 + 80].vector) - assert torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) - assert torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) + assert torch.allclose(m_obs.vector, obs[i * 80 : i * 80 + 80].vector) + assert torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) + assert torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) else: assert m_obs.vector.shape == (50, 4) assert m_logprobs.shape == (50,) assert m_values.shape == (50,) - assert torch.allclose(m_obs.vector, obs[i*80: i*80 + 80].vector) - assert torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) - assert torch.allclose(m_logprobs, logprobs[i*80: i*80 + 80]) + assert torch.allclose(m_obs.vector, obs[i * 80 : i * 80 + 80].vector) + assert torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) + assert torch.allclose(m_logprobs, logprobs[i * 80 : i * 80 + 80]) assert count == 11 @@ -88,12 +90,15 @@ def test_ppo_step(): data, _ = collect_crowd_data(agent, env, num_steps=100) # 1000 steps total - ppo = CrowdPPOptimizer(agent=agent, config={ - # 30 updates total - "minibatch_size": 100, - "ppo_epochs": 3, - "use_gpu": torch.cuda.is_available(), - }) + ppo = CrowdPPOptimizer( + agent=agent, + config={ + # 30 updates total + "minibatch_size": 100, + "ppo_epochs": 3, + "use_gpu": torch.cuda.is_available(), + }, + ) data.cpu() diff --git a/tests/test_vecenv.py b/tests/test_vecenv.py index 96cab7aa..334bce35 100644 --- a/tests/test_vecenv.py +++ b/tests/test_vecenv.py @@ -6,7 +6,7 @@ def test_venv(): - + venv = ConstRewardEnv.get_venv(workers=8, num_agents=10) obs = venv.reset() assert len(obs) == 80 @@ -28,9 +28,9 @@ def test_venv(): def test_collect(): venv = ConstRewardEnv.get_venv(workers=8, num_agents=10) - agent = ConstantAgent(np.array([1.])) + agent = ConstantAgent(np.array([1.0])) data, stats = collect_crowd_data(agent, venv, 500) - assert data.obs.vector.shape == (8*10 * 500, 1) - assert stats["stat"].shape == (500, 3 * 8) \ No newline at end of file + assert data.obs.vector.shape == (8 * 10 * 500, 1) + assert stats["stat"].shape == (500, 3 * 8)