From f3490808bc6dce854a4a747a596343c1e415bb1e Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 20 Jun 2020 11:19:30 +0800 Subject: [PATCH 1/3] save the sacred results by each algo and map_name --- src/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index cd0230278..0b05c6da8 100644 --- a/src/main.py +++ b/src/main.py @@ -92,7 +92,9 @@ def config_copy(config): # Save to disk by default for sacred logger.info("Saving to FileStorageObserver in results/sacred.") - file_obs_path = os.path.join(results_path, "sacred") + file_obs_path = os.path.join(os.path.join( + os.path.join(results_path, config_dict['env_args']['map_name']), config_dict['name']), "sacred") + ex.observers.append(FileStorageObserver.create(file_obs_path)) ex.run_commandline(params) From f881975e41215253eb308da2af4d6461b9e1cfce Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 13 Aug 2020 13:33:14 +0800 Subject: [PATCH 2/3] Reproduction when using CUDNN Refer to: https://pytorch.org/docs/stable/notes/randomness.html#cudnn --- src/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main.py b/src/main.py index 0b05c6da8..3c3da5fde 100644 --- a/src/main.py +++ b/src/main.py @@ -31,6 +31,10 @@ def my_main(_run, _config, _log): th.manual_seed(config["seed"]) config['env_args']['seed'] = config["seed"] + if config["use_cuda"]: + th.backends.cudnn.deterministic = True + th.backends.cudnn.benchmark = False + # run the framework run(_run, config, _log) From 494422ec48da1d49fd97f60017df77edb5f4af8a Mon Sep 17 00:00:00 2001 From: Alexander Date: Mon, 16 Nov 2020 20:12:44 +0800 Subject: [PATCH 3/3] Update q_learner.py Avoid the `AttributeError: 'torch.dtype' object has no attribute 'type'` error in later PyTorch version --- src/learners/q_learner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learners/q_learner.py b/src/learners/q_learner.py index 02de44c4a..1221a7bf3 100644 --- a/src/learners/q_learner.py +++ b/src/learners/q_learner.py @@ -108,7 +108,7 @@ def train(self, batch: EpisodeBatch, t_env: int, episode_num: int): if t_env - self.log_stats_t >= self.args.learner_log_interval: self.logger.log_stat("loss", loss.item(), t_env) - self.logger.log_stat("grad_norm", grad_norm, t_env) + self.logger.log_stat("grad_norm", grad_norm.item(), t_env) mask_elems = mask.sum().item() self.logger.log_stat("td_error_abs", (masked_td_error.abs().sum().item()/mask_elems), t_env) self.logger.log_stat("q_taken_mean", (chosen_action_qvals * mask).sum().item()/(mask_elems * self.args.n_agents), t_env)