diff --git a/src/learners/q_learner.py b/src/learners/q_learner.py index 02de44c4a..1221a7bf3 100644 --- a/src/learners/q_learner.py +++ b/src/learners/q_learner.py @@ -108,7 +108,7 @@ def train(self, batch: EpisodeBatch, t_env: int, episode_num: int): if t_env - self.log_stats_t >= self.args.learner_log_interval: self.logger.log_stat("loss", loss.item(), t_env) - self.logger.log_stat("grad_norm", grad_norm, t_env) + self.logger.log_stat("grad_norm", grad_norm.item(), t_env) mask_elems = mask.sum().item() self.logger.log_stat("td_error_abs", (masked_td_error.abs().sum().item()/mask_elems), t_env) self.logger.log_stat("q_taken_mean", (chosen_action_qvals * mask).sum().item()/(mask_elems * self.args.n_agents), t_env) diff --git a/src/main.py b/src/main.py index cd0230278..3c3da5fde 100644 --- a/src/main.py +++ b/src/main.py @@ -31,6 +31,10 @@ def my_main(_run, _config, _log): th.manual_seed(config["seed"]) config['env_args']['seed'] = config["seed"] + if config["use_cuda"]: + th.backends.cudnn.deterministic = True + th.backends.cudnn.benchmark = False + # run the framework run(_run, config, _log) @@ -92,7 +96,9 @@ def config_copy(config): # Save to disk by default for sacred logger.info("Saving to FileStorageObserver in results/sacred.") - file_obs_path = os.path.join(results_path, "sacred") + file_obs_path = os.path.join(os.path.join( + os.path.join(results_path, config_dict['env_args']['map_name']), config_dict['name']), "sacred") + ex.observers.append(FileStorageObserver.create(file_obs_path)) ex.run_commandline(params)