From 7a2b25fc12ebf2a9ba9a05df56e210b35872ebb0 Mon Sep 17 00:00:00 2001 From: Viktor Makoviychuk Date: Wed, 3 Jul 2024 23:22:40 -0700 Subject: [PATCH] Increased time resolution for more precision performance tracking. (#295) * Increased time resolution for more precision performance tracking. * Updated recommended pytorch version. --- README.md | 6 +++--- rl_games/algos_torch/sac_agent.py | 15 +++++++-------- rl_games/common/a2c_common.py | 28 ++++++++++++++-------------- rl_games/torch_runner.py | 2 +- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 9cf65d70..591166f5 100644 --- a/README.md +++ b/README.md @@ -67,10 +67,10 @@ Explore RL Games quick and easily in colab notebooks: ## Installation -For maximum training performance a preliminary installation of Pytorch 1.9+ with CUDA 11.1+ is highly recommended: +For maximum training performance a preliminary installation of Pytorch 2.2 or newer with CUDA 12.1 or newer is highly recommended: -```conda install pytorch torchvision cudatoolkit=11.3 -c pytorch -c nvidia``` or: -```pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html``` +```conda install pytorch torchvision pytorch-cuda=12.1 -c pytorch -c nvidia``` or: +```pip install pip3 install torch torchvision``` Then: diff --git a/rl_games/algos_torch/sac_agent.py b/rl_games/algos_torch/sac_agent.py index fd79fb7a..d4010fc4 100644 --- a/rl_games/algos_torch/sac_agent.py +++ b/rl_games/algos_torch/sac_agent.py @@ -441,7 +441,7 @@ def clear_stats(self): self.algo_observer.after_clear_stats() def play_steps(self, random_exploration = False): - total_time_start = time.time() + total_time_start = time.perf_counter() total_update_time = 0 total_time = 0 step_time = 0.0 @@ -466,11 +466,10 @@ def play_steps(self, random_exploration = False): with torch.no_grad(): action = self.act(obs.float(), self.env_info["action_space"].shape, sample=True) - step_start = time.time() - + step_start = time.perf_counter() with torch.no_grad(): next_obs, rewards, dones, infos = self.env_step(action) - step_end = time.time() + step_end = time.perf_counter() self.current_rewards += rewards self.current_lengths += 1 @@ -500,7 +499,6 @@ def play_steps(self, random_exploration = False): self.obs = next_obs.clone() rewards = self.rewards_shaper(rewards) - self.replay_buffer.add(obs, action, torch.unsqueeze(rewards, 1), next_obs_processed, torch.unsqueeze(dones, 1)) if isinstance(obs, dict): @@ -508,9 +506,10 @@ def play_steps(self, random_exploration = False): if not random_exploration: self.set_train() - update_time_start = time.time() + + update_time_start = time.perf_counter() actor_loss_info, critic1_loss, critic2_loss = self.update(self.epoch_num) - update_time_end = time.time() + update_time_end = time.perf_counter() update_time = update_time_end - update_time_start self.extract_actor_stats(actor_losses, entropies, alphas, alpha_losses, actor_loss_info) @@ -521,7 +520,7 @@ def play_steps(self, random_exploration = False): total_update_time += update_time - total_time_end = time.time() + total_time_end = time.perf_counter() total_time = total_time_end - total_time_start play_time = total_time - total_update_time diff --git a/rl_games/common/a2c_common.py b/rl_games/common/a2c_common.py index f9bd5a14..19b95985 100644 --- a/rl_games/common/a2c_common.py +++ b/rl_games/common/a2c_common.py @@ -757,9 +757,9 @@ def play_steps(self): if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) - step_time_start = time.time() + step_time_start = time.perf_counter() self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - step_time_end = time.time() + step_time_end = time.perf_counter() step_time += (step_time_end - step_time_start) @@ -830,9 +830,9 @@ def play_steps_rnn(self): if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) - step_time_start = time.time() + step_time_start = time.perf_counter() self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - step_time_end = time.time() + step_time_end = time.perf_counter() step_time += (step_time_end - step_time_start) @@ -920,7 +920,7 @@ def train_epoch(self): super().train_epoch() self.set_eval() - play_time_start = time.time() + play_time_start = time.perf_counter() with torch.no_grad(): if self.is_rnn: @@ -930,8 +930,8 @@ def train_epoch(self): self.set_train() - play_time_end = time.time() - update_time_start = time.time() + play_time_end = time.perf_counter() + update_time_start = time.perf_counter() rnn_masks = batch_dict.get('rnn_masks', None) self.curr_frames = batch_dict.pop('played_frames') @@ -966,7 +966,7 @@ def train_epoch(self): if self.normalize_input: self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch - update_time_end = time.time() + update_time_end = time.perf_counter() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start @@ -1034,7 +1034,7 @@ def prepare_dataset(self, batch_dict): def train(self): self.init_tensors() self.mean_rewards = self.last_mean_rewards = -100500 - start_time = time.time() + start_time = time.perf_counter() total_time = 0 rep_count = 0 # self.frame = 0 # loading from checkpoint @@ -1183,15 +1183,15 @@ def train_epoch(self): super().train_epoch() self.set_eval() - play_time_start = time.time() + play_time_start = time.perf_counter() with torch.no_grad(): if self.is_rnn: batch_dict = self.play_steps_rnn() else: batch_dict = self.play_steps() - play_time_end = time.time() - update_time_start = time.time() + play_time_end = time.perf_counter() + update_time_start = time.perf_counter() rnn_masks = batch_dict.get('rnn_masks', None) self.set_train() @@ -1240,7 +1240,7 @@ def train_epoch(self): if self.normalize_input: self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch - update_time_end = time.time() + update_time_end = time.perf_counter() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start @@ -1310,7 +1310,7 @@ def prepare_dataset(self, batch_dict): def train(self): self.init_tensors() self.last_mean_rewards = -100500 - start_time = time.time() + start_time = time.perf_counter() total_time = 0 rep_count = 0 self.obs = self.env_reset() diff --git a/rl_games/torch_runner.py b/rl_games/torch_runner.py index 4377d29b..86be48ac 100644 --- a/rl_games/torch_runner.py +++ b/rl_games/torch_runner.py @@ -63,7 +63,7 @@ def __init__(self, algo_observer=None): self.algo_observer = algo_observer if algo_observer else DefaultAlgoObserver() torch.backends.cudnn.benchmark = True - ### it didnot help for lots for openai gym envs anyway :( + ### it did not help for lots for openai gym envs anyway :( #torch.backends.cudnn.deterministic = True #torch.use_deterministic_algorithms(True)