From dafed47c5bcce489cb1c6dab2187cce72377c691 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 26 Sep 2024 14:50:44 -0700 Subject: [PATCH] Add support for logging max/percentiles of reward to tensorboard in ES (#375) This patch makes blackbox learner write percentiles of the reward along with the maximum reward to Tensorboard so that there is more exact information on the distribution of the reward per step rather than needing to resort to the distribution/histogram which do not have great support for analyzing numerically within tensorboard. --- compiler_opt/es/blackbox_learner.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py index 79c61acc..ab67ae4e 100644 --- a/compiler_opt/es/blackbox_learner.py +++ b/compiler_opt/es/blackbox_learner.py @@ -35,6 +35,9 @@ # If less than 40% of requests succeed, skip the step. _SKIP_STEP_SUCCESS_RATIO = 0.4 +# The percentiles to report as individual values in Tensorboard. +_PERCENTILES_TO_REPORT = [25, 50, 75] + @gin.configurable @dataclasses.dataclass(frozen=True) @@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None: tf.summary.scalar( 'reward/average_reward_train', np.mean(rewards), step=self._step) + tf.summary.scalar( + 'reward/maximum_reward_train', np.max(rewards), step=self._step) + + for percentile_to_report in _PERCENTILES_TO_REPORT: + percentile_value = np.percentile(rewards, percentile_to_report) + tf.summary.scalar( + f'reward/{percentile_value}_percentile', + percentile_value, + step=self._step) + tf.summary.histogram('reward/reward_train', rewards, step=self._step) train_regressions = [reward for reward in rewards if reward < 0]