Add support for logging max/percentiles of reward to tensorboard in ES (

#375) This patch makes blackbox learner write percentiles of the reward along with the maximum reward to Tensorboard so that there is more exact information on the distribution of the reward per step rather than needing to resort to the distribution/histogram which do not have great support for analyzing numerically within tensorboard.
google · Sep 26, 2024 · dafed47 · dafed47
1 parent 9cc035f
commit dafed47
Showing 1 changed file with 13 additions and 0 deletions.
diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py
@@ -35,6 +35,9 @@
 # If less than 40% of requests succeed, skip the step.
 _SKIP_STEP_SUCCESS_RATIO = 0.4
 
+# The percentiles to report as individual values in Tensorboard.
+_PERCENTILES_TO_REPORT = [25, 50, 75]
+
 
 @gin.configurable
 @dataclasses.dataclass(frozen=True)
@@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
       tf.summary.scalar(
           'reward/average_reward_train', np.mean(rewards), step=self._step)
 
+      tf.summary.scalar(
+          'reward/maximum_reward_train', np.max(rewards), step=self._step)
+
+      for percentile_to_report in _PERCENTILES_TO_REPORT:
+        percentile_value = np.percentile(rewards, percentile_to_report)
+        tf.summary.scalar(
+            f'reward/{percentile_value}_percentile',
+            percentile_value,
+            step=self._step)
+
       tf.summary.histogram('reward/reward_train', rewards, step=self._step)
 
       train_regressions = [reward for reward in rewards if reward < 0]