Add support for logging max/percentiles of reward to tensorboard in ES

boomanaiden154 · boomanaiden154 · commit ff76bc9e658b · 2024-09-26T19:56:17.000Z
This patch makes blackbox learner write percentiles of the reward along
with the maximum reward to Tensorboard so that there is more exact
information on the distribution of the reward per step rather than
needing to resort to the distribution/histogram which do not have great
support for analyzing numerically within tensorboard.
diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py
@@ -35,6 +35,9 @@
 # If less than 40% of requests succeed, skip the step.
 _SKIP_STEP_SUCCESS_RATIO = 0.4
 
+# The percentiles to report as individual values in Tensorboard.
+_PERCENTILES_TO_REPORT = [25, 50, 75]
+
 
 @gin.configurable
 @dataclasses.dataclass(frozen=True)
@@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
       tf.summary.scalar(
           'reward/average_reward_train', np.mean(rewards), step=self._step)
 
+      tf.summary.scalar(
+          'reward/maximum_reward_train', np.max(rewards), step=self._step)
+
+      for percentile_to_report in _PERCENTILES_TO_REPORT:
+        percentile_value = np.percentile(rewards, percentile_to_report)
+        tf.summary.scalar(
+            f'reward/{percentile_value}_percentile',
+            percentile_value,
+            step=self._step)
+
       tf.summary.histogram('reward/reward_train', rewards, step=self._step)
 
       train_regressions = [reward for reward in rewards if reward < 0]