From dafed47c5bcce489cb1c6dab2187cce72377c691 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Thu, 26 Sep 2024 14:50:44 -0700
Subject: [PATCH] Add support for logging max/percentiles of reward to
 tensorboard in ES (#375)

This patch makes blackbox learner write percentiles of the reward along
with the maximum reward to Tensorboard so that there is more exact
information on the distribution of the reward per step rather than
needing to resort to the distribution/histogram which do not have great
support for analyzing numerically within tensorboard.
---
 compiler_opt/es/blackbox_learner.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/compiler_opt/es/blackbox_learner.py b/compiler_opt/es/blackbox_learner.py
index 79c61acc..ab67ae4e 100644
--- a/compiler_opt/es/blackbox_learner.py
+++ b/compiler_opt/es/blackbox_learner.py
@@ -35,6 +35,9 @@
 # If less than 40% of requests succeed, skip the step.
 _SKIP_STEP_SUCCESS_RATIO = 0.4
 
+# The percentiles to report as individual values in Tensorboard.
+_PERCENTILES_TO_REPORT = [25, 50, 75]
+
 
 @gin.configurable
 @dataclasses.dataclass(frozen=True)
@@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
       tf.summary.scalar(
           'reward/average_reward_train', np.mean(rewards), step=self._step)
 
+      tf.summary.scalar(
+          'reward/maximum_reward_train', np.max(rewards), step=self._step)
+
+      for percentile_to_report in _PERCENTILES_TO_REPORT:
+        percentile_value = np.percentile(rewards, percentile_to_report)
+        tf.summary.scalar(
+            f'reward/{percentile_value}_percentile',
+            percentile_value,
+            step=self._step)
+
       tf.summary.histogram('reward/reward_train', rewards, step=self._step)
 
       train_regressions = [reward for reward in rewards if reward < 0]