Skip to content

Commit

Permalink
Add support for logging max/percentiles of reward to tensorboard in ES
Browse files Browse the repository at this point in the history
This patch makes blackbox learner write percentiles of the reward along
with the maximum reward to Tensorboard so that there is more exact
information on the distribution of the reward per step rather than
needing to resort to the distribution/histogram which do not have great
support for analyzing numerically within tensorboard.
  • Loading branch information
boomanaiden154 committed Sep 26, 2024
1 parent 9cc035f commit ff76bc9
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions compiler_opt/es/blackbox_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# If less than 40% of requests succeed, skip the step.
_SKIP_STEP_SUCCESS_RATIO = 0.4

# The percentiles to report as individual values in Tensorboard.
_PERCENTILES_TO_REPORT = [25, 50, 75]


@gin.configurable
@dataclasses.dataclass(frozen=True)
Expand Down Expand Up @@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
tf.summary.scalar(
'reward/average_reward_train', np.mean(rewards), step=self._step)

tf.summary.scalar(
'reward/maximum_reward_train', np.max(rewards), step=self._step)

for percentile_to_report in _PERCENTILES_TO_REPORT:
percentile_value = np.percentile(rewards, percentile_to_report)
tf.summary.scalar(
f'reward/{percentile_value}_percentile',
percentile_value,
step=self._step)

tf.summary.histogram('reward/reward_train', rewards, step=self._step)

train_regressions = [reward for reward in rewards if reward < 0]
Expand Down

0 comments on commit ff76bc9

Please sign in to comment.