Skip to content

Commit

Permalink
Add support for logging max/percentiles of reward to tensorboard in ES (
Browse files Browse the repository at this point in the history
#375)

This patch makes blackbox learner write percentiles of the reward along
with the maximum reward to Tensorboard so that there is more exact
information on the distribution of the reward per step rather than
needing to resort to the distribution/histogram which do not have great
support for analyzing numerically within tensorboard.
  • Loading branch information
boomanaiden154 authored Sep 26, 2024
1 parent 9cc035f commit dafed47
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions compiler_opt/es/blackbox_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# If less than 40% of requests succeed, skip the step.
_SKIP_STEP_SUCCESS_RATIO = 0.4

# The percentiles to report as individual values in Tensorboard.
_PERCENTILES_TO_REPORT = [25, 50, 75]


@gin.configurable
@dataclasses.dataclass(frozen=True)
Expand Down Expand Up @@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
tf.summary.scalar(
'reward/average_reward_train', np.mean(rewards), step=self._step)

tf.summary.scalar(
'reward/maximum_reward_train', np.max(rewards), step=self._step)

for percentile_to_report in _PERCENTILES_TO_REPORT:
percentile_value = np.percentile(rewards, percentile_to_report)
tf.summary.scalar(
f'reward/{percentile_value}_percentile',
percentile_value,
step=self._step)

tf.summary.histogram('reward/reward_train', rewards, step=self._step)

train_regressions = [reward for reward in rewards if reward < 0]
Expand Down

0 comments on commit dafed47

Please sign in to comment.