Skip to content

Commit ff76bc9

Browse files
Add support for logging max/percentiles of reward to tensorboard in ES
This patch makes blackbox learner write percentiles of the reward along with the maximum reward to Tensorboard so that there is more exact information on the distribution of the reward per step rather than needing to resort to the distribution/histogram which do not have great support for analyzing numerically within tensorboard.
1 parent 9cc035f commit ff76bc9

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

compiler_opt/es/blackbox_learner.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
# If less than 40% of requests succeed, skip the step.
3636
_SKIP_STEP_SUCCESS_RATIO = 0.4
3737

38+
# The percentiles to report as individual values in Tensorboard.
39+
_PERCENTILES_TO_REPORT = [25, 50, 75]
40+
3841

3942
@gin.configurable
4043
@dataclasses.dataclass(frozen=True)
@@ -188,6 +191,16 @@ def _log_tf_summary(self, rewards: List[float]) -> None:
188191
tf.summary.scalar(
189192
'reward/average_reward_train', np.mean(rewards), step=self._step)
190193

194+
tf.summary.scalar(
195+
'reward/maximum_reward_train', np.max(rewards), step=self._step)
196+
197+
for percentile_to_report in _PERCENTILES_TO_REPORT:
198+
percentile_value = np.percentile(rewards, percentile_to_report)
199+
tf.summary.scalar(
200+
f'reward/{percentile_value}_percentile',
201+
percentile_value,
202+
step=self._step)
203+
191204
tf.summary.histogram('reward/reward_train', rewards, step=self._step)
192205

193206
train_regressions = [reward for reward in rewards if reward < 0]

0 commit comments

Comments
 (0)