Skip to content

Commit

Permalink
MapData support for benchmarking (#3058)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3058

**`BenchmarkMapMetric`**
* A `MapMetric`, returning `MapData`.
* Receives an entire learning curve and looks to the backend simulator to see whether to return partial data, if the trial has still been running or has been early-stopped.

**`BenchmarkTestFunction`**
* Now produces a 2d tensor rather than a 1d tensor, with the second dimension being the progression along the learning curve or time series.
* Always produces the entire learning curve or time series. If we only observe partial data, `BenchmarkMapMetric` will handle that.

**`BenchmarkRunner`**
* Always produces the entire learning curve or time series. If we only observe partial data, `BenchmarkMapMetric` will handle that.
* Now works with Pandas DataFrames rather than torch tensors, to make it easier to track the values of "metric_name", "arm_name", and (newly added) "t".  This also minimizes the amount of work tha tneeds to be done by `BenchmarkMetric` and `BenchmarkMapMetric`.
* Adds IID noise to each element of a time series. We can add more sophisticated noise generators in the future.

**`benchmark_problem.py`**
Updated a helper function for getting an optimization config so that it can use `BenchmarkMapMetrics`

**`BenchmarkTrialMetadata`**
Now stores a dataframe that is formatted the same as the data that will eventually be on `Data.df` and `MapData.df`

TODO:
* Allow for different epochs to take different lengths of time, based on parameters.

Reviewed By: Balandat

Differential Revision: D64198634

fbshipit-source-id: b69df0618de77638bbcf6b4b6d115bbdcd5feb2b
  • Loading branch information
esantorella authored and facebook-github-bot committed Nov 15, 2024
1 parent 241ad71 commit 0cab5d2
Show file tree
Hide file tree
Showing 10 changed files with 625 additions and 177 deletions.
3 changes: 1 addition & 2 deletions ax/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ def compute_score_trace(


def get_benchmark_runner(
problem: BenchmarkProblem,
max_concurrency: int = 1,
problem: BenchmarkProblem, max_concurrency: int = 1
) -> BenchmarkRunner:
"""
Construct a ``BenchmarkRunner`` for the given problem and concurrency.
Expand Down
134 changes: 115 additions & 19 deletions ax/benchmark/benchmark_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@

from typing import Any

import pandas as pd
from ax.core.base_trial import BaseTrial

from ax.core.data import Data

from ax.core.map_data import MapData, MapKeyInfo
from ax.core.map_metric import MapMetric
from ax.core.metric import Metric, MetricFetchE, MetricFetchResult
from ax.utils.common.result import Err, Ok
from pyre_extensions import none_throws


class BenchmarkMetric(Metric):
Expand Down Expand Up @@ -56,26 +58,120 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult
f"Arguments {set(kwargs)} are not supported in "
f"{self.__class__.__name__}.fetch_trial_data."
)
df = trial.run_metadata["benchmark_metadata"].dfs[self.name]
if (df["t"] > 0).any():
raise ValueError(
f"Trial {trial.index} has data from multiple time steps. This is"
" not supported by `BenchmarkMetric`; use `BenchmarkMapMetric`."
)
df = df.drop(columns=["t"])
if not self.observe_noise_sd:
df["sem"] = None
try:
return Ok(value=Data(df=df))

except Exception as e:
return Err(
MetricFetchE(
message=f"Failed to obtain data for trial {trial.index}",
exception=e,
)
)


class BenchmarkMapMetric(MapMetric):
# pyre-fixme: Inconsistent override [15]: `map_key_info` overrides attribute
# defined in `MapMetric` inconsistently. Type `MapKeyInfo[int]` is not a
# subtype of the overridden attribute `MapKeyInfo[float]`
map_key_info: MapKeyInfo[int] = MapKeyInfo(key="t", default_value=0)

def __init__(
self,
name: str,
# Needed to be boolean (not None) for validation of MOO opt configs
lower_is_better: bool,
observe_noise_sd: bool = True,
) -> None:
"""
Args:
name: Name of the metric.
lower_is_better: If `True`, lower metric values are considered better.
observe_noise_sd: If `True`, the standard deviation of the observation
noise is included in the `sem` column of the the returned data.
If `False`, `sem` is set to `None` (meaning that the model will
have to infer the noise level).
"""
super().__init__(name=name, lower_is_better=lower_is_better)
# Declare `lower_is_better` as bool (rather than optional as in the base class)
self.lower_is_better: bool = lower_is_better
self.observe_noise_sd: bool = observe_noise_sd

@classmethod
def is_available_while_running(cls) -> bool:
return True

def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult:
"""
If the trial has been completed, look up the ``sim_start_time`` and
``sim_completed_time`` on the corresponding ``SimTrial``, and return all
data from keys 0, ..., ``sim_completed_time - sim_start_time``. If the
trial has not completed, return all data from keys 0, ..., ``sim_runtime
- sim_start_time``.
Args:
trial: The trial from which to fetch data.
kwargs: Unsupported and will raise an exception.
Returns:
A MetricFetchResult containing the data for the requested metric.
"""
if len(kwargs) > 0:
raise NotImplementedError(
f"Arguments {set(kwargs)} are not supported in "
f"{self.__class__.__name__}.fetch_trial_data."
)
if len(trial.run_metadata) == 0:
return Err(
MetricFetchE(
message=f"No metadata available for trial {trial.index}",
exception=None,
)
)

metadata = trial.run_metadata["benchmark_metadata"]
# Look up the index based on the outcome name under which we track the data
# as part of `metadata`.
outcome_index = metadata.outcome_names.index(self.name)

backend_simulator = metadata.backend_simulator

if backend_simulator is None:
max_t = float("inf")
else:
sim_trial = none_throws(
backend_simulator.get_sim_trial_by_index(trial.index)
)
start_time = none_throws(sim_trial.sim_start_time)
if sim_trial.sim_completed_time is None: # Still running
max_t = backend_simulator.time - start_time
else:
if sim_trial.sim_completed_time > backend_simulator.time:
raise RuntimeError(
"The trial's completion time is in the future! This is "
f"unexpected. {sim_trial.sim_completed_time=}, "
f"{backend_simulator.time=}"
)
# Completed, may have stopped early
max_t = none_throws(sim_trial.sim_completed_time) - start_time

df = (
metadata.dfs[self.name]
.loc[lambda x: x["t"] <= max_t]
.rename(columns={"t": self.map_key_info.key})
)
if not self.observe_noise_sd:
df["sem"] = None

# Could fail if no data
try:
records = [
{
"arm_name": arm_name,
"metric_name": self.name,
"mean": metadata.Ys[arm_name][outcome_index],
"sem": metadata.Ystds[arm_name][outcome_index]
if self.observe_noise_sd
else float("nan"),
"trial_index": trial.index,
}
for arm_name in metadata.Ys.keys()
]
df = pd.DataFrame.from_records(records)
return Ok(value=Data(df=df))
return Ok(value=MapData(df=df, map_key_infos=[self.map_key_info]))

except Exception as e:
return Err(
Expand Down
37 changes: 26 additions & 11 deletions ax/benchmark/benchmark_problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from dataclasses import dataclass, field
from typing import Any

from ax.benchmark.benchmark_metric import BenchmarkMetric
from ax.benchmark.benchmark_metric import BenchmarkMapMetric, BenchmarkMetric
from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
from ax.benchmark.benchmark_test_functions.botorch_test import BoTorchTestFunction

from ax.core.objective import MultiObjective, Objective
from ax.core.optimization_config import (
MultiObjectiveOptimizationConfig,
Expand Down Expand Up @@ -148,20 +149,27 @@ def is_moo(self) -> bool:


def _get_constraints(
constraint_names: Sequence[str], observe_noise_sd: bool
constraint_names: Sequence[str],
observe_noise_sd: bool,
use_map_metric: bool = False,
) -> list[OutcomeConstraint]:
"""
Create a list of ``OutcomeConstraint``s.
Each constraint has a ``BenchmarkMetric``; the metrics names match
``constraint_names``, and each has ``observe_noise_sd=observe_noise_sd``.
This doesn't handle the case where only some of the outcomes have noise
levels observed.
"""
Args:
constraint_names: Names of the constraints. One constraint will be
created for each.
observe_noise_sd: Whether the standard deviation of the observation
noise is observed, for each constraint. This doesn't handle the case
where only some of the outcomes have noise levels observed.
use_map_metric: Whether to use a ``BenchmarkMapMetric``.
"""
metric_cls = BenchmarkMapMetric if use_map_metric else BenchmarkMetric
outcome_constraints = [
OutcomeConstraint(
metric=BenchmarkMetric(
metric=metric_cls(
name=name,
lower_is_better=False, # positive slack = feasible
observe_noise_sd=observe_noise_sd,
Expand All @@ -180,6 +188,7 @@ def get_soo_opt_config(
outcome_names: Sequence[str],
lower_is_better: bool = True,
observe_noise_sd: bool = False,
use_map_metric: bool = False,
) -> OptimizationConfig:
"""
Create a single-objective ``OptimizationConfig``, potentially with
Expand All @@ -195,9 +204,11 @@ def get_soo_opt_config(
better (feasible).
observe_noise_sd: Whether the standard deviation of the observation
noise is observed. Applies to all objective and constraints.
use_map_metric: Whether to use a ``BenchmarkMapMetric``.
"""
metric_cls = BenchmarkMapMetric if use_map_metric else BenchmarkMetric
objective = Objective(
metric=BenchmarkMetric(
metric=metric_cls(
name=outcome_names[0],
lower_is_better=lower_is_better,
observe_noise_sd=observe_noise_sd,
Expand All @@ -206,7 +217,9 @@ def get_soo_opt_config(
)

outcome_constraints = _get_constraints(
constraint_names=outcome_names[1:], observe_noise_sd=observe_noise_sd
constraint_names=outcome_names[1:],
observe_noise_sd=observe_noise_sd,
use_map_metric=use_map_metric,
)

return OptimizationConfig(
Expand All @@ -221,6 +234,7 @@ def get_moo_opt_config(
num_constraints: int = 0,
lower_is_better: bool = True,
observe_noise_sd: bool = False,
use_map_metric: bool = False,
) -> MultiObjectiveOptimizationConfig:
"""
Create a ``MultiObjectiveOptimizationConfig``, potentially with constraints.
Expand All @@ -244,8 +258,9 @@ def get_moo_opt_config(
noise is observed. Applies to all objective and constraints.
"""
n_objectives = len(outcome_names) - num_constraints
metric_cls = BenchmarkMapMetric if use_map_metric else BenchmarkMetric
objective_metrics = [
BenchmarkMetric(
metric_cls(
name=outcome_names[i],
lower_is_better=lower_is_better,
observe_noise_sd=observe_noise_sd,
Expand Down
Loading

0 comments on commit 0cab5d2

Please sign in to comment.