MapData support for benchmarking (#3058)

Summary: Pull Request resolved: #3058 **`BenchmarkMapMetric`** * A `MapMetric`, returning `MapData`. * Receives an entire learning curve and looks to the backend simulator to see whether to return partial data, if the trial has still been running or has been early-stopped. **`BenchmarkTestFunction`** * Now produces a 2d tensor rather than a 1d tensor, with the second dimension being the progression along the learning curve or time series. * Always produces the entire learning curve or time series. If we only observe partial data, `BenchmarkMapMetric` will handle that. **`BenchmarkRunner`** * Always produces the entire learning curve or time series. If we only observe partial data, `BenchmarkMapMetric` will handle that. * Now works with Pandas DataFrames rather than torch tensors, to make it easier to track the values of "metric_name", "arm_name", and (newly added) "t". This also minimizes the amount of work tha tneeds to be done by `BenchmarkMetric` and `BenchmarkMapMetric`. * Adds IID noise to each element of a time series. We can add more sophisticated noise generators in the future. **`benchmark_problem.py`** Updated a helper function for getting an optimization config so that it can use `BenchmarkMapMetrics` **`BenchmarkTrialMetadata`** Now stores a dataframe that is formatted the same as the data that will eventually be on `Data.df` and `MapData.df` TODO: * Allow for different epochs to take different lengths of time, based on parameters. Reviewed By: Balandat Differential Revision: D64198634 fbshipit-source-id: b69df0618de77638bbcf6b4b6d115bbdcd5feb2b
facebook · Nov 15, 2024 · 0cab5d2 · 0cab5d2
1 parent 241ad71
commit 0cab5d2
Show file tree

Hide file tree

Showing 10 changed files with 625 additions and 177 deletions.
diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
@@ -68,8 +68,7 @@ def compute_score_trace(
 
 
 def get_benchmark_runner(
-    problem: BenchmarkProblem,
-    max_concurrency: int = 1,
+    problem: BenchmarkProblem, max_concurrency: int = 1
 ) -> BenchmarkRunner:
     """
     Construct a ``BenchmarkRunner`` for the given problem and concurrency.

diff --git a/ax/benchmark/benchmark_metric.py b/ax/benchmark/benchmark_metric.py
@@ -7,12 +7,14 @@
 
 from typing import Any
 
-import pandas as pd
 from ax.core.base_trial import BaseTrial
-
 from ax.core.data import Data
+
+from ax.core.map_data import MapData, MapKeyInfo
+from ax.core.map_metric import MapMetric
 from ax.core.metric import Metric, MetricFetchE, MetricFetchResult
 from ax.utils.common.result import Err, Ok
+from pyre_extensions import none_throws
 
 
 class BenchmarkMetric(Metric):
@@ -56,26 +58,120 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult
                 f"Arguments {set(kwargs)} are not supported in "
                 f"{self.__class__.__name__}.fetch_trial_data."
             )
+        df = trial.run_metadata["benchmark_metadata"].dfs[self.name]
+        if (df["t"] > 0).any():
+            raise ValueError(
+                f"Trial {trial.index} has data from multiple time steps. This is"
+                " not supported by `BenchmarkMetric`; use `BenchmarkMapMetric`."
+            )
+        df = df.drop(columns=["t"])
+        if not self.observe_noise_sd:
+            df["sem"] = None
+        try:
+            return Ok(value=Data(df=df))
+
+        except Exception as e:
+            return Err(
+                MetricFetchE(
+                    message=f"Failed to obtain data for trial {trial.index}",
+                    exception=e,
+                )
+            )
+
+
+class BenchmarkMapMetric(MapMetric):
+    # pyre-fixme: Inconsistent override [15]: `map_key_info` overrides attribute
+    # defined in `MapMetric` inconsistently. Type `MapKeyInfo[int]` is not a
+    # subtype of the overridden attribute `MapKeyInfo[float]`
+    map_key_info: MapKeyInfo[int] = MapKeyInfo(key="t", default_value=0)
+
+    def __init__(
+        self,
+        name: str,
+        # Needed to be boolean (not None) for validation of MOO opt configs
+        lower_is_better: bool,
+        observe_noise_sd: bool = True,
+    ) -> None:
+        """
+        Args:
+            name: Name of the metric.
+            lower_is_better: If `True`, lower metric values are considered better.
+            observe_noise_sd: If `True`, the standard deviation of the observation
+                noise is included in the `sem` column of the the returned data.
+                If `False`, `sem` is set to `None` (meaning that the model will
+                have to infer the noise level).
+        """
+        super().__init__(name=name, lower_is_better=lower_is_better)
+        # Declare `lower_is_better` as bool (rather than optional as in the base class)
+        self.lower_is_better: bool = lower_is_better
+        self.observe_noise_sd: bool = observe_noise_sd
+
+    @classmethod
+    def is_available_while_running(cls) -> bool:
+        return True
+
+    def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult:
+        """
+        If the trial has been completed, look up the ``sim_start_time`` and
+        ``sim_completed_time`` on the corresponding ``SimTrial``, and return all
+        data from keys 0, ..., ``sim_completed_time - sim_start_time``. If the
+        trial has not completed, return all data from keys 0, ..., ``sim_runtime
+        - sim_start_time``.
+
+        Args:
+            trial: The trial from which to fetch data.
+            kwargs: Unsupported and will raise an exception.
+
+        Returns:
+            A MetricFetchResult containing the data for the requested metric.
+        """
+        if len(kwargs) > 0:
+            raise NotImplementedError(
+                f"Arguments {set(kwargs)} are not supported in "
+                f"{self.__class__.__name__}.fetch_trial_data."
+            )
+        if len(trial.run_metadata) == 0:
+            return Err(
+                MetricFetchE(
+                    message=f"No metadata available for trial {trial.index}",
+                    exception=None,
+                )
+            )
+
         metadata = trial.run_metadata["benchmark_metadata"]
-        # Look up the index based on the outcome name under which we track the data
-        # as part of `metadata`.
-        outcome_index = metadata.outcome_names.index(self.name)
 
+        backend_simulator = metadata.backend_simulator
+
+        if backend_simulator is None:
+            max_t = float("inf")
+        else:
+            sim_trial = none_throws(
+                backend_simulator.get_sim_trial_by_index(trial.index)
+            )
+            start_time = none_throws(sim_trial.sim_start_time)
+            if sim_trial.sim_completed_time is None:  # Still running
+                max_t = backend_simulator.time - start_time
+            else:
+                if sim_trial.sim_completed_time > backend_simulator.time:
+                    raise RuntimeError(
+                        "The trial's completion time is in the future! This is "
+                        f"unexpected. {sim_trial.sim_completed_time=}, "
+                        f"{backend_simulator.time=}"
+                    )
+                # Completed, may have stopped early
+                max_t = none_throws(sim_trial.sim_completed_time) - start_time
+
+        df = (
+            metadata.dfs[self.name]
+            .loc[lambda x: x["t"] <= max_t]
+            .rename(columns={"t": self.map_key_info.key})
+        )
+        if not self.observe_noise_sd:
+            df["sem"] = None
+
+        # Could fail if no data
         try:
-            records = [
-                {
-                    "arm_name": arm_name,
-                    "metric_name": self.name,
-                    "mean": metadata.Ys[arm_name][outcome_index],
-                    "sem": metadata.Ystds[arm_name][outcome_index]
-                    if self.observe_noise_sd
-                    else float("nan"),
-                    "trial_index": trial.index,
-                }
-                for arm_name in metadata.Ys.keys()
-            ]
-            df = pd.DataFrame.from_records(records)
-            return Ok(value=Data(df=df))
+            return Ok(value=MapData(df=df, map_key_infos=[self.map_key_info]))
 
         except Exception as e:
             return Err(

diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
@@ -9,9 +9,10 @@
 from dataclasses import dataclass, field
 from typing import Any
 
-from ax.benchmark.benchmark_metric import BenchmarkMetric
+from ax.benchmark.benchmark_metric import BenchmarkMapMetric, BenchmarkMetric
 from ax.benchmark.benchmark_test_function import BenchmarkTestFunction
 from ax.benchmark.benchmark_test_functions.botorch_test import BoTorchTestFunction
+
 from ax.core.objective import MultiObjective, Objective
 from ax.core.optimization_config import (
     MultiObjectiveOptimizationConfig,
@@ -148,20 +149,27 @@ def is_moo(self) -> bool:
 
 
 def _get_constraints(
-    constraint_names: Sequence[str], observe_noise_sd: bool
+    constraint_names: Sequence[str],
+    observe_noise_sd: bool,
+    use_map_metric: bool = False,
 ) -> list[OutcomeConstraint]:
     """
     Create a list of ``OutcomeConstraint``s.
 
-    Each constraint has a ``BenchmarkMetric``; the metrics names match
-    ``constraint_names``, and each has ``observe_noise_sd=observe_noise_sd``.
-    This doesn't handle the case where only some of the outcomes have noise
-    levels observed.
-    """
+    Args:
+        constraint_names: Names of the constraints. One constraint will be
+            created for each.
+        observe_noise_sd: Whether the standard deviation of the observation
+            noise is observed, for each constraint. This doesn't handle the case
+            where only some of the outcomes have noise levels observed.
+        use_map_metric: Whether to use a ``BenchmarkMapMetric``.
+
 
+    """
+    metric_cls = BenchmarkMapMetric if use_map_metric else BenchmarkMetric
     outcome_constraints = [
         OutcomeConstraint(
-            metric=BenchmarkMetric(
+            metric=metric_cls(
                 name=name,
                 lower_is_better=False,  # positive slack = feasible
                 observe_noise_sd=observe_noise_sd,
@@ -180,6 +188,7 @@ def get_soo_opt_config(
     outcome_names: Sequence[str],
     lower_is_better: bool = True,
     observe_noise_sd: bool = False,
+    use_map_metric: bool = False,
 ) -> OptimizationConfig:
     """
     Create a single-objective ``OptimizationConfig``, potentially with
@@ -195,9 +204,11 @@ def get_soo_opt_config(
             better (feasible).
         observe_noise_sd: Whether the standard deviation of the observation
             noise is observed. Applies to all objective and constraints.
+        use_map_metric: Whether to use a ``BenchmarkMapMetric``.
     """
+    metric_cls = BenchmarkMapMetric if use_map_metric else BenchmarkMetric
     objective = Objective(
-        metric=BenchmarkMetric(
+        metric=metric_cls(
             name=outcome_names[0],
             lower_is_better=lower_is_better,
             observe_noise_sd=observe_noise_sd,
@@ -206,7 +217,9 @@ def get_soo_opt_config(
     )
 
     outcome_constraints = _get_constraints(
-        constraint_names=outcome_names[1:], observe_noise_sd=observe_noise_sd
+        constraint_names=outcome_names[1:],
+        observe_noise_sd=observe_noise_sd,
+        use_map_metric=use_map_metric,
     )
 
     return OptimizationConfig(
@@ -221,6 +234,7 @@ def get_moo_opt_config(
     num_constraints: int = 0,
     lower_is_better: bool = True,
     observe_noise_sd: bool = False,
+    use_map_metric: bool = False,
 ) -> MultiObjectiveOptimizationConfig:
     """
     Create a ``MultiObjectiveOptimizationConfig``, potentially with constraints.
@@ -244,8 +258,9 @@ def get_moo_opt_config(
         noise is observed. Applies to all objective and constraints.
     """
     n_objectives = len(outcome_names) - num_constraints
+    metric_cls = BenchmarkMapMetric if use_map_metric else BenchmarkMetric
     objective_metrics = [
-        BenchmarkMetric(
+        metric_cls(
             name=outcome_names[i],
             lower_is_better=lower_is_better,
             observe_noise_sd=observe_noise_sd,