Enable batched benchmarks (with BatchTrial) (#2331)

Summary: - Add scheduler options for running benchmarks with BatchTrial - Append "_batch" to the name of MBM batched Differential Revision: D55750921
facebook · Apr 6, 2024 · 1a98877 · 1a98877
1 parent adcf3a4
commit 1a98877
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 16 deletions.
diff --git a/ax/benchmark/benchmark_method.py b/ax/benchmark/benchmark_method.py
@@ -7,10 +7,10 @@
 
 import logging
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Optional
 
 from ax.modelbridge.generation_strategy import GenerationStep, GenerationStrategy
-from ax.service.utils.scheduler_options import SchedulerOptions
+from ax.service.utils.scheduler_options import SchedulerOptions, TrialType
 from ax.utils.common.base import Base
 from ax.utils.common.logger import get_logger
 from ax.utils.common.typeutils import not_none
@@ -75,25 +75,53 @@ def __post_init__(self) -> None:
         _assign_frozen_attr(self, name="generation_strategy", value=gs_cloned)
 
 
-def get_sequential_optimization_scheduler_options(
+def get_benchmark_scheduler_options(
     timeout_hours: int = 4,
+    sequential: bool = True,
+    batch_size: Optional[int] = None,
 ) -> SchedulerOptions:
     """The typical SchedulerOptions used in benchmarking.
 
     Args:
         timeout_hours: The maximum amount of time (in hours) to run each
             benchmark replication. Defaults to 4 hours.
+        sequential: True if trials are generaed one at a time rather than in
+            batches. Even when `sequential` is False, the design is "batch
+            sequential" rather than fully asynchronous in that no new trials
+            will be generated while any are pending. Default: True.
+        batch_size: Number of trials to generate at once. Must be specified if and only if `sequential=False`.
     """
+    if sequential and batch_size is not None:
+        raise ValueError("batch_size must be None when benchmarks are sequential.")
+    if not sequential and batch_size is None:
+        raise ValueError("batch_size must be specified when benchmarks are batched.")
+
     return SchedulerOptions(
-        # Enforce sequential trials by default
+        # No new candidates can be generated while any are pending.
+        # If batched, an entire batch must finish before the next can be
+        # generated.
         max_pending_trials=1,
         # Do not throttle, as is often necessary when polling real endpoints
         init_seconds_between_polls=0,
         min_seconds_before_poll=0,
         timeout_hours=timeout_hours,
+        trial_type=TrialType.TRIAL if sequential else TrialType.BATCH_TRIAL,
+        batch_size=batch_size,
     )
 
 
+def get_sequential_optimization_scheduler_options(
+    timeout_hours: int = 4,
+) -> SchedulerOptions:
+    """The typical SchedulerOptions used in benchmarking.
+
+    Args:
+        timeout_hours: The maximum amount of time (in hours) to run each
+            benchmark replication. Defaults to 4 hours.
+    """
+    return get_benchmark_scheduler_options(timeout_hours=timeout_hours)
+
+
 def _assign_frozen_attr(obj: Any, name: str, value: Any) -> None:  # pyre-ignore [2]
     """Assign a new value to an attribute of a frozen dataclass.
     This is an ugly hack and shouldn't be used broadly.

diff --git a/ax/benchmark/methods/modular_botorch.py b/ax/benchmark/methods/modular_botorch.py
@@ -58,7 +58,11 @@ def get_sobol_botorch_modular_acquisition(
     acqf_name = acqf_name_abbreviations.get(
         acquisition_cls.__name__, acquisition_cls.__name__
     )
-    name = name or f"MBM::{model_name}_{acqf_name}"
+    # Historically all benchmarks were sequential, so sequential benchmarks
+    # don't get anything added to their name, for continuity
+    is_sequential = scheduler_options is None or scheduler_options.batch_size is None
+    batch_name = "" if is_sequential else "_batch"
+    name = name or f"MBM::{model_name}_{acqf_name}{batch_name}"
 
     generation_strategy = GenerationStrategy(
         name=name,

diff --git a/ax/benchmark/tests/methods/test_methods.py b/ax/benchmark/tests/methods/test_methods.py
@@ -5,49 +5,66 @@
 
 # pyre-strict
 
+from typing import Dict
+
 import numpy as np
 
 from ax.benchmark.benchmark import benchmark_replication
-from ax.benchmark.benchmark_method import get_sequential_optimization_scheduler_options
+from ax.benchmark.benchmark_method import get_benchmark_scheduler_options
 from ax.benchmark.methods.modular_botorch import get_sobol_botorch_modular_acquisition
 from ax.benchmark.methods.sobol import get_sobol_benchmark_method
 from ax.benchmark.problems.registry import get_problem
 from ax.modelbridge.registry import Models
+from ax.service.utils.scheduler_options import SchedulerOptions
 from ax.utils.common.testutils import TestCase
-from ax.utils.common.typeutils import not_none
 from ax.utils.testing.mock import fast_botorch_optimize
-from botorch.acquisition.analytic import LogExpectedImprovement
 from botorch.acquisition.knowledge_gradient import qKnowledgeGradient
+from botorch.acquisition.logei import qLogExpectedImprovement
 from botorch.models.gp_regression import SingleTaskGP
+from pyre_extensions import none_throws
 
 
 class TestMethods(TestCase):
-    def test_mbm_acquisition(self) -> None:
+    def setUp(self) -> None:
+        super().setUp()
+        self.scheduler_options_dict: Dict[str, SchedulerOptions] = {
+            "sequential": get_benchmark_scheduler_options(),
+            "batch": get_benchmark_scheduler_options(sequential=False, batch_size=2),
+        }
+
+    def _test_mbm_acquisition(self, scheduler_options: SchedulerOptions) -> None:
         method = get_sobol_botorch_modular_acquisition(
             model_cls=SingleTaskGP,
             acquisition_cls=qKnowledgeGradient,
-            scheduler_options=get_sequential_optimization_scheduler_options(),
+            scheduler_options=scheduler_options,
             distribute_replications=False,
         )
         self.assertEqual(method.name, "MBM::SingleTaskGP_qKnowledgeGradient")
         gs = method.generation_strategy
         sobol, kg = gs._steps
         self.assertEqual(kg.model, Models.BOTORCH_MODULAR)
-        model_kwargs = not_none(kg.model_kwargs)
+        model_kwargs = none_throws(kg.model_kwargs)
         self.assertEqual(model_kwargs["botorch_acqf_class"], qKnowledgeGradient)
         surrogate_spec = next(iter(model_kwargs["surrogate_specs"].values()))
         self.assertEqual(
             surrogate_spec.botorch_model_class.__name__,
             "SingleTaskGP",
         )
 
+    def test_mbm_acquisition(self) -> None:
+        for name, scheduler_options in self.scheduler_options_dict.items():
+            with self.subTest(name=name):
+                self._test_mbm_acquisition(scheduler_options=scheduler_options)
+
     @fast_botorch_optimize
-    def test_benchmark_replication_runs(self) -> None:
+    def _test_benchmark_replication_runs(
+        self, scheduler_options: SchedulerOptions
+    ) -> None:
         problem = get_problem(problem_name="ackley4")
         method = get_sobol_botorch_modular_acquisition(
             model_cls=SingleTaskGP,
-            scheduler_options=get_sequential_optimization_scheduler_options(),
-            acquisition_cls=LogExpectedImprovement,
+            scheduler_options=scheduler_options,
+            acquisition_cls=qLogExpectedImprovement,
             num_sobol_trials=2,
             name="test",
             distribute_replications=False,
@@ -56,13 +73,30 @@ def test_benchmark_replication_runs(self) -> None:
         self.assertEqual(n_sobol_trials, 2)
         self.assertEqual(method.name, "test")
         # Only run one non-Sobol trial
-        problem = get_problem(problem_name="ackley4", num_trials=n_sobol_trials + 1)
+        n_total_trials = n_sobol_trials + 1
+        problem = get_problem(problem_name="ackley4", num_trials=n_total_trials)
         result = benchmark_replication(problem=problem, method=method, seed=0)
         self.assertTrue(np.isfinite(result.score_trace).all())
+        self.assertEqual(result.optimization_trace.shape, (n_total_trials,))
+
+        expected_n_arms_per_batch = (
+            1 if (batch_size := scheduler_options.batch_size) is None else batch_size
+        )
+        self.assertEqual(
+            len(none_throws(result.experiment).arms_by_name),
+            n_total_trials * expected_n_arms_per_batch,
+        )
+
+    def test_benchmark_replication_runs(self) -> None:
+        for name, scheduler_options in self.scheduler_options_dict.items():
+            with self.subTest(name=name):
+                self._test_benchmark_replication_runs(
+                    scheduler_options=scheduler_options
+                )
 
     def test_sobol(self) -> None:
         method = get_sobol_benchmark_method(
-            scheduler_options=get_sequential_optimization_scheduler_options(),
+            scheduler_options=get_benchmark_scheduler_options(),
             distribute_replications=False,
         )
         self.assertEqual(method.name, "Sobol")

diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py
@@ -8,12 +8,20 @@
 
 import numpy as np
 from ax.benchmark.benchmark import compute_score_trace
+from ax.benchmark.benchmark_problem import BenchmarkProblemProtocol
 from ax.core.runner import Runner
 from ax.utils.common.testutils import TestCase
 from ax.utils.testing.benchmark_stubs import get_moo_surrogate, get_soo_surrogate
 
 
 class TestSurrogateProblems(TestCase):
+    def test_conforms_to_protocol(self) -> None:
+        sbp = get_soo_surrogate()
+        self.assertIsInstance(sbp, BenchmarkProblemProtocol)
+
+        mbp = get_moo_surrogate()
+        self.assertIsInstance(mbp, BenchmarkProblemProtocol)
+
     def test_lazy_instantiation(self) -> None:
 
         # test instantiation from init