Avoid fetching data in ImprovementGSS; we always want to "look up" da…

…ta anywhere in the methods stack (#2814) Summary: Calling `fetch` here was an oversight; we should *always* be using `lookup` anywhere outside of HitL settings or `Scheduler`. In the long run we would want to: 1. Enforce that `fetch` is only used in appropriate settings mentioned above. 2. Make the "default to lookup" logic more elaborate to make sure that `fetch` boils down to `lookup` everywhere that it can, but it's not currently straightforward to ensure this. Reviewed By: esantorella Differential Revision: D63784231
facebook · Oct 11, 2024 · 81567b7 · 81567b7
1 parent f1b9c02
commit 81567b7
Showing 1 changed file with 17 additions and 2 deletions.
diff --git a/ax/global_stopping/strategies/improvement.py b/ax/global_stopping/strategies/improvement.py
@@ -19,6 +19,7 @@
 from ax.core.outcome_constraint import ObjectiveThreshold
 from ax.core.trial import Trial
 from ax.core.types import ComparisonOp
+from ax.exceptions.core import AxError
 from ax.global_stopping.strategies.base import BaseGlobalStoppingStrategy
 from ax.modelbridge.modelbridge_utils import observed_hypervolume
 from ax.plot.pareto_utils import (
@@ -140,12 +141,20 @@ def _should_stop_optimization(
             )
             return stop, message
 
+        data = experiment.lookup_data()
+        if data.df.empty:
+            raise AxError(
+                f"Experiment {experiment} does not have any data attached "
+                f"to it, despite having {num_completed_trials} completed "
+                f"trials. Data is required for {self}, so this is an invalid "
+                "state of the experiment."
+            )
+
         if isinstance(experiment.optimization_config, MultiObjectiveOptimizationConfig):
             if objective_thresholds is None:
                 # self._inferred_objective_thresholds is cached and only computed once.
                 if self._inferred_objective_thresholds is None:
                     # only infer reference point if there is data on the experiment.
-                    data = experiment.fetch_data()
                     if not data.df.empty:
                         # We infer the nadir reference point to be used by the GSS.
                         self._inferred_objective_thresholds = (
@@ -156,6 +165,12 @@ def _should_stop_optimization(
                 # TODO: move this out into a separate infer_objective_thresholds
                 # instance method or property that handles the caching.
                 objective_thresholds = self._inferred_objective_thresholds
+            if objective_thresholds is None:  # QUESTION: Should empty list go here too?
+                raise AxError(
+                    f"Objective thresholds were not specified and could not be inferred."
+                    f" They are required for {self} when performing multi-objective "
+                    "optimization, so this is an invalid state of the experiment."
+                )
             return self._should_stop_moo(
                 experiment=experiment,
                 trial_to_check=trial_to_check,
@@ -200,7 +215,7 @@ def _should_stop_moo(
                 and a str declaring the reason for stopping.
         """
         reference_trial_index = trial_to_check - self.window_size + 1
-        data_df = experiment.fetch_data().df
+        data_df = experiment.lookup_data().df
         data_df_reference = data_df[data_df["trial_index"] <= reference_trial_index]
         data_df = data_df[data_df["trial_index"] <= trial_to_check]