Skip to content

Commit

Permalink
Avoid fetching data in ImprovementGSS; we always want to "look up" da…
Browse files Browse the repository at this point in the history
…ta anywhere in the methods stack (facebook#2814)

Summary:

Calling `fetch` here was an oversight; we should *always* be using `lookup` anywhere outside of HitL settings or `Scheduler`. 

In the long run we would want to:
1. Enforce that `fetch` is only used in appropriate settings mentioned above.
2. Make the "default to lookup" logic more elaborate to make sure that `fetch` boils down to `lookup` everywhere that it can, but it's not currently straightforward to ensure this.

Reviewed By: esantorella

Differential Revision: D63784231
  • Loading branch information
Lena Kashtelyan authored and facebook-github-bot committed Oct 14, 2024
1 parent 0ef1de0 commit 3b34819
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions ax/global_stopping/strategies/improvement.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ax.core.outcome_constraint import ObjectiveThreshold
from ax.core.trial import Trial
from ax.core.types import ComparisonOp
from ax.exceptions.core import AxError
from ax.global_stopping.strategies.base import BaseGlobalStoppingStrategy
from ax.modelbridge.modelbridge_utils import observed_hypervolume
from ax.plot.pareto_utils import (
Expand Down Expand Up @@ -140,12 +141,20 @@ def _should_stop_optimization(
)
return stop, message

data = experiment.lookup_data()
if data.df.empty:
raise AxError(
f"Experiment {experiment} does not have any data attached "
f"to it, despite having {num_completed_trials} completed "
f"trials. Data is required for {self}, so this is an invalid "
"state of the experiment."
)

if isinstance(experiment.optimization_config, MultiObjectiveOptimizationConfig):
if objective_thresholds is None:
# self._inferred_objective_thresholds is cached and only computed once.
if self._inferred_objective_thresholds is None:
# only infer reference point if there is data on the experiment.
data = experiment.fetch_data()
if not data.df.empty:
# We infer the nadir reference point to be used by the GSS.
self._inferred_objective_thresholds = (
Expand All @@ -156,6 +165,18 @@ def _should_stop_optimization(
# TODO: move this out into a separate infer_objective_thresholds
# instance method or property that handles the caching.
objective_thresholds = self._inferred_objective_thresholds
if not objective_thresholds:
# TODO: This is headed to ax.modelbridge.modelbridge_utils.hypervolume,
# where an empty list would lead to an opaque indexing error.
# A list that is nonempty and of the wrong length could be worse,
# since it might wind up running without error, but with thresholds for
# the wrong metrics. We should validate correctness of the length of the
# objective thresholds, ideally in hypervolume utils.
raise AxError(
f"Objective thresholds were not specified and could not be inferred"
f". They are required for {self} when performing multi-objective "
"optimization, so this is an invalid state of the experiment."
)
return self._should_stop_moo(
experiment=experiment,
trial_to_check=trial_to_check,
Expand Down Expand Up @@ -200,7 +221,7 @@ def _should_stop_moo(
and a str declaring the reason for stopping.
"""
reference_trial_index = trial_to_check - self.window_size + 1
data_df = experiment.fetch_data().df
data_df = experiment.lookup_data().df
data_df_reference = data_df[data_df["trial_index"] <= reference_trial_index]
data_df = data_df[data_df["trial_index"] <= trial_to_check]

Expand Down

0 comments on commit 3b34819

Please sign in to comment.