various speed ups (facebookresearch#319)

Summary: Made AEPsych faster in the following ways: - Now takes advantage of optimize_acqf's timeout_sec argument to more reliably limit the amount of time spent optimizing the acqf using the max_gen_time option in configs. - Changes the optimization algorithm for inverse model querying to use botorch's optimize_acqf, which should be faster and more accurate than scipy.minimize. - "max_time" can be sent in query messages to limit the amount of time, in seconds, the model will spend searching for the queried value (using the aforementioned timeout_sec arg). A query method was added to the python client to test these new features. Differential Revision: D49388668
crasanders · Sep 21, 2023 · 8baf5c1 · 8baf5c1
1 parent e2795f2
commit 8baf5c1
Show file tree

Hide file tree

Showing 12 changed files with 319 additions and 227 deletions.
diff --git a/aepsych/generators/__init__.py b/aepsych/generators/__init__.py
@@ -16,7 +16,6 @@
 from .optimize_acqf_generator import AxOptimizeAcqfGenerator, OptimizeAcqfGenerator
 from .pairwise_optimize_acqf_generator import PairwiseOptimizeAcqfGenerator
 from .pairwise_sobol_generator import PairwiseSobolGenerator
-from .random_generator import RandomGenerator
 from .random_generator import AxRandomGenerator, RandomGenerator
 from .semi_p import IntensityAwareSemiPGenerator
 from .sobol_generator import AxSobolGenerator, SobolGenerator

diff --git a/aepsych/generators/optimize_acqf_generator.py b/aepsych/generators/optimize_acqf_generator.py
@@ -22,7 +22,6 @@
 from botorch.acquisition import AcquisitionFunction
 from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
 from botorch.optim import optimize_acqf
-from botorch.utils import draw_sobol_samples
 
 logger = getLogger()
 
@@ -47,8 +46,6 @@ def __init__(
             restarts (int): Number of restarts for acquisition function optimization.
             samps (int): Number of samples for quasi-random initialization of the acquisition function optimizer.
             max_gen_time (optional, float): Maximum time (in seconds) to optimize the acquisition function.
-                This is only loosely followed by scipy's optimizer, so consider using a number about 1/3 or
-                less of what your true upper bound is.
         """
 
         if acqf_kwargs is None:
@@ -103,56 +100,15 @@ def _gen(
         logger.info("Starting gen...")
         starttime = time.time()
 
-        if self.max_gen_time is None:
-            new_candidate, _ = optimize_acqf(
-                acq_function=acqf,
-                bounds=torch.tensor(np.c_[model.lb, model.ub]).T.to(train_x),
-                q=num_points,
-                num_restarts=self.restarts,
-                raw_samples=self.samps,
-                **gen_options,
-            )
-        else:
-            # figure out how long evaluating a single samp
-            starttime = time.time()
-            _ = acqf(train_x[0:num_points, :])
-            single_eval_time = time.time() - starttime
-
-            # only a heuristic for total num evals since everything is stochastic,
-            # but the reasoning is: we initialize with self.samps samps, subsample
-            # self.restarts from them in proportion to the value of the acqf, and
-            # run that many optimization. So:
-            # total_time = single_eval_time * n_eval * restarts + single_eval_time * samps
-            # and we solve for n_eval
-            n_eval = int(
-                (self.max_gen_time - single_eval_time * self.samps)
-                / (single_eval_time * self.restarts)
-            )
-            if n_eval > 10:
-                # heuristic, if we can't afford 10 evals per restart, just use quasi-random search
-                options = {"maxfun": n_eval}
-                logger.info(f"gen maxfun is {n_eval}")
-
-                new_candidate, _ = optimize_acqf(
-                    acq_function=acqf,
-                    bounds=torch.tensor(np.c_[model.lb, model.ub]).T.to(train_x),
-                    q=num_points,
-                    num_restarts=self.restarts,
-                    raw_samples=self.samps,
-                    options=options,
-                )
-            else:
-                logger.info(f"gen maxfun is {n_eval}, falling back to random search...")
-                nsamp = max(int(self.max_gen_time / single_eval_time), 10)
-                # Generate the points at which to sample
-                bounds = torch.stack((model.lb, model.ub))
-
-                X = draw_sobol_samples(bounds=bounds, n=nsamp, q=num_points)
-
-                acqvals = acqf(X)
-
-                best_indx = torch.argmax(acqvals, dim=0)
-                new_candidate = X[best_indx]
+        new_candidate, _ = optimize_acqf(
+            acq_function=acqf,
+            bounds=torch.tensor(np.c_[model.lb, model.ub]).T.to(train_x),
+            q=num_points,
+            num_restarts=self.restarts,
+            raw_samples=self.samps,
+            timeout_sec=self.max_gen_time,
+            **gen_options,
+        )
 
         logger.info(f"Gen done, time={time.time()-starttime}")
         return new_candidate
@@ -258,6 +214,14 @@ class MissingValue:
     @classmethod
     def _get_gen_options(cls, config: Config):
         classname = "OptimizeAcqfGenerator"
-        restarts = config.getint(classname, "restarts", fallback=10)
-        samps = config.getint(classname, "samps", fallback=1000)
-        return {"restarts": restarts, "samps": samps}
+        restarts = config.getint(classname, "num_restarts", fallback=10)
+        samps = config.getint(classname, "raw_samples", fallback=None)
+        timeout_sec = config.getfloat(classname, "max_gen_time", fallback=None)
+        optimizer_kwargs = {
+            "optimizer_kwargs": {
+                "num_restarts": restarts,
+                "raw_samples": samps,
+                "timeout_sec": timeout_sec,
+            }
+        }
+        return {"model_gen_options": optimizer_kwargs}
diff --git a/aepsych/models/base.py b/aepsych/models/base.py
@@ -17,17 +17,17 @@
 
 from aepsych.config import Config, ConfigurableMixin
 from aepsych.factory.factory import default_mean_covar_factory
-from aepsych.models.utils import get_extremum
-from aepsych.utils import dim_grid, get_jnd_multid, make_scaled_sobol, promote_0d
+from aepsych.models.utils import get_extremum, inv_query
+from aepsych.utils import dim_grid, get_jnd_multid, promote_0d
 from aepsych.utils_logging import getLogger
 from botorch.fit import fit_gpytorch_mll, fit_gpytorch_mll_scipy
 from botorch.models.gpytorch import GPyTorchModel
 from botorch.posteriors import GPyTorchPosterior
 from gpytorch.likelihoods import Likelihood
 from gpytorch.mlls import MarginalLogLikelihood
-from scipy.optimize import minimize
 from scipy.stats import norm
 
+
 logger = getLogger()
 
 torch.set_default_dtype(torch.double)  # TODO: find a better way to prevent type errors
@@ -120,6 +120,7 @@ def get_max(
         self: ModelProtocol,
         locked_dims: Optional[Mapping[int, List[float]]] = None,
         n_samples: int = 1000,
+        max_time: Optional[float] = None,
     ) -> Tuple[float, np.ndarray]:
         """Return the maximum of the modeled function, subject to constraints
         Returns:
@@ -129,12 +130,15 @@ def get_max(
             n_samples int: number of coarse grid points to sample for optimization estimate.
         """
         locked_dims = locked_dims or {}
-        return get_extremum(self, "max", self.bounds, locked_dims, n_samples)
+        return get_extremum(
+            self, "max", self.bounds, locked_dims, n_samples, max_time=max_time
+        )
 
     def get_min(
         self: ModelProtocol,
         locked_dims: Optional[Mapping[int, List[float]]] = None,
         n_samples: int = 1000,
+        max_time: Optional[float] = None,
     ) -> Tuple[float, np.ndarray]:
         """Return the minimum of the modeled function, subject to constraints
         Returns:
@@ -144,77 +148,46 @@ def get_min(
             n_samples int: number of coarse grid points to sample for optimization estimate.
         """
         locked_dims = locked_dims or {}
-        return get_extremum(self, "min", self.bounds, locked_dims, n_samples)
+        return get_extremum(
+            self, "min", self.bounds, locked_dims, n_samples, max_time=max_time
+        )
 
     def inv_query(
-        self: ModelProtocol,
+        self,
         y: float,
         locked_dims: Optional[Mapping[int, List[float]]] = None,
         probability_space: bool = False,
         n_samples: int = 1000,
-    ) -> Tuple[float, torch.Tensor]:
+        max_time: Optional[float] = None,
+    ) -> Tuple[float, Union[torch.Tensor, np.ndarray]]:
         """Query the model inverse.
         Return nearest x such that f(x) = queried y, and also return the
             value of f at that point.
         Args:
             y (float): Points at which to find the inverse.
             locked_dims (Mapping[int, List[float]]): Dimensions to fix, so that the
                 inverse is along a slice of the full surface.
-            probability_space (bool, optional): Is y (and therefore the
+            probability_space (bool): Is y (and therefore the
                 returned nearest_y) in probability space instead of latent
                 function space? Defaults to False.
         Returns:
             Tuple[float, np.ndarray]: Tuple containing the value of f
                 nearest to queried y and the x position of this value.
         """
-        if probability_space:
-            assert (
-                self.outcome_type == "binary"
-            ), f"Cannot get probability space for outcome_type '{self.outcome_type}'"
-
-        locked_dims = locked_dims or {}
-
-        def model_distance(x, pt, probability_space):
-            return np.abs(
-                self.predict(torch.tensor([x]), probability_space=probability_space)[0]
-                .detach()
-                .numpy()
-                - pt
-            )
-
-        # Look for point with value closest to y, subject the dict of locked dims
-
-        query_lb = self.lb.clone()
-        query_ub = self.ub.clone()
-
-        for locked_dim in locked_dims.keys():
-            dim_values = locked_dims[locked_dim]
-            if len(dim_values) == 1:
-                query_lb[locked_dim] = dim_values[0]
-                query_ub[locked_dim] = dim_values[0]
-            else:
-                query_lb[locked_dim] = dim_values[0]
-                query_ub[locked_dim] = dim_values[1]
-
-        d = make_scaled_sobol(query_lb, query_ub, n_samples, seed=0)
-
-        bounds = zip(query_lb.numpy(), query_ub.numpy())
-
-        fmean, _ = self.predict(d, probability_space=probability_space)
-
-        f = torch.abs(fmean - y)
-        estimate = d[torch.where(f == torch.min(f))[0][0]].numpy()
-        a = minimize(
-            model_distance,
-            estimate,
-            args=(y, probability_space),
-            method=self.extremum_solver,
-            bounds=bounds,
+        _, arg = inv_query(
+            self,
+            y=y,
+            bounds=self.bounds,
+            locked_dims=locked_dims,
+            probability_space=probability_space,
+            n_samples=n_samples,
+            max_time=max_time,
         )
-        val = self.predict(torch.tensor([a.x]), probability_space=probability_space)[
-            0
-        ].item()
-        return val, torch.Tensor(a.x)
+        if probability_space:
+            val, _ = self.predict_probability(arg.reshape(1, self.dim))
+        else:
+            val, _ = self.predict(arg.reshape(1, self.dim))
+        return float(val.item()), arg
 
     def get_jnd(
         self: ModelProtocol,
@@ -475,6 +448,7 @@ def get_max(
         bounds: torch.Tensor,
         locked_dims: Optional[Mapping[int, List[float]]] = None,
         n_samples: int = 1000,
+        max_time: Optional[float] = None,
     ) -> Tuple[float, np.ndarray]:
         """Return the maximum of the modeled function, subject to constraints
         Args:
@@ -487,13 +461,16 @@ def get_max(
             Tuple[torch.Tensor, torch.Tensor]: Tuple containing the max and its location (argmax).
         """
         locked_dims = locked_dims or {}
-        return get_extremum(self, "max", bounds, locked_dims, n_samples)
+        return get_extremum(
+            self, "max", bounds, locked_dims, n_samples, max_time=max_time
+        )
 
     def get_min(
         self,
         bounds: torch.Tensor,
         locked_dims: Optional[Mapping[int, List[float]]] = None,
         n_samples: int = 1000,
+        max_time: Optional[float] = None,
     ) -> Tuple[float, np.ndarray]:
         """Return the minimum of the modeled function, subject to constraints
         Args:
@@ -505,7 +482,9 @@ def get_min(
             Tuple[torch.Tensor, torch.Tensor]: Tuple containing the min and its location (argmin).
         """
         locked_dims = locked_dims or {}
-        return get_extremum(self, "min", bounds, locked_dims, n_samples)
+        return get_extremum(
+            self, "min", bounds, locked_dims, n_samples, max_time=max_time
+        )
 
     def inv_query(
         self,
@@ -514,7 +493,7 @@ def inv_query(
         locked_dims: Optional[Mapping[int, List[float]]] = None,
         probability_space: bool = False,
         n_samples: int = 1000,
-    ) -> Tuple[float, torch.Tensor]:
+    ) -> Tuple[float, Union[torch.Tensor, np.ndarray]]:
         """Query the model inverse.
         Return nearest x such that f(x) = queried y, and also return the
             value of f at that point.
@@ -529,51 +508,12 @@ def inv_query(
             Tuple[float, np.ndarray]: Tuple containing the value of f
                 nearest to queried y and the x position of this value.
         """
+        _, arg = inv_query(self, y, bounds, locked_dims, probability_space, n_samples)
         if probability_space:
-            assert (
-                self.outcome_type == "binary" or self.outcome_type is None
-            ), f"Cannot get probability space for outcome_type '{self.outcome_type}'"
-            pred_function = self.predict_probability
-
+            val, _ = self.predict_probability(arg.reshape(1, -1))
         else:
-            pred_function = self.predict
-
-        locked_dims = locked_dims or {}
-
-        def model_distance(x, pt, probability_space):
-            return np.abs(pred_function(torch.tensor([x]))[0].detach().numpy() - pt)
-
-        # Look for point with value closest to y, subject the dict of locked dims
-
-        query_lb = bounds[0]
-        query_ub = bounds[-1]
-
-        for locked_dim in locked_dims.keys():
-            dim_values = locked_dims[locked_dim]
-            if len(dim_values) == 1:
-                query_lb[locked_dim] = dim_values[0]
-                query_ub[locked_dim] = dim_values[0]
-            else:
-                query_lb[locked_dim] = dim_values[0]
-                query_ub[locked_dim] = dim_values[1]
-
-        d = make_scaled_sobol(query_lb, query_ub, n_samples, seed=0)
-
-        opt_bounds = zip(query_lb.numpy(), query_ub.numpy())
-
-        fmean, _ = pred_function(d)
-
-        f = torch.abs(fmean - y)
-        estimate = d[torch.where(f == torch.min(f))[0][0]].numpy()
-        a = minimize(
-            model_distance,
-            estimate,
-            args=(y, probability_space),
-            method=self.extremum_solver,
-            bounds=opt_bounds,
-        )
-        val = pred_function(torch.tensor([a.x]))[0].item()
-        return val, torch.Tensor(a.x)
+            val, _ = self.predict(arg)
+        return float(val.item()), arg
 
     @abc.abstractmethod
     def get_mll_class(self):