diff --git a/ax/models/torch/botorch_modular/acquisition.py b/ax/models/torch/botorch_modular/acquisition.py index 77183394ebf..f8aebca2235 100644 --- a/ax/models/torch/botorch_modular/acquisition.py +++ b/ax/models/torch/botorch_modular/acquisition.py @@ -382,7 +382,7 @@ def optimize( fixed_features: Optional[Dict[int, float]] = None, rounding_func: Optional[Callable[[Tensor], Tensor]] = None, optimizer_options: Optional[Dict[str, Any]] = None, - ) -> Tuple[Tensor, Tensor]: + ) -> Tuple[Tensor, Tensor, Tensor]: """Generate a set of candidates via multi-start optimization. Obtains candidates and their associated acquisition function values. @@ -409,8 +409,9 @@ def optimize( down these options while constructing a generation strategy. Returns: - A two-element tuple containing an `n x d`-dim tensor of generated candidates - and a tensor with the associated acquisition value. + A three-element tuple containing an `n x d`-dim tensor of generated + candidates, a tensor with the associated acquisition values, and a tensor + with the weight for each candidate. """ # NOTE: Could make use of `optimizer_class` when it's added to BoTorch # instead of calling `optimizer_acqf` or `optimize_acqf_discrete` etc. @@ -434,13 +435,15 @@ def optimize( for i in fixed_features: if not 0 <= i < len(ssd.feature_names): raise ValueError(f"Invalid fixed_feature index: {i}") - + # Return a weight of 1 for each arm by default. This can be + # customized in subclasses if necessary. + arm_weights = torch.ones(n, dtype=self.dtype) # 1. Handle the fully continuous search space. if ( optimizer_options_with_defaults.pop("force_use_optimize_acqf", False) or not discrete_features ): - return optimize_acqf( + candidates, acqf_values = optimize_acqf( acq_function=self.acqf, bounds=bounds, q=n, @@ -449,6 +452,7 @@ def optimize( post_processing_func=post_processing_func, **optimizer_options_with_defaults, ) + return candidates, acqf_values, arm_weights # 2. Handle search spaces with discrete features. discrete_choices = mk_discrete_choices(ssd=ssd, fixed_features=fixed_features) @@ -468,7 +472,7 @@ def optimize( torch.tensor(c, device=self.device, dtype=self.dtype) for c in discrete_choices.values() ] - return optimize_acqf_discrete_local_search( + candidates, acqf_values = optimize_acqf_discrete_local_search( acq_function=self.acqf, q=n, discrete_choices=discrete_choices, @@ -476,6 +480,7 @@ def optimize( X_avoid=X_observed, **optimizer_options_with_defaults, ) + return candidates, acqf_values, arm_weights # Enumerate all possible choices all_choices = (discrete_choices[i] for i in range(len(discrete_choices))) @@ -520,12 +525,13 @@ def optimize( optimizer_options=optimizer_options, optimizer_is_discrete=True, ) - return optimize_acqf_discrete( + candidates, acqf_values = optimize_acqf_discrete( acq_function=self.acqf, q=n, choices=all_choices, **discrete_opt_options ) + return candidates, acqf_values, arm_weights # 2b. Handle mixed search spaces that have discrete and continuous features. - return optimize_acqf_mixed( + candidates, acqf_values = optimize_acqf_mixed( acq_function=self.acqf, bounds=bounds, q=n, @@ -539,6 +545,7 @@ def optimize( post_processing_func=post_processing_func, **optimizer_options_with_defaults, ) + return candidates, acqf_values, arm_weights def evaluate(self, X: Tensor) -> Tensor: """Evaluate the acquisition function on the candidate set `X`. diff --git a/ax/models/torch/botorch_modular/model.py b/ax/models/torch/botorch_modular/model.py index 15f445418bf..ab2bb4e098a 100644 --- a/ax/models/torch/botorch_modular/model.py +++ b/ax/models/torch/botorch_modular/model.py @@ -427,7 +427,7 @@ def gen( acq_options=acq_options, ) botorch_rounding_func = get_rounding_func(torch_opt_config.rounding_func) - candidates, expected_acquisition_value = acqf.optimize( + candidates, expected_acquisition_value, weights = acqf.optimize( n=n, search_space_digest=search_space_digest, inequality_constraints=_to_inequality_constraints( @@ -444,7 +444,7 @@ def gen( ) return TorchGenResults( points=candidates.detach().cpu(), - weights=torch.ones(n, dtype=self.dtype), + weights=weights, gen_metadata=gen_metadata, ) diff --git a/ax/models/torch/botorch_modular/sebo.py b/ax/models/torch/botorch_modular/sebo.py index 8bbd1be8834..301173849ed 100644 --- a/ax/models/torch/botorch_modular/sebo.py +++ b/ax/models/torch/botorch_modular/sebo.py @@ -214,7 +214,7 @@ def optimize( fixed_features: Optional[Dict[int, float]] = None, rounding_func: Optional[Callable[[Tensor], Tensor]] = None, optimizer_options: Optional[Dict[str, Any]] = None, - ) -> Tuple[Tensor, Tensor]: + ) -> Tuple[Tensor, Tensor, Tensor]: """Generate a set of candidates via multi-start optimization. Obtains candidates and their associated acquisition function values. @@ -232,6 +232,11 @@ def optimize( transformations). optimizer_options: Options for the optimizer function, e.g. ``sequential`` or ``raw_samples``. + + Returns: + A three-element tuple containing an `n x d`-dim tensor of generated + candidates, a tensor with the associated acquisition values, and a tensor + with the weight for each candidate. """ if self.penalty_name == "L0_norm": if inequality_constraints is not None: @@ -239,16 +244,18 @@ def optimize( "Homotopy does not support optimization with inequality " + "constraints. Use L1 penalty norm instead." ) - candidates, expected_acquisition_value = self._optimize_with_homotopy( - n=n, - search_space_digest=search_space_digest, - fixed_features=fixed_features, - rounding_func=rounding_func, - optimizer_options=optimizer_options, + candidates, expected_acquisition_value, weights = ( + self._optimize_with_homotopy( + n=n, + search_space_digest=search_space_digest, + fixed_features=fixed_features, + rounding_func=rounding_func, + optimizer_options=optimizer_options, + ) ) else: # if L1 norm use standard moo-opt - candidates, expected_acquisition_value = super().optimize( + candidates, expected_acquisition_value, weights = super().optimize( n=n, search_space_digest=search_space_digest, inequality_constraints=inequality_constraints, @@ -265,7 +272,7 @@ def optimize( device=self.device, dtype=self.dtype, ) - return candidates, expected_acquisition_value + return candidates, expected_acquisition_value, weights def _optimize_with_homotopy( self, @@ -274,7 +281,7 @@ def _optimize_with_homotopy( fixed_features: Optional[Dict[int, float]] = None, rounding_func: Optional[Callable[[Tensor], Tensor]] = None, optimizer_options: Optional[Dict[str, Any]] = None, - ) -> Tuple[Tensor, Tensor]: + ) -> Tuple[Tensor, Tensor, Tensor]: """Optimize SEBO ACQF with L0 norm using homotopy.""" # extend to fixed a no homotopy_schedule schedule _tensorize = partial(torch.tensor, dtype=self.dtype, device=self.device) @@ -346,7 +353,11 @@ def callback(): # pyre-ignore batch_initial_conditions=batch_initial_conditions, ) - return candidates, expected_acquisition_value + return ( + candidates, + expected_acquisition_value, + torch.ones(n, dtype=candidates.dtype), + ) def L1_norm_func(X: Tensor, init_point: Tensor) -> Tensor: diff --git a/ax/models/torch/botorch_modular/surrogate.py b/ax/models/torch/botorch_modular/surrogate.py index e1800d1cb6e..8d50c2f3cfe 100644 --- a/ax/models/torch/botorch_modular/surrogate.py +++ b/ax/models/torch/botorch_modular/surrogate.py @@ -670,7 +670,7 @@ def best_out_of_sample_point( torch_opt_config=torch_opt_config, options=acqf_options, ) - candidates, acqf_values = acqf.optimize( + candidates, acqf_values, _ = acqf.optimize( n=1, search_space_digest=search_space_digest, inequality_constraints=_to_inequality_constraints( diff --git a/ax/models/torch/tests/test_acquisition.py b/ax/models/torch/tests/test_acquisition.py index 81bef0c85aa..81e06965d45 100644 --- a/ax/models/torch/tests/test_acquisition.py +++ b/ax/models/torch/tests/test_acquisition.py @@ -299,7 +299,7 @@ def test_init_with_subset_model_false( outcome_constraints=self.outcome_constraints ) - @mock.patch(f"{ACQUISITION_PATH}.optimize_acqf") + @mock.patch(f"{ACQUISITION_PATH}.optimize_acqf", return_value=(Mock(), Mock())) def test_optimize(self, mock_optimize_acqf: Mock) -> None: acquisition = self.get_acquisition_function(fixed_features=self.fixed_features) acquisition.optimize( @@ -421,7 +421,7 @@ def test_optimize_discrete(self) -> None: # 2 candidates have acqf value 8, but [1, 3, 4] is pending and thus should # not be selected. [2, 3, 4] is the best point, but has already been picked acquisition = self.get_acquisition_function() - X_selected, _ = acquisition.optimize( + X_selected, _, weights = acquisition.optimize( n=2, search_space_digest=ssd1, rounding_func=self.rounding_func, @@ -431,6 +431,7 @@ def test_optimize_discrete(self) -> None: self.assertTrue( all((x.unsqueeze(0) == expected).all(dim=-1).any() for x in X_selected) ) + self.assertTrue(torch.equal(weights, torch.ones(2))) # check with fixed feature # Since parameter 1 is fixed to 2, the best 3 candidates are # [4, 2, 4], [3, 2, 4], [4, 2, 3] @@ -444,7 +445,7 @@ def test_optimize_discrete(self) -> None: # int]]]` but got `Dict[int, List[int]]`. discrete_choices={k: [0, 1, 2, 3, 4] for k in range(3)}, ) - X_selected, _ = acquisition.optimize( + X_selected, _, weights = acquisition.optimize( n=3, search_space_digest=ssd2, fixed_features=self.fixed_features, @@ -455,9 +456,10 @@ def test_optimize_discrete(self) -> None: self.assertTrue( all((x.unsqueeze(0) == expected).all(dim=-1).any() for x in X_selected) ) + self.assertTrue(torch.equal(weights, torch.ones(3))) # check with a constraint that -1 * x[0] -1 * x[1] >= 0 which should make # [0, 0, 4] the best candidate. - X_selected, _ = acquisition.optimize( + X_selected, _, weights = acquisition.optimize( n=1, search_space_digest=ssd2, rounding_func=self.rounding_func, @@ -467,8 +469,9 @@ def test_optimize_discrete(self) -> None: ) expected = torch.tensor([[0, 0, 4]]).to(self.X) self.assertTrue(torch.equal(expected, X_selected)) + self.assertTrue(torch.equal(weights, torch.tensor([1.0], dtype=self.X.dtype))) # Same thing but use two constraints instead - X_selected, _ = acquisition.optimize( + X_selected, _, weights = acquisition.optimize( n=1, search_space_digest=ssd2, rounding_func=self.rounding_func, @@ -479,8 +482,12 @@ def test_optimize_discrete(self) -> None: ) expected = torch.tensor([[0, 0, 4]]).to(self.X) self.assertTrue(torch.equal(expected, X_selected)) + self.assertTrue(torch.equal(weights, torch.tensor([1.0]))) - @mock.patch(f"{ACQUISITION_PATH}.optimize_acqf_discrete_local_search") + @mock.patch( + f"{ACQUISITION_PATH}.optimize_acqf_discrete_local_search", + return_value=(Mock(), Mock()), + ) def test_optimize_acqf_discrete_local_search( self, mock_optimize_acqf_discrete_local_search: Mock, @@ -524,7 +531,9 @@ def test_optimize_acqf_discrete_local_search( all((X_avoid_true == x).all(dim=-1).any().item() for x in kwargs["X_avoid"]) ) - @mock.patch(f"{ACQUISITION_PATH}.optimize_acqf_mixed") + @mock.patch( + f"{ACQUISITION_PATH}.optimize_acqf_mixed", return_value=(Mock(), Mock()) + ) def test_optimize_mixed(self, mock_optimize_acqf_mixed: Mock) -> None: tkwargs = {"dtype": self.X.dtype, "device": self.X.device} ssd = SearchSpaceDigest( @@ -564,7 +573,9 @@ def test_optimize_mixed(self, mock_optimize_acqf_mixed: Mock) -> None: mock_optimize_acqf_mixed.reset_mock() optimizer_options = self.optimizer_options.copy() optimizer_options["force_use_optimize_acqf"] = True - with mock.patch(f"{ACQUISITION_PATH}.optimize_acqf") as mock_optimize_acqf: + with mock.patch( + f"{ACQUISITION_PATH}.optimize_acqf", return_value=(Mock(), Mock()) + ) as mock_optimize_acqf: acquisition.optimize( n=3, search_space_digest=ssd, diff --git a/ax/models/torch/tests/test_model.py b/ax/models/torch/tests/test_model.py index ef81d9a2e17..e3fe127bfdc 100644 --- a/ax/models/torch/tests/test_model.py +++ b/ax/models/torch/tests/test_model.py @@ -482,8 +482,9 @@ def _test_gen( input_constructor=mock_input_constructor, ) mock_optimize.return_value = ( - torch.tensor([1.0]), + torch.tensor([[1.0]]), torch.tensor([2.0]), + torch.tensor([1.0]), ) surrogate = Surrogate(botorch_model_class=botorch_model_class) model = BoTorchModel( @@ -824,8 +825,8 @@ def test_model_list_choice(self, _) -> None: # , mock_extract_training_data): @mock.patch( f"{ACQUISITION_PATH}.Acquisition.optimize", - # Dummy candidates and acquisition function value. - return_value=(torch.tensor([[2.0]]), torch.tensor([1.0])), + # Dummy candidates, acquisition value, and weights + return_value=(torch.tensor([[2.0]]), torch.tensor([1.0]), torch.tensor([1.0])), ) def test_MOO(self, _) -> None: # Add mock for qLogNEHVI input constructor to catch arguments passed to it. diff --git a/ax/models/torch/tests/test_sebo.py b/ax/models/torch/tests/test_sebo.py index ec97c237eeb..4fb2e77f1fd 100644 --- a/ax/models/torch/tests/test_sebo.py +++ b/ax/models/torch/tests/test_sebo.py @@ -244,7 +244,7 @@ def test_optimize_l0_homotopy( feature_names=["a"], bounds=[(-10.0, 5.0)], ) - candidate, acqf_val = acquisition._optimize_with_homotopy( + candidate, acqf_val, weights = acquisition._optimize_with_homotopy( n=1, search_space_digest=search_space_digest, optimizer_options={ @@ -255,6 +255,7 @@ def test_optimize_l0_homotopy( ) self.assertEqual(candidate, torch.zeros(1, **tkwargs)) self.assertEqual(acqf_val, 5 * torch.ones(1, **tkwargs)) + self.assertEqual(weights, torch.ones(1, **tkwargs)) @mock.patch(f"{SEBOACQUISITION_PATH}.optimize_acqf_homotopy") def test_optimize_l0(self, mock_optimize_acqf_homotopy: Mock) -> None: diff --git a/ax/models/torch/tests/test_surrogate.py b/ax/models/torch/tests/test_surrogate.py index 16cbb15394b..52e56ef156a 100644 --- a/ax/models/torch/tests/test_surrogate.py +++ b/ax/models/torch/tests/test_surrogate.py @@ -574,7 +574,11 @@ def test_best_in_sample_point(self) -> None: @patch(f"{ACQUISITION_PATH}.Acquisition.__init__", return_value=None) @patch( f"{ACQUISITION_PATH}.Acquisition.optimize", - return_value=([torch.tensor([0.0])], [torch.tensor([1.0])]), + return_value=( + torch.tensor([[0.0]]), + torch.tensor([1.0]), + torch.tensor([1.0]), + ), ) @patch( f"{SURROGATE_PATH}.pick_best_out_of_sample_point_acqf_class", @@ -615,7 +619,7 @@ def test_best_out_of_sample_point( options={Keys.SAMPLER: SobolQMCNormalSampler}, ) self.assertTrue(torch.equal(candidate, torch.tensor([0.0]))) - self.assertTrue(torch.equal(acqf_value, torch.tensor([1.0]))) + self.assertTrue(torch.equal(acqf_value, torch.tensor(1.0))) def test_serialize_attributes_as_kwargs(self) -> None: for botorch_model_class in [SaasFullyBayesianSingleTaskGP, SingleTaskGP]: