From 933756b7ef318a2dd7ba1b213b6d6ac2a09390af Mon Sep 17 00:00:00 2001
From: Sait Cakmak <saitcakmak@meta.com>
Date: Wed, 15 May 2024 08:52:41 -0700
Subject: [PATCH] Remove REMBO & ALEBO (#2458)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2458

These were marked for removal in v0.3.9. Last release was v0.4.0.

For users interested in BO with high-dimensional inputs, we recommend using SAASBO instead. See https://ax.dev/tutorials/saasbo.html for a tutorial

Reviewed By: bletham

Differential Revision: D57360740

fbshipit-source-id: d8214b935c9e22c9d690881d12897558742392b2
---
 ax/modelbridge/registry.py                  |  19 -
 ax/modelbridge/strategies/__init__.py       |   5 -
 ax/modelbridge/strategies/alebo.py          | 140 ---
 ax/modelbridge/strategies/rembo.py          | 307 ------
 ax/modelbridge/tests/test_alebo_strategy.py |  79 --
 ax/modelbridge/tests/test_registry.py       |  45 -
 ax/modelbridge/tests/test_rembo_strategy.py | 113 ---
 ax/models/random/alebo_initializer.py       |  94 --
 ax/models/random/rembo_initializer.py       |  73 --
 ax/models/tests/test_alebo.py               | 384 --------
 ax/models/tests/test_alebo_initializer.py   |  32 -
 ax/models/tests/test_rembo.py               | 123 ---
 ax/models/tests/test_rembo_initializer.py   |  34 -
 ax/models/torch/alebo.py                    | 997 --------------------
 ax/models/torch/rembo.py                    | 266 ------
 sphinx/source/modelbridge.rst               |  16 -
 sphinx/source/models.rst                    |  33 -
 17 files changed, 2760 deletions(-)
 delete mode 100644 ax/modelbridge/strategies/__init__.py
 delete mode 100644 ax/modelbridge/strategies/alebo.py
 delete mode 100644 ax/modelbridge/strategies/rembo.py
 delete mode 100644 ax/modelbridge/tests/test_alebo_strategy.py
 delete mode 100644 ax/modelbridge/tests/test_rembo_strategy.py
 delete mode 100644 ax/models/random/alebo_initializer.py
 delete mode 100644 ax/models/random/rembo_initializer.py
 delete mode 100644 ax/models/tests/test_alebo.py
 delete mode 100644 ax/models/tests/test_alebo_initializer.py
 delete mode 100644 ax/models/tests/test_rembo.py
 delete mode 100644 ax/models/tests/test_rembo_initializer.py
 delete mode 100644 ax/models/torch/alebo.py
 delete mode 100644 ax/models/torch/rembo.py

diff --git a/ax/modelbridge/registry.py b/ax/modelbridge/registry.py
index 29381055f8a..226857e19af 100644
--- a/ax/modelbridge/registry.py
+++ b/ax/modelbridge/registry.py
@@ -34,7 +34,6 @@
 from ax.modelbridge.random import RandomModelBridge
 from ax.modelbridge.torch import TorchModelBridge
 from ax.modelbridge.transforms.base import Transform
-from ax.modelbridge.transforms.centered_unit_x import CenteredUnitX
 from ax.modelbridge.transforms.choice_encode import (
     ChoiceToNumericChoice,
     OrderedChoiceToIntegerRange,
@@ -59,10 +58,8 @@
 from ax.models.discrete.eb_thompson import EmpiricalBayesThompsonSampler
 from ax.models.discrete.full_factorial import FullFactorialGenerator
 from ax.models.discrete.thompson import ThompsonSampler
-from ax.models.random.alebo_initializer import ALEBOInitializer
 from ax.models.random.sobol import SobolGenerator
 from ax.models.random.uniform import UniformGenerator
-from ax.models.torch.alebo import ALEBO
 from ax.models.torch.botorch import BotorchModel
 from ax.models.torch.botorch_modular.model import (
     BoTorchModel as ModularBoTorchModel,
@@ -137,9 +134,6 @@
     TaskChoiceToIntTaskChoice,
 ]
 
-ALEBO_X_trans: List[Type[Transform]] = [RemoveFixed, IntToFloat, CenteredUnitX]
-ALEBO_Y_trans: List[Type[Transform]] = [Derelativize, StandardizeY]
-
 STANDARD_TORCH_BRIDGE_KWARGS: Dict[str, Any] = {"torch_dtype": torch.double}
 
 
@@ -218,17 +212,6 @@ class ModelSetup(NamedTuple):
         transforms=ST_MTGP_trans,
         standard_bridge_kwargs=STANDARD_TORCH_BRIDGE_KWARGS,
     ),
-    "ALEBO": ModelSetup(
-        bridge_class=TorchModelBridge,
-        model_class=ALEBO,
-        transforms=ALEBO_X_trans + ALEBO_Y_trans,
-        standard_bridge_kwargs=STANDARD_TORCH_BRIDGE_KWARGS,
-    ),
-    "ALEBO_Initializer": ModelSetup(
-        bridge_class=RandomModelBridge,
-        model_class=ALEBOInitializer,
-        transforms=ALEBO_X_trans,
-    ),
     "BO_MIXED": ModelSetup(
         bridge_class=TorchModelBridge,
         model_class=ModularBoTorchModel,
@@ -444,10 +427,8 @@ class Models(ModelRegistryBase):
     MOO = "MOO"
     ST_MTGP_LEGACY = "ST_MTGP_LEGACY"
     ST_MTGP = "ST_MTGP"
-    ALEBO = "ALEBO"
     BO_MIXED = "BO_MIXED"
     ST_MTGP_NEHVI = "ST_MTGP_NEHVI"
-    ALEBO_INITIALIZER = "ALEBO_Initializer"
     CONTEXT_SACBO = "Contextual_SACBO"
 
     @classmethod
diff --git a/ax/modelbridge/strategies/__init__.py b/ax/modelbridge/strategies/__init__.py
deleted file mode 100644
index 4b87eb9e4d0..00000000000
--- a/ax/modelbridge/strategies/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
diff --git a/ax/modelbridge/strategies/alebo.py b/ax/modelbridge/strategies/alebo.py
deleted file mode 100644
index 8a7540f1849..00000000000
--- a/ax/modelbridge/strategies/alebo.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-from typing import Any, Dict, Optional
-
-import numpy as np
-import torch
-from ax.core.data import Data
-from ax.core.experiment import Experiment
-from ax.core.search_space import SearchSpace
-from ax.modelbridge.factory import DEFAULT_TORCH_DEVICE
-from ax.modelbridge.generation_strategy import GenerationStep, GenerationStrategy
-from ax.modelbridge.random import RandomModelBridge
-from ax.modelbridge.registry import ALEBO_X_trans, ALEBO_Y_trans
-from ax.modelbridge.torch import TorchModelBridge
-from ax.models.random.alebo_initializer import ALEBOInitializer
-from ax.models.torch.alebo import ALEBO
-
-
-def get_ALEBOInitializer(
-    search_space: SearchSpace,
-    B: np.ndarray,
-    seed: Optional[int] = None,
-    **model_kwargs: Any,
-) -> RandomModelBridge:
-    return RandomModelBridge(
-        search_space=search_space,
-        model=ALEBOInitializer(B=B, seed=seed, **model_kwargs),
-        transforms=ALEBO_X_trans,
-    )
-
-
-def get_ALEBO(
-    experiment: Experiment,
-    search_space: SearchSpace,
-    data: Data,
-    B: torch.Tensor,
-    **model_kwargs: Any,
-) -> TorchModelBridge:
-    if search_space is None:
-        search_space = experiment.search_space
-    return TorchModelBridge(
-        experiment=experiment,
-        search_space=search_space,
-        data=data,
-        model=ALEBO(B=B, **model_kwargs),
-        transforms=ALEBO_X_trans + ALEBO_Y_trans,
-        torch_dtype=B.dtype,
-        torch_device=B.device,
-    )
-
-
-class ALEBOStrategy(GenerationStrategy):
-    """Generation strategy for Adaptive Linear Embedding BO.
-
-    Both quasirandom initialization and BO are done with the same random
-    projection. All function evaluations are done within that projection.
-
-    Args:
-        D: Dimensionality of high-dimensional space
-        d: Dimensionality of low-dimensional space
-        init_size: Size of random initialization
-        name: Name of strategy
-        dtype: torch dtype
-        device: torch device
-        random_kwargs: kwargs passed along to random model
-        gp_kwargs: kwargs passed along to GP model
-        gp_gen_kwargs: kwargs passed along to gen call on GP
-    """
-
-    def __init__(
-        self,
-        D: int,
-        d: int,
-        init_size: int,
-        name: str = "ALEBO",
-        dtype: torch.dtype = torch.double,
-        device: torch.device = DEFAULT_TORCH_DEVICE,
-        random_kwargs: Optional[Dict[str, Any]] = None,
-        gp_kwargs: Optional[Dict[str, Any]] = None,
-        gp_gen_kwargs: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        self.D = D
-        self.d = d
-        self.init_size = init_size
-        self.dtype = dtype
-        self.device = device
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.random_kwargs = random_kwargs if random_kwargs is not None else {}
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.gp_kwargs = gp_kwargs if gp_kwargs is not None else {}
-        self.gp_gen_kwargs = gp_gen_kwargs
-
-        B = self.gen_projection(d=d, D=D, device=device, dtype=dtype)
-
-        self.gp_kwargs.update({"B": B})
-        self.random_kwargs.update({"B": B.cpu().numpy()})
-
-        steps = [
-            GenerationStep(
-                model=get_ALEBOInitializer,
-                num_trials=init_size,
-                model_kwargs=self.random_kwargs,
-            ),
-            GenerationStep(
-                model=get_ALEBO,
-                num_trials=-1,
-                model_kwargs=self.gp_kwargs,
-                model_gen_kwargs=gp_gen_kwargs,
-            ),
-        ]
-        super().__init__(steps=steps, name=name)
-
-    def clone_reset(self) -> "ALEBOStrategy":
-        """Copy without state."""
-        return self.__class__(
-            D=self.D,
-            d=self.d,
-            init_size=self.init_size,
-            name=self.name,
-            dtype=self.dtype,
-            device=self.device,
-            random_kwargs=self.random_kwargs,
-            gp_kwargs=self.gp_kwargs,
-            gp_gen_kwargs=self.gp_gen_kwargs,
-        )
-
-    def gen_projection(
-        self, d: int, D: int, dtype: torch.dtype, device: torch.device
-    ) -> torch.Tensor:
-        """Generate the projection matrix B as a (d x D) tensor"""
-        B0 = torch.randn(d, D, dtype=dtype, device=device)
-        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
-        B = B0 / torch.sqrt((B0**2).sum(dim=0))
-        return B
diff --git a/ax/modelbridge/strategies/rembo.py b/ax/modelbridge/strategies/rembo.py
deleted file mode 100644
index a26e0c0c288..00000000000
--- a/ax/modelbridge/strategies/rembo.py
+++ /dev/null
@@ -1,307 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-import math
-from collections import defaultdict
-from typing import Any, Dict, List, Optional, Set, Tuple
-
-import numpy as np
-import torch
-from ax.core.data import Data
-from ax.core.experiment import Experiment
-from ax.core.generator_run import GeneratorRun
-from ax.core.search_space import SearchSpace
-from ax.modelbridge.factory import DEFAULT_TORCH_DEVICE
-from ax.modelbridge.generation_strategy import GenerationStep, GenerationStrategy
-from ax.modelbridge.random import RandomModelBridge
-from ax.modelbridge.torch import TorchModelBridge
-from ax.modelbridge.transforms.centered_unit_x import CenteredUnitX
-from ax.modelbridge.transforms.standardize_y import StandardizeY
-from ax.models.random.rembo_initializer import REMBOInitializer
-from ax.models.torch.rembo import REMBO
-from ax.utils.common.typeutils import not_none
-
-
-def get_rembo_initializer(
-    search_space: SearchSpace,
-    A: np.ndarray,
-    bounds_d: List[Tuple[float, float]],
-    seed: Optional[int] = None,
-    **kwargs: Any,
-) -> RandomModelBridge:
-    """Instantiates a uniform random generator.
-
-    Args:
-        search_space: Search space.
-        A: Projection matrix.
-        bounds_d: Bounds in low-d space.
-        seed: seed.
-        kwargs: kwargs
-
-    Returns:
-        RandomModelBridge, with REMBOInitializer as model.
-    """
-    return RandomModelBridge(
-        search_space=search_space,
-        model=REMBOInitializer(A=A, bounds_d=bounds_d, seed=seed, **kwargs),
-        transforms=[CenteredUnitX],
-    )
-
-
-def get_REMBO(
-    experiment: Experiment,
-    data: Data,
-    A: torch.Tensor,
-    initial_X_d: torch.Tensor,
-    bounds_d: List[Tuple[float, float]],
-    search_space: Optional[SearchSpace] = None,
-    dtype: torch.dtype = torch.double,
-    device: torch.device = DEFAULT_TORCH_DEVICE,
-    **model_kwargs: Any,
-) -> TorchModelBridge:
-    """Instantiates a BotorchModel."""
-    if search_space is None:
-        search_space = experiment.search_space
-    if data.df.empty:
-        raise ValueError("REMBO model requires non-empty data.")
-    return TorchModelBridge(
-        experiment=experiment,
-        search_space=search_space,
-        data=data,
-        model=REMBO(A=A, initial_X_d=initial_X_d, bounds_d=bounds_d, **model_kwargs),
-        transforms=[CenteredUnitX, StandardizeY],
-        torch_dtype=dtype,
-        torch_device=device,
-    )
-
-
-class REMBOStrategy(GenerationStrategy):
-    """Generation strategy for REMBO.
-
-    Both quasirandom initialization and BO are done with the same random
-    projection. As is done in the REMBO paper, k independent optimizations
-    are done, each with an independently generated projection.
-
-    Args:
-        D: Dimensionality of high-dimensional space
-        d: Dimensionality of low-dimensional space
-        k: Number of random projections
-        init_per_proj: Number of arms to use for random initialization of each
-            of the k projections.
-        name: Name of strategy
-        dtype: torch dtype
-        device: torch device
-        gp_kwargs: kwargs sent along to the GP model
-    """
-
-    def __init__(
-        self,
-        D: int,
-        d: int,
-        init_per_proj: int,
-        k: int = 4,
-        name: str = "REMBO",
-        dtype: torch.dtype = torch.double,
-        device: torch.device = DEFAULT_TORCH_DEVICE,
-        gp_kwargs: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        self.D = D
-        self.d = d
-        self.k = k
-        self.init_per_proj = init_per_proj
-        self.dtype = dtype
-        self.device = device
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.gp_kwargs = gp_kwargs if gp_kwargs is not None else {}
-
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.projections = {
-            i: self.get_projection(
-                D=self.D, d=self.d, dtype=self.dtype, device=self.device
-            )
-            for i in range(self.k)
-        }
-
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.X_d_by_proj = defaultdict(list)
-        self.current_iteration = 0
-        self.arms_by_proj: Dict[int, Set[str]] = {i: set({}) for i in range(self.k)}
-
-        # The first GenerationStep, and super
-        A, bounds_d = self.projections[0]
-        steps = [
-            GenerationStep(
-                model=get_rembo_initializer,
-                num_trials=1,
-                model_kwargs={"A": A, "bounds_d": bounds_d},
-            )
-        ]
-        super().__init__(steps=steps, name=name)
-
-    @property
-    def model_transitions(self) -> List[int]:
-        """Generator changes every iteration with rotating strategy"""
-        return list(range(self.current_iteration))
-
-    # pyre-fixme[14]: `gen` overrides method defined in `GenerationStrategy`
-    #  inconsistently.
-    def gen(
-        self,
-        experiment: Experiment,
-        data: Optional[Data] = None,
-        n: int = 1,
-        **kwargs: Any,
-    ) -> GeneratorRun:
-        """Generate new points, rotating through projections each time."""
-        if data is None:
-            data = experiment.fetch_data()
-        # Get the next model in the rotation
-        i = self.current_iteration % self.k
-        data_by_proj = self._filter_data_to_projection(
-            experiment=experiment,
-            data=data,
-            arm_sigs=self.arms_by_proj[i],
-        )
-        lgr = self.last_generator_run
-        # NOTE: May need to `model_class.deserialize_model_state` in the
-        # future if using non-readily serializable state.
-        model_state = (
-            not_none(lgr._model_state_after_gen)
-            if lgr is not None and lgr._model_state_after_gen is not None
-            else {}
-        )
-
-        A, bounds_d = self.projections[i]
-        if (
-            data_by_proj is None
-            or len(data_by_proj.df["arm_name"].unique()) < self.init_per_proj
-        ):
-            # Not enough data to switch to GP, use Sobol for initialization
-            m = get_rembo_initializer(
-                search_space=experiment.search_space,
-                A=A.double().numpy(),
-                bounds_d=bounds_d,
-                **model_state,
-            )
-        else:
-            # We have enough data to switch to GP.
-            m = get_REMBO(
-                experiment=experiment,
-                data=data_by_proj,
-                A=A,
-                initial_X_d=torch.tensor(
-                    self.X_d_by_proj[i], dtype=self.dtype, device=self.device
-                ),
-                bounds_d=bounds_d,
-                **self.gp_kwargs,
-            )
-
-        self.current_iteration += 1
-        # Call gen
-        gr = m.gen(n=n)
-        self.X_d_by_proj[i].extend(not_none(m.model).X_d_gen)  # pyre-ignore[16]
-        self.arms_by_proj[i].update(a.signature for a in gr.arms)
-        self._generator_runs.append(gr)
-        return gr
-
-    def clone_reset(self) -> "REMBOStrategy":
-        """Copy without state."""
-        return self.__class__(
-            D=self.D,
-            d=self.d,
-            k=self.k,
-            init_per_proj=self.init_per_proj,
-            name=self.name,
-            dtype=self.dtype,
-            device=self.device,
-            gp_kwargs=self.gp_kwargs,
-        )
-
-    def _filter_data_to_projection(
-        self, experiment: Experiment, data: Data, arm_sigs: Set[str]
-    ) -> Optional[Data]:
-        """Extract the arms in data that are in arm_sigs.
-
-        Return None if none.
-        """
-        arm_names: Set[str] = set({})
-        for arm_name in data.df["arm_name"].unique():
-            sig = experiment.arms_by_name[arm_name].signature
-            if sig in arm_sigs:
-                arm_names.add(arm_name)
-
-        if len(arm_names) == 0:
-            return None
-        # Else,
-        df_i = data.df[data.df["arm_name"].isin(arm_names)].copy()
-        return Data(df_i)
-
-    def get_projection(
-        self, D: int, d: int, dtype: torch.dtype, device: torch.device
-    ) -> Tuple[torch.Tensor, List[Tuple[float, float]]]:
-        """Generate the projection matrix A as a (D x d) tensor
-
-        Also return the box bounds for the low-d space.
-        """
-        A = torch.randn((D, d), dtype=dtype, device=device)
-        bounds_d = [(-(math.sqrt(d)), math.sqrt(d))] * d
-        return A, bounds_d
-
-
-class HeSBOStrategy(REMBOStrategy):
-    """Generation strategy for HeSBO.
-
-    Args:
-        D: Dimensionality of high-dimensional space
-        d: Dimensionality of low-dimensional space
-        k: Number of random projections
-        init_per_proj: Number of arms to use for random initialization of each
-            of the k projections.
-        name: Name of strategy
-        dtype: torch dtype
-        device: torch device
-        gp_kwargs: kwargs sent along to the GP model
-    """
-
-    def __init__(
-        self,
-        D: int,
-        d: int,
-        init_per_proj: int,
-        k: int = 1,
-        name: str = "HeSBO",
-        dtype: torch.dtype = torch.double,
-        device: torch.device = DEFAULT_TORCH_DEVICE,
-        gp_kwargs: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        super().__init__(
-            D=D,
-            d=d,
-            init_per_proj=init_per_proj,
-            k=k,
-            name=name,
-            dtype=dtype,
-            device=device,
-            gp_kwargs=gp_kwargs,
-        )
-
-    def get_projection(
-        self, D: int, d: int, dtype: torch.dtype, device: torch.device
-    ) -> Tuple[torch.Tensor, List[Tuple[float, float]]]:
-        """Generate the projection matrix A as a (D x d) tensor
-
-        Also return the box bounds for the low-d space.
-        """
-        A = torch.zeros((D, d), dtype=dtype, device=device)
-        h = torch.randint(d, size=(D,))
-        s = 2 * torch.randint(2, size=(D,)) - 1
-        for i in range(D):
-            A[i, h[i]] = s[i]
-
-        bounds_d = [(-1.0, 1.0)] * d
-        return A, bounds_d
diff --git a/ax/modelbridge/tests/test_alebo_strategy.py b/ax/modelbridge/tests/test_alebo_strategy.py
deleted file mode 100644
index eeade2314fd..00000000000
--- a/ax/modelbridge/tests/test_alebo_strategy.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-from unittest import mock
-
-import numpy as np
-import pandas as pd
-import torch
-from ax.core.data import Data
-from ax.modelbridge.strategies.alebo import (
-    ALEBOStrategy,
-    get_ALEBO,
-    get_ALEBOInitializer,
-)
-from ax.utils.common.testutils import TestCase
-from ax.utils.testing.core_stubs import get_branin_experiment
-
-
-class ALEBOStrategyTest(TestCase):
-    def test_factory_functions(self) -> None:
-        experiment = get_branin_experiment(with_batch=True)
-        B = np.array([[1.0, 2.0]])
-        m1 = get_ALEBOInitializer(search_space=experiment.search_space, B=B)
-        # pyre-fixme[16]: `RandomModel` has no attribute `Q`.
-        self.assertTrue(np.allclose(m1.model.Q, np.linalg.pinv(B) @ B))
-        data = Data(
-            pd.DataFrame(
-                {
-                    "arm_name": ["0_0", "0_1", "0_2"],
-                    "metric_name": "branin",
-                    "mean": [-1.0, 0.0, 1.0],
-                    "sem": 0.1,
-                }
-            )
-        )
-        with mock.patch("ax.modelbridge.strategies.alebo.ALEBO.fit", autospec=True):
-            m2 = get_ALEBO(
-                experiment=experiment,
-                # pyre-fixme[6]: For 2nd param expected `SearchSpace` but got `None`.
-                search_space=None,
-                data=data,
-                B=torch.tensor(B),
-            )
-
-        # pyre-fixme[16]: Optional type has no attribute `B`.
-        self.assertTrue(np.array_equal(m2.model.B.numpy(), B))
-
-    def test_ALEBOStrategy(self) -> None:
-        D = 20
-        d = 3
-        init_size = 5
-        s = ALEBOStrategy(D=D, d=d, init_size=init_size)
-        self.assertEqual(s._steps[0].num_trials, init_size)
-        # pyre-fixme[16]: Optional type has no attribute `__getitem__`.
-        random_B = s._steps[0].model_kwargs["B"]
-        gp_B = s._steps[1].model_kwargs["B"]
-        # Check that random and GP have the same projection
-        self.assertTrue(np.allclose(random_B, gp_B.numpy()))
-        # And that the projection has correct properties
-        self.assertEqual(random_B.shape, (d, D))
-        self.assertTrue(
-            torch.allclose(
-                torch.sqrt((gp_B**2).sum(dim=0)), torch.ones(D, dtype=torch.double)
-            )
-        )
-
-        s2 = s.clone_reset()
-        # Check that attributes copied, but not B
-        self.assertEqual(s2.d, d)
-        self.assertEqual(s2.D, D)
-        self.assertEqual(s2._steps[0].num_trials, init_size)
-        random_B2 = s2._steps[0].model_kwargs["B"]
-        self.assertEqual(random_B2.shape, (d, D))
-        self.assertFalse(np.allclose(random_B, random_B2))
diff --git a/ax/modelbridge/tests/test_registry.py b/ax/modelbridge/tests/test_registry.py
index 2cbd5260305..568e6d04335 100644
--- a/ax/modelbridge/tests/test_registry.py
+++ b/ax/modelbridge/tests/test_registry.py
@@ -8,10 +8,7 @@
 
 from collections import OrderedDict
 
-import numpy as np
-import pandas as pd
 import torch
-from ax.core.data import Data
 from ax.core.observation import ObservationFeatures
 from ax.modelbridge.discrete import DiscreteModelBridge
 from ax.modelbridge.random import RandomModelBridge
@@ -26,8 +23,6 @@
 from ax.models.base import Model
 from ax.models.discrete.eb_thompson import EmpiricalBayesThompsonSampler
 from ax.models.discrete.thompson import ThompsonSampler
-from ax.models.random.alebo_initializer import ALEBOInitializer
-from ax.models.torch.alebo import ALEBO
 from ax.models.torch.botorch_modular.acquisition import Acquisition
 from ax.models.torch.botorch_modular.kernels import ScaleMaternKernel
 from ax.models.torch.botorch_modular.model import BoTorchModel, SurrogateSpec
@@ -371,46 +366,6 @@ def test_ModelSetups_do_not_share_kwargs(self) -> None:
             # Intersection of two sets should be empty
             self.assertEqual(model_args & bridge_args, set())
 
-    @fast_botorch_optimize
-    def test_ALEBO(self) -> None:
-        """Tests Alebo fitting and generations"""
-        experiment = get_branin_experiment(with_batch=True)
-        B = np.array([[1.0, 2.0]])
-        data = Data(
-            pd.DataFrame(
-                {
-                    "arm_name": ["0_0", "0_1", "0_2"],
-                    "metric_name": "branin",
-                    "mean": [-1.0, 0.0, 1.0],
-                    "sem": 0.1,
-                }
-            )
-        )
-        m = Models.ALEBO(
-            experiment=experiment,
-            search_space=None,
-            data=data,
-            B=torch.from_numpy(B).double(),
-        )
-        self.assertIsInstance(m, TorchModelBridge)
-        self.assertIsInstance(m.model, ALEBO)
-        self.assertTrue(np.array_equal(m.model.B.numpy(), B))
-
-    def test_ALEBO_Initializer(self) -> None:
-        """Tests Alebo Initializer generations"""
-        experiment = get_branin_experiment(with_batch=True)
-        B = np.array([[1.0, 2.0]])
-        m = Models.ALEBO_INITIALIZER(
-            experiment=experiment,
-            search_space=None,
-            B=B,
-        )
-        self.assertIsInstance(m, RandomModelBridge)
-        self.assertIsInstance(m.model, ALEBOInitializer)
-
-        gr = m.gen(n=2)
-        self.assertEqual(len(gr.arms), 2)
-
     @fast_botorch_optimize
     def test_ST_MTGP_LEGACY(self) -> None:
         """Tests single type MTGP instantiation."""
diff --git a/ax/modelbridge/tests/test_rembo_strategy.py b/ax/modelbridge/tests/test_rembo_strategy.py
deleted file mode 100644
index 53d570cd63e..00000000000
--- a/ax/modelbridge/tests/test_rembo_strategy.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-import torch
-from ax.core.experiment import Experiment
-from ax.core.objective import Objective
-from ax.core.optimization_config import OptimizationConfig
-from ax.core.outcome_constraint import ComparisonOp, OutcomeConstraint
-from ax.core.parameter import ParameterType, RangeParameter
-from ax.core.search_space import SearchSpace
-from ax.metrics.hartmann6 import Hartmann6Metric
-from ax.metrics.l2norm import L2NormMetric
-from ax.modelbridge.strategies.rembo import HeSBOStrategy, REMBOStrategy
-from ax.runners.synthetic import SyntheticRunner
-from ax.utils.common.testutils import TestCase
-from ax.utils.testing.mock import fast_botorch_optimize
-
-
-class REMBOStrategyTest(TestCase):
-    @fast_botorch_optimize
-    def test_REMBOStrategy(self) -> None:
-        # Construct a high-D test experiment with multiple metrics
-        hartmann_search_space = SearchSpace(
-            parameters=[
-                RangeParameter(
-                    name=f"x{i}",
-                    parameter_type=ParameterType.FLOAT,
-                    lower=0.0,
-                    upper=1.0,
-                )
-                for i in range(20)
-            ]
-        )
-
-        exp = Experiment(
-            name="test",
-            search_space=hartmann_search_space,
-            optimization_config=OptimizationConfig(
-                objective=Objective(
-                    metric=Hartmann6Metric(
-                        name="hartmann6", param_names=[f"x{i}" for i in range(6)]
-                    ),
-                    minimize=True,
-                ),
-                outcome_constraints=[
-                    OutcomeConstraint(
-                        metric=L2NormMetric(
-                            name="l2norm",
-                            param_names=[f"x{i}" for i in range(6)],
-                            noise_sd=0.2,
-                        ),
-                        op=ComparisonOp.LEQ,
-                        bound=1.25,
-                        relative=False,
-                    )
-                ],
-            ),
-            runner=SyntheticRunner(),
-        )
-
-        # Instantiate the strategy
-        gs = REMBOStrategy(D=20, d=6, k=4, init_per_proj=4)
-
-        # Check that arms and data are correctly segmented by projection
-        exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
-        self.assertEqual(len(gs.arms_by_proj[0]), 2)
-        self.assertEqual(len(gs.arms_by_proj[1]), 0)
-
-        exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
-
-        self.assertEqual(len(gs.arms_by_proj[0]), 2)
-        self.assertEqual(len(gs.arms_by_proj[1]), 2)
-
-        # Iterate until the first projection fits a GP
-        for _ in range(4):
-            exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
-
-        self.assertEqual(len(gs.arms_by_proj[0]), 4)
-        self.assertEqual(len(gs.arms_by_proj[1]), 4)
-        self.assertEqual(len(gs.arms_by_proj[2]), 2)
-        self.assertEqual(len(gs.arms_by_proj[3]), 2)
-
-        # Keep iterating until GP is used for gen
-        for i in range(4):
-            # First two trials will go towards 3rd and 4th proj. getting enough
-            if i < 1:  # data for GP.
-                self.assertLess(len(gs.arms_by_proj[2]), 4)
-            if i < 2:
-                self.assertLess(len(gs.arms_by_proj[3]), 4)
-
-            exp.new_batch_trial(generator_run=gs.gen(experiment=exp, n=2)).run()
-            if i >= 2:
-                self.assertFalse(any(len(x) < 4 for x in gs.arms_by_proj.values()))
-
-        gs2 = gs.clone_reset()
-        self.assertEqual(gs2.D, 20)
-        self.assertEqual(gs2.d, 6)
-
-    def test_HeSBOStrategy(self) -> None:
-        gs = HeSBOStrategy(D=10, d=4, init_per_proj=2)
-        self.assertEqual(gs.name, "HeSBO")
-        self.assertEqual(len(gs.projections), 1)
-        A, bounds_d = gs.projections[0]
-        self.assertEqual(bounds_d, [(-1, 1)] * 4)
-        z = torch.abs(A).sum(dim=1)
-        self.assertTrue(torch.allclose(z, torch.ones(10, dtype=torch.double)))
-        gs2 = gs.clone_reset()
-        self.assertTrue(isinstance(gs2, HeSBOStrategy))
diff --git a/ax/models/random/alebo_initializer.py b/ax/models/random/alebo_initializer.py
deleted file mode 100644
index ef88447a485..00000000000
--- a/ax/models/random/alebo_initializer.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-from typing import Callable, Dict, List, Optional, Tuple
-from warnings import warn
-
-import numpy as np
-from ax.models.random.uniform import UniformGenerator
-from ax.models.types import TConfig
-from ax.utils.common.docutils import copy_doc
-
-
-class ALEBOInitializer(UniformGenerator):
-    """Sample in a low-dimensional linear embedding, to initialize ALEBO.
-
-    Generates points on a linear subspace of [-1, 1]^D by generating points in
-    [-b, b]^D, projecting them down with a matrix B, and then projecting them
-    back up with the pseudoinverse of B. Thus points thus all lie in a linear
-    subspace defined by B. Points whose up-projection falls outside of [-1, 1]^D
-    are thrown out, via rejection sampling.
-
-    To generate n points, we start with nsamp points in [-b, b]^D, which are
-    mapped down to the embedding and back up as described above. If >=n points
-    fall within [-1, 1]^D after being mapped up, then the first n are returned.
-    If there are less than n points in [-1, 1]^D, then b is constricted
-    (halved) and the process is repeated until there are at least n points in
-    [-1, 1]^D. There exists a b small enough that all points will project to
-    [-1, 1]^D, so this is guaranteed to terminate, typically after few rounds.
-
-    Args:
-        B: A (dxD) projection down.
-        nsamp: Number of samples to use for rejection sampling.
-        init_bound: b for the initial sampling space described above.
-        seed: seed for UniformGenerator
-    """
-
-    def __init__(
-        self,
-        B: np.ndarray,
-        nsamp: int = 10000,
-        init_bound: int = 16,
-        seed: Optional[int] = None,
-    ) -> None:
-        warn("ALEBOInitializer is deprecated.", DeprecationWarning)
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.Q = np.linalg.pinv(B) @ B  # Projects down to B and then back up
-        self.nsamp = nsamp
-        self.init_bound = init_bound
-        super().__init__(seed=seed, deduplicate=False)
-
-    @copy_doc(UniformGenerator.gen)
-    def gen(
-        self,
-        n: int,
-        bounds: List[Tuple[float, float]],
-        linear_constraints: Optional[Tuple[np.ndarray, np.ndarray]] = None,
-        fixed_features: Optional[Dict[int, float]] = None,
-        model_gen_options: Optional[TConfig] = None,
-        rounding_func: Optional[Callable[[np.ndarray], np.ndarray]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        if n > self.nsamp:
-            raise ValueError("n > nsamp")
-        # The projection is from [-1, 1]^D.
-        for b in bounds:
-            assert b == (-1.0, 1.0)
-        # The following can be easily handled in the future when needed
-        assert linear_constraints is None
-        assert fixed_features is None
-        # Do gen in the high-dimensional space.
-        X01, w = super().gen(
-            n=self.nsamp,
-            bounds=[(0.0, 1.0)] * self.Q.shape[0],
-            model_gen_options={"max_rs_draws": self.nsamp},
-        )
-        finished = False
-        b = float(self.init_bound)
-        while not finished:
-            # Map to [-b, b]
-            X_b = 2 * b * X01 - b
-            # Project down to B and back up
-            X = X_b @ np.transpose(self.Q)
-            # Filter out to points in [-1, 1]^D
-            X = X[(X >= -1.0).all(axis=1) & (X <= 1.0).all(axis=1)]
-            if X.shape[0] >= n:
-                finished = True
-            else:
-                b = b / 2.0  # Constrict the space
-        X = X[:n, :]
-        return X, np.ones(n)
diff --git a/ax/models/random/rembo_initializer.py b/ax/models/random/rembo_initializer.py
deleted file mode 100644
index bbf1ca1cb39..00000000000
--- a/ax/models/random/rembo_initializer.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-from typing import Callable, Dict, List, Optional, Tuple
-
-import numpy as np
-from ax.models.random.uniform import UniformGenerator
-from ax.models.types import TConfig
-from ax.utils.common.docutils import copy_doc
-
-
-class REMBOInitializer(UniformGenerator):
-    """Sample in a low-dimensional linear embedding.
-
-    Generates points in [-1, 1]^D by generating points in a d-dimensional
-    embedding, with box bounds as specified. When points are projected up, if
-    they fall outside [-1, 1]^D they are clamped to those bounds.
-
-    Args:
-        A: A (Dxd) linear embedding
-        bounds_d: Box bounds in the low-d space
-        seed: seed for UniformGenerator
-    """
-
-    def __init__(
-        self,
-        A: np.ndarray,
-        bounds_d: List[Tuple[float, float]],
-        seed: Optional[int] = None,
-    ) -> None:
-        self.bounds_d = bounds_d
-        self.A = A
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.X_d_gen = []  # Store points in low-d space generated here
-        super().__init__(seed=seed, deduplicate=False)
-
-    def project_up(self, X: np.ndarray) -> np.ndarray:
-        """Project to high-dimensional space."""
-        Z = np.transpose(self.A @ np.transpose(X))
-        return np.clip(Z, a_min=-1, a_max=1)
-
-    @copy_doc(UniformGenerator.gen)
-    def gen(
-        self,
-        n: int,
-        bounds: List[Tuple[float, float]],
-        linear_constraints: Optional[Tuple[np.ndarray, np.ndarray]] = None,
-        fixed_features: Optional[Dict[int, float]] = None,
-        model_gen_options: Optional[TConfig] = None,
-        rounding_func: Optional[Callable[[np.ndarray], np.ndarray]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        # The projection is from [-1, 1]^D.
-        for b in bounds:
-            assert b == (-1, 1)
-        # The following can be easily handled in the future when needed
-        assert linear_constraints is None
-        assert fixed_features is None
-        # Do gen in the low-dimensional space. First on [0, 1]^d,
-        X_01, w = super().gen(n=n, bounds=[(0.0, 1.0)] * len(self.bounds_d))
-        # Then map to bounds_d
-        lw, up = zip(*self.bounds_d)
-        lw = np.array(lw)
-        up = np.array(up)
-        X_d = X_01 * (up - lw) + lw
-        # Store
-        self.X_d_gen.extend(list(X_d))
-        # And finally project up
-        return self.project_up(X_d), w
diff --git a/ax/models/tests/test_alebo.py b/ax/models/tests/test_alebo.py
deleted file mode 100644
index d7e96882408..00000000000
--- a/ax/models/tests/test_alebo.py
+++ /dev/null
@@ -1,384 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-from unittest import mock
-
-import numpy as np
-import torch
-from ax.core.search_space import SearchSpaceDigest
-from ax.models.torch.alebo import (
-    ALEBO,
-    alebo_acqf_optimizer,
-    ALEBOGP,
-    ALEBOKernel,
-    ei_or_nei,
-    extract_map_statedict,
-    get_batch_model,
-    get_fitted_model,
-    get_map_model,
-)
-from ax.models.torch_base import TorchOptConfig
-from ax.utils.common.testutils import TestCase
-from ax.utils.common.typeutils import checked_cast
-from ax.utils.testing.mock import fast_botorch_optimize
-from botorch.acquisition import qLogNoisyExpectedImprovement
-from botorch.acquisition.analytic import ExpectedImprovement
-from botorch.models.model_list_gp_regression import ModelListGP
-from botorch.utils.datasets import SupervisedDataset
-from torch.nn.parameter import Parameter
-
-
-class ALEBOTest(TestCase):
-    def test_ALEBOKernel(self) -> None:
-        B = torch.tensor(
-            [[1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 3.0, 4.0, 5.0, 6.0]], dtype=torch.double
-        )
-        with self.assertWarnsRegex(DeprecationWarning, "ALEBOKernel is deprecated"):
-            k = ALEBOKernel(B=B, batch_shape=torch.Size([]))
-
-        self.assertEqual(k.d, 2)
-        self.assertTrue(torch.equal(B, k.B))
-        self.assertTrue(
-            torch.equal(k.triu_indx[0], torch.tensor([0, 0, 1], dtype=torch.long))
-        )
-        self.assertTrue(
-            torch.equal(k.triu_indx[1], torch.tensor([0, 1, 1], dtype=torch.long))
-        )
-        self.assertEqual(k.Uvec.shape, torch.Size([3]))
-
-        k.Uvec.requires_grad_(False)
-        checked_cast(Parameter, k.Uvec).copy_(
-            torch.tensor([1.0, 2.0, 3.0], dtype=torch.double)
-        )
-        k.Uvec.requires_grad_(True)
-        x1 = torch.tensor([[0.0, 0.0], [1.0, 1.0]], dtype=torch.double)
-        x2 = torch.tensor([[1.0, 1.0], [0.0, 0.0]], dtype=torch.double)
-
-        K = k.forward(x1, x2)
-        Ktrue = torch.tensor(
-            [[np.exp(-0.5 * 18), 1.0], [1.0, np.exp(-0.5 * 18)]], dtype=torch.double
-        )
-        self.assertTrue(torch.equal(K, Ktrue))
-
-    @fast_botorch_optimize
-    def test_ALEBOGP(self) -> None:
-        # First non-batch
-        B = torch.tensor(
-            [[1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 3.0, 4.0, 5.0, 6.0]], dtype=torch.double
-        )
-        train_X = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=torch.double)
-        train_Y = torch.tensor([[1.0], [2.0], [3.0]], dtype=torch.double)
-        train_Yvar = 0.1 * torch.ones(3, 1, dtype=torch.double)
-
-        with self.assertWarnsRegex(
-            DeprecationWarning,
-            "`get_map_model` from ax.models.torch.alebo.py is deprecated",
-        ):
-            mll = get_map_model(
-                B=B,
-                train_X=train_X,
-                train_Y=train_Y,
-                train_Yvar=train_Yvar,
-                restarts=1,
-                init_state_dict=None,
-            )
-        m = mll.model
-        m.eval()
-        self.assertIsInstance(m, ALEBOGP)
-        self.assertIsInstance(m.covar_module.base_kernel, ALEBOKernel)
-
-        X = torch.tensor([[2.0, 2.0], [3.0, 3.0], [4.0, 4.0]], dtype=torch.double)
-        f = m(X)
-        self.assertEqual(f.mean.shape, torch.Size([3]))
-        self.assertEqual(f.variance.shape, torch.Size([3]))
-        self.assertEqual(f.covariance_matrix.shape, torch.Size([3, 3]))
-
-        # Batch
-        Uvec_b = m.covar_module.base_kernel.Uvec.repeat(5, 1)
-        mean_b = m.mean_module.constant.repeat(5)
-        output_scale_b = m.covar_module.raw_outputscale.repeat(5)
-        m_b = get_batch_model(
-            B=B,
-            train_X=train_X,
-            train_Y=train_Y,
-            train_Yvar=train_Yvar,
-            Uvec_batch=Uvec_b,
-            mean_constant_batch=mean_b,
-            output_scale_batch=output_scale_b,
-        )
-
-        self.assertEqual(m_b._aug_batch_shape, torch.Size([5]))
-        f = m_b(X)
-        self.assertEqual(f.mean.shape, torch.Size([3]))
-        self.assertEqual(f.variance.shape, torch.Size([3]))
-        self.assertEqual(f.covariance_matrix.shape, torch.Size([3, 3]))
-        self.assertEqual(
-            m_b.posterior(X).mvn.covariance_matrix.shape, torch.Size([3, 3])
-        )
-
-        # The whole process in get_fitted_model
-        init_state_dict = m.state_dict()
-        with self.assertWarnsRegex(
-            DeprecationWarning,
-            "`get_fitted_model` from ax.models.torch.alebo.py is deprecated",
-        ):
-            m_b2 = get_fitted_model(
-                B=B,
-                train_X=train_X,
-                train_Y=train_Y,
-                train_Yvar=train_Yvar,
-                restarts=1,
-                nsamp=5,
-                init_state_dict=init_state_dict,
-            )
-        self.assertEqual(m_b2._aug_batch_shape, torch.Size([5]))
-
-        # Test extract_map_statedict
-        map_sds = extract_map_statedict(m_b=m_b, num_outputs=1)
-        self.assertEqual(len(map_sds), 1)
-        self.assertEqual(len(map_sds[0]), 5)
-        self.assertEqual(
-            set(map_sds[0]),
-            {
-                "covar_module.base_kernel.Uvec",
-                "covar_module.raw_outputscale",
-                "mean_module.raw_constant",
-                "covar_module.raw_outputscale_constraint.lower_bound",
-                "covar_module.raw_outputscale_constraint.upper_bound",
-            },
-        )
-        self.assertEqual(
-            map_sds[0]["covar_module.base_kernel.Uvec"].shape, torch.Size([3])
-        )
-
-        ml = ModelListGP(m_b, m_b2)
-        map_sds = extract_map_statedict(m_b=ml, num_outputs=2)
-        self.assertEqual(len(map_sds), 2)
-        for i in range(2):
-            self.assertEqual(len(map_sds[i]), 5)
-            self.assertEqual(
-                set(map_sds[i]),
-                {
-                    "covar_module.base_kernel.Uvec",
-                    "covar_module.raw_outputscale",
-                    "mean_module.raw_constant",
-                    "covar_module.raw_outputscale_constraint.lower_bound",
-                    "covar_module.raw_outputscale_constraint.upper_bound",
-                },
-            )
-            self.assertEqual(
-                map_sds[i]["covar_module.base_kernel.Uvec"].shape, torch.Size([3])
-            )
-
-    def test_Acq(self) -> None:
-        B = torch.tensor(
-            [[1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 3.0, 4.0, 5.0, 6.0]], dtype=torch.double
-        )
-        train_X = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=torch.double)
-        train_Y = torch.tensor([[1.0], [2.0], [3.0]], dtype=torch.double)
-        train_Yvar = 0.1 * torch.ones(3, 1, dtype=torch.double)
-        with self.assertWarnsRegex(DeprecationWarning, "ALEBOGP is deprecated"):
-            m = ALEBOGP(B=B, train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
-        m.eval()
-
-        objective_weights = torch.tensor([1.0], dtype=torch.double)
-        acq = ei_or_nei(
-            model=m,
-            objective_weights=objective_weights,
-            outcome_constraints=None,
-            X_observed=train_X,
-            X_pending=None,
-            q=1,
-            noiseless=True,
-        )
-        self.assertIsInstance(acq, ExpectedImprovement)
-        self.assertEqual(acq.best_f.item(), 3.0)
-
-        objective_weights = torch.tensor([-1.0], dtype=torch.double)
-        acq = ei_or_nei(
-            model=m,
-            objective_weights=objective_weights,
-            outcome_constraints=None,
-            X_observed=train_X,
-            X_pending=None,
-            q=1,
-            noiseless=True,
-        )
-        self.assertEqual(acq.best_f.item(), 1.0)
-        with mock.patch(
-            "ax.models.torch.alebo.optimize_acqf",
-            autospec=True,
-            return_value=(train_X, train_Y),
-        ) as optim_mock:
-            alebo_acqf_optimizer(
-                acq_function=acq,
-                # pyre-fixme[6]: For 2nd param expected `Tensor` but got `None`.
-                bounds=None,
-                n=1,
-                # pyre-fixme[6]: For 4th param expected `Optional[List[Tuple[Tensor,
-                #  Tensor, float]]]` but got `float`.
-                inequality_constraints=5.0,
-                fixed_features=None,
-                rounding_func=None,
-                raw_samples=100,
-                num_restarts=5,
-                B=B,
-            )
-        self.assertEqual(optim_mock.call_count, 1)
-        self.assertIsInstance(
-            optim_mock.mock_calls[0][2]["acq_function"], ExpectedImprovement
-        )
-
-        acq = ei_or_nei(
-            model=m,
-            objective_weights=objective_weights,
-            outcome_constraints=None,
-            X_observed=train_X,
-            X_pending=None,
-            q=1,
-            noiseless=False,
-        )
-        self.assertIsInstance(acq, qLogNoisyExpectedImprovement)
-
-        with mock.patch(
-            "ax.models.torch.alebo.optimize_acqf",
-            autospec=True,
-            return_value=(train_X, train_Y),
-        ) as optim_mock:
-            alebo_acqf_optimizer(
-                acq_function=acq,
-                # pyre-fixme[6]: For 2nd param expected `Tensor` but got `None`.
-                bounds=None,
-                n=2,
-                # pyre-fixme[6]: For 4th param expected `Optional[List[Tuple[Tensor,
-                #  Tensor, float]]]` but got `float`.
-                inequality_constraints=5.0,
-                fixed_features=None,
-                rounding_func=None,
-                raw_samples=100,
-                num_restarts=5,
-                B=B,
-            )
-
-        self.assertEqual(optim_mock.call_count, 2)
-        self.assertIsInstance(
-            optim_mock.mock_calls[0][2]["acq_function"], qLogNoisyExpectedImprovement
-        )
-        self.assertEqual(optim_mock.mock_calls[0][2]["num_restarts"], 5)
-        self.assertEqual(optim_mock.mock_calls[0][2]["inequality_constraints"], 5.0)
-        X = optim_mock.mock_calls[0][2]["batch_initial_conditions"]
-        self.assertEqual(X.shape, torch.Size([5, 1, 2]))
-        # Make sure initialization is inside subspace
-        Z = (B @ torch.pinverse(B) @ X[:, 0, :].t()).t()
-        self.assertTrue(torch.allclose(Z, X[:, 0, :]))
-
-    @fast_botorch_optimize
-    def test_ALEBO(self) -> None:
-        B = torch.tensor(
-            [[1.0, 2.0, 3.0, 4.0, 5.0], [2.0, 3.0, 4.0, 5.0, 6.0]], dtype=torch.double
-        )
-        m = ALEBO(B=B, laplace_nsamp=5, fit_restarts=1)
-        self.assertTrue(torch.equal(B, m.B))
-        self.assertEqual(m.laplace_nsamp, 5)
-        self.assertEqual(m.fit_restarts, 1)
-        self.assertEqual(m.refit_on_cv, False)
-        self.assertEqual(m.warm_start_refitting, False)
-
-        train_X = torch.tensor(
-            [
-                [0.0, 0.0, 0.0, 0.0, 0.0],
-                [1.0, 1.0, 1.0, 1.0, 1.0],
-                [2.0, 2.0, 2.0, 2.0, 2.0],
-            ],
-            dtype=torch.double,
-        )
-        train_Y = torch.tensor([[1.0], [2.0], [3.0]], dtype=torch.double)
-        train_Yvar = 0.1 * torch.ones(3, 1, dtype=torch.double)
-        dataset = SupervisedDataset(
-            X=train_X,
-            Y=train_Y,
-            Yvar=train_Yvar,
-            feature_names=[f"x{i}" for i in range(5)],
-            outcome_names=["y"],
-        )
-
-        # Test fit
-        m.fit(
-            datasets=[dataset, dataset],
-            search_space_digest=SearchSpaceDigest(
-                feature_names=[],
-                # pyre-fixme[6]: For 2nd param expected `List[Tuple[Union[float,
-                #  int], Union[float, int]]]` but got `List[Tuple[int, int]]`.
-                bounds=[(-1, 1)] * 5,
-            ),
-        )
-        self.assertIsInstance(m.model, ModelListGP)
-        self.assertTrue(torch.allclose(m.Xs[0], (B @ train_X.t()).t()))
-
-        # Test predict
-        f, cov = m.predict(X=B)
-        self.assertEqual(f.shape, torch.Size([2, 2]))
-        self.assertEqual(cov.shape, torch.Size([2, 2, 2]))
-
-        # Test best point
-        objective_weights = torch.tensor([1.0, 0.0], dtype=torch.double)
-        search_space_digest = SearchSpaceDigest(
-            feature_names=[],
-            # pyre-fixme[6]: For 2nd param expected `List[Tuple[Union[float, int],
-            #  Union[float, int]]]` but got `List[Tuple[int, int]]`.
-            bounds=[(-1, 1)] * 5,
-        )
-        torch_opt_config = TorchOptConfig(objective_weights=objective_weights)
-        with self.assertRaises(NotImplementedError):
-            m.best_point(
-                search_space_digest=search_space_digest,
-                torch_opt_config=torch_opt_config,
-            )
-
-        # Test gen
-        gen_results = m.gen(
-            n=1,
-            search_space_digest=search_space_digest,
-            torch_opt_config=torch_opt_config,
-        )
-
-        self.assertFalse(torch.allclose(gen_results.points, train_X))
-        self.assertTrue(gen_results.points.min() >= -1)
-        self.assertTrue(gen_results.points.max() <= 1)
-        # Without
-        gen_results = m.gen(
-            n=1,
-            search_space_digest=search_space_digest,
-            torch_opt_config=torch_opt_config,
-        )
-
-        self.assertEqual(
-            gen_results.points.shape,
-            torch.Size([1, 5]),
-        )
-
-        # Test get_and_fit with single metric
-        gp = m.get_and_fit_model(
-            Xs=[(B @ train_X.t()).t()], Ys=[train_Y], Yvars=[train_Yvar]
-        )
-        self.assertIsInstance(gp, ALEBOGP)
-
-        # Test cross_validate
-        f, cov = m.cross_validate(
-            datasets=[dataset],
-            X_test=train_X,
-        )
-        self.assertEqual(f.shape, torch.Size([3, 1]))
-        self.assertEqual(cov.shape, torch.Size([3, 1, 1]))
-        m.refit_on_cv = True
-        f, cov = m.cross_validate(
-            datasets=[dataset],
-            X_test=train_X,
-        )
-        self.assertEqual(f.shape, torch.Size([3, 1]))
-        self.assertEqual(cov.shape, torch.Size([3, 1, 1]))
diff --git a/ax/models/tests/test_alebo_initializer.py b/ax/models/tests/test_alebo_initializer.py
deleted file mode 100644
index 3a53b32ddf3..00000000000
--- a/ax/models/tests/test_alebo_initializer.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-import numpy as np
-from ax.models.random.alebo_initializer import ALEBOInitializer
-from ax.utils.common.testutils import TestCase
-
-
-class ALEBOSobolTest(TestCase):
-    def test_ALEBOSobolModel(self) -> None:
-        B = np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]])
-        Q = np.linalg.pinv(B) @ B
-        # Test setting attributes
-        m = ALEBOInitializer(B=B)
-        self.assertTrue(np.allclose(Q, m.Q))
-
-        # Test gen
-        Z, w = m.gen(5, bounds=[(-1.0, 1.0)] * 3)
-        self.assertEqual(Z.shape, (5, 3))
-        self.assertTrue(Z.min() >= -1.0)
-        self.assertTrue(Z.max() <= 1.0)
-        # Verify that it is in the subspace
-        self.assertTrue(np.allclose(Q @ Z.transpose(), Z.transpose()))
-
-        m = ALEBOInitializer(B=B, nsamp=1)
-        with self.assertRaises(ValueError):
-            m.gen(2, bounds=[(-1.0, 1.0)] * 3)
diff --git a/ax/models/tests/test_rembo.py b/ax/models/tests/test_rembo.py
deleted file mode 100644
index daeafb98cfa..00000000000
--- a/ax/models/tests/test_rembo.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-import torch
-from ax.core.search_space import SearchSpaceDigest
-from ax.models.torch.rembo import REMBO
-from ax.models.torch_base import TorchOptConfig
-from ax.utils.common.testutils import TestCase
-from ax.utils.common.typeutils import not_none
-from ax.utils.testing.mock import fast_botorch_optimize
-from botorch.utils.datasets import SupervisedDataset
-
-
-class REMBOTest(TestCase):
-    @fast_botorch_optimize
-    def test_REMBOModel(self) -> None:
-        A = torch.cat((torch.eye(2), -(torch.eye(2))))
-        initial_X_d = torch.tensor([[0.25, 0.5], [1, 0], [0, -1]])
-        bounds_d = [(-2.0, 2.0), (-2.0, 2.0)]
-        my_metric_names = ["a", "b"]
-
-        # Test setting attributes
-        m = REMBO(A=A, initial_X_d=initial_X_d, bounds_d=bounds_d)
-        self.assertTrue(torch.allclose(A, m.A))
-        self.assertTrue(torch.allclose(torch.pinverse(A), m._pinvA))
-        self.assertEqual(m.bounds_d, bounds_d)
-        self.assertEqual(len(m.X_d), 3)
-
-        # Test fit
-        # Create high-D data
-        X_D = torch.t(torch.mm(A, torch.t(initial_X_d)))
-        Xs = [X_D, X_D.clone()]
-        Ys = [torch.randn(3, 1)] * 2
-        Yvars = [0.1 * torch.ones(3, 1)] * 2
-        feature_names = [f"x{i}" for i in range(X_D.shape[-1])]
-        datasets = [
-            SupervisedDataset(
-                X=X, Y=Y, Yvar=Yvar, feature_names=feature_names, outcome_names=[mn]
-            )
-            for X, Y, Yvar, mn in zip(Xs, Ys, Yvars, my_metric_names)
-        ]
-
-        bounds = [(-1.0, 1.0)] * 4
-        with self.assertRaises(AssertionError):
-            m.fit(
-                datasets=datasets,
-                search_space_digest=SearchSpaceDigest(
-                    feature_names=[], bounds=[(0.0, 1.0)] * 4
-                ),
-            )
-        search_space_digest = SearchSpaceDigest(feature_names=[], bounds=bounds)
-        m.fit(
-            datasets=datasets,
-            search_space_digest=search_space_digest,
-        )
-
-        # Check was fit with the low-d data.
-        for x in m.Xs:
-            self.assertTrue(torch.allclose(x, m.to_01(initial_X_d)))
-
-        self.assertEqual(len(m.X_d), 3)
-
-        # Test project up
-        X_d2 = torch.tensor([[0.25, 0.5], [2.0, 0.0], [-4.0, 4.0]])
-        X_D2 = torch.tensor(
-            [[0.25, 0.5, -0.25, -0.5], [1.0, 0.0, -1.0, 0.0], [-1.0, 1.0, 1.0, -1.0]]
-        )
-        Z = m.project_up(X_d2)
-        self.assertTrue(torch.allclose(Z, X_D2))
-
-        # Test predict
-        f1, var = m.predict(X=X_D)
-        self.assertEqual(f1.shape, torch.Size([3, 2]))
-        with self.assertRaises(NotImplementedError):
-            m.predict(torch.tensor([[0.1, 0.2, 0.3, 0.4]]))
-
-        f2, var = m.predict(initial_X_d)
-        self.assertTrue(torch.allclose(f1, f2))
-
-        # Test best_point
-        torch_opt_config = TorchOptConfig(objective_weights=torch.tensor([1.0, 0.0]))
-        x_best = m.best_point(
-            search_space_digest=search_space_digest,
-            torch_opt_config=torch_opt_config,
-        )
-        self.assertEqual(len(not_none(x_best)), 4)
-
-        # Test cross_validate
-        f, var = m.cross_validate(
-            datasets=[
-                SupervisedDataset(
-                    X=X_D[:-1, :],
-                    Y=Ys[0][:-1, :],
-                    Yvar=Yvars[0][:-1, :],
-                    feature_names=feature_names,
-                    outcome_names=my_metric_names[:1],
-                ),
-                SupervisedDataset(
-                    X=X_D[:-1, :],
-                    Y=Ys[1][:-1, :],
-                    Yvar=Yvars[1][:-1, :],
-                    feature_names=feature_names,
-                    outcome_names=my_metric_names[1:],
-                ),
-            ],
-            X_test=X_D[-1:, :],
-        )
-        self.assertEqual(f.shape, torch.Size([1, 2]))
-
-        # Test gen
-        gen_results = m.gen(
-            n=2,
-            search_space_digest=search_space_digest,
-            torch_opt_config=torch_opt_config,
-        )
-
-        self.assertEqual(gen_results.points.shape[1], 4)
-        self.assertEqual(len(m.X_d), 5)
diff --git a/ax/models/tests/test_rembo_initializer.py b/ax/models/tests/test_rembo_initializer.py
deleted file mode 100644
index fed1f76ff87..00000000000
--- a/ax/models/tests/test_rembo_initializer.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-import numpy as np
-from ax.models.random.rembo_initializer import REMBOInitializer
-from ax.utils.common.testutils import TestCase
-
-
-class REMBOInitializerTest(TestCase):
-    def test_REMBOInitializerModel(self) -> None:
-        A = np.vstack((np.eye(2, 2), -(np.eye(2, 2))))
-        # Test setting attributes
-        # pyre-fixme[6]: For 2nd param expected `List[Tuple[float, float]]` but got
-        #  `List[Tuple[int, int]]`.
-        m = REMBOInitializer(A=A, bounds_d=[(-2, 2)] * 2)
-        self.assertTrue(np.allclose(A, m.A))
-        self.assertEqual(m.bounds_d, [(-2, 2), (-2, 2)])
-
-        # Test project up
-        Z = m.project_up(5 * np.random.rand(3, 2))
-        self.assertEqual(Z.shape, (3, 4))
-        self.assertTrue(Z.min() >= -1.0)
-        self.assertTrue(Z.max() <= 1.0)
-
-        # Test gen
-        Z, w = m.gen(3, bounds=[(-1.0, 1.0)] * 4)
-        self.assertEqual(Z.shape, (3, 4))
-        self.assertTrue(Z.min() >= -1.0)
-        self.assertTrue(Z.max() <= 1.0)
diff --git a/ax/models/torch/alebo.py b/ax/models/torch/alebo.py
deleted file mode 100644
index 36830d0e8eb..00000000000
--- a/ax/models/torch/alebo.py
+++ /dev/null
@@ -1,997 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-from __future__ import annotations
-
-import dataclasses
-import re
-from collections import OrderedDict
-from logging import Logger
-from math import inf
-from numbers import Number
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    List,
-    MutableMapping,
-    Optional,
-    Tuple,
-    TypeVar,
-    Union,
-)
-from warnings import warn
-
-import gpytorch
-import numpy as np
-import torch
-from ax.core.search_space import SearchSpaceDigest
-from ax.core.types import TCandidateMetadata
-from ax.models.random.alebo_initializer import ALEBOInitializer
-from ax.models.torch.botorch import BotorchModel
-from ax.models.torch.botorch_defaults import get_qLogNEI
-from ax.models.torch.utils import _datasets_to_legacy_inputs
-from ax.models.torch_base import TorchGenResults, TorchModel, TorchOptConfig
-from ax.utils.common.docutils import copy_doc
-from ax.utils.common.logger import get_logger
-from ax.utils.common.typeutils import checked_cast
-from botorch.acquisition.acquisition import AcquisitionFunction
-from botorch.acquisition.analytic import ExpectedImprovement
-from botorch.acquisition.objective import PosteriorTransform
-from botorch.models.gp_regression import SingleTaskGP
-from botorch.models.gpytorch import GPyTorchModel
-from botorch.models.model_list_gp_regression import ModelListGP
-from botorch.optim.fit import fit_gpytorch_mll_scipy
-from botorch.optim.initializers import initialize_q_batch_nonneg
-from botorch.optim.optimize import optimize_acqf
-from botorch.optim.utils import (
-    _handle_numerical_errors,
-    get_parameters_and_bounds,
-    TorchAttr,
-)
-from botorch.posteriors.gpytorch import GPyTorchPosterior
-from botorch.utils.datasets import SupervisedDataset
-from gpytorch.distributions.multivariate_normal import MultivariateNormal
-from gpytorch.kernels.kernel import Kernel
-from gpytorch.kernels.rbf_kernel import postprocess_rbf
-from gpytorch.kernels.scale_kernel import ScaleKernel
-from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
-
-from scipy.optimize import approx_fprime
-from torch import Tensor
-from torch.nn.parameter import Parameter
-
-
-logger: Logger = get_logger(__name__)
-
-
-def module_to_array(
-    module: torch.nn.Module,
-) -> Tuple[np.ndarray, Dict[str, TorchAttr], Optional[np.ndarray]]:
-    r"""Extract named parameters from a module into a numpy array.
-
-    Only extracts parameters with requires_grad, since it is meant for optimizing.
-
-    NOTE: `module_to_array` was originally a BoTorch function and was later
-    deprecated. It has been copied here because ALEBO depends on it, and because
-    ALEBO itself is deprecated, it is not worth moving ALEBO to the new syntax.
-
-    Args:
-        module: A module with parameters. May specify parameter constraints in
-            a `named_parameters_and_constraints` method.
-
-    Returns:
-        3-element tuple containing
-        - The parameter values as a numpy array.
-        - An ordered dictionary with the name and tensor attributes of each
-        parameter.
-        - A `2 x n_params` numpy array with lower and upper bounds if at least
-        one constraint is finite, and None otherwise.
-
-    Example:
-        >>> mll = ExactMarginalLogLikelihood(model.likelihood, model)
-        >>> parameter_array, property_dict, bounds_out = module_to_array(mll)
-    """
-    param_dict, bounds_dict = get_parameters_and_bounds(
-        module=module,
-        name_filter=None,
-        requires_grad=True,
-    )
-
-    # Record tensor metadata and read parameter values to the tape
-    param_tape: List[Number] = []
-    property_dict = OrderedDict()
-    with torch.no_grad():
-        for name, param in param_dict.items():
-            property_dict[name] = TorchAttr(param.shape, param.dtype, param.device)
-            param_tape.extend(param.view(-1).cpu().double().tolist())
-
-    # Extract lower and upper bounds
-    start = 0
-    bounds_np = None
-    params_np = np.asarray(param_tape)
-    for name, param in param_dict.items():
-        numel = param.numel()
-        if name in bounds_dict:
-            for row, bound in enumerate(bounds_dict[name]):
-                if bound is None:
-                    continue
-
-                if isinstance(bound, Tensor):
-                    if torch.eq(bound, (2 * row - 1) * inf).all():
-                        continue
-                    bound = bound.detach().cpu()
-
-                elif bound == (2 * row - 1) * inf:
-                    continue
-
-                if bounds_np is None:
-                    bounds_np = np.full((2, len(params_np)), ((-inf,), (inf,)))
-
-                bounds_np[row, start : start + numel] = bound
-        start += numel
-
-    return params_np, property_dict, bounds_np
-
-
-TModule = TypeVar("TModule", bound=torch.nn.Module)
-
-
-def set_params_with_array(
-    module: TModule, x: np.ndarray, property_dict: Dict[str, TorchAttr]
-) -> TModule:
-    r"""Set module parameters with values from numpy array.
-
-    NOTE: `set_params_with_array` was originally a BoTorch function and was
-    later deprecated. It has been copied here because ALEBO depends on it, and
-    because ALEBO itself is deprecated, it is not worth moving ALEBO to the new
-    syntax.
-
-    Args:
-        module: Module with parameters to be set
-        x: Numpy array with parameter values
-        property_dict: Dictionary of parameter names and torch attributes as
-            returned by module_to_array.
-
-    Returns:
-        Module: module with parameters updated in-place.
-
-    Example:
-        >>> mll = ExactMarginalLogLikelihood(model.likelihood, model)
-        >>> parameter_array, property_dict, bounds_out = module_to_array(mll)
-        >>> parameter_array += 0.1  # perturb parameters (for example only)
-        >>> mll = set_params_with_array(mll, parameter_array,  property_dict)
-    """
-    param_dict = OrderedDict(module.named_parameters())
-    start_idx = 0
-    for p_name, attrs in property_dict.items():
-        # Construct the new tensor
-        if len(attrs.shape) == 0:  # deal with scalar tensors
-            end_idx = start_idx + 1
-            new_data = torch.tensor(
-                x[start_idx], dtype=attrs.dtype, device=attrs.device
-            )
-        else:
-            end_idx = start_idx + np.prod(attrs.shape)
-            new_data = torch.tensor(
-                x[start_idx:end_idx], dtype=attrs.dtype, device=attrs.device
-            ).view(*attrs.shape)
-        start_idx = end_idx
-        # Update corresponding parameter in-place. Disable autograd to update.
-        param_dict[p_name].requires_grad_(False)
-        param_dict[p_name].copy_(new_data)
-        param_dict[p_name].requires_grad_(True)
-    return module
-
-
-def _scipy_objective_and_grad(
-    x: np.ndarray, mll: ExactMarginalLogLikelihood, property_dict: Dict[str, TorchAttr]
-) -> Tuple[Union[float, np.ndarray], np.ndarray]:
-    r"""Get objective and gradient in format that scipy expects.
-
-
-    NOTE: `_scipy_objective_and_grad` was originally a BoTorch function and was later
-    deprecated. It has been copied here because ALEBO depends on it, and because
-    ALEBO itself is deprecated, it is not worth moving ALEBO to the new syntax.
-
-    Args:
-        x: The (flattened) input parameters.
-        mll: The MarginalLogLikelihood module to evaluate.
-        property_dict: The property dictionary required to "unflatten" the input
-            parameter vector, as generated by `module_to_array`.
-
-    Returns:
-        2-element tuple containing
-
-        - The objective value.
-        - The gradient of the objective.
-    """
-    mll = set_params_with_array(mll, x, property_dict)
-    train_inputs, train_targets = mll.model.train_inputs, mll.model.train_targets
-    mll.zero_grad()
-    try:  # catch linear algebra errors in gpytorch
-        output = mll.model(*train_inputs)
-        args = [output, train_targets] + list(mll.model.train_inputs)
-        # pyre-fixme[16]: Undefined attribute. Item
-        # `torch.distributions.distribution.Distribution` of
-        # `typing.Union[linear_operator.operators._linear_operator.LinearOperator,
-        # torch._tensor.Tensor, torch.distributions.distribution.Distribution]`
-        # has no attribute `sum`.
-        loss = -mll(*args).sum()
-    except RuntimeError as e:
-        return _handle_numerical_errors(error=e, x=x)
-    loss.backward()
-
-    i = 0
-    param_dict = OrderedDict(mll.named_parameters())
-    grad = np.zeros(sum([tattr.shape.numel() for tattr in property_dict.values()]))
-    for p_name in property_dict:
-        t = param_dict[p_name]
-        size = t.numel()
-        t_grad = t.grad
-        if t.requires_grad and t_grad is not None:
-            grad[i : i + size] = t_grad.detach().view(-1).cpu().double().clone().numpy()
-        i += size
-
-    mll.zero_grad()
-    return loss.item(), grad
-
-
-class ALEBOKernel(Kernel):
-    """The kernel for ALEBO.
-
-    Suppose there exists an ARD RBF GP on an (unknown) linear embedding with
-    projection matrix A. We make function evaluations in a different linear
-    embedding with projection matrix B (known). This is the appropriate kernel
-    for fitting those data.
-
-    This kernel computes a Mahalanobis distance, and the (d x d) PD distance
-    matrix Gamma is a parameter that must be fit. This is done by fitting its
-    upper Cholesky decomposition, U.
-
-    Args:
-        B: (d x D) Projection matrix.
-        batch_shape: Batch shape as usual for gpytorch kernels.
-    """
-
-    def __init__(self, B: Tensor, batch_shape: torch.Size) -> None:
-        super().__init__(
-            has_lengthscale=False, ard_num_dims=None, eps=0.0, batch_shape=batch_shape
-        )
-        warn(
-            "ALEBOKernel is deprecated and should be removed in Ax 0.3.9.",
-            DeprecationWarning,
-        )
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.d, D = B.shape
-        if not self.d < D:
-            raise ValueError(f"Expected B.shape[0] < B.shape[1], but got {B.shape=}.")
-        self.B = B
-        # Initialize U
-        Arnd = torch.randn(D, D, dtype=B.dtype, device=B.device)
-        Arnd = torch.linalg.qr(Arnd)[0]
-        ABinv = Arnd[: self.d, :] @ torch.pinverse(B)
-        # U is the upper Cholesky decomposition of Gamma, the Mahalanobis
-        # matrix. Uvec is the upper triangular portion of U squeezed out into
-        # a vector.
-        U = torch.linalg.cholesky(torch.mm(ABinv.t(), ABinv)).t()
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.triu_indx = torch.triu_indices(self.d, self.d, device=B.device)
-        Uvec = U[self.triu_indx.tolist()].repeat(*batch_shape, 1)
-        self.register_parameter(name="Uvec", parameter=torch.nn.Parameter(Uvec))
-
-    def forward(
-        self,
-        x1: Tensor,
-        x2: Tensor,
-        diag: bool = False,
-        last_dim_is_batch: bool = False,
-        **params: Any,
-    ) -> Tensor:
-        """Compute kernel distance."""
-        # Unpack Uvec into an upper triangular matrix U
-        shapeU = self.Uvec.shape[:-1] + torch.Size([self.d, self.d])
-        U_t = torch.zeros(shapeU, dtype=self.B.dtype, device=self.B.device)
-        U_t[..., self.triu_indx[1], self.triu_indx[0]] = checked_cast(Tensor, self.Uvec)
-        # Compute kernel distance
-        z1 = torch.matmul(x1, U_t)
-        z2 = torch.matmul(x2, U_t)
-
-        diff = self.covar_dist(
-            z1,
-            z2,
-            square_dist=True,
-            diag=diag,
-            **params,
-        )
-        return postprocess_rbf(diff)
-
-
-class ALEBOGP(SingleTaskGP):
-    """The GP for ALEBO.
-
-    Uses the Mahalanobis kernel defined in ALEBOKernel, along with a
-    ScaleKernel to add a kernel variance and a fitted constant mean.
-
-    In non-batch mode, there is a single kernel that produces MVN predictions
-    as usual for a GP.
-    With b batches, each batch has its own set of kernel hyperparameters and
-    each batch represents a sample from the hyperparameter posterior
-    distribution. When making a prediction (with `__call__`), these samples are
-    integrated over using moment matching. So, the predictions are an MVN as
-    usual with the same shape as in non-batch mode.
-
-    Args:
-        B: (d x D) Projection matrix.
-        train_X: (n x d) X training data.
-        train_Y: (n x 1) Y training data.
-        train_Yvar: (n x 1) Noise variances of each training Y.
-    """
-
-    def __init__(
-        self, B: Tensor, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor
-    ) -> None:
-        warn(
-            "ALEBOGP is deprecated and should be removed in Ax 0.3.9. SAASBO "
-            "(Models.SAASBO from ax.modelbridge.registry) likely provides better "
-            "performance.",
-            DeprecationWarning,
-        )
-        super().__init__(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
-        self.covar_module = ScaleKernel(
-            base_kernel=ALEBOKernel(B=B, batch_shape=self._aug_batch_shape),
-            batch_shape=self._aug_batch_shape,
-        )
-        self.to(train_X)
-
-    def __call__(self, x: Tensor) -> MultivariateNormal:
-        """
-        If model is non-batch, then just make a prediction. If model has
-        multiple batches, then these are samples from the kernel hyperparameter
-        posterior and we integrate over them with moment matching.
-
-        The shape of the MVN that this outputs will be the same regardless of
-        whether the model is batched or not.
-
-        Args:
-            x: Point to be predicted.
-
-        Returns: MultivariateNormal distribution of prediction.
-        """
-        if len(self._aug_batch_shape) == 0:
-            return super().__call__(x)
-        # Else, approximately integrate over batches with moment matching.
-        # Take X as (b) x q x d, and expand to (b) x ns x q x d
-        if x.ndim > 3:
-            raise ValueError("Don't know how to predict this shape")
-        x = x.unsqueeze(-3).expand(
-            x.shape[:-2]
-            + torch.Size([self._aug_batch_shape[0]])  # pyre-ignore
-            # pyre-fixme[58]: `+` is not supported for operand types `Tuple[int,
-            #  ...]` and `Size`.
-            + x.shape[-2:]
-        )
-        mvn_b = super().__call__(x)
-        mu = mvn_b.mean.mean(dim=-2)
-        C = (
-            mvn_b.covariance_matrix.mean(dim=-3)
-            + torch.matmul(mvn_b.mean.transpose(-2, -1), mvn_b.mean)
-            / mvn_b.mean.shape[-2]
-            - torch.matmul(mu.unsqueeze(-1), mu.unsqueeze(-2))
-        )  # Law of Total Covariance
-        mvn = MultivariateNormal(mu, C)
-        return mvn
-
-    def posterior(
-        self,
-        X: Tensor,
-        output_indices: Optional[List[int]] = None,
-        observation_noise: Union[bool, Tensor] = False,
-        posterior_transform: Optional[PosteriorTransform] = None,
-        **kwargs: Any,
-    ) -> GPyTorchPosterior:
-        assert output_indices is None
-        assert not observation_noise
-        mvn = self(X)
-        posterior = GPyTorchPosterior(distribution=mvn)
-        if posterior_transform is not None:
-            return posterior_transform(posterior)
-        return posterior
-
-
-def get_fitted_model(
-    B: Tensor,
-    train_X: Tensor,
-    train_Y: Tensor,
-    train_Yvar: Tensor,
-    restarts: int,
-    nsamp: int,
-    init_state_dict: Optional[Dict[str, Tensor]],
-) -> ALEBOGP:
-    """Get a fitted ALEBO GP.
-
-    We do random restart optimization to get a MAP model, then use the Laplace
-    approximation to draw posterior samples of kernel hyperparameters, and
-    finally construct a batch-mode model where each batch is one of those
-    sampled sets of kernel hyperparameters.
-
-    Args:
-        B: Projection matrix.
-        train_X: X training data.
-        train_Y: Y training data.
-        train_Yvar: Noise variances of each training Y.
-        restarts: Number of restarts for MAP estimation.
-        nsamp: Number of samples to draw from kernel hyperparameter posterior.
-        init_state_dict: Optionally begin MAP estimation with this state dict.
-
-    Returns: Batch-mode (nsamp batches) fitted ALEBO GP.
-    """
-    warn(
-        "`get_fitted_model` from ax.models.torch.alebo.py is deprecated and "
-        "should be removed in Ax 0.3.9.",
-        DeprecationWarning,
-    )
-    # Get MAP estimate.
-    mll = get_map_model(
-        B=B,
-        train_X=train_X,
-        train_Y=train_Y,
-        train_Yvar=train_Yvar,
-        restarts=restarts,
-        init_state_dict=init_state_dict,
-    )
-    # Compute Laplace approximation of posterior
-    Uvec_batch, mean_constant_batch, output_scale_batch = laplace_sample_U(
-        mll=mll, nsamp=nsamp
-    )
-    # Construct batch model with samples
-    m_b = get_batch_model(
-        B=B,
-        train_X=train_X,
-        train_Y=train_Y,
-        train_Yvar=train_Yvar,
-        Uvec_batch=Uvec_batch,
-        mean_constant_batch=mean_constant_batch,
-        output_scale_batch=output_scale_batch,
-    )
-    return m_b
-
-
-def get_map_model(
-    B: Tensor,
-    train_X: Tensor,
-    train_Y: Tensor,
-    train_Yvar: Tensor,
-    restarts: int,
-    init_state_dict: Optional[Dict[str, Tensor]],
-) -> ExactMarginalLogLikelihood:
-    """Do random-restart optimization for MAP fitting of an ALEBO GP model.
-
-    Args:
-        B: Projection matrix.
-        train_X: X training data.
-        train_Y: Y training data.
-        train_Yvar: Noise variances of each training Y.
-        restarts: Number of restarts for MAP estimation.
-        init_state_dict: Optionally begin MAP estimation with this state dict.
-
-    Returns: non-batch ALEBO GP with MAP kernel hyperparameters.
-    """
-    warn(
-        "`get_map_model` from ax.models.torch.alebo.py is deprecated and should "
-        "be removed in Ax 0.3.9.",
-        DeprecationWarning,
-    )
-    f_best = 1e8
-    sd_best = {}
-    # Fit with random restarts
-    for _ in range(restarts):
-        m = ALEBOGP(B=B, train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
-        if init_state_dict is not None:
-            m.load_state_dict(init_state_dict)
-        mll = ExactMarginalLogLikelihood(m.likelihood, m)
-        mll.train()
-        result = fit_gpytorch_mll_scipy(mll, method="tnc")
-        logger.debug(result)
-        if result.fval < f_best:
-            f_best = float(result.fval)
-            sd_best = m.state_dict()
-    # Set the final value
-    m = ALEBOGP(B=B, train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
-    m.load_state_dict(sd_best)
-    mll = ExactMarginalLogLikelihood(m.likelihood, m)
-    return mll
-
-
-def laplace_sample_U(
-    mll: ExactMarginalLogLikelihood, nsamp: int
-) -> Tuple[Tensor, Tensor, Tensor]:
-    """Draw posterior samples of kernel hyperparameters using Laplace
-    approximation.
-
-    Only the Mahalanobis distance matrix is sampled.
-
-    The diagonal of the Hessian is estimated using finite differences of the
-    autograd gradients. The Laplace approximation is then N(p_map, inv(-H)).
-    We construct a set of nsamp kernel hyperparameters by drawing nsamp-1
-    values from this distribution, and prepending as the first sample the MAP
-    parameters.
-
-    Args:
-        mll: MLL object of MAP ALEBO GP.
-        nsamp: Number of samples to return.
-
-    Returns: Batch tensors of the kernel hyperparameters Uvec, mean constant,
-        and output scale.
-    """
-    warn(
-        "laplace_sample_U is deprecated and should be removed in Ax 0.3.9.",
-        DeprecationWarning,
-    )
-    # Estimate diagonal of the Hessian
-    mll.train()
-    x0, property_dict, bounds = module_to_array(module=mll)
-    x0 = x0.astype(np.float64)  # This is the MAP parameters
-    H = np.zeros((len(x0), len(x0)))
-    epsilon = 1e-4 + 1e-3 * np.abs(x0)
-    for i, _ in enumerate(x0):
-        # Compute gradient of df/dx_i wrt x_i
-        # pyre-fixme[53]: Captured variable `property_dict` is not annotated.
-        # pyre-fixme[53]: Captured variable `x0` is not annotated.
-        # pyre-fixme[53]: Captured variable `i` is not annotated.
-        # pyre-fixme[3]: Return type must be annotated.
-        # pyre-fixme[2]: Parameter must be annotated.
-        def f(x):
-            x_all = x0.copy()
-            x_all[i] = x[0]
-            return -_scipy_objective_and_grad(x_all, mll, property_dict)[1][i]
-
-        H[i, i] = approx_fprime(np.array([x0[i]]), f, epsilon=epsilon[i])  # pyre-ignore
-
-    # Sample only Uvec; leave mean and output scale fixed.
-    assert list(property_dict.keys()) == [
-        "model.mean_module.raw_constant",
-        "model.covar_module.raw_outputscale",
-        "model.covar_module.base_kernel.Uvec",
-    ]
-    H = H[2:, 2:]
-    H += np.diag(-1e-3 * np.ones(H.shape[0]))  # Add a nugget for inverse stability
-    Sigma = np.linalg.inv(-H)
-    samples = np.random.multivariate_normal(mean=x0[2:], cov=Sigma, size=(nsamp - 1))
-    # Include the MAP estimate
-    samples = np.vstack((x0[2:], samples))
-    # Reshape
-    attrs = property_dict["model.covar_module.base_kernel.Uvec"]
-    Uvec_batch = torch.tensor(samples, dtype=attrs.dtype, device=attrs.device).reshape(
-        nsamp, *attrs.shape
-    )
-    # Get the other properties into batch mode
-    mean_constant_batch = mll.model.mean_module.constant.repeat(nsamp)
-    output_scale_batch = mll.model.covar_module.raw_outputscale.repeat(nsamp)
-    return Uvec_batch, mean_constant_batch, output_scale_batch
-
-
-def get_batch_model(
-    B: Tensor,
-    train_X: Tensor,
-    train_Y: Tensor,
-    train_Yvar: Tensor,
-    Uvec_batch: Tensor,
-    mean_constant_batch: Tensor,
-    output_scale_batch: Tensor,
-) -> ALEBOGP:
-    """Construct a batch-mode ALEBO GP using batch tensors of hyperparameters.
-
-    Args:
-        B: Projection matrix.
-        train_X: X training data.
-        train_Y: Y training data.
-        train_Yvar: Noise variances of each training Y.
-        Uvec_batch: Batch tensor of Uvec hyperparameters.
-        mean_constant_batch: Batch tensor of mean constant hyperparameter.
-        output_scale_batch: Batch tensor of output scale hyperparameter.
-
-    Returns: Batch-mode ALEBO GP.
-    """
-    warn(
-        "`get_batch_model` from ax.models.torch.alebo.py is deprecated and "
-        "should be removed in Ax 0.3.9.",
-        DeprecationWarning,
-    )
-    b = Uvec_batch.size(0)
-    m_b = ALEBOGP(
-        B=B,
-        train_X=train_X.repeat(b, 1, 1),
-        train_Y=train_Y.repeat(b, 1, 1),
-        train_Yvar=train_Yvar.repeat(b, 1, 1),
-    )
-    m_b.train()
-    # Set mean constant
-    # pyre-fixme[16]: `Optional` has no attribute `raw_constant`.
-    m_b.mean_module.raw_constant.requires_grad_(False)
-    m_b.mean_module.raw_constant.copy_(mean_constant_batch)
-    m_b.mean_module.raw_constant.requires_grad_(True)
-    # Set output scale
-    m_b.covar_module.raw_outputscale.requires_grad_(False)
-    checked_cast(Parameter, m_b.covar_module.raw_outputscale).copy_(output_scale_batch)
-    m_b.covar_module.raw_outputscale.requires_grad_(True)
-    # Set Uvec
-    m_b.covar_module.base_kernel.Uvec.requires_grad_(False)
-    checked_cast(Parameter, m_b.covar_module.base_kernel.Uvec).copy_(Uvec_batch)
-    m_b.covar_module.base_kernel.Uvec.requires_grad_(True)
-    m_b.eval()
-    return m_b
-
-
-def extract_map_statedict(
-    m_b: Union[ALEBOGP, ModelListGP], num_outputs: int
-) -> List[MutableMapping[str, Tensor]]:
-    """Extract MAP statedict from the batch-mode ALEBO GP.
-
-    The batch GP can be either a single ALEBO GP or a ModelListGP of ALEBO GPs.
-
-    Args:
-        m_b: Batch-mode GP.
-        num_outputs: Number of outputs being modeled.
-    """
-    warn(
-        "`extract_map_statedict` from ax.models.torch.alebo.py is deprecated and "
-        "should be removed in Ax 0.3.9.",
-        DeprecationWarning,
-    )
-    is_modellist = num_outputs > 1
-    map_sds: List[MutableMapping[str, Tensor]] = [
-        OrderedDict() for i in range(num_outputs)
-    ]
-    sd = m_b.state_dict()
-    for k, v in sd.items():
-        # Extract model index and parameter name
-        if is_modellist:
-            g = re.match(r"^models\.([0-9]+)\.(.*)$", k)
-            if g is None:
-                raise Exception("Unable to parse ModelList structure")
-            model_idx = int(g.group(1))
-            param_name = g.group(2)
-        else:
-            model_idx = 0
-            param_name = k
-        if len(v.shape) > 1:
-            v = torch.select(v, 0, 0)
-        map_sds[model_idx][param_name] = v
-    return map_sds
-
-
-def ei_or_nei(
-    model: Union[ALEBOGP, ModelListGP],
-    objective_weights: Tensor,
-    outcome_constraints: Optional[Tuple[Tensor, Tensor]],
-    X_observed: Tensor,
-    X_pending: Optional[Tensor],
-    q: int,
-    noiseless: bool,
-) -> AcquisitionFunction:
-    """Use analytic EI if appropriate, otherwise Monte Carlo NEI.
-
-    Analytic EI can be used if: Single outcome, no constraints, no pending
-    points, not batch, and no noise.
-
-    Args:
-        model: GP.
-        objective_weights: Weights on each outcome for the objective.
-        outcome_constraints: Outcome constraints.
-        X_observed: Observed points for NEI.
-        X_pending: Pending points.
-        q: Batch size.
-        noiseless: True if evaluations are noiseless.
-
-    Returns: An AcquisitionFunction, either analytic EI or MC NEI.
-    """
-    warn(
-        "`ei_or_nei` from ax.models.torch.alebo.py is deprecated and should be "
-        "removed in Ax 0.5.0.",
-        DeprecationWarning,
-    )
-    if (
-        len(objective_weights) == 1
-        and outcome_constraints is None
-        and X_pending is None
-        and q == 1
-        and noiseless
-    ):
-        maximize = objective_weights[0] > 0
-        if maximize:
-            best_f = model.train_targets.max()
-        else:
-            best_f = model.train_targets.min()
-        # pyre-fixme[6]: For 3rd param expected `bool` but got `Tensor`.
-        return ExpectedImprovement(model=model, best_f=best_f, maximize=maximize)
-    else:
-        with gpytorch.settings.max_cholesky_size(2000):
-            acq = get_qLogNEI(
-                model=model,
-                objective_weights=objective_weights,
-                outcome_constraints=outcome_constraints,
-                X_observed=X_observed,
-                X_pending=X_pending,
-            )
-        return acq
-
-
-def alebo_acqf_optimizer(
-    acq_function: AcquisitionFunction,
-    bounds: Tensor,
-    n: int,
-    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]],
-    fixed_features: Optional[Dict[int, float]],
-    rounding_func: Optional[Callable[[Tensor], Tensor]],
-    raw_samples: int,
-    num_restarts: int,
-    B: Tensor,
-) -> Tuple[Tensor, Tensor]:
-    """
-    Optimize the acquisition function for ALEBO.
-
-    We are optimizing over a polytope within the subspace, and so begin each
-    random restart of the acquisition function optimization with points that
-    lie within that polytope.
-    """
-    warn(
-        "`alebo_acqf_optimizer` is deprecated and should be removed in Ax 0.3.9.",
-        DeprecationWarning,
-    )
-    candidate_list, acq_value_list = [], []
-    candidates = torch.tensor([], device=B.device, dtype=B.dtype)
-    try:
-        base_X_pending = acq_function.X_pending
-        acq_has_X_pend = True
-    except AttributeError:
-        base_X_pending = None
-        acq_has_X_pend = False
-        assert n == 1
-    for i in range(n):
-        # Generate initial points for optimization inside embedding
-        m_init = ALEBOInitializer(B.cpu().numpy(), nsamp=10 * raw_samples)
-        Xrnd_npy, _ = m_init.gen(n=raw_samples, bounds=[(-1.0, 1.0)] * B.shape[1])
-
-        Xrnd = torch.tensor(Xrnd_npy, dtype=B.dtype, device=B.device).unsqueeze(1)
-        Yrnd = torch.matmul(Xrnd, B.t())  # Project down to the embedding
-
-        with gpytorch.settings.max_cholesky_size(2000):
-            with torch.no_grad():
-                alpha = acq_function(Yrnd)
-
-            Yinit = initialize_q_batch_nonneg(X=Yrnd, Y=alpha, n=num_restarts)
-            inf_bounds = (  # all constraints are encoded via inequality_constraints
-                torch.tensor([[-float("inf")], [float("inf")]])
-                .expand(2, Yrnd.shape[-1])
-                .to(Yrnd)
-            )
-            # Optimize the acquisition function, separately for each random restart.
-            candidate, acq_value = optimize_acqf(
-                acq_function=acq_function,
-                bounds=inf_bounds,
-                q=1,
-                num_restarts=num_restarts,
-                raw_samples=0,
-                options={"method": "SLSQP", "batch_limit": 1},
-                inequality_constraints=inequality_constraints,
-                batch_initial_conditions=Yinit,
-                sequential=False,
-            )
-            candidate_list.append(candidate)
-            acq_value_list.append(acq_value)
-            candidates = torch.cat(candidate_list, dim=-2)
-            if acq_has_X_pend:
-                acq_function.set_X_pending(
-                    torch.cat([base_X_pending, candidates], dim=-2)
-                    if base_X_pending is not None
-                    else candidates
-                )
-        logger.info(f"Generated sequential candidate {i + 1} of {n}")
-    if acq_has_X_pend:
-        acq_function.set_X_pending(base_X_pending)
-    return candidates, torch.stack(acq_value_list)
-
-
-class ALEBO(BotorchModel):
-    """Does Bayesian optimization in a linear subspace with ALEBO.
-
-    The (d x D) projection down matrix B must be provided, and must be that
-    used for the initialization.
-
-    Function evaluations happen in the high-D space. We only evaluate points
-    such that x = pinverse(B) @ B @ x (that is, points inside the subspace).
-    Under that constraint, the projection is invertible.
-
-    Args:
-        B: (d x D) projection matrix (projects down).
-        laplace_nsamp: Number of samples for posterior sampling of kernel
-            hyperparameters.
-        fit_restarts: Number of random restarts for MAP estimation.
-    """
-
-    def __init__(
-        self, B: Tensor, laplace_nsamp: int = 25, fit_restarts: int = 10
-    ) -> None:
-        warn(
-            "ALEBO is deprecated and should be removed in Ax 0.3.9.",
-            DeprecationWarning,
-        )
-        self.B = B
-        # pyre-fixme[4]: Attribute must be annotated.
-        self.Binv = torch.pinverse(B)
-        self.laplace_nsamp = laplace_nsamp
-        self.fit_restarts = fit_restarts
-        super().__init__(
-            refit_on_cv=False,
-            warm_start_refitting=False,
-            acqf_constructor=ei_or_nei,  # pyre-ignore
-            # pyre-fixme[6]: Expected `(AcquisitionFunction, Tensor, int, Optional[Li...
-            acqf_optimizer=alebo_acqf_optimizer,
-        )
-
-    @copy_doc(TorchModel.fit)
-    def fit(
-        self,
-        datasets: List[SupervisedDataset],
-        search_space_digest: SearchSpaceDigest,
-        candidate_metadata: Optional[List[List[TCandidateMetadata]]] = None,
-    ) -> None:
-        Xs, Ys, Yvars = _datasets_to_legacy_inputs(datasets=datasets)
-        assert len(search_space_digest.task_features) == 0
-        assert len(search_space_digest.fidelity_features) == 0
-        for b in search_space_digest.bounds:
-            assert b == (-1, 1)
-        # GP is fit in the low-d space, so project Xs down.
-        self.Xs = [(self.B @ X.t()).t() for X in Xs]
-        self.Ys = Ys
-        self.Yvars = Yvars
-        self.device = self.B.device
-        self.dtype = self.B.dtype
-        self.model = self.get_and_fit_model(Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars)
-
-    @copy_doc(TorchModel.predict)
-    def predict(self, X: Tensor) -> Tuple[Tensor, Tensor]:
-        Xd = (self.B @ X.t()).t()  # Project down
-        with gpytorch.settings.max_cholesky_size(2000):
-            return super().predict(X=Xd)
-
-    @copy_doc(TorchModel.best_point)
-    def best_point(
-        self,
-        search_space_digest: SearchSpaceDigest,
-        torch_opt_config: TorchOptConfig,
-    ) -> Optional[Tensor]:
-        raise NotImplementedError
-
-    def gen(
-        self,
-        n: int,
-        search_space_digest: SearchSpaceDigest,
-        torch_opt_config: TorchOptConfig,
-    ) -> TorchGenResults:
-        """Generate candidates.
-
-        Candidates are generated in the linear embedding with the polytope
-        constraints described in the paper.
-
-        model_gen_options can contain 'raw_samples' (number of samples used for
-        initializing the acquisition function optimization) and 'num_restarts'
-        (number of restarts for acquisition function optimization).
-        """
-        for b in search_space_digest.bounds:
-            assert b == (-1, 1)
-        # The following can be easily handled in the future when needed
-        assert torch_opt_config.linear_constraints is None
-        assert torch_opt_config.fixed_features is None
-        assert torch_opt_config.pending_observations is None
-        # Setup constraints
-        A = torch.cat((self.Binv, -self.Binv))
-        b = torch.ones(2 * self.Binv.shape[0], 1, dtype=self.dtype, device=self.device)
-        linear_constraints = (A, b)
-        noiseless = max(Yvar.min().item() for Yvar in self.Yvars) < 1e-5
-        model_gen_options = {
-            "acquisition_function_kwargs": {"q": n, "noiseless": noiseless},
-            "optimizer_kwargs": {
-                "raw_samples": torch_opt_config.model_gen_options.get(
-                    "raw_samples", 1000
-                ),
-                "num_restarts": torch_opt_config.model_gen_options.get(
-                    "num_restarts", 10
-                ),
-                "B": self.B,
-            },
-        }
-        gen_results = super().gen(
-            n=n,
-            search_space_digest=dataclasses.replace(
-                search_space_digest,
-                bounds=[(-1e8, 1e8)] * self.B.shape[0],
-            ),
-            torch_opt_config=dataclasses.replace(
-                torch_opt_config,
-                linear_constraints=linear_constraints,
-                model_gen_options=model_gen_options,
-            ),
-        )
-        # Project up
-        Xopt = (self.Binv @ gen_results.points.t()).t()
-        # Sometimes numerical tolerance can have Xopt epsilon outside [-1, 1],
-        # so clip it back.
-        if Xopt.min() < -1 or Xopt.max() > 1:
-            logger.debug(f"Clipping from [{Xopt.min()}, {Xopt.max()}]")
-            Xopt = torch.clamp(Xopt, min=-1.0, max=1.0)
-
-        return dataclasses.replace(gen_results, points=Xopt)
-
-    @copy_doc(TorchModel.cross_validate)
-    def cross_validate(
-        self,
-        datasets: List[SupervisedDataset],
-        X_test: Tensor,
-        **kwargs: Any,
-    ) -> Tuple[Tensor, Tensor]:
-        if self.model is None:
-            raise RuntimeError("Cannot cross-validate model that has not been fit")
-        if self.refit_on_cv:
-            state_dicts = None
-        else:
-            state_dicts = extract_map_statedict(
-                m_b=self.model, num_outputs=len(self.Xs)  # pyre-ignore
-            )
-        Xs, Ys, Yvars = _datasets_to_legacy_inputs(datasets=datasets)
-        Xs = [X @ self.B.t() for X in Xs]  # Project down.
-        X_test = X_test @ self.B.t()
-        model = self.get_and_fit_model(
-            Xs=Xs, Ys=Ys, Yvars=Yvars, state_dicts=state_dicts
-        )
-        return self.model_predictor(model=model, X=X_test)  # pyre-ignore: [28]
-
-    def get_and_fit_model(
-        self,
-        Xs: List[Tensor],
-        Ys: List[Tensor],
-        Yvars: List[Tensor],
-        state_dicts: Optional[List[MutableMapping[str, Tensor]]] = None,
-    ) -> GPyTorchModel:
-        """Get a fitted ALEBO model for each outcome.
-
-        Args:
-            Xs: X for each outcome, already projected down.
-            Ys: Y for each outcome.
-            Yvars: Noise variance of Y for each outcome.
-            state_dicts: State dicts to initialize model fitting.
-
-        Returns: Fitted ALEBO model.
-        """
-        if state_dicts is None:
-            state_dicts = [None] * len(Xs)
-            fit_restarts = self.fit_restarts
-        else:
-            fit_restarts = 1  # Warm-started
-        Yvars = [Yvar.clamp_min_(1e-7) for Yvar in Yvars]
-        models = [
-            get_fitted_model(
-                B=self.B,
-                train_X=X,
-                train_Y=Ys[i],
-                train_Yvar=Yvars[i],
-                restarts=fit_restarts,
-                nsamp=self.laplace_nsamp,
-                # pyre-fixme[6]: Expected `Optional[Dict[str, Tensor]]` for 7th
-                #  param but got `Optional[MutableMapping[str, Tensor]]`.
-                init_state_dict=state_dicts[i],
-            )
-            for i, X in enumerate(Xs)
-        ]
-        if len(models) == 1:
-            model = models[0]
-        else:
-            model = ModelListGP(*models)
-        model.to(Xs[0])
-        return model
diff --git a/ax/models/torch/rembo.py b/ax/models/torch/rembo.py
deleted file mode 100644
index 44235a65180..00000000000
--- a/ax/models/torch/rembo.py
+++ /dev/null
@@ -1,266 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-# pyre-strict
-
-import dataclasses
-from typing import Any, List, Optional, Tuple
-from warnings import warn
-
-import torch
-from ax.core.search_space import SearchSpaceDigest
-from ax.core.types import TCandidateMetadata
-from ax.models.torch.botorch import BotorchModel
-from ax.models.torch_base import TorchGenResults, TorchModel, TorchOptConfig
-from ax.utils.common.docutils import copy_doc
-from botorch.utils.datasets import SupervisedDataset
-from torch import Tensor
-
-
-class REMBO(BotorchModel):
-    """Implements REMBO (Bayesian optimization in a linear subspace).
-
-    The (D x d) projection matrix A must be provided, and must be that used for
-    the initialization. In the original REMBO paper A ~ N(0, 1). Box bounds
-    in the low-d space must also be provided, which in the REMBO paper should
-    be [(-sqrt(d), sqrt(d)]^d.
-
-    Function evaluations happen in the high-D space, and so the arms on the
-    experiment will also be tracked in the high-D space. This class maintains
-    a list of points in the low-d spac that have been launched, so we can match
-    arms in high-D space back to their low-d point on update.
-
-    Args:
-        A: (D x d) projection matrix.
-        initial_X_d: Points in low-d space for initial data.
-        bounds_d: Box bounds in the low-d space.
-        kwargs: kwargs for BotorchModel init
-    """
-
-    def __init__(
-        self,
-        A: Tensor,
-        initial_X_d: Tensor,
-        bounds_d: List[Tuple[float, float]],
-        **kwargs: Any,
-    ) -> None:
-        warn(
-            "REMBO is deprecated and does not guarantee correctness. "
-            "It will be removed in Ax 0.3.9.",
-            DeprecationWarning,
-        )
-        self.A = A
-        # compute pseudo inverse once and cache it
-        self._pinvA: Tensor = torch.pinverse(A)
-        # Projected points in low-d space generated in the optimization
-        self.X_d: List[Tensor] = list(initial_X_d)
-        # Projected points that were generated by this model
-        self.X_d_gen: List[Tensor] = []
-        self.bounds_d = bounds_d
-        self.num_outputs = 0
-        super().__init__(**kwargs)
-
-    @copy_doc(TorchModel.fit)
-    def fit(
-        self,
-        datasets: List[SupervisedDataset],
-        search_space_digest: SearchSpaceDigest,
-        candidate_metadata: Optional[List[List[TCandidateMetadata]]] = None,
-    ) -> None:
-        assert len(search_space_digest.task_features) == 0
-        assert len(search_space_digest.fidelity_features) == 0
-        for b in search_space_digest.bounds:
-            # REMBO assumes the input space is [-1, 1]^D
-            assert b == (-1, 1)
-        self.num_outputs = len(datasets)
-        # For convenience for now, assume X for all outcomes the same
-        low_d_datasets = self._convert_and_normalize_datasets(datasets=datasets)
-        super().fit(
-            datasets=low_d_datasets,
-            search_space_digest=SearchSpaceDigest(
-                feature_names=[f"x{i}" for i in range(self.A.shape[1])],
-                bounds=[(0.0, 1.0)] * len(self.bounds_d),
-                task_features=search_space_digest.task_features,
-                fidelity_features=search_space_digest.fidelity_features,
-            ),
-            candidate_metadata=candidate_metadata,
-        )
-
-    def to_01(self, X_d: Tensor) -> Tensor:
-        """Map points from bounds_d to [0, 1].
-
-        Args:
-            X_d: Tensor in bounds_d
-
-        Returns: Tensor in [0, 1].
-        """
-        X_d01 = X_d.clone()
-        for i, (lb, ub) in enumerate(self.bounds_d):
-            X_d01[:, i] = (X_d01[:, i] - lb) / (ub - lb)
-        return X_d01
-
-    def from_01(self, X_d01: Tensor) -> Tensor:
-        """Map points from [0, 1] to bounds_d.
-
-        Args:
-            X_d01: Tensor in [0, 1]
-
-        Returns: Tensor in bounds_d.
-        """
-        X_d = X_d01.clone()
-        for i, (lb, ub) in enumerate(self.bounds_d):
-            X_d[:, i] = X_d[:, i] * (ub - lb) + lb
-        return X_d
-
-    def project_down(self, X_D: Tensor) -> Tensor:
-        """Map points in the high-D space to the low-d space by looking them
-        up in self.X_d.
-
-        We assume that X_D = self.project_up(self.X_d), except possibly with
-        rows shuffled. If a value in X_d cannot be found for each row in X_D,
-        an error will be raised.
-
-        This is quite fast relative to model fitting, so we do it in O(n^2)
-        time and don't worry about it.
-
-        Args:
-            X_D: Tensor in high-D space.
-
-        Returns:
-            X_d: Tensor in low-d space.
-        """
-        X_d = []
-        unmatched = list(range(len(self.X_d)))
-        for x_D in X_D:
-            idx_match = None
-            for d_idx in unmatched:
-                if torch.allclose(x_D, self.project_up(self.X_d[d_idx])):
-                    idx_match = d_idx
-                    break
-            if idx_match is not None:
-                X_d.append(self.X_d[idx_match])
-                unmatched.remove(idx_match)
-            else:
-                raise ValueError("Failed to project X down.")
-        return torch.stack(X_d)
-
-    def project_up(self, X: Tensor) -> Tensor:
-        """Project to high-dimensional space."""
-        Z = torch.t(self.A @ torch.t(X))
-        Z = torch.clamp(Z, min=-1, max=1)
-        return Z
-
-    @copy_doc(TorchModel.predict)
-    def predict(self, X: Tensor) -> Tuple[Tensor, Tensor]:
-        # Suports preditions in both low-d and high-D space, depending on shape
-        # of X. For high-D, predictions are restricted to within the linear
-        # embedding, so can project down with pseudoinverse.
-        if X.shape[1] == self.A.shape[1]:
-            # X is in low-d space
-            X_d = X
-        else:
-            # Project down to low-d space
-            X_d = X @ torch.t(self._pinvA)
-            # Project X_d back up to verify X was within linear embedding
-            if not torch.allclose(X, X_d @ torch.t(self.A)):
-                raise NotImplementedError(
-                    "Predictions outside the linear embedding not supported."
-                )
-        return super().predict(X=self.to_01(X_d))
-
-    @copy_doc(TorchModel.gen)
-    def gen(
-        self,
-        n: int,
-        search_space_digest: SearchSpaceDigest,
-        torch_opt_config: TorchOptConfig,
-    ) -> TorchGenResults:
-        for b in search_space_digest.bounds:
-            assert b == (-1, 1)
-        # The following can be easily handled in the future when needed
-        assert torch_opt_config.linear_constraints is None
-        assert torch_opt_config.fixed_features is None
-        assert torch_opt_config.pending_observations is None
-        # Do gen in the low-dimensional space and project up
-        rounding_func = torch_opt_config.rounding_func
-        gen_results = super().gen(
-            n=n,
-            search_space_digest=dataclasses.replace(
-                search_space_digest,
-                bounds=[(0.0, 1.0)] * len(self.bounds_d),
-            ),
-            torch_opt_config=dataclasses.replace(torch_opt_config, rounding_func=None),
-        )
-        Xopt = self.from_01(gen_results.points)
-        self.X_d.extend([x.clone() for x in Xopt])
-        self.X_d_gen.extend([x.clone() for x in Xopt])
-        gen_points = self.project_up(Xopt)
-        if rounding_func is not None:
-            for i in range(len(gen_points)):
-                gen_points[i] = rounding_func(gen_points[i])
-        return TorchGenResults(
-            points=gen_points,
-            weights=gen_results.weights,
-        )
-
-    @copy_doc(TorchModel.best_point)
-    def best_point(
-        self,
-        search_space_digest: SearchSpaceDigest,
-        torch_opt_config: TorchOptConfig,
-    ) -> Optional[Tensor]:
-        for b in search_space_digest.bounds:
-            assert b == (-1, 1)
-        assert torch_opt_config.linear_constraints is None
-        assert torch_opt_config.fixed_features is None
-        x_best = super().best_point(
-            search_space_digest=dataclasses.replace(
-                search_space_digest,
-                bounds=self.bounds_d,
-            ),
-            torch_opt_config=torch_opt_config,
-        )
-        if x_best is not None:
-            x_best = self.project_up(self.from_01(x_best.unsqueeze(0))).squeeze(0)
-        return x_best
-
-    @copy_doc(TorchModel.cross_validate)
-    def cross_validate(
-        self,
-        datasets: List[SupervisedDataset],
-        X_test: Tensor,
-        **kwargs: Any,
-    ) -> Tuple[Tensor, Tensor]:
-        low_d_datasets = self._convert_and_normalize_datasets(datasets=datasets)
-        X_test_d = self.project_down(X_test)
-        return super().cross_validate(
-            datasets=low_d_datasets,
-            X_test=self.to_01(X_test_d),
-        )
-
-    def _convert_and_normalize_datasets(
-        self, datasets: List[SupervisedDataset]
-    ) -> List[SupervisedDataset]:
-        X_D = _get_single_X([dataset.X for dataset in datasets])
-        X_d_01 = self.to_01(self.project_down(X_D))
-        # Fit model in low-d space (adjusted to [0, 1]^d)
-        for dataset in datasets:
-            dataset._X = X_d_01
-        return datasets
-
-
-def _get_single_X(Xs: List[Tensor]) -> Tensor:
-    """Verify all X are identical, and return one.
-
-    Args:
-        Xs: A list of X tensors
-
-    Returns: Xs[0], after verifying they are all identical.
-    """
-    X = Xs[0]
-    for i in range(1, len(Xs)):
-        assert torch.allclose(X, Xs[i])
-    return X
diff --git a/sphinx/source/modelbridge.rst b/sphinx/source/modelbridge.rst
index 96f8367d9f7..c956557ac5b 100644
--- a/sphinx/source/modelbridge.rst
+++ b/sphinx/source/modelbridge.rst
@@ -416,19 +416,3 @@ Transforms
     :members:
     :undoc-members:
     :show-inheritance:
-
-Strategies
--------------
-
-`ax.modelbridge.strategies.alebo`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. automodule:: ax.modelbridge.strategies.alebo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-.. automodule:: ax.modelbridge.strategies.rembo
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/sphinx/source/models.rst b/sphinx/source/models.rst
index 449f81440c9..1b973b7f9f7 100644
--- a/sphinx/source/models.rst
+++ b/sphinx/source/models.rst
@@ -116,34 +116,9 @@ ax.models.random.sobol module
     :undoc-members:
     :show-inheritance:
 
-ax.models.random.alebo_initializer module
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. automodule:: ax.models.random.alebo_initializer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-ax.models.random.rembo_initializer module
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. automodule:: ax.models.random.rembo_initializer
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
 Torch Models & Utilities
 ------------------------
 
-ax.models.torch.alebo module
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. automodule:: ax.models.torch.alebo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 ax.models.torch.botorch module
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -344,14 +319,6 @@ ax.models.torch.posterior_mean module
     :undoc-members:
     :show-inheritance:
 
-ax.models.torch.rembo module
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. automodule:: ax.models.torch.rembo
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 ax.models.torch.utils module
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~