diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
index deb8433321..d98fe13642 100644
--- a/botorch/acquisition/input_constructors.py
+++ b/botorch/acquisition/input_constructors.py
@@ -1800,7 +1800,6 @@ def construct_inputs_qJES(
     model: Model,
     bounds: list[tuple[float, float]],
     num_optima: int = 64,
-    maximize: bool = True,
     condition_noiseless: bool = True,
     X_pending: Tensor | None = None,
     estimation_type: str = "LB",
@@ -1811,7 +1810,6 @@ def construct_inputs_qJES(
         model=model,
         bounds=torch.as_tensor(bounds, dtype=dtype).T,
         num_optima=num_optima,
-        maximize=maximize,
     )
 
     inputs = {
@@ -1819,7 +1817,6 @@ def construct_inputs_qJES(
         "optimal_inputs": optimal_inputs,
         "optimal_outputs": optimal_outputs,
         "condition_noiseless": condition_noiseless,
-        "maximize": maximize,
         "X_pending": X_pending,
         "estimation_type": estimation_type,
         "num_samples": num_samples,
diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py
index e53bc0a7d0..d486629b76 100644
--- a/botorch/acquisition/utils.py
+++ b/botorch/acquisition/utils.py
@@ -18,6 +18,7 @@
     IdentityMCObjective,
     MCAcquisitionObjective,
     PosteriorTransform,
+    ScalarizedPosteriorTransform,
 )
 from botorch.exceptions.errors import (
     BotorchTensorDimensionError,
@@ -28,10 +29,11 @@
 from botorch.models.model import Model
 from botorch.sampling.base import MCSampler
 from botorch.sampling.get_sampler import get_sampler
-from botorch.sampling.pathwise import draw_matheron_paths
+from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
 from botorch.utils.objective import compute_feasibility_indicator
 from botorch.utils.sampling import optimize_posterior_samples
 from botorch.utils.transforms import is_ensemble, normalize_indices
+from gpytorch.models import GP
 from torch import Tensor
 
 
@@ -486,36 +488,62 @@ def project_to_sample_points(X: Tensor, sample_points: Tensor) -> Tensor:
 
 
 def get_optimal_samples(
-    model: Model,
+    model: GP,
     bounds: Tensor,
     num_optima: int,
     raw_samples: int = 1024,
     num_restarts: int = 20,
-    maximize: bool = True,
+    posterior_transform: ScalarizedPosteriorTransform | None = None,
+    objective: MCAcquisitionObjective | None = None,
+    return_transformed: bool = False,
 ) -> tuple[Tensor, Tensor]:
     """Draws sample paths from the posterior and maximizes the samples using GD.
 
     Args:
-        model (Model): The model from which samples are drawn.
-        bounds: (Tensor): Bounds of the search space. If the model inputs are
+        model: The model from which samples are drawn.
+        bounds: Bounds of the search space. If the model inputs are
             normalized, the bounds should be normalized as well.
-        num_optima (int): The number of paths to be drawn and optimized.
-        raw_samples (int, optional): The number of candidates randomly sample.
+        num_optima: The number of paths to be drawn and optimized.
+        raw_samples: The number of candidates randomly sample.
             Defaults to 1024.
-        num_restarts (int, optional): The number of candidates to do gradient-based
+        num_restarts: The number of candidates to do gradient-based
             optimization on. Defaults to 20.
-        maximize: Whether to maximize or minimize the samples.
+        posterior_transform: A ScalarizedPosteriorTransform (may e.g. be used to
+            scalarize multi-output models or negate the objective).
+        objective: An MCAcquisitionObjective, used to negate the objective or otherwise
+            transform sample outputs. Cannot be combined with `posterior_transform`.
+        return_transformed: If True, return the transformed samples.
+
     Returns:
-        Tuple[Tensor, Tensor]: The optimal input locations and corresponding
-        outputs, x* and f*.
+        The optimal input locations and corresponding outputs, x* and f*.
 
     """
-    paths = draw_matheron_paths(model, sample_shape=torch.Size([num_optima]))
+    if posterior_transform and not isinstance(
+        posterior_transform, ScalarizedPosteriorTransform
+    ):
+        raise ValueError(
+            "Only the ScalarizedPosteriorTransform is supported for "
+            "get_optimal_samples."
+        )
+    if posterior_transform and objective:
+        raise ValueError(
+            "Only one of `posterior_transform` and `objective` can be specified."
+        )
+
+    if posterior_transform:
+        sample_transform = posterior_transform.evaluate
+    elif objective:
+        sample_transform = objective
+    else:
+        sample_transform = None
+
+    paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima]))
     optimal_inputs, optimal_outputs = optimize_posterior_samples(
-        paths,
+        paths=paths,
         bounds=bounds,
         raw_samples=raw_samples,
         num_restarts=num_restarts,
-        maximize=maximize,
+        sample_transform=sample_transform,
+        return_transformed=return_transformed,
     )
     return optimal_inputs, optimal_outputs
diff --git a/botorch/utils/sampling.py b/botorch/utils/sampling.py
index da4df94849..52fe54fbb2 100644
--- a/botorch/utils/sampling.py
+++ b/botorch/utils/sampling.py
@@ -19,9 +19,9 @@
 import warnings
 
 from abc import ABC, abstractmethod
-from collections.abc import Generator, Iterable
+from collections.abc import Callable, Generator, Iterable
 from contextlib import contextmanager
-from typing import Any, TYPE_CHECKING
+from typing import TYPE_CHECKING
 
 import numpy as np
 import numpy.typing as npt
@@ -37,7 +37,9 @@
 
 
 if TYPE_CHECKING:
-    from botorch.sampling.pathwise.paths import SamplePath  # pragma: no cover
+    from botorch.models.deterministic import (  # pragma: no cover
+        GenericDeterministicModel,
+    )
 
 
 @contextmanager
@@ -989,45 +991,45 @@ def sparse_to_dense_constraints(
 
 
 def optimize_posterior_samples(
-    paths: SamplePath,
+    paths: GenericDeterministicModel,
     bounds: Tensor,
-    candidates: Tensor | None = None,
-    raw_samples: int | None = 1024,
+    raw_samples: int = 1024,
     num_restarts: int = 20,
-    maximize: bool = True,
-    **kwargs: Any,
+    sample_transform: Callable[[Tensor], Tensor] | None = None,
+    return_transformed: bool = False,
 ) -> tuple[Tensor, Tensor]:
-    r"""Cheaply maximizes posterior samples by random querying followed by vanilla
-    gradient descent on the best num_restarts points.
+    r"""Cheaply maximizes posterior samples by random querying followed by
+    gradient-based optimization using SciPy's L-BFGS-B routine.
 
     Args:
         paths: Random Fourier Feature-based sample paths from the GP
         bounds: The bounds on the search space.
-        candidates: A priori good candidates (typically previous design points)
-            which acts as extra initial guesses for the optimization routine.
         raw_samples: The number of samples with which to query the samples initially.
         num_restarts: The number of points selected for gradient-based optimization.
-        maximize: Boolean indicating whether to maimize or minimize
+        sample_transform: A callable transform of the sample outputs (e.g.
+            MCAcquisitionObjective or ScalarizedPosteriorTransform.evaluate) used to
+            negate the objective or otherwise transform the output.
+        return_transformed: A boolean indicating whether to return the transformed
+            or non-transformed samples.
 
     Returns:
         A two-element tuple containing:
             - X_opt: A `num_optima x [batch_size] x d`-dim tensor of optimal inputs x*.
-            - f_opt: A `num_optima x [batch_size] x 1`-dim tensor of optimal outputs f*.
+            - f_opt: A `num_optima x [batch_size] x m`-dim, optionally
+                `num_optima x [batch_size] x 1`-dim,  tensor of optimal outputs f*.
     """
-    if maximize:
-
-        def path_func(x):
-            return paths(x)
 
-    else:
+    def path_func(x) -> Tensor:
+        res = paths(x)
+        if sample_transform:
+            res = sample_transform(res)
 
-        def path_func(x):
-            return -paths(x)
+        return res.squeeze(-1)
 
     candidate_set = unnormalize(
-        SobolEngine(dimension=bounds.shape[1], scramble=True).draw(raw_samples), bounds
+        SobolEngine(dimension=bounds.shape[1], scramble=True).draw(n=raw_samples),
+        bounds=bounds,
     )
-
     # queries all samples on all candidates - output shape
     # raw_samples * num_optima * num_models
     candidate_queries = path_func(candidate_set)
@@ -1035,10 +1037,13 @@ def path_func(x):
     X_top_k = candidate_set[argtop_k, :]
 
     # to avoid circular import, the import occurs here
-    from botorch.generation.gen import gen_candidates_torch
+    from botorch.generation.gen import gen_candidates_scipy
 
-    X_top_k, f_top_k = gen_candidates_torch(
-        X_top_k, path_func, lower_bounds=bounds[0], upper_bounds=bounds[1], **kwargs
+    X_top_k, f_top_k = gen_candidates_scipy(
+        X_top_k,
+        path_func,
+        lower_bounds=bounds[0],
+        upper_bounds=bounds[1],
     )
     f_opt, arg_opt = f_top_k.max(dim=-1, keepdim=True)
 
@@ -1046,11 +1051,16 @@ def path_func(x):
     # retrieves the argmax. We flatten, pick out the indices and then reshape to
     # the original batch shapes (so instead of pickig out the argmax of a
     # (3, 7, num_restarts, D)) along the num_restarts dim, we pick it out of a
-    # (21  , num_restarts, D)
+    # (21, num_restarts, D)
     final_shape = candidate_queries.shape[:-1]
     X_opt = X_top_k.reshape(final_shape.numel(), num_restarts, -1)[
         torch.arange(final_shape.numel()), arg_opt.flatten()
     ].reshape(*final_shape, -1)
-    if not maximize:
-        f_opt = -f_opt
+
+    # if we return transformed, we do not need to pass the samples through paths
+    # paths a second time but rather just return the transformed optimal values
+    if return_transformed:
+        return X_opt, f_opt
+
+    f_opt = paths(X_opt.unsqueeze(-2)).squeeze(-2)
     return X_opt, f_opt
diff --git a/test/acquisition/test_input_constructors.py b/test/acquisition/test_input_constructors.py
index 208637083b..968da03aab 100644
--- a/test/acquisition/test_input_constructors.py
+++ b/test/acquisition/test_input_constructors.py
@@ -1620,10 +1620,8 @@ def test_construct_inputs_jes(self) -> None:
             training_data=self.blockX_blockY,
             bounds=self.bounds,
             num_optima=17,
-            maximize=False,
         )
 
-        self.assertFalse(kwargs["maximize"])
         self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype)
         self.assertEqual(len(kwargs["optimal_inputs"]), 17)
         self.assertEqual(len(kwargs["optimal_outputs"]), 17)
diff --git a/test/acquisition/test_utils.py b/test/acquisition/test_utils.py
index 61845a387a..7e4718150e 100644
--- a/test/acquisition/test_utils.py
+++ b/test/acquisition/test_utils.py
@@ -10,7 +10,13 @@
 
 import torch
 
-from botorch.acquisition.objective import GenericMCObjective, LearnedObjective
+from botorch.acquisition.objective import (
+    ExpectationPosteriorTransform,
+    GenericMCObjective,
+    LearnedObjective,
+    LinearMCObjective,
+    ScalarizedPosteriorTransform,
+)
 from botorch.acquisition.utils import (
     compute_best_feasible_objective,
     expand_trace_observations,
@@ -412,32 +418,108 @@ class TestGetOptimalSamples(BotorchTestCase):
     def test_get_optimal_samples(self):
         dims = 3
         dtype = torch.float64
-        for_testing_speed_kwargs = {"raw_samples": 50, "num_restarts": 3}
+        for_testing_speed_kwargs = {"raw_samples": 20, "num_restarts": 2}
         num_optima = 7
         batch_shape = (3,)
 
         bounds = torch.tensor([[0, 1]] * dims, dtype=dtype).T
         X = torch.rand(*batch_shape, 4, dims, dtype=dtype)
-        Y = torch.sin(X).sum(dim=-1, keepdim=True).to(dtype)
-        model = SingleTaskGP(X, Y)
-        X_opt, f_opt = get_optimal_samples(
-            model, bounds, num_optima=num_optima, **for_testing_speed_kwargs
+        Y = torch.sin(2 * 3.1415 * X).sum(dim=-1, keepdim=True).to(dtype)
+        model = SingleTaskGP(train_X=X, train_Y=Y)
+        posterior_transform = ScalarizedPosteriorTransform(
+            weights=torch.ones(1, dtype=dtype)
         )
-        X_opt, f_opt_min = get_optimal_samples(
-            model,
-            bounds,
-            num_optima=num_optima,
-            maximize=False,
-            **for_testing_speed_kwargs,
+        posterior_transform_neg = ScalarizedPosteriorTransform(
+            weights=-torch.ones(1, dtype=dtype)
         )
-
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            X_opt_def, f_opt_def = get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                **for_testing_speed_kwargs,
+            )
         correct_X_shape = (num_optima,) + batch_shape + (dims,)
         correct_f_shape = (num_optima,) + batch_shape + (1,)
-        self.assertEqual(X_opt.shape, correct_X_shape)
-        self.assertEqual(f_opt.shape, correct_f_shape)
-        # asserting that the solutions found by minimization the samples are smaller
-        # than those found by maximization
-        self.assertTrue(torch.all(f_opt_min < f_opt))
+        self.assertEqual(X_opt_def.shape, correct_X_shape)
+        self.assertEqual(f_opt_def.shape, correct_f_shape)
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            X_opt_ps, f_opt_ps = get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                posterior_transform=posterior_transform,
+                **for_testing_speed_kwargs,
+            )
+        self.assertAllClose(X_opt_def, X_opt_ps)
+
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            X_opt_ps_neg, f_opt_ps_neg = get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                posterior_transform=posterior_transform_neg,
+                **for_testing_speed_kwargs,
+            )
+        # maxima larger than minima when the seed is fixed
+        self.assertTrue(torch.all(f_opt_ps_neg < f_opt_ps))
+
+        obj = LinearMCObjective(weights=-torch.ones(1, dtype=dtype))
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            X_opt_obj_neg, f_opt_obj_neg = get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                objective=obj,
+                **for_testing_speed_kwargs,
+            )
+            # check that the minimum is the same for negative objective and
+            # negative posterior transform
+            self.assertAllClose(X_opt_ps_neg, X_opt_obj_neg)
+
+        obj = LinearMCObjective(weights=-torch.ones(1, dtype=dtype))
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            _, f_opt_obj_pos = get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                objective=obj,
+                return_transformed=True,
+                **for_testing_speed_kwargs,
+            )
+            # check that the transformed return value is the negation of the
+            # non-transformed return value
+            self.assertAllClose(f_opt_obj_pos, -f_opt_obj_neg)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Only the ScalarizedPosteriorTransform is supported for "
+            "get_optimal_samples.",
+        ):
+            get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                posterior_transform=ExpectationPosteriorTransform(n_w=5),
+                **for_testing_speed_kwargs,
+            )
+        with self.assertRaisesRegex(
+            ValueError,
+            "Only one of `posterior_transform` and `objective` can be specified.",
+        ):
+            get_optimal_samples(
+                model=model,
+                bounds=bounds,
+                num_optima=num_optima,
+                posterior_transform=posterior_transform,
+                objective=obj,
+                **for_testing_speed_kwargs,
+            )
 
 
 class TestPreferenceUtils(BotorchTestCase):
diff --git a/test/utils/test_sampling.py b/test/utils/test_sampling.py
index 78418d2879..84e4f54a28 100644
--- a/test/utils/test_sampling.py
+++ b/test/utils/test_sampling.py
@@ -14,10 +14,14 @@
 
 import numpy as np
 import torch
+from botorch.acquisition.objective import (
+    LinearMCObjective,
+    ScalarizedPosteriorTransform,
+)
 from botorch.exceptions.errors import BotorchError
 from botorch.exceptions.warnings import UserInputWarning
 from botorch.models.gp_regression import SingleTaskGP
-from botorch.sampling.pathwise import draw_matheron_paths
+from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
 from botorch.utils.sampling import (
     _convert_bounds_to_inequality_constraints,
     batched_multinomial,
@@ -552,23 +556,35 @@ def test_optimize_posterior_samples(self):
         torch.manual_seed(1)
         dims = 2
         dtype = torch.float64
-        eps = 1e-6
-        for_testing_speed_kwargs = {"raw_samples": 512, "num_restarts": 10}
+        eps = 1e-4
+        for_testing_speed_kwargs = {"raw_samples": 128, "num_restarts": 4}
         nums_optima = (1, 7)
-        batch_shapes = ((), (3,), (5, 2))
-        for num_optima, batch_shape in itertools.product(nums_optima, batch_shapes):
+        batch_shapes = ((), (2,), (3, 2))
+        posterior_transforms = (
+            None,
+            ScalarizedPosteriorTransform(weights=-torch.ones(1, dtype=dtype)),
+        )
+        for num_optima, batch_shape, posterior_transform in itertools.product(
+            nums_optima, batch_shapes, posterior_transforms
+        ):
             bounds = torch.tensor([[0, 1]] * dims, dtype=dtype).T
-            X = torch.rand(*batch_shape, 13, dims, dtype=dtype)
+            X = torch.rand(*batch_shape, 4, dims, dtype=dtype)
             Y = torch.pow(X - 0.5, 2).sum(dim=-1, keepdim=True)
 
             # having a noiseless model all but guarantees that the found optima
             # will be better than the observations
             model = SingleTaskGP(X, Y, torch.full_like(Y, eps))
-            paths = draw_matheron_paths(
+            model.covar_module.lengthscale = 0.5
+            paths = get_matheron_path_model(
                 model=model, sample_shape=torch.Size([num_optima])
             )
             X_opt, f_opt = optimize_posterior_samples(
-                paths, bounds, **for_testing_speed_kwargs
+                paths=paths,
+                bounds=bounds,
+                sample_transform=(
+                    posterior_transform.evaluate if posterior_transform else None
+                ),
+                **for_testing_speed_kwargs,
             )
 
             correct_X_shape = (num_optima,) + batch_shape + (dims,)
@@ -581,4 +597,72 @@ def test_optimize_posterior_samples(self):
 
             # Check that the all found optima are larger than the observations
             # This is not 100% deterministic, but just about.
-            self.assertTrue(torch.all(f_opt > Y.max(dim=-2).values))
+            Y_queries = paths(X)
+            # this is when we negate, so the values should be smaller
+            if posterior_transform:
+                self.assertTrue(torch.all(f_opt < Y_queries.min(dim=-2).values))
+
+            # otherwise, larger
+            else:
+                self.assertTrue(torch.all(f_opt > Y_queries.max(dim=-2).values))
+
+        obj = LinearMCObjective(weights=-torch.ones(1, dtype=dtype))
+        X_opt, f_opt = optimize_posterior_samples(
+            paths=paths,
+            bounds=bounds,
+            sample_transform=obj,
+            **for_testing_speed_kwargs,
+        )
+        self.assertTrue(torch.all(f_opt < Y_queries.max(dim=-2).values))
+
+    def test_optimize_posterior_samples_multi_objective(self):
+        # Fix the random seed to prevent flaky failures.
+        torch.manual_seed(1)
+        dims = 2
+        dtype = torch.float64
+        eps = 1e-4
+        for_testing_speed_kwargs = {"raw_samples": 128, "num_restarts": 4}
+        num_optima = 5
+        batch_shape = (3,)
+
+        # test that multi-output models are supported if there is an appropriate
+        # scalarization
+        bounds = torch.tensor([[0, 1]] * dims, dtype=dtype).T
+        X = torch.rand(*batch_shape, 4, dims, dtype=dtype)
+        Y1 = torch.pow(X - 0.5, 2).sum(dim=-1, keepdim=True)
+        Y2 = torch.cos(X * 3).sum(dim=-1, keepdim=True)
+        Y = torch.cat([Y1, Y2], dim=-1)
+        # having a noiseless model all but guarantees that the found optima
+        # will be better than the observations
+        model = SingleTaskGP(X, Y, torch.full_like(Y, eps))
+        model.covar_module.lengthscale = 0.5
+        posterior_transform = ScalarizedPosteriorTransform(
+            weights=torch.ones(2, dtype=dtype)
+        )
+        paths = get_matheron_path_model(
+            model=model,
+            sample_shape=torch.Size([num_optima]),
+        )
+        X_opt, f_opt = optimize_posterior_samples(
+            paths=paths,
+            bounds=bounds,
+            sample_transform=posterior_transform.evaluate,
+            **for_testing_speed_kwargs,
+        )
+
+        correct_X_shape = (num_optima,) + batch_shape + (dims,)
+        correct_f_shape = (num_optima,) + batch_shape + (2,)
+        self.assertEqual(X_opt.shape, correct_X_shape)
+        self.assertEqual(f_opt.shape, correct_f_shape)
+        self.assertTrue(torch.all(X_opt >= bounds[0]))
+        self.assertTrue(torch.all(X_opt <= bounds[1]))
+
+        X_opt, f_opt = optimize_posterior_samples(
+            paths=paths,
+            bounds=bounds,
+            sample_transform=posterior_transform.evaluate,
+            return_transformed=True,
+            **for_testing_speed_kwargs,
+        )
+        correct_f_shape = (num_optima,) + batch_shape + (1,)
+        self.assertEqual(f_opt.shape, correct_f_shape)