diff --git a/botorch/acquisition/thompson_sampling.py b/botorch/acquisition/thompson_sampling.py
index ad596bc2d5..04c0d8f934 100644
--- a/botorch/acquisition/thompson_sampling.py
+++ b/botorch/acquisition/thompson_sampling.py
@@ -6,10 +6,16 @@
 
 import torch
 from botorch.acquisition.analytic import AcquisitionFunction
-from botorch.acquisition.objective import PosteriorTransform
+from botorch.acquisition.objective import (
+    IdentityMCObjective,
+    MCAcquisitionObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions.errors import UnsupportedError
+from botorch.models.deterministic import GenericDeterministicModel
 from botorch.models.model import Model
 from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
-from botorch.utils.transforms import t_batch_mode_transform
+from botorch.utils.transforms import is_ensemble, t_batch_mode_transform
 from torch import Tensor
 
 
@@ -32,55 +38,151 @@ class PathwiseThompsonSampling(AcquisitionFunction):
     def __init__(
         self,
         model: Model,
+        objective: MCAcquisitionObjective | None = None,
         posterior_transform: PosteriorTransform | None = None,
     ) -> None:
         r"""Single-outcome TS.
 
+        If using a multi-output `model`, the acquisition function requires either an
+        `objective` or a `posterior_transform` that transforms the multi-output
+        posterior samples to single-output posterior samples.
+
         Args:
             model: A fitted GP model.
-            posterior_transform: A PosteriorTransform. If using a multi-output model,
-                a PosteriorTransform that transforms the multi-output posterior into a
-                single-output posterior is required.
+            objective: The MCAcquisitionObjective under which the samples are
+                evaluated. Defaults to `IdentityMCObjective()`.
+            posterior_transform: An optional PosteriorTransform.
         """
-        if model._is_fully_bayesian:
-            raise NotImplementedError(
-                "PathwiseThompsonSampling is not supported for fully Bayesian models",
-            )
 
         super().__init__(model=model)
         self.batch_size: int | None = None
-
-    def redraw(self) -> None:
+        self.samples: GenericDeterministicModel | None = None
+        self.ensemble_indices: Tensor | None = None
+
+        # NOTE: This conditional block is copied from MCAcquisitionFunction, we should
+        # consider inherting from it and e.g. getting the X_pending logic as well.
+        if objective is None and model.num_outputs != 1:
+            if posterior_transform is None:
+                raise UnsupportedError(
+                    "Must specify an objective or a posterior transform when using "
+                    "a multi-output model."
+                )
+            elif not posterior_transform.scalarize:
+                raise UnsupportedError(
+                    "If using a multi-output model without an objective, "
+                    "posterior_transform must scalarize the output."
+                )
+        if objective is None:
+            objective = IdentityMCObjective()
+        self.objective = objective
+        self.posterior_transform = posterior_transform
+
+    def redraw(self, batch_size: int) -> None:
+        sample_shape = (batch_size,)
         self.samples = get_matheron_path_model(
-            model=self.model, sample_shape=torch.Size([self.batch_size])
+            model=self.model, sample_shape=torch.Size(sample_shape)
         )
+        if is_ensemble(self.model):
+            # the ensembling dimension is assumed to be part of the batch shape
+            model_batch_shape = self.model.batch_shape
+            if len(model_batch_shape) > 1:
+                raise NotImplementedError(
+                    "Ensemble models with more than one ensemble dimension are not "
+                    "yet supported."
+                )
+            num_ensemble = model_batch_shape[0]
+            # ensemble_indices is cached here to ensure that the acquisition function
+            # becomes deterministic for the same input and can be optimized with LBFGS.
+            # ensemble_indices is used in select_from_ensemble_models.
+            self.ensemble_indices = torch.randint(
+                0,
+                num_ensemble,
+                (*sample_shape, 1, self.model.num_outputs),
+            )
 
     @t_batch_mode_transform()
     def forward(self, X: Tensor) -> Tensor:
         r"""Evaluate the pathwise posterior sample draws on the candidate set X.
 
         Args:
-            X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design points.
+            X: A `batch_shape x q x d`-dim batched tensor of `d`-dim design points.
 
         Returns:
-            A `(b1 x ... bk) x [num_models for fully bayesian]`-dim tensor of
-            evaluations on the posterior sample draws.
+            A `batch_shape`-dim tensor of evaluations on the posterior sample draws,
+            where the samples are summed over the q-batch dimension.
         """
-        batch_size = X.shape[-2]
-        q_dim = -2
+        objective_values = self._pathwise_forward(X)  # batch_shape x q
+        # NOTE: The current implementation sums over the q-batch dimension, which means
+        # that we are optimizing the sum of independent Thompson samples. In the future,
+        # we can leverage *batched* L-BFGS optimization, rather than summing over the q
+        # dimension, which will guarantee descent steps for all members of the batch
+        # through batch-member-specific learning rate selection.
+        return objective_values.sum(-1)  # batch_shape
 
+    def _pathwise_forward(self, X: Tensor) -> Tensor:
+        """Evaluate the pathwise posterior sample draws on the candidate set X.
+
+        Args:
+            X: A `batch_shape x q x d`-dim batched tensor of `d`-dim design points.
+
+        Returns:
+            A `batch_shape x q`-dim tensor of evaluations on the posterior sample draws.
+        """
+        batch_size = X.shape[-2]
         # batch_shape x q x 1 x d
         X = X.unsqueeze(-2)
-        if self.batch_size is None:
+        if self.samples is None:
             self.batch_size = batch_size
-            self.redraw()
-        elif self.batch_size != batch_size:
+            self.redraw(batch_size=batch_size)
+
+        if self.batch_size != batch_size:
             raise ValueError(
                 BATCH_SIZE_CHANGE_ERROR.format(self.batch_size, batch_size)
             )
+        # batch_shape x q [x num_ensembles] x 1 x m
+        posterior_values = self.samples(X)
+        # batch_shape x q [x num_ensembles] x m
+        posterior_values = posterior_values.squeeze(-2)
 
-        # posterior_values.shape post-squeeze:
         # batch_shape x q x m
-        posterior_values = self.samples(X).squeeze(-2)
-        # sum over batch dim and squeeze num_objectives dim (-1)
-        return posterior_values.sum(q_dim).squeeze(-1)
+        posterior_values = self.select_from_ensemble_models(values=posterior_values)
+
+        if self.posterior_transform:
+            posterior_values = self.posterior_transform.evaluate(posterior_values)
+        # objective removes the `m` dimension
+        objective_values = self.objective(posterior_values)  # batch_shape x q
+        return objective_values
+
+    def select_from_ensemble_models(self, values: Tensor):
+        """Subselecting a value associated with a single sample in the ensemble for each
+        element of samples that is not associated with an ensemble dimension.
+
+        NOTE: 1) uses `self.model` and `is_ensemble` to determine whether or not an
+        ensembling dimension is present. 2) uses `self.ensemble_indices` to select the
+        value associated with a single sample in the ensemble. `ensemble_indices`
+        contains uniformly randomly sample indices for each element of the ensemble, but
+        is cached to make the evaluation of the acquisition function deterministic.
+
+        Args:
+            values: A `batch_shape x num_draws x q [x num_ensemble] x m`-dim Tensor.
+
+        Returns:
+            A`batch_shape x num_draws x q x m`-dim where each element is contains a
+            single sample from the ensemble, selected with `self.ensemble_indices`.
+        """
+        if not is_ensemble(self.model):
+            return values
+
+        ensemble_dim = -2
+        # `ensemble_indices` are fixed so that the acquisition function becomes
+        # deterministic for the same input and can be optimized with LBFGS.
+        # ensemble indices have shape num_paths x 1 x m
+        self.ensemble_indices = self.ensemble_indices.to(device=values.device)
+        index = self.ensemble_indices
+        input_batch_shape = values.shape[:-3]
+        index = index.expand(*input_batch_shape, *index.shape)
+        # samples is batch_shape x q x num_ensemble x m
+        values_wo_ensemble = torch.gather(values, dim=ensemble_dim, index=index)
+        return values_wo_ensemble.squeeze(
+            ensemble_dim
+        )  # removing the ensemble dimension
diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py
index a930488680..00fbe35291 100644
--- a/botorch/acquisition/utils.py
+++ b/botorch/acquisition/utils.py
@@ -575,7 +575,11 @@ def get_optimal_samples(
     else:
         sample_transform = None
 
-    paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima]))
+    paths = get_matheron_path_model(
+        model=model,
+        sample_shape=torch.Size([num_optima]),
+        ensemble_as_batch=True,
+    )
     optimal_inputs, optimal_outputs = optimize_posterior_samples(
         paths=paths,
         bounds=bounds,
diff --git a/botorch/models/deterministic.py b/botorch/models/deterministic.py
index 7ec568caed..4be43664d6 100644
--- a/botorch/models/deterministic.py
+++ b/botorch/models/deterministic.py
@@ -64,7 +64,12 @@ class GenericDeterministicModel(DeterministicModel):
         >>> model = GenericDeterministicModel(f)
     """
 
-    def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None:
+    def __init__(
+        self,
+        f: Callable[[Tensor], Tensor],
+        num_outputs: int = 1,
+        batch_shape: torch.Size | None = None,
+    ) -> None:
         r"""
         Args:
             f: A callable mapping a `batch_shape x n x d`-dim input tensor `X`
@@ -75,6 +80,12 @@ def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None:
         super().__init__()
         self._f = f
         self._num_outputs = num_outputs
+        self._batch_shape = batch_shape
+
+    @property
+    def batch_shape(self) -> torch.Size | None:
+        r"""The batch shape of the model."""
+        return self._batch_shape
 
     def subset_output(self, idcs: list[int]) -> GenericDeterministicModel:
         r"""Subset the model along the output dimension.
@@ -100,7 +111,19 @@ def forward(self, X: Tensor) -> Tensor:
         Returns:
             A `batch_shape x n x m`-dimensional output tensor.
         """
-        return self._f(X)
+        Y = self._f(X)
+        batch_shape = Y.shape[:-2]
+        # allowing for old behavior of not specifying the batch_shape
+        if self.batch_shape is not None:
+            try:
+                torch.broadcast_shapes(self.batch_shape, batch_shape)
+            except RuntimeError:
+                raise ValueError(
+                    "GenericDeterministicModel was initialized with batch_shape="
+                    f"{self.batch_shape=} but the output of f has a batch_shape="
+                    f"{batch_shape=} that is not broadcastable with it."
+                )
+        return Y
 
 
 class AffineDeterministicModel(DeterministicModel):
diff --git a/botorch/sampling/pathwise/paths.py b/botorch/sampling/pathwise/paths.py
index 0b64792502..175739112a 100644
--- a/botorch/sampling/pathwise/paths.py
+++ b/botorch/sampling/pathwise/paths.py
@@ -6,7 +6,7 @@
 
 from __future__ import annotations
 
-from abc import ABC
+from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Iterator, Mapping
 from typing import Any
 
@@ -24,6 +24,16 @@
 class SamplePath(ABC, TransformedModuleMixin, Module):
     r"""Abstract base class for Botorch sample paths."""
 
+    @abstractmethod
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        pass  # pragma: no cover
+
 
 class PathDict(SamplePath):
     r"""A dictionary of SamplePaths."""
@@ -84,6 +94,16 @@ def __getitem__(self, key: str) -> SamplePath:
     def __setitem__(self, key: str, val: SamplePath) -> None:
         self.paths[key] = val
 
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        for path in self.paths.values():
+            path.set_ensemble_as_batch(ensemble_as_batch)
+
 
 class PathList(SamplePath):
     r"""A list of SamplePaths."""
@@ -136,6 +156,16 @@ def __getitem__(self, key: int) -> SamplePath:
     def __setitem__(self, key: int, val: SamplePath) -> None:
         self.paths[key] = val
 
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        for path in self.paths:
+            path.set_ensemble_as_batch(ensemble_as_batch)
+
 
 class GeneralizedLinearPath(SamplePath):
     r"""A sample path in the form of a generalized linear model."""
@@ -147,6 +177,8 @@ def __init__(
         bias_module: Module | None = None,
         input_transform: TInputTransform | None = None,
         output_transform: TOutputTransform | None = None,
+        is_ensemble: bool = False,
+        ensemble_as_batch: bool = False,
     ):
         r"""Initializes a GeneralizedLinearPath instance.
 
@@ -157,10 +189,17 @@ def __init__(
 
         Args:
             feature_map: A map used to featurize the module's inputs.
-            weight: A tensor of weights used to combine input features.
+            weight: A tensor of weights used to combine input features. When generated
+                with `draw_kernel_feature_paths`, `weight` is a Tensor with the shape
+                `sample_shape x batch_shape x num_outputs`.
             bias_module: An optional module used to define additive offsets.
             input_transform: An optional input transform for the module.
             output_transform: An optional output transform for the module.
+            is_ensemble: Whether the associated model is an ensemble model or not.
+            ensemble_as_batch: Whether the ensemble dimension is added as a batch
+                dimension or not. If `True`, the ensemble dimension is treated as a
+                batch dimension, which allows for the joint optimization of all members
+                of the ensemble.
         """
         super().__init__()
         self.feature_map = feature_map
@@ -170,8 +209,36 @@ def __init__(
         self.bias_module = bias_module
         self.input_transform = input_transform
         self.output_transform = output_transform
+        self.is_ensemble = is_ensemble
+        self.ensemble_as_batch = ensemble_as_batch
 
     def forward(self, x: Tensor, **kwargs) -> Tensor:
+        """Evaluates the path.
+
+        Args:
+            x: The input tensor of shape `batch_shape x [num_ensemble x] q x d`, where
+                `num_ensemble` is the number of ensemble members and is required to
+                *only* be included if `is_ensemble=True` and `ensemble_as_batch=True`.
+            kwargs: Additional keyword arguments passed to the feature map.
+
+        Returns:
+            A tensor of shape `batch_shape x [num_ensemble x] q x m`, where `m` is the
+            number of outputs, where `num_ensemble` is only included if `is_ensemble`
+            is `True`, and regardless of whether `ensemble_as_batch` is `True` or not.
+        """
+        if self.is_ensemble and not self.ensemble_as_batch:
+            # assuming that the ensembling dimension is added after (n, d), but
+            # before the other batch dimensions, starting from the left.
+            x = x.unsqueeze(-3)
         feat = self.feature_map(x, **kwargs)
         out = (feat @ self.weight.unsqueeze(-1)).squeeze(-1)
         return out if self.bias_module is None else out + self.bias_module(x)
+
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        self.ensemble_as_batch = ensemble_as_batch
diff --git a/botorch/sampling/pathwise/posterior_samplers.py b/botorch/sampling/pathwise/posterior_samplers.py
index 33c8d5e029..09d14f114a 100644
--- a/botorch/sampling/pathwise/posterior_samplers.py
+++ b/botorch/sampling/pathwise/posterior_samplers.py
@@ -87,7 +87,7 @@ def __init__(
 
 
 def get_matheron_path_model(
-    model: GP, sample_shape: Size | None = None
+    model: GP, sample_shape: Size | None = None, ensemble_as_batch: bool = False
 ) -> GenericDeterministicModel:
     r"""Generates a deterministic model using a single Matheron path drawn
     from the model's posterior.
@@ -102,12 +102,19 @@ def get_matheron_path_model(
             deterministic model will behave as if the `sample_shape` is prepended
             to the `batch_shape` of the model. The inputs used to evaluate the model
             must be adjusted to match.
+        ensemble_as_batch: If True, and model is an ensemble model, the resuling path
+            model will treat the ensemble dimension as a batch dimension, which means
+            that its inputs have to contain the ensemble dimension in the -3 position,
+            i.e. `batch_shape x ensemble_size x q x d`. This is used when optimizing the
+            paths of all members of an ensemble jointly, with distinct optima for each
+            member of the ensemble.
 
     Returns:
         A deterministic model that evaluates the Matheron path.
     """
     sample_shape = Size() if sample_shape is None else sample_shape
     path = draw_matheron_paths(model, sample_shape=sample_shape)
+    path.set_ensemble_as_batch(ensemble_as_batch)
     num_outputs = model.num_outputs
     if isinstance(model, ModelList) and len(model.models) != num_outputs:
         raise UnsupportedError("A model-list of multi-output models is not supported.")
@@ -121,7 +128,7 @@ def f(X: Tensor) -> Tensor:
                 the model batch shape.
 
         Returns:
-            The output tensor of shape `batch_shape x q x m`.
+            The output tensor of shape `[sample_shape x] batch_shape x q x m`.
         """
         if num_outputs == 1:
             # For single-output, we lack the output dimension. Add one.
@@ -137,7 +144,11 @@ def f(X: Tensor) -> Tensor:
             res = path(X.unsqueeze(-3)).transpose(-1, -2)
         return res
 
-    path_model = GenericDeterministicModel(f=f, num_outputs=num_outputs)
+    path_model = GenericDeterministicModel(
+        f=f,
+        num_outputs=num_outputs,
+        batch_shape=sample_shape + model.batch_shape,
+    )
     path_model._is_ensemble = is_ensemble(model) or len(sample_shape) > 0
     return path_model
 
diff --git a/botorch/sampling/pathwise/prior_samplers.py b/botorch/sampling/pathwise/prior_samplers.py
index 9fe7bb46ba..37e152567c 100644
--- a/botorch/sampling/pathwise/prior_samplers.py
+++ b/botorch/sampling/pathwise/prior_samplers.py
@@ -24,6 +24,7 @@
 )
 from botorch.utils.dispatcher import Dispatcher
 from botorch.utils.sampling import draw_sobol_normal_samples
+from botorch.utils.transforms import is_ensemble
 from gpytorch.kernels import Kernel
 from gpytorch.models import ApproximateGP, ExactGP, GP
 from gpytorch.variational import _VariationalStrategy
@@ -61,6 +62,7 @@ def _draw_kernel_feature_paths_fallback(
     input_transform: TInputTransform | None = None,
     output_transform: TOutputTransform | None = None,
     weight_generator: Callable[[Size], Tensor] | None = None,
+    is_ensemble: bool = False,
 ) -> GeneralizedLinearPath:
     # Generate a kernel feature map
     feature_map = map_generator(
@@ -71,6 +73,7 @@ def _draw_kernel_feature_paths_fallback(
 
     # Sample random weights with which to combine kernel features
     if weight_generator is None:
+        # weight is sample_shape x batch_shape x num_outputs
         weight = draw_sobol_normal_samples(
             n=sample_shape.numel() * covar_module.batch_shape.numel(),
             d=feature_map.num_outputs,
@@ -89,6 +92,7 @@ def _draw_kernel_feature_paths_fallback(
         bias_module=mean_module,
         input_transform=input_transform,
         output_transform=output_transform,
+        is_ensemble=is_ensemble,
     )
 
 
@@ -103,6 +107,7 @@ def _draw_kernel_feature_paths_ExactGP(
         covar_module=model.covar_module,
         input_transform=get_input_transform(model),
         output_transform=get_output_transform(model),
+        is_ensemble=is_ensemble(model),
         **kwargs,
     )
 
@@ -150,5 +155,6 @@ def _draw_kernel_feature_paths_ApproximateGP_fallback(
         num_inputs=num_inputs,
         mean_module=model.mean_module,
         covar_module=model.covar_module,
+        is_ensemble=is_ensemble(model),
         **kwargs,
     )
diff --git a/botorch/sampling/pathwise/update_strategies.py b/botorch/sampling/pathwise/update_strategies.py
index 7d92e04a1a..f78cb5535f 100644
--- a/botorch/sampling/pathwise/update_strategies.py
+++ b/botorch/sampling/pathwise/update_strategies.py
@@ -13,6 +13,7 @@
 from typing import Any
 
 import torch
+
 from botorch.models.approximate_gp import ApproximateGPyTorchModel
 from botorch.models.transforms.input import InputTransform
 from botorch.sampling.pathwise.features import KernelEvaluationMap
@@ -24,6 +25,7 @@
     TInputTransform,
 )
 from botorch.utils.dispatcher import Dispatcher
+from botorch.utils.transforms import is_ensemble
 from botorch.utils.types import DEFAULT
 from gpytorch.kernels.kernel import Kernel
 from gpytorch.likelihoods import _GaussianLikelihoodBase, Likelihood
@@ -79,6 +81,7 @@ def _gaussian_update_exact(
     noise_covariance: Tensor | LinearOperator | None = None,
     scale_tril: Tensor | LinearOperator | None = None,
     input_transform: TInputTransform | None = None,
+    is_ensemble: bool = False,
 ) -> GeneralizedLinearPath:
     # Prepare Cholesky factor of `Cov(y, y)` and noise sample values as needed
     if isinstance(noise_covariance, (NoneType, ZeroLinearOperator)):
@@ -103,7 +106,9 @@ def _gaussian_update_exact(
         points=points,
         input_transform=input_transform,
     )
-    return GeneralizedLinearPath(feature_map=feature_map, weight=weight.squeeze(-1))
+    return GeneralizedLinearPath(
+        feature_map=feature_map, weight=weight.squeeze(-1), is_ensemble=is_ensemble
+    )
 
 
 @GaussianUpdate.register(ExactGP, _GaussianLikelihoodBase)
@@ -134,6 +139,7 @@ def _gaussian_update_ExactGP(
         noise_covariance=noise_covariance,
         scale_tril=scale_tril,
         input_transform=get_input_transform(model),
+        is_ensemble=is_ensemble(model),
     )
 
 
@@ -194,4 +200,5 @@ def _gaussian_update_ApproximateGP_VariationalStrategy(
         sample_values=sample_values,
         scale_tril=L,
         input_transform=input_transform,
+        is_ensemble=is_ensemble(model),
     )
diff --git a/botorch/utils/sampling.py b/botorch/utils/sampling.py
index 7066578b9d..80d85250b1 100644
--- a/botorch/utils/sampling.py
+++ b/botorch/utils/sampling.py
@@ -996,6 +996,11 @@ def sparse_to_dense_constraints(
     return A, b
 
 
+# This is only used in get_optimal_samples, which in turn is only used in the input
+# constructors of
+# 1) qJointEntropySearch,
+# 2) qSelfCorrectingBayesianOptimization, and
+# 3) qTestSetInformationGain.
 def optimize_posterior_samples(
     paths: GenericDeterministicModel,
     bounds: Tensor,
@@ -1037,7 +1042,7 @@ def path_func(x) -> Tensor:
         bounds=bounds,
     )
     # queries all samples on all candidates - output shape
-    # raw_samples * num_optima * num_models
+    # raw_samples x num_optima x num_models
     candidate_queries = path_func(candidate_set)
     argtop_k = torch.topk(candidate_queries, num_restarts, dim=-1).indices
     X_top_k = candidate_set[argtop_k, :]
@@ -1056,8 +1061,8 @@ def path_func(x) -> Tensor:
     # For each sample (and possibly for every model in the batch of models), this
     # retrieves the argmax. We flatten, pick out the indices and then reshape to
     # the original batch shapes (so instead of pickig out the argmax of a
-    # (3, 7, num_restarts, D)) along the num_restarts dim, we pick it out of a
-    # (21, num_restarts, D)
+    # (num_optima, num_models, num_restarts, D)-shaped Tensor along the num_restarts
+    # dim, we pick it out of (num_optima * num_models, num_restarts, D)
     final_shape = candidate_queries.shape[:-1]
     X_opt = X_top_k.reshape(final_shape.numel(), num_restarts, -1)[
         torch.arange(final_shape.numel()), arg_opt.flatten()
diff --git a/botorch/utils/test_helpers.py b/botorch/utils/test_helpers.py
index 1d7ef928b6..c6d4be6e62 100644
--- a/botorch/utils/test_helpers.py
+++ b/botorch/utils/test_helpers.py
@@ -39,19 +39,6 @@
 from torch.nn.functional import pad
 
 
-def _get_mcmc_samples(num_samples: int, dim: int, infer_noise: bool, **tkwargs):
-    mcmc_samples = {
-        "lengthscale": 1 + torch.rand(num_samples, 1, dim, **tkwargs),
-        "outputscale": 1 + torch.rand(num_samples, **tkwargs),
-        "mean": torch.randn(num_samples, **tkwargs),
-    }
-    if infer_noise:
-        mcmc_samples["noise"] = torch.rand(num_samples, 1, **tkwargs)
-    mcmc_samples["lengthscale"] = mcmc_samples["lengthscale"]
-
-    return mcmc_samples
-
-
 def get_model(
     train_X: Tensor,
     train_Y: Tensor,
@@ -93,8 +80,8 @@ def get_fully_bayesian_model(
     train_X: Tensor,
     train_Y: Tensor,
     num_models: int,
-    standardize_model: bool,
-    infer_noise: bool,
+    standardize_model: bool = False,
+    infer_noise: bool = True,
     **tkwargs: Any,
 ) -> SaasFullyBayesianSingleTaskGP:
     num_objectives = train_Y.shape[-1]
@@ -122,6 +109,20 @@ def get_fully_bayesian_model(
     return model
 
 
+def _get_mcmc_samples(
+    num_samples: int, dim: int, infer_noise: bool, **tkwargs
+) -> dict[str, Tensor]:
+    mcmc_samples = {
+        "lengthscale": 1 + torch.rand(num_samples, 1, dim, **tkwargs),
+        "outputscale": 1 + torch.rand(num_samples, **tkwargs),
+        "mean": torch.randn(num_samples, **tkwargs),
+    }
+    if infer_noise:
+        mcmc_samples["noise"] = torch.rand(num_samples, 1, **tkwargs)
+
+    return mcmc_samples
+
+
 def get_fully_bayesian_model_list(
     train_X: Tensor,
     train_Y: Tensor,
diff --git a/test/acquisition/test_thompson_sampling.py b/test/acquisition/test_thompson_sampling.py
index 92ee6063e3..54619ce5b2 100644
--- a/test/acquisition/test_thompson_sampling.py
+++ b/test/acquisition/test_thompson_sampling.py
@@ -6,47 +6,23 @@
 
 from itertools import product
 
+from unittest import mock
+from unittest.mock import PropertyMock
+
 import torch
+from botorch.acquisition.objective import (
+    IdentityMCObjective,
+    ScalarizedPosteriorTransform,
+)
 from botorch.acquisition.thompson_sampling import PathwiseThompsonSampling
-from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP
+from botorch.exceptions.errors import UnsupportedError
 from botorch.models.model import Model
-from botorch.utils.test_helpers import get_model
+from botorch.utils.test_helpers import get_fully_bayesian_model, get_model
 from botorch.utils.testing import BotorchTestCase
 
 
-def _get_mcmc_samples(num_samples: int, dim: int, infer_noise: bool, **tkwargs):
-    mcmc_samples = {
-        "lengthscale": torch.rand(num_samples, 1, dim, **tkwargs),
-        "outputscale": torch.rand(num_samples, **tkwargs),
-        "mean": torch.randn(num_samples, **tkwargs),
-    }
-    if infer_noise:
-        mcmc_samples["noise"] = torch.rand(num_samples, 1, **tkwargs)
-    return mcmc_samples
-
-
-def get_fully_bayesian_model(
-    train_X,
-    train_Y,
-    num_models,
-    **tkwargs,
-):
-    model = SaasFullyBayesianSingleTaskGP(
-        train_X=train_X,
-        train_Y=train_Y,
-    )
-    mcmc_samples = _get_mcmc_samples(
-        num_samples=num_models,
-        dim=train_X.shape[-1],
-        infer_noise=True,
-        **tkwargs,
-    )
-    model.load_mcmc_samples(mcmc_samples)
-    return model
-
-
 class TestPathwiseThompsonSampling(BotorchTestCase):
-    def _test_thompson_sampling_base(self, model: Model):
+    def _test_thompson_sampling_base(self, model: Model) -> None:
         acq = PathwiseThompsonSampling(
             model=model,
         )
@@ -59,11 +35,43 @@ def _test_thompson_sampling_base(self, model: Model):
 
         acq_pass1 = acq(test_X)
         self.assertAllClose(acq_pass1, acq(test_X))
-        acq.redraw()
+        acq.redraw(batch_size=acq.batch_size)
         acq_pass2 = acq(test_X)
         self.assertFalse(torch.allclose(acq_pass1, acq_pass2))
 
-    def _test_thompson_sampling_batch(self, model: Model):
+    def _test_thompson_sampling_multi_output(self, model: Model) -> None:
+        # using multi-output model with a posterior transform
+        with self.assertRaisesRegex(
+            UnsupportedError,
+            "Must specify an objective or a posterior transform when using ",
+        ):
+            PathwiseThompsonSampling(model=model)
+
+        X_observed = model.train_inputs[0]
+        input_dim = X_observed.shape[-1]
+        tkwargs = {"device": self.device, "dtype": X_observed.dtype}
+        test_X = torch.rand(4, 1, input_dim, **tkwargs)
+        weigths = torch.ones(2, **tkwargs)
+        posterior_transform = ScalarizedPosteriorTransform(weights=weigths)
+        acqf = PathwiseThompsonSampling(
+            model=model, posterior_transform=posterior_transform
+        )
+        self.assertIsInstance(acqf.objective, IdentityMCObjective)
+        # testing that the acquisition function is deterministic and executes
+        # with the posterior transform
+        acq_val = acqf(test_X)
+        acq_val_2 = acqf(test_X)
+        self.assertAllClose(acq_val, acq_val_2)
+
+        posterior_transform.scalarize = False
+        with self.assertRaisesRegex(
+            UnsupportedError, "posterior_transform must scalarize the output"
+        ):
+            PathwiseThompsonSampling(
+                model=model, posterior_transform=posterior_transform
+            )
+
+    def _test_thompson_sampling_batch(self, model: Model) -> None:
         X_observed = model.train_inputs[0]
         input_dim = X_observed.shape[-1]
         batch_acq = PathwiseThompsonSampling(
@@ -92,27 +100,48 @@ def _test_thompson_sampling_batch(self, model: Model):
 
     def test_thompson_sampling_single_task(self):
         input_dim = 2
-        num_objectives = 1
         for dtype, standardize_model in product(
             (torch.float32, torch.float64), (True, False)
         ):
             tkwargs = {"device": self.device, "dtype": dtype}
             train_X = torch.rand(4, input_dim, **tkwargs)
+            num_objectives = 1
             train_Y = 10 * torch.rand(4, num_objectives, **tkwargs)
             model = get_model(train_X, train_Y, standardize_model=standardize_model)
             self._test_thompson_sampling_base(model)
             self._test_thompson_sampling_batch(model)
 
+            # multi-output model
+            num_objectives = 2
+            train_Y = 10 * torch.rand(4, num_objectives, **tkwargs)
+            model = get_model(train_X, train_Y, standardize_model=standardize_model)
+            self._test_thompson_sampling_multi_output(model)
+
     def test_thompson_sampling_fully_bayesian(self):
         input_dim = 2
         num_objectives = 1
         tkwargs = {"device": self.device, "dtype": torch.float64}
         train_X = torch.rand(4, input_dim, **tkwargs)
         train_Y = 10 * torch.rand(4, num_objectives, **tkwargs)
-
         fb_model = get_fully_bayesian_model(train_X, train_Y, num_models=3, **tkwargs)
-        with self.assertRaisesRegex(
-            NotImplementedError,
-            "PathwiseThompsonSampling is not supported for fully Bayesian models",
-        ):
-            PathwiseThompsonSampling(model=fb_model)
+        acqf = PathwiseThompsonSampling(model=fb_model)
+        acqf_vals = acqf(train_X)
+        acqf_vals_2 = acqf(train_X)
+        self.assertAllClose(acqf_vals, acqf_vals_2)
+
+        batch_shape = (2, 5)
+        test_X = torch.randn(*batch_shape, *train_X.shape, **tkwargs)
+        batched_output = acqf(test_X)
+        self.assertEqual(batched_output.shape, batch_shape)
+        batched_output_2 = acqf(test_X)
+        self.assertAllClose(batched_output, batched_output_2)
+
+        with mock.patch.object(
+            type(acqf.model), "batch_shape", new_callable=PropertyMock
+        ) as mock_batch_shape:
+            mock_batch_shape.return_value = (2, 3)
+            with self.assertRaisesRegex(
+                NotImplementedError,
+                "Ensemble models with more than one ensemble dimension",
+            ):
+                acqf.redraw(batch_size=2)
diff --git a/test/models/test_deterministic.py b/test/models/test_deterministic.py
index 47e440e9d6..30c8986a3f 100644
--- a/test/models/test_deterministic.py
+++ b/test/models/test_deterministic.py
@@ -58,7 +58,8 @@ def f(X):
 
         model = GenericDeterministicModel(f)
         self.assertEqual(model.num_outputs, 1)
-        X = torch.rand(3, 2)
+        d = 2
+        X = torch.rand(3, d)
         # basic test
         p = model.posterior(X)
         self.assertIsInstance(p, EnsemblePosterior)
@@ -81,6 +82,25 @@ def f(X):
         p_sub = subset_model.posterior(X)
         self.assertTrue(torch.equal(p_sub.mean, X[..., [0]]))
 
+        # testing batched model
+        batch_shape = torch.Size([2, 4])
+        batch_coefficients = torch.rand(*batch_shape, 1, d)
+
+        def batched_f(X):
+            return (X * batch_coefficients).sum(dim=-1, keepdim=True)
+
+        model = GenericDeterministicModel(batched_f, batch_shape=batch_shape)
+        Y = model(X)
+        self.assertEqual(Y.shape, torch.Size([2, 4, 3, 1]))
+
+        # testing with wrong batch shape
+        model = GenericDeterministicModel(batched_f, batch_shape=torch.Size([2]))
+
+        with self.assertRaisesRegex(
+            ValueError, "GenericDeterministicModel was initialized with batch_shape="
+        ):
+            model(X)
+
     def test_AffineDeterministicModel(self):
         # test error on bad shape of a
         with self.assertRaises(ValueError):
diff --git a/test/sampling/pathwise/test_paths.py b/test/sampling/pathwise/test_paths.py
index 3b24430f53..207502ae04 100644
--- a/test/sampling/pathwise/test_paths.py
+++ b/test/sampling/pathwise/test_paths.py
@@ -14,9 +14,14 @@
 
 
 class IdentityPath(SamplePath):
+    ensemble_as_batch: bool = False
+
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x
 
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        self.ensemble_as_batch = ensemble_as_batch
+
 
 class TestGenericPaths(BotorchTestCase):
     def test_path_dict(self):
@@ -48,6 +53,12 @@ def test_path_dict(self):
         self.assertEqual(output.shape, (2,) + x.shape)
         self.assertTrue(output.eq(x).all())
 
+        A.set_ensemble_as_batch(True)
+        self.assertTrue(A.ensemble_as_batch)
+
+        A.set_ensemble_as_batch(False)
+        self.assertFalse(A.ensemble_as_batch)
+
         # Test `dict`` methods
         self.assertEqual(len(path_dict), 2)
         for key, val, (key_0, val_0), (key_1, val_1), key_2 in zip(
diff --git a/test/sampling/pathwise/test_posterior_samplers.py b/test/sampling/pathwise/test_posterior_samplers.py
index f0ff1a79ed..0fe8acf79c 100644
--- a/test/sampling/pathwise/test_posterior_samplers.py
+++ b/test/sampling/pathwise/test_posterior_samplers.py
@@ -18,8 +18,14 @@
 from botorch.sampling.pathwise import draw_matheron_paths, MatheronPath, PathList
 from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
 from botorch.sampling.pathwise.utils import get_train_inputs
-from botorch.utils.test_helpers import get_sample_moments, standardize_moments
+from botorch.utils.test_helpers import (
+    get_fully_bayesian_model,
+    get_sample_moments,
+    standardize_moments,
+)
+
 from botorch.utils.testing import BotorchTestCase
+from botorch.utils.transforms import is_ensemble
 from gpytorch.kernels import MaternKernel, ScaleKernel
 from torch import Size
 from torch.nn.functional import pad
@@ -131,15 +137,16 @@ def _test_draw_matheron_paths(self, model, paths, sample_shape, atol=3):
 
     def test_get_matheron_path_model(self) -> None:
         model_list = ModelListGP(self.inferred_noise_gp, self.observed_noise_gp)
+        n, d, m = 5, 2, 3
         moo_model = SingleTaskGP(
-            train_X=torch.rand(5, 2, **self.tkwargs),
-            train_Y=torch.rand(5, 2, **self.tkwargs),
+            train_X=torch.rand(n, d, **self.tkwargs),
+            train_Y=torch.rand(n, m, **self.tkwargs),
         )
 
-        test_X = torch.rand(5, 2, **self.tkwargs)
-        batch_test_X = torch.rand(3, 5, 2, **self.tkwargs)
+        test_X = torch.rand(n, d, **self.tkwargs)
+        batch_test_X = torch.rand(3, n, d, **self.tkwargs)
         sample_shape = Size([2])
-        sample_shape_X = torch.rand(3, 2, 5, 2, **self.tkwargs)
+        sample_shape_X = torch.rand(3, 2, n, d, **self.tkwargs)
         for model in (self.inferred_noise_gp, moo_model, model_list):
             path_model = get_matheron_path_model(model=model)
             self.assertFalse(path_model._is_ensemble)
@@ -163,25 +170,45 @@ def test_get_matheron_path_model(self) -> None:
             )
 
     def test_get_matheron_path_model_batched(self) -> None:
+        n, d, m = 5, 2, 3
         model = SingleTaskGP(
-            train_X=torch.rand(4, 5, 2, **self.tkwargs),
-            train_Y=torch.rand(4, 5, 2, **self.tkwargs),
+            train_X=torch.rand(4, n, d, **self.tkwargs),
+            train_Y=torch.rand(4, n, m, **self.tkwargs),
         )
-        model._is_ensemble = True
         path_model = get_matheron_path_model(model=model)
-        self.assertTrue(path_model._is_ensemble)
-        test_X = torch.rand(5, 2, **self.tkwargs)
+        test_X = torch.rand(n, d, **self.tkwargs)
         # This mimics the behavior of the acquisition functions unsqueezing the
         # model batch dimension for ensemble models.
-        batch_test_X = torch.rand(3, 1, 5, 2, **self.tkwargs)
+        batch_test_X = torch.rand(3, 1, n, d, **self.tkwargs)
         # Explicitly matching X for completeness.
-        complete_test_X = torch.rand(3, 4, 5, 2, **self.tkwargs)
+        complete_test_X = torch.rand(3, 4, n, d, **self.tkwargs)
         for X in (test_X, batch_test_X, complete_test_X):
+            # shapes in each iteration of the loop are, respectively:
+            # torch.Size([4, 5, 2])
+            # torch.Size([3, 4, 5, 2])
+            # torch.Size([3, 4, 5, 2])
+            # irrespective of whether `is_ensemble` is true or false.
             self.assertEqual(
                 model.posterior(X).mean.shape, path_model.posterior(X).mean.shape
             )
 
         # Test with sample_shape.
         path_model = get_matheron_path_model(model=model, sample_shape=Size([2, 6]))
-        test_X = torch.rand(3, 2, 6, 4, 5, 2, **self.tkwargs)
-        self.assertEqual(path_model.posterior(test_X).mean.shape, test_X.shape)
+        test_X = torch.rand(3, 2, 6, 4, n, d, **self.tkwargs)
+        self.assertEqual(
+            path_model.posterior(test_X).mean.shape, torch.Size([*test_X.shape[:-1], m])
+        )
+        m = 1  # required by fully Bayesian model
+        fully_bayesian_model = get_fully_bayesian_model(
+            train_X=torch.randn(n, d, **self.tkwargs),
+            train_Y=torch.randn(n, m, **self.tkwargs),
+            num_models=3,
+            **self.tkwargs,
+        )
+        fully_bayesian_path_model = get_matheron_path_model(model=fully_bayesian_model)
+        self.assertTrue(is_ensemble(fully_bayesian_path_model))
+        for X in (test_X, batch_test_X, complete_test_X):
+            self.assertEqual(
+                fully_bayesian_model.posterior(X).mean.shape,
+                fully_bayesian_path_model.posterior(X).mean.shape,
+            )
diff --git a/test_community/acquisition/test_input_constructors.py b/test_community/acquisition/test_input_constructors.py
index 30a62703b3..92e523fba6 100644
--- a/test_community/acquisition/test_input_constructors.py
+++ b/test_community/acquisition/test_input_constructors.py
@@ -62,7 +62,10 @@ class TestFullyBayesianAcquisitionFunctionInputConstructors(
 ):
     def test_construct_inputs_scorebo(self) -> None:
         func = get_acqf_input_constructor(qSelfCorrectingBayesianOptimization)
-        num_samples, num_optima = 3, 7
+        # num_ensemble controls the ensemble size of the SAAS model
+        # num_optima controls the number of Thompson samples used to infer the
+        # distribution of optima
+        num_ensemble, num_optima = 4, 7
         model = SaasFullyBayesianSingleTaskGP(
             self.blockX_blockY[0].X, self.blockX_blockY[0].Y
         )
@@ -70,14 +73,14 @@ def test_construct_inputs_scorebo(self) -> None:
         model.load_mcmc_samples(
             {
                 "lengthscale": torch.rand(
-                    num_samples,
+                    num_ensemble,
                     1,
                     self.blockX_blockY[0].X.shape[-1],
                     dtype=torch.double,
                 ),
-                "outputscale": torch.rand(num_samples, dtype=torch.double),
-                "mean": torch.randn(num_samples, dtype=torch.double),
-                "noise": torch.rand(num_samples, 1, dtype=torch.double),
+                "outputscale": torch.rand(num_ensemble, dtype=torch.double),
+                "mean": torch.randn(num_ensemble, dtype=torch.double),
+                "noise": torch.rand(num_ensemble, 1, dtype=torch.double),
             }
         )
 
@@ -88,13 +91,15 @@ def test_construct_inputs_scorebo(self) -> None:
             num_optima=num_optima,
             distance_metric="kl_divergence",
         )
-        self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype)
-        self.assertEqual(len(kwargs["optimal_inputs"]), num_optima)
-        self.assertEqual(len(kwargs["optimal_outputs"]), num_optima)
+        optimal_inputs = kwargs["optimal_inputs"]
+        optimal_outputs = kwargs["optimal_outputs"]
+        self.assertEqual(self.blockX_blockY[0].X.dtype, optimal_inputs.dtype)
+        d = self.blockX_blockY[0].X.shape[-1]
+        self.assertEqual(optimal_inputs.shape, (num_optima, num_ensemble, d))
+        self.assertEqual(optimal_outputs.shape, (num_optima, num_ensemble, 1))
+
         # asserting that, for the non-batch case, the optimal inputs are
         # of shape num_models x N x D and outputs are num_models x N x 1
-        self.assertEqual(len(kwargs["optimal_inputs"].shape), 3)
-        self.assertEqual(len(kwargs["optimal_outputs"].shape), 3)
         self.assertEqual(kwargs["distance_metric"], "kl_divergence")
         qSelfCorrectingBayesianOptimization(**kwargs)