diff --git a/aepsych/acquisition/bvn.py b/aepsych/acquisition/bvn.py
index 9bb10f346..ae5be48ec 100644
--- a/aepsych/acquisition/bvn.py
+++ b/aepsych/acquisition/bvn.py
@@ -72,6 +72,7 @@ def _bvnu(
     hk = h * k
 
     x, w = _gauss_legendre20(dtype=dh.dtype)
+    x, w = x.to(dh), w.to(dh)
 
     asr = 0.5 * torch.asin(r)
     sn = torch.sin(asr[..., None] * x)
diff --git a/aepsych/acquisition/lookahead.py b/aepsych/acquisition/lookahead.py
index 08124f5b8..88445f1da 100644
--- a/aepsych/acquisition/lookahead.py
+++ b/aepsych/acquisition/lookahead.py
@@ -34,7 +34,7 @@ def Hb(p: Tensor) -> Tensor:
 
     Returns: Binary entropy for each probability.
     """
-    epsilon = torch.tensor(np.finfo(float).eps)
+    epsilon = torch.tensor(np.finfo(float).eps).to(p)
     p = torch.clamp(p, min=epsilon, max=1 - epsilon)
     return -torch.nan_to_num(p * torch.log2(p) + (1 - p) * torch.log2(1 - p))
 
@@ -78,6 +78,8 @@ def SUR_fn(Px: Tensor, P1: Tensor, P0: Tensor, py1: Tensor) -> Tensor:
 
     Returns: (b) tensor of SUR values.
     """
+    P1 = P1.to(Px)
+    py1 = py1.to(Px)
     sur = ClassErr(Px) - py1 * ClassErr(P1) - (1 - py1) * ClassErr(P0)
     return sur.sum(dim=-1)
 
diff --git a/aepsych/acquisition/lookahead_utils.py b/aepsych/acquisition/lookahead_utils.py
index 82592f4b5..f6c5aa8fa 100644
--- a/aepsych/acquisition/lookahead_utils.py
+++ b/aepsych/acquisition/lookahead_utils.py
@@ -144,7 +144,7 @@ def lookahead_inner(f_q: Normal) -> Tensor:
     pstar_marginal_0 = 1 - pstar_marginal_1
     pq_marginal_1 = probit(Mu_q / torch.sqrt(1 + Sigma2_q))
 
-    quad = GaussHermiteQuadrature1D()
+    quad = GaussHermiteQuadrature1D().to(Mu_q)
     fq_mvn = Normal(Mu_q, torch.sqrt(Sigma2_q))
     joint_ystar1_yq1 = quad(lookahead_inner, fq_mvn)
     joint_ystar0_yq1 = pq_marginal_1 - joint_ystar1_yq1
diff --git a/aepsych/strategy.py b/aepsych/strategy.py
index 22f5d1a40..093ce8e53 100644
--- a/aepsych/strategy.py
+++ b/aepsych/strategy.py
@@ -26,7 +26,12 @@
 
 import numpy as np
 import torch
-
+from aepsych.acquisition import (
+    MonotonicBernoulliMCMutualInformation,
+    MonotonicMCLSE,
+    MonotonicMCPosteriorVariance,
+)
+from aepsych.acquisition.monotonic_rejection import MonotonicMCAcquisition
 from aepsych.config import Config
 from aepsych.generators.base import AEPsychGenerator
 from aepsych.generators.sobol_generator import SobolGenerator
@@ -62,6 +67,13 @@ class Strategy(object):
 
     _n_eval_points: int = 1000
 
+    no_gpu_acqfs = (
+        MonotonicMCAcquisition,
+        MonotonicBernoulliMCMutualInformation,
+        MonotonicMCPosteriorVariance,
+        MonotonicMCLSE,
+    )
+
     def __init__(
         self,
         generator: AEPsychGenerator,
@@ -74,6 +86,7 @@ def __init__(
         min_asks: int = 0,
         model: Optional[AEPsychMixin] = None,
         model_gpu: bool = False,
+        generator_gpu: bool = False,
         refit_every: int = 1,
         min_total_outcome_occurrences: int = 1,
         max_asks: Optional[int] = None,
@@ -94,6 +107,7 @@ def __init__(
             min_asks (int): The minimum number of points that should be generated from this strategy.
             model (ModelProtocol, optional): The AEPsych model of the data.
             model_gpu (bool): Whether to move the model to GPU, defaults to False.
+            generator_gpu (bool): Whether to use the GPU for generating points, defaults to False.
             refit_every (int): How often to refit the model from scratch.
             min_total_outcome_occurrences (int): The minimum number of total observations needed for each outcome before the strategy will finish.
                 Defaults to 1 (i.e., for binary outcomes, there must be at least one "yes" trial and one "no" trial).
@@ -143,6 +157,27 @@ def __init__(
 
             self.model_device = torch.device("cuda" if model_gpu else "cpu")
 
+        if generator_gpu:
+            if model is None:
+                logger.warning(
+                    f"GPU requested for generator {type(generator).__name__} but this generator has no model to move to GPU."
+                )
+                self.generator_device = torch.device("cpu")
+            else:
+                assert (
+                    torch.cuda.is_available()
+                ), f"GPU requested for generator {type(generator).__name__} but GPU is not found!"
+
+                if hasattr(generator, "acqf") and isinstance(
+                    generator.acqf, self.no_gpu_acqfs
+                ):
+                    logger.warning(f"{generator.acqf.__name__} does not support GPU")
+                    self.generator_device = torch.device("cpu")
+                else:
+                    self.generator_device = torch.device("cuda")
+        else:
+            self.generator_device = torch.device("cpu")
+
         self.run_indefinitely = run_indefinitely
         self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim)
         self.min_total_outcome_occurrences = min_total_outcome_occurrences
@@ -237,8 +272,18 @@ def gen(self, num_points: int = 1) -> torch.Tensor:
         Returns:
             torch.Tensor: Next set of point(s) to evaluate, [num_points x dim].
         """
+        original_device = None
+        if self.model is not None and self.generator_device.type == "cuda":
+            original_device = self.model.device
+            self.model.to(self.generator_device)  # type: ignore
+
         self._count = self._count + num_points
-        return self.generator.gen(num_points, self.model)
+        points = self.generator.gen(num_points, self.model)
+
+        if original_device is not None:
+            self.model.to(original_device)  # type: ignore
+
+        return points
 
     @ensure_model_is_fresh
     def get_max(
@@ -452,6 +497,7 @@ def from_config(cls, config: Config, name: str) -> Strategy:
 
         gen_cls = config.getobj(name, "generator", fallback=SobolGenerator)
         generator = gen_cls.from_config(config)
+        generator_gpu = config.getboolean(gen_cls.__name__, "use_gpu", fallback=False)
 
         model_cls = config.getobj(name, "model", fallback=None)
         if model_cls is not None:
@@ -507,6 +553,7 @@ def from_config(cls, config: Config, name: str) -> Strategy:
             dim=dim,
             model=model,
             model_gpu=model_gpu,
+            generator_gpu=generator_gpu,
             generator=generator,
             min_asks=min_asks,
             refit_every=refit_every,
diff --git a/aepsych/utils.py b/aepsych/utils.py
index 931a59c83..aff39ee6c 100644
--- a/aepsych/utils.py
+++ b/aepsych/utils.py
@@ -22,7 +22,7 @@ def make_scaled_sobol(
     lb: torch.Tensor, ub: torch.Tensor, size: int, seed: Optional[int] = None
 ) -> torch.Tensor:
     lb, ub, ndim = _process_bounds(lb, ub, None)
-    grid = SobolEngine(dimension=ndim, scramble=True, seed=seed).draw(size)
+    grid = SobolEngine(dimension=ndim, scramble=True, seed=seed).draw(size).to(lb)
 
     # rescale from [0,1] to [lb, ub]
     grid = lb + (ub - lb) * grid
@@ -127,7 +127,6 @@ def interpolate_monotonic(x, y, z, min_x=-np.inf, max_x=np.inf):
     y1 = y[idx]
 
     x_star = x0 + (x1 - x0) * (z - y0) / (y1 - y0)
-
     return x_star
 
 
diff --git a/tests/generators/test_optimize_acqf_generator.py b/tests/generators/test_optimize_acqf_generator.py
index fbd94779f..358e78588 100644
--- a/tests/generators/test_optimize_acqf_generator.py
+++ b/tests/generators/test_optimize_acqf_generator.py
@@ -10,16 +10,45 @@
 
 import numpy as np
 import torch
-from aepsych.acquisition import MCLevelSetEstimation
+from aepsych.acquisition import (
+    ApproxGlobalSUR,
+    EAVC,
+    GlobalMI,
+    GlobalSUR,
+    LocalMI,
+    LocalSUR,
+    MCLevelSetEstimation,
+    MCPosteriorVariance,
+)
+from aepsych.acquisition.lookahead import MOCU, SMOCU
+from aepsych.acquisition.mutual_information import BernoulliMCMutualInformation
 from aepsych.config import Config
 from aepsych.generators import OptimizeAcqfGenerator
-from aepsych.models import (
-    GPClassificationModel,
-    PairwiseProbitModel,
-)
+from aepsych.models import GPClassificationModel, PairwiseProbitModel
+from aepsych.strategy import Strategy
 from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
+from parameterized import parameterized
 from sklearn.datasets import make_classification
 
+acqf_kwargs_target = {"target": 0.75}
+acqf_kwargs_lookahead = {"target": 0.75, "lookahead_type": "posterior"}
+
+acqfs = [
+    (MCPosteriorVariance, {}),
+    (ApproxGlobalSUR, acqf_kwargs_target),
+    (MOCU, acqf_kwargs_target),
+    (SMOCU, acqf_kwargs_target),
+    (EAVC, acqf_kwargs_target),
+    (EAVC, acqf_kwargs_lookahead),
+    (GlobalMI, acqf_kwargs_target),
+    (GlobalMI, acqf_kwargs_lookahead),
+    (GlobalSUR, acqf_kwargs_target),
+    (LocalMI, acqf_kwargs_target),
+    (LocalSUR, acqf_kwargs_target),
+    (MCLevelSetEstimation, acqf_kwargs_target),
+    (BernoulliMCMutualInformation, {}),
+]
+
 
 class TestOptimizeAcqfGenerator(unittest.TestCase):
     def test_time_limits(self):
@@ -84,6 +113,33 @@ def test_instantiate_eubo(self):
         acqf = generator._instantiate_acquisition_fn(model=model)
         self.assertTrue(isinstance(acqf, AnalyticExpectedUtilityOfBestOption))
 
+    @unittest.skipUnless(torch.cuda.is_available(), "no gpu available")
+    @parameterized.expand(acqfs)
+    def test_gpu_smoketest(self, acqf, acqf_kwargs):
+        lb = torch.tensor([0])
+        ub = torch.tensor([1])
+        model = GPClassificationModel(
+            lb=lb, ub=ub, inducing_size=10, inducing_point_method="pivoted_chol"
+        )
+
+        generator = OptimizeAcqfGenerator(acqf=acqf, acqf_kwargs=acqf_kwargs)
+
+        strat = Strategy(
+            lb=torch.tensor([0]),
+            ub=torch.tensor([1]),
+            model=model,
+            generator=generator,
+            stimuli_per_trial=1,
+            outcome_types=["binary"],
+            min_asks=1,
+            model_gpu=True,
+            generator_gpu=True,
+        )
+
+        strat.add_data(x=torch.tensor([0.90]), y=torch.tensor([1.0]))
+
+        strat.gen(1)
+
 
 if __name__ == "__main__":
     unittest.main()