diff --git a/setup.py b/setup.py
index 47aab3fca0..1ee4d8c569 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@
         "absl-py",
         "dill<0.3.6",
         "gpflow>=2.9.2",
-        "gpflux>=0.4.4",
+        "gpflux@ git+https://github.com/secondmind-labs/GPflux.git@khurram/rff_additive_kernels",
         "numpy",
         "tensorflow>=2.5,<2.17; platform_system!='Darwin' or platform_machine!='arm64'",
         "tensorflow-macos>=2.5,<2.17; platform_system=='Darwin' and platform_machine=='arm64'",
diff --git a/tests/unit/models/gpflow/test_sampler.py b/tests/unit/models/gpflow/test_sampler.py
index bad6a5ee57..d9a2fa2a2d 100644
--- a/tests/unit/models/gpflow/test_sampler.py
+++ b/tests/unit/models/gpflow/test_sampler.py
@@ -49,6 +49,7 @@
     feature_decomposition_trajectory,
 )
 from trieste.models.gpflow.sampler import (
+    FeatureDecompositionInternalDataModel,
     FeatureDecompositionTrajectorySamplerModel,
     qmc_normal_samples,
 )
@@ -66,7 +67,10 @@
 ]
 
 
-DecoupledSamplingModel = Callable[[Dataset], Tuple[int, FeatureDecompositionTrajectorySamplerModel]]
+RFFSamplingModel = Callable[[Dataset], Tuple[int, FeatureDecompositionInternalDataModel]]
+DecoupledSamplingModel = Callable[
+    [Dataset], Tuple[int, int, FeatureDecompositionTrajectorySamplerModel]
+]
 
 
 @pytest.fixture(name="sampling_dataset")
@@ -79,15 +83,39 @@ def _sampling_dataset() -> Dataset:
     return dataset
 
 
+@pytest.fixture(
+    name="rff_sampling_model",
+    params=[
+        pytest.param(
+            lambda dataset: (1, quadratic_mean_rbf_kernel_model(dataset)),
+            id="one_op_custom",
+        ),
+        pytest.param(
+            lambda dataset: (
+                2,
+                quadratic_mean_rbf_kernel_model(
+                    dataset,
+                    kernel=gpflow.kernels.Sum([gpflow.kernels.RBF(), gpflow.kernels.Matern52()]),
+                ),
+            ),
+            id="one_op_add_custom",
+        ),
+    ],
+)
+def _rff_sampling_model_fixture(request: Any) -> RFFSamplingModel:
+    return request.param
+
+
 @pytest.fixture(
     name="decoupled_sampling_model",
     params=[
         pytest.param(
-            lambda dataset: (1, quadratic_mean_rbf_kernel_model(dataset)), id="one_op_custom"
+            lambda dataset: (1, 1, quadratic_mean_rbf_kernel_model(dataset)), id="one_op_custom"
         ),
         # whiten testing is covered in tests/unit/models/gpflow/test_models.py
         pytest.param(
             lambda dataset: (
+                1,
                 1,
                 SparseVariational(svgp_model(dataset.query_points, dataset.observations)),
             ),
@@ -96,12 +124,29 @@ def _sampling_dataset() -> Dataset:
         pytest.param(
             lambda dataset: (
                 2,
+                1,
                 SparseVariational(
                     svgp_model_by_type(dataset.query_points, "separate+shared", whiten=False)
                 ),
             ),
             id="two_op_svgp",
         ),
+        pytest.param(
+            lambda dataset: (
+                1,
+                2,
+                SparseVariational(
+                    svgp_model(
+                        dataset.query_points,
+                        dataset.observations,
+                        kernel=gpflow.kernels.Sum(
+                            [gpflow.kernels.RBF(), gpflow.kernels.Matern52()]
+                        ),
+                    )
+                ),
+            ),
+            id="one_op_add_svgp",
+        ),
     ],
 )
 def _decoupled_sampling_model_fixture(request: Any) -> DecoupledSamplingModel:
@@ -483,11 +528,12 @@ def test_rff_trajectory_sampler_returns_trajectory_function_with_correct_shapes(
     num_evals: int,
     num_features: int,
     batch_size: int,
+    rff_sampling_model: RFFSamplingModel,
 ) -> None:
     dataset = Dataset(
         tf.constant([[-2.0]], dtype=tf.float64), tf.constant([[4.1]], dtype=tf.float64)
     )
-    model = quadratic_mean_rbf_kernel_model(dataset)
+    n_kernels, model = rff_sampling_model(dataset)
     sampler = RandomFourierFeatureTrajectorySampler(model, num_features=num_features)
 
     trajectory = sampler.get_trajectory()
@@ -502,7 +548,7 @@ def test_rff_trajectory_sampler_returns_trajectory_function_with_correct_shapes(
 
     tf.debugging.assert_shapes([(trajectory(xs_with_full_batch_dim), [num_evals, batch_size, 1])])
     tf.debugging.assert_shapes(
-        [(trajectory._feature_functions(xs), [num_evals, num_features])]  # type: ignore
+        [(trajectory._feature_functions(xs), [num_evals, num_features * n_kernels])]  # type: ignore
     )
     assert isinstance(trajectory, feature_decomposition_trajectory)
 
@@ -512,8 +558,9 @@ def test_rff_trajectory_sampler_returns_trajectory_function_with_correct_shapes(
 def test_rff_trajectory_sampler_returns_deterministic_trajectory(
     batch_size: int,
     sampling_dataset: Dataset,
+    rff_sampling_model: RFFSamplingModel,
 ) -> None:
-    model = quadratic_mean_rbf_kernel_model(sampling_dataset)
+    _, model = rff_sampling_model(sampling_dataset)
 
     sampler = RandomFourierFeatureTrajectorySampler(model, num_features=100)
     trajectory = sampler.get_trajectory()
@@ -529,8 +576,9 @@ def test_rff_trajectory_sampler_returns_deterministic_trajectory(
 
 def test_rff_trajectory_sampler_returns_same_posterior_from_each_calculation_method(
     sampling_dataset: Dataset,
+    rff_sampling_model: RFFSamplingModel,
 ) -> None:
-    model = quadratic_mean_rbf_kernel_model(sampling_dataset)
+    _, model = rff_sampling_model(sampling_dataset)
 
     sampler = RandomFourierFeatureTrajectorySampler(model, num_features=100)
     sampler.get_trajectory()
@@ -545,8 +593,9 @@ def test_rff_trajectory_sampler_returns_same_posterior_from_each_calculation_met
 @random_seed
 def test_rff_trajectory_sampler_samples_are_distinct_for_new_instances(
     sampling_dataset: Dataset,
+    rff_sampling_model: RFFSamplingModel,
 ) -> None:
-    model = quadratic_mean_rbf_kernel_model(sampling_dataset)
+    _, model = rff_sampling_model(sampling_dataset)
 
     sampler1 = RandomFourierFeatureTrajectorySampler(model, num_features=100)
     trajectory1 = sampler1.get_trajectory()
@@ -574,8 +623,9 @@ def test_rff_trajectory_sampler_samples_are_distinct_for_new_instances(
 def test_rff_trajectory_resample_trajectory_provides_new_samples_without_retracing(
     batch_size: int,
     sampling_dataset: Dataset,
+    rff_sampling_model: RFFSamplingModel,
 ) -> None:
-    model = quadratic_mean_rbf_kernel_model(sampling_dataset)
+    _, model = rff_sampling_model(sampling_dataset)
     xs = sampling_dataset.query_points
     xs = tf.expand_dims(xs, -2)  # [N, 1, d]
     xs = tf.tile(xs, [1, batch_size, 1])  # [N, B, D]
@@ -726,7 +776,7 @@ def test_decoupled_trajectory_sampler_returns_trajectory_function_with_correct_s
         tf.constant([[-2.0]], dtype=tf.float64), tf.constant([[4.1]], dtype=tf.float64)
     )
     N = len(dataset.query_points)
-    L, model = decoupled_sampling_model(dataset)
+    L, n_kernels, model = decoupled_sampling_model(dataset)
     sampler = DecoupledTrajectorySampler(model, num_features=num_features)
 
     trajectory = sampler.get_trajectory()
@@ -738,11 +788,21 @@ def test_decoupled_trajectory_sampler_returns_trajectory_function_with_correct_s
     tf.debugging.assert_shapes([(trajectory(xs_with_full_batch_dim), [num_evals, batch_size, L])])
     if L > 1:
         tf.debugging.assert_shapes(
-            [(trajectory._feature_functions(xs), [L, num_evals, num_features + N])]  # type: ignore
+            [
+                (
+                    trajectory._feature_functions(xs),  # type: ignore
+                    [L, num_evals, num_features * n_kernels + N],
+                )
+            ]
         )
     else:
         tf.debugging.assert_shapes(
-            [(trajectory._feature_functions(xs), [num_evals, num_features + N])]  # type: ignore
+            [
+                (
+                    trajectory._feature_functions(xs),  # type: ignore
+                    [num_evals, num_features * n_kernels + N],
+                )
+            ]
         )
     assert isinstance(trajectory, feature_decomposition_trajectory)
 
@@ -754,7 +814,7 @@ def test_decoupled_trajectory_sampler_returns_deterministic_trajectory(
     sampling_dataset: Dataset,
     decoupled_sampling_model: DecoupledSamplingModel,
 ) -> None:
-    _, model = decoupled_sampling_model(sampling_dataset)
+    _, _, model = decoupled_sampling_model(sampling_dataset)
     sampler = DecoupledTrajectorySampler(model, num_features=100)
     trajectory = sampler.get_trajectory()
 
@@ -800,7 +860,7 @@ def test_decoupled_trajectory_resample_trajectory_provides_new_samples_without_r
     sampling_dataset: Dataset,
     decoupled_sampling_model: DecoupledSamplingModel,
 ) -> None:
-    _, model = decoupled_sampling_model(sampling_dataset)
+    _, _, model = decoupled_sampling_model(sampling_dataset)
     xs = sampling_dataset.query_points
     xs = tf.expand_dims(xs, -2)  # [N, 1, d]
     xs = tf.tile(xs, [1, batch_size, 1])  # [N, B, D]
@@ -826,7 +886,7 @@ def test_decoupled_trajectory_update_trajectory_updates_and_doesnt_retrace(
     sampling_dataset: Dataset,
     decoupled_sampling_model: DecoupledSamplingModel,
 ) -> None:
-    L, model = decoupled_sampling_model(sampling_dataset)
+    L, n_kernels, model = decoupled_sampling_model(sampling_dataset)
 
     x_range = tf.random.uniform([5], 1.0, 2.0)  # sample test locations
     x_range = tf.cast(x_range, dtype=tf.float64)
@@ -841,7 +901,7 @@ def test_decoupled_trajectory_update_trajectory_updates_and_doesnt_retrace(
     eval_before = trajectory(xs_predict_with_batching)
     trace_count_before = trajectory.__call__._get_tracing_count()  # type: ignore
 
-    if L > 1:
+    if L > 1 or n_kernels > 1:
         # pick the first kernel to check
         _model_lengthscales = model.get_kernel().kernels[0].lengthscales
         _trajectory_sampler_lengthscales = trajectory_sampler._feature_functions.kernel.kernels[
@@ -894,9 +954,10 @@ def test_decoupled_trajectory_update_trajectory_updates_and_doesnt_retrace(
 def test_rff_and_decoupled_trajectory_give_similar_results(
     noise_var: float,
     sampling_dataset: Dataset,
+    rff_sampling_model: RFFSamplingModel,
 ) -> None:
-    model = quadratic_mean_rbf_kernel_model(sampling_dataset)
-    model._noise_variance = tf.constant(noise_var, dtype=tf.float64)
+    _, model = rff_sampling_model(sampling_dataset)
+    model._noise_variance = tf.constant(noise_var, dtype=tf.float64)  # type: ignore[attr-defined]
 
     x_range = tf.linspace(1.4, 1.8, 3)
     x_range = tf.cast(x_range, dtype=tf.float64)
@@ -916,7 +977,7 @@ def test_rff_and_decoupled_trajectory_give_similar_results(
     eval_2 = trajectory_2(xs_predict_with_batching)
 
     npt.assert_allclose(
-        tf.reduce_mean(eval_1, 1), tf.reduce_mean(eval_2, 1), rtol=0.01
+        tf.reduce_mean(eval_1, 1), tf.reduce_mean(eval_2, 1), rtol=0.1
     )  # means across samples should roughly agree for different samplers
     npt.assert_allclose(
         tf.math.reduce_variance(eval_1, 1), tf.math.reduce_variance(eval_2, 1), rtol=1.0
diff --git a/tests/util/models/gpflow/models.py b/tests/util/models/gpflow/models.py
index 650fc65f08..d3962e4dad 100644
--- a/tests/util/models/gpflow/models.py
+++ b/tests/util/models/gpflow/models.py
@@ -396,9 +396,14 @@ def sgpr_model(x: tf.Tensor, y: tf.Tensor, num_latent_gps: int = 1) -> SGPR:
     return SGPR((x, y), gpflow.kernels.Matern32(), x[:2], num_latent_gps=num_latent_gps)
 
 
-def svgp_model(x: tf.Tensor, y: tf.Tensor, num_latent_gps: int = 1) -> SVGP:
+def svgp_model(
+    x: tf.Tensor,
+    y: tf.Tensor,
+    num_latent_gps: int = 1,
+    kernel: gpflow.kernels.Kernel = gpflow.kernels.Matern32(),
+) -> SVGP:
     return SVGP(
-        gpflow.kernels.Matern32(),
+        kernel,
         gpflow.likelihoods.Gaussian(),
         x[:2],
         num_data=len(x),
@@ -406,13 +411,14 @@ def svgp_model(x: tf.Tensor, y: tf.Tensor, num_latent_gps: int = 1) -> SVGP:
     )
 
 
-def quadratic_mean_rbf_kernel_model(dataset: Dataset) -> QuadraticMeanAndRBFKernelWithSamplers:
+def quadratic_mean_rbf_kernel_model(
+    dataset: Dataset,
+    kernel: gpflow.kernels.Kernel = gpflow.kernels.RBF(),
+) -> QuadraticMeanAndRBFKernelWithSamplers:
     model = QuadraticMeanAndRBFKernelWithSamplers(
         noise_variance=tf.constant(0.9, dtype=tf.float64), dataset=dataset
     )
-    model.kernel = (
-        gpflow.kernels.RBF()
-    )  # need a gpflow kernel object for random feature decompositions
+    model.kernel = kernel  # need a gpflow kernel object for random feature decompositions
     return model
 
 
diff --git a/trieste/models/gpflow/sampler.py b/trieste/models/gpflow/sampler.py
index d1aa8f1ce7..b64f18a11f 100644
--- a/trieste/models/gpflow/sampler.py
+++ b/trieste/models/gpflow/sampler.py
@@ -504,8 +504,11 @@ def __init__(
             )
 
         tf.debugging.assert_positive(num_features)
-        self._num_features = num_features
-        feature_functions = ResampleableRandomFourierFeatureFunctions(model, self._num_features)
+        feature_functions = ResampleableRandomFourierFeatureFunctions(model, num_features)
+
+        dataset = model.get_internal_data()
+        self._num_features = feature_functions.compute_output_dim(tf.shape(dataset.query_points))
+
         super().__init__(model, feature_functions)
 
     def _prepare_weight_sampler(self) -> Callable[[int], TensorType]:  # [B] -> [B, F, 1]
@@ -653,8 +656,13 @@ def __init__(
             )
 
         tf.debugging.assert_positive(num_features)
-        self._num_features = num_features
-        feature_functions = ResampleableDecoupledFeatureFunctions(model, self._num_features)
+        feature_functions = ResampleableDecoupledFeatureFunctions(model, num_features)
+
+        if isinstance(model, FeatureDecompositionInducingPointModel):
+            points, _, _, _ = model.get_inducing_variables()
+        else:
+            points = model.get_internal_data().query_points
+        self._num_features = feature_functions.compute_output_dim(tf.shape(points))
 
         super().__init__(model, feature_functions)