diff --git a/ops/pipeline/test-python-wheel-impl.sh b/ops/pipeline/test-python-wheel-impl.sh
index 837ff03b24d7..f5c0c7971b97 100755
--- a/ops/pipeline/test-python-wheel-impl.sh
+++ b/ops/pipeline/test-python-wheel-impl.sh
@@ -45,6 +45,7 @@ case "$suite" in
   mgpu)
     echo "-- Run Python tests, using multiple GPUs"
     python -c 'from cupy.cuda import jitify; jitify._init_module()'
+    export NCCL_RAS_ENABLE=0
     pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' tests/python-gpu
     pytest -v -s -rxXs --fulltrace --durations=0 -m 'mgpu' \
       tests/test_distributed/test_gpu_with_dask
diff --git a/python-package/xgboost/dask/data.py b/python-package/xgboost/dask/data.py
index f92f1666499f..cff853748ae8 100644
--- a/python-package/xgboost/dask/data.py
+++ b/python-package/xgboost/dask/data.py
@@ -1,5 +1,5 @@
 # pylint: disable=too-many-arguments
-"""Copyright 2019-2024, XGBoost contributors"""
+"""Copyright 2019-2025, XGBoost contributors"""
 
 import logging
 from collections.abc import Sequence
@@ -283,6 +283,25 @@ def append(i: int, name: str) -> None:
     return result
 
 
+def _get_is_cuda(parts: Optional[_DataParts]) -> bool:
+    if parts is not None:
+        is_cuda = is_on_cuda(parts[0].get("data"))
+    else:
+        is_cuda = False
+
+    is_cuda = bool(coll.allreduce(np.array([is_cuda], dtype=np.int32), coll.Op.MAX)[0])
+    return is_cuda
+
+
+def _make_empty(is_cuda: bool) -> np.ndarray:
+    if is_cuda:
+        cp = import_cupy()
+        empty = cp.empty((0, 0))
+    else:
+        empty = np.empty((0, 0))
+    return empty
+
+
 def _create_quantile_dmatrix(
     *,
     feature_names: Optional[FeatureNames],
@@ -297,12 +316,11 @@ def _create_quantile_dmatrix(
     ref: Optional[DMatrix] = None,
 ) -> QuantileDMatrix:
     worker = distributed.get_worker()
+    is_cuda = _get_is_cuda(parts)
     if parts is None:
-        msg = f"Worker {worker.address} has an empty DMatrix."
-        LOGGER.warning(msg)
-
-        Xy = QuantileDMatrix(
-            np.empty((0, 0)),
+        LOGGER.warning("Worker %s has an empty DMatrix.", worker.address)
+        return QuantileDMatrix(
+            _make_empty(is_cuda),
             feature_names=feature_names,
             feature_types=feature_types,
             max_bin=max_bin,
@@ -310,16 +328,14 @@ def _create_quantile_dmatrix(
             enable_categorical=enable_categorical,
             max_quantile_batches=max_quantile_batches,
         )
-        return Xy
 
-    unzipped_dict = _get_worker_parts(parts)
     it = DaskPartitionIter(
-        **unzipped_dict,
+        **_get_worker_parts(parts),
         feature_types=feature_types,
         feature_names=feature_names,
         feature_weights=feature_weights,
     )
-    Xy = QuantileDMatrix(
+    return QuantileDMatrix(
         it,
         missing=missing,
         nthread=nthread,
@@ -328,7 +344,6 @@ def _create_quantile_dmatrix(
         enable_categorical=enable_categorical,
         max_quantile_batches=max_quantile_batches,
     )
-    return Xy
 
 
 def _create_dmatrix(  # pylint: disable=too-many-locals
@@ -350,11 +365,13 @@ def _create_dmatrix(  # pylint: disable=too-many-locals
     """
     worker = distributed.get_worker()
     list_of_parts = parts
+    is_cuda = _get_is_cuda(parts)
+
     if list_of_parts is None:
         msg = f"Worker {worker.address} has an empty DMatrix."
         LOGGER.warning(msg)
         Xy = DMatrix(
-            np.empty((0, 0)),
+            _make_empty(is_cuda),
             feature_names=feature_names,
             feature_types=feature_types,
             enable_categorical=enable_categorical,
diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index af0fc8bf0397..21c0560fe7e8 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -239,7 +239,7 @@ def check_no_group_split(client: Client, device: str) -> None:
         client, 1024, 128, n_query_groups=4, max_rel=5, device=device
     )
 
-    ltr = dxgb.DaskXGBRanker(allow_group_split=False, n_estimators=32, device=device)
+    ltr = dxgb.DaskXGBRanker(allow_group_split=False, n_estimators=36, device=device)
     ltr.fit(
         X_tr,
         y_tr,
diff --git a/src/common/threading_utils.cuh b/src/common/threading_utils.cuh
index 1a4e29f38645..5408f3e07669 100644
--- a/src/common/threading_utils.cuh
+++ b/src/common/threading_utils.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_COMMON_THREADING_UTILS_CUH_
 #define XGBOOST_COMMON_THREADING_UTILS_CUH_
@@ -20,6 +20,9 @@ namespace xgboost::common {
  * \param h hight
  */
 XGBOOST_DEVICE inline std::size_t DiscreteTrapezoidArea(std::size_t n, std::size_t h) {
+  if (n == 0 || h == 0) {
+    return 0;
+  }
   n -= 1;              // without diagonal entries
   h = std::min(n, h);  // Used for ranking, h <= n
   std::size_t total = ((n - (h - 1)) + n) * h / 2;
diff --git a/tests/cpp/objective/test_lambdarank_obj.cc b/tests/cpp/objective/test_lambdarank_obj.cc
index 82441ea310ec..db8472a2a7dd 100644
--- a/tests/cpp/objective/test_lambdarank_obj.cc
+++ b/tests/cpp/objective/test_lambdarank_obj.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023 by XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include "test_lambdarank_obj.h"
 
@@ -8,7 +8,6 @@
 #include <algorithm>                            // for sort
 #include <cstddef>                              // for size_t
 #include <initializer_list>                     // for initializer_list
-#include <map>                                  // for map
 #include <memory>                               // for unique_ptr, shared_ptr, make_shared
 #include <numeric>                              // for iota
 #include <string>                               // for char_traits, basic_string, string
@@ -106,6 +105,20 @@ void TestNDCGGPair(Context const* ctx) {
     }
   }
 
+  {
+    // Test empty input
+    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
+    obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
+
+    HostDeviceVector<float> predts;
+    MetaInfo info;
+    info.labels = linalg::Tensor<float, 2>{{}, {0, 1}, ctx->Device()};
+    info.group_ptr_ = {0, 0};
+    info.num_row_ = 0;
+    linalg::Matrix<GradientPair> gpairs;
+    obj->GetGradient(predts, info, 0, &gpairs);
+    ASSERT_EQ(gpairs.Size(), 0);
+  }
   ASSERT_NO_THROW(obj->DefaultEvalMetric());
 }
 
diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
index fa7d5f1c76e7..0cae314c685e 100644
--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -36,7 +36,7 @@
 pytestmark = [
     pytest.mark.skipif(**tm.no_dask()),
     pytest.mark.skipif(**tm.no_dask_cuda()),
-    tm.timeout(120),
+    tm.timeout(180),
 ]
 
 try: