Add pylibcudf.gpumemoryview support for len()/nbytes (#18133)

Add support for `len()` and `nbytes` in `pylibcudf.gpumemoryview`. Having those methods is helpful to ensure proper serialization in Dask/Distributed, as utility methods that serialize objects, in this case used by cudf-polars, may use the appropriate method or property to determine the size of the object being transferred. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Matthew Murray (https://github.com/Matt711) - Richard (Rick) Zamora (https://github.com/rjzamora) URL: #18133
rapidsai · Mar 4, 2025 · d9e64b2 · d9e64b2
1 parent 1420ef2
commit d9e64b2
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 1 deletion.
diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyi b/python/pylibcudf/pylibcudf/gpumemoryview.pyi
@@ -7,3 +7,6 @@ class gpumemoryview:
     def __init__(self, data: Any): ...
     @property
     def __cuda_array_interface__(self) -> Mapping[str, Any]: ...
+    def __len__(self) -> int: ...
+    @property
+    def nbytes(self) -> int: ...
diff --git a/python/pylibcudf/pylibcudf/gpumemoryview.pyx b/python/pylibcudf/pylibcudf/gpumemoryview.pyx
@@ -1,4 +1,7 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
+
+import functools
+import operator
 
 __all__ = ["gpumemoryview"]
 
@@ -27,4 +30,19 @@ cdef class gpumemoryview:
     def __cuda_array_interface__(self):
         return self.obj.__cuda_array_interface__
 
+    def __len__(self):
+        return self.obj.__cuda_array_interface__["shape"][0]
+
+    @property
+    def nbytes(self):
+        cai = self.obj.__cuda_array_interface__
+        shape, typestr = cai["shape"], cai["typestr"]
+
+        # Get element size from typestr, format is two character specifying
+        # the type and the latter part is the number of bytes. E.g., '<f4' for
+        # 32-bit (4-byte) float.
+        element_size = int(typestr[2:])
+
+        return functools.reduce(operator.mul, shape) * element_size
+
     __hash__ = None
diff --git a/python/pylibcudf/pylibcudf/tests/test_gpumemoryview.py b/python/pylibcudf/pylibcudf/tests/test_gpumemoryview.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import itertools
+
+import numpy as np
+import pytest
+
+import rmm
+
+import pylibcudf as plc
+
+DTYPES = [
+    "u1",
+    "i2",
+    "f4",
+    "f8",
+    "f16",
+]
+SIZES = [
+    0,
+    1,
+    1000,
+    1024,
+    10000,
+]
+
+
+@pytest.fixture(params=tuple(itertools.product(SIZES, DTYPES)), ids=repr)
+def np_array(request):
+    size, dtype = request.param
+    return np.empty((size,), dtype=dtype)
+
+
+def test_cuda_array_interface(np_array):
+    buf = rmm.DeviceBuffer(
+        ptr=np_array.__array_interface__["data"][0], size=np_array.nbytes
+    )
+    gpumemview = plc.gpumemoryview(buf)
+
+    np_array_view = np_array.view("u1")
+
+    ai = np_array_view.__array_interface__
+    cai = gpumemview.__cuda_array_interface__
+    assert cai["shape"] == ai["shape"]
+    assert cai["strides"] == ai["strides"]
+    assert cai["typestr"] == ai["typestr"]
+
+
+def test_len(np_array):
+    buf = rmm.DeviceBuffer(
+        ptr=np_array.__array_interface__["data"][0], size=np_array.nbytes
+    )
+    gpumemview = plc.gpumemoryview(buf)
+
+    np_array_view = np_array.view("u1")
+
+    assert len(gpumemview) == len(np_array_view)
+    assert gpumemview.nbytes == np_array.nbytes