From b44ebb66abd3ae06e7425bca719271e2afd8d18e Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 28 Feb 2025 19:07:29 -0600
Subject: [PATCH] Add basic example. (#1800)

This is a skeleton for adding examples, requested in issue #1784.

I plan to merge some minimal form of this, and then add a few examples that answer common questions about RMM, such as how to use specific memory resource adaptors or how to use RMM for managing multi-thread, multi-stream work.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)
  - Mark Harris (https://github.com/harrism)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/rmm/pull/1800
---
 .../cuda12.8-conda/devcontainer.json          |  2 +-
 .devcontainer/cuda12.8-pip/devcontainer.json  |  2 +-
 README.md                                     |  2 +-
 ci/release/update-version.sh                  |  3 +
 conda/recipes/librmm/recipe.yaml              | 56 +++++++++++++++++-
 examples/README.md                            |  7 +++
 examples/basic/CMakeLists.txt                 | 25 ++++++++
 examples/basic/README.md                      | 19 ++++++
 examples/basic/src/basic.cpp                  | 41 +++++++++++++
 examples/build.sh                             | 58 +++++++++++++++++++
 examples/fetch_dependencies.cmake             | 30 ++++++++++
 examples/set_cuda_architecture.cmake          | 27 +++++++++
 examples/versions.cmake                       | 15 +++++
 include/rmm/device_buffer.hpp                 |  3 +-
 .../rmm/mr/device/device_memory_resource.hpp  |  2 +-
 15 files changed, 284 insertions(+), 8 deletions(-)
 create mode 100644 examples/README.md
 create mode 100644 examples/basic/CMakeLists.txt
 create mode 100644 examples/basic/README.md
 create mode 100644 examples/basic/src/basic.cpp
 create mode 100755 examples/build.sh
 create mode 100644 examples/fetch_dependencies.cmake
 create mode 100644 examples/set_cuda_architecture.cmake
 create mode 100644 examples/versions.cmake

diff --git a/.devcontainer/cuda12.8-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json
index f8ccb3a59..fcf20229d 100644
--- a/.devcontainer/cuda12.8-conda/devcontainer.json
+++ b/.devcontainer/cuda12.8-conda/devcontainer.json
@@ -15,7 +15,7 @@
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.devcontainer/cuda12.8-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json
index 3a890397d..6620e32a1 100644
--- a/.devcontainer/cuda12.8-pip/devcontainer.json
+++ b/.devcontainer/cuda12.8-pip/devcontainer.json
@@ -15,7 +15,7 @@
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/README.md b/README.md
index 54bab2eab..6e6fdc43a 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ Compiler requirements:
 
 * `gcc`     version 9.3+
 * `nvcc`    version 11.4+
-* `cmake`   version 3.26.4+
+* `cmake`   version 3.30.4+
 
 CUDA/GPU requirements:
 
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index ef409d68e..21d76a117 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -32,6 +32,9 @@ function sed_runner() {
 # Centralized version file update
 echo "${NEXT_FULL_TAG}" > VERSION
 
+# Examples update
+sed_runner "s/RMM_TAG branch-[0-9.]*/RMM_TAG branch-${NEXT_SHORT_TAG}/" examples/versions.cmake
+
 # CI files
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
diff --git a/conda/recipes/librmm/recipe.yaml b/conda/recipes/librmm/recipe.yaml
index a59f67122..5003180e5 100644
--- a/conda/recipes/librmm/recipe.yaml
+++ b/conda/recipes/librmm/recipe.yaml
@@ -18,8 +18,8 @@ cache:
 
   build:
     script:
-      content:
-        - ./build.sh -n -v clean librmm tests benchmarks --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
+      content: |
+        ./build.sh -n -v clean librmm tests benchmarks --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
       secrets:
         - AWS_ACCESS_KEY_ID
         - AWS_SECRET_ACCESS_KEY
@@ -121,3 +121,55 @@ outputs:
       homepage: ${{ load_from_file("python/librmm/pyproject.toml").project.urls.Homepage }}
       license: ${{ load_from_file("python/librmm/pyproject.toml").project.license.text | replace(" ", "-") }}
       summary: librmm test & benchmark executables
+
+  - package:
+      name: librmm-example
+      version: ${{ version }}
+    build:
+      string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }}
+      script:
+        content: |
+          ./examples/build.sh --install
+        env:
+          CMAKE_C_COMPILER_LAUNCHER: ${{ env.get("CMAKE_C_COMPILER_LAUNCHER") }}
+          CMAKE_CUDA_COMPILER_LAUNCHER: ${{ env.get("CMAKE_CUDA_COMPILER_LAUNCHER") }}
+          CMAKE_CXX_COMPILER_LAUNCHER: ${{ env.get("CMAKE_CXX_COMPILER_LAUNCHER") }}
+          CMAKE_GENERATOR: ${{ env.get("CMAKE_GENERATOR") }}
+          PARALLEL_LEVEL: ${{ env.get("PARALLEL_LEVEL") }}
+          SCCACHE_BUCKET: ${{ env.get("SCCACHE_BUCKET") }}
+          SCCACHE_IDLE_TIMEOUT: ${{ env.get("SCCACHE_IDLE_TIMEOUT") }}
+          SCCACHE_REGION: ${{ env.get("SCCACHE_REGION") }}
+          SCCACHE_S3_USE_SSL: ${{ env.get("SCCACHE_S3_USE_SSL") }}
+          SCCACHE_S3_NO_CREDENTIALS: ${{ env.get("SCCACHE_S3_NO_CREDENTIALS") }}
+          SCCACHE_S3_KEY_PREFIX: librmm-${{ env.get("RAPIDS_CONDA_ARCH") }}
+    requirements:
+      build:
+        - cmake ${{ cmake_version }}
+        - ninja
+        - ${{ compiler("c") }}
+        - ${{ compiler("cxx") }}
+        - ${{ compiler("cuda") }}
+        - cuda-version =${{ cuda_version }}
+      host:
+        - cuda-version =${{ cuda_version }}
+        - if: cuda_major == "11"
+          then: cudatoolkit
+          else: cuda-cudart-dev
+        - ${{ pin_subpackage("librmm", exact=True) }}
+      run:
+        - if: cuda_major == "11"
+          then: cudatoolkit
+          else: cuda-cudart
+        - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
+        - ${{ pin_subpackage("librmm", exact=True) }}
+      ignore_run_exports:
+        from_package:
+          - if: cuda_major != "11"
+            then: cuda-cudart-dev
+        by_name:
+          - cuda-version
+          - librmm
+    about:
+      homepage: ${{ load_from_file("python/librmm/pyproject.toml").project.urls.Homepage }}
+      license: ${{ load_from_file("python/librmm/pyproject.toml").project.license.text | replace(" ", "-") }}
+      summary: librmm example executables
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 000000000..138f3ac0e
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,7 @@
+# RMM C++ Examples
+
+This folder contains examples to demonstrate librmm use cases. Running `build.sh` builds all examples.
+
+Current examples:
+
+- Basic: demonstrates memory resource construction and allocating a `device_uvector` on a stream.
diff --git a/examples/basic/CMakeLists.txt b/examples/basic/CMakeLists.txt
new file mode 100644
index 000000000..d74a934a5
--- /dev/null
+++ b/examples/basic/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+
+cmake_minimum_required(VERSION 3.30.4)
+
+include(../set_cuda_architecture.cmake)
+
+# initialize cuda architecture
+rapids_cuda_init_architectures(basic_example)
+
+project(
+  basic_example
+  VERSION 0.0.1
+  LANGUAGES CXX CUDA)
+
+include(../fetch_dependencies.cmake)
+
+include(rapids-cmake)
+rapids_cmake_build_type("Release")
+
+# Configure your project here
+add_executable(basic_example src/basic.cpp)
+target_link_libraries(basic_example PRIVATE rmm::rmm)
+target_compile_features(basic_example PRIVATE cxx_std_17)
+
+install(TARGETS basic_example DESTINATION bin/examples/librmm)
diff --git a/examples/basic/README.md b/examples/basic/README.md
new file mode 100644
index 000000000..5d85fdcbc
--- /dev/null
+++ b/examples/basic/README.md
@@ -0,0 +1,19 @@
+# Basic Standalone librmm CUDA C++ application
+
+This C++ example demonstrates a basic librmm use case and provides a minimal
+example of building your own application based on librmm using CMake.
+
+The example source code creates a device memory resource, sets it to the
+current device resource, and then uses it to allocate a buffer. The buffer is
+initialized with data and then deallocated.
+
+## Compile and execute
+
+```bash
+# Configure project
+cmake -S . -B build/
+# Build
+cmake --build build/ --parallel $PARALLEL_LEVEL
+# Execute
+build/basic_example
+```
diff --git a/examples/basic/src/basic.cpp b/examples/basic/src/basic.cpp
new file mode 100644
index 000000000..1f31ccfef
--- /dev/null
+++ b/examples/basic/src/basic.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/cuda_async_memory_resource.hpp>
+
+int main(int argc, char** argv)
+{
+  // Construct a CUDA async memory resource using RAPIDS Memory Manager (RMM).
+  // This uses a memory pool managed by the CUDA driver, using half of the
+  // available GPU memory.
+  rmm::mr::cuda_async_memory_resource mr{rmm::percent_of_free_device_memory(50)};
+
+  // Create a CUDA stream for asynchronous allocations
+  auto stream = rmm::cuda_stream{};
+
+  // Create a device_uvector with this stream and memory resource
+  auto const size{12345};
+  rmm::device_uvector<int> vec(size, stream, mr);
+  std::cout << "vec size: " << vec.size() << std::endl;
+
+  // Synchronize the stream
+  stream.synchronize();
+
+  return 0;
+}
diff --git a/examples/build.sh b/examples/build.sh
new file mode 100755
index 000000000..318e57f73
--- /dev/null
+++ b/examples/build.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+# librmm examples build script
+
+set -euo pipefail
+
+# Parallelism control
+PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
+# Installation disabled by default
+INSTALL_EXAMPLES=false
+
+# Check for -i or --install flags to enable installation
+ARGS=$(getopt -o i --long install -- "$@")
+eval set -- "$ARGS"
+while [ : ]; do
+  case "$1" in
+    -i | --install)
+        INSTALL_EXAMPLES=true
+        shift
+        ;;
+    --) shift;
+        break
+        ;;
+  esac
+done
+
+# Root of examples
+EXAMPLES_DIR=$(dirname "$(realpath "$0")")
+
+# Set up default librmm build directory and install prefix if conda build
+if [ "${CONDA_BUILD:-"0"}" == "1" ]; then
+  LIB_BUILD_DIR="${LIB_BUILD_DIR:-${SRC_DIR/cpp/build}}"
+  INSTALL_PREFIX="${INSTALL_PREFIX:-${PREFIX}}"
+fi
+
+# librmm build directory
+LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")}
+
+################################################################################
+# Add individual librmm examples build scripts down below
+
+build_example() {
+  example_dir=${1}
+  example_dir="${EXAMPLES_DIR}/${example_dir}"
+  build_dir="${example_dir}/build"
+
+  # Configure
+  cmake -S ${example_dir} -B ${build_dir} -Drmm_ROOT="${LIB_BUILD_DIR}"
+  # Build
+  cmake --build ${build_dir} -j${PARALLEL_LEVEL}
+  # Install if needed
+  if [ "$INSTALL_EXAMPLES" = true ]; then
+    cmake --install ${build_dir} --prefix ${INSTALL_PREFIX:-${example_dir}/install}
+  fi
+}
+
+build_example basic
diff --git a/examples/fetch_dependencies.cmake b/examples/fetch_dependencies.cmake
new file mode 100644
index 000000000..0c949769b
--- /dev/null
+++ b/examples/fetch_dependencies.cmake
@@ -0,0 +1,30 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake)
+
+set(CPM_DOWNLOAD_VERSION v0.40.5)
+file(
+  DOWNLOAD
+  https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake
+  ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
+include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
+
+# find or build it via CPM
+CPMFindPackage(
+  NAME rmm
+  FIND_PACKAGE_ARGUMENTS "PATHS ${rmm_ROOT} ${rmm_ROOT}/latest" GIT_REPOSITORY
+                         https://github.com/rapidsai/rmm
+  GIT_TAG ${RMM_TAG}
+  GIT_SHALLOW TRUE)
diff --git a/examples/set_cuda_architecture.cmake b/examples/set_cuda_architecture.cmake
new file mode 100644
index 000000000..e8b3d9b56
--- /dev/null
+++ b/examples/set_cuda_architecture.cmake
@@ -0,0 +1,27 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake)
+
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/librmm_cpp_examples_RAPIDS.cmake)
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/${RMM_TAG}/RAPIDS.cmake
+       ${CMAKE_CURRENT_BINARY_DIR}/librmm_cpp_examples_RAPIDS.cmake)
+endif()
+include(${CMAKE_CURRENT_BINARY_DIR}/librmm_cpp_examples_RAPIDS.cmake)
+
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-cuda)
+include(rapids-export)
+include(rapids-find)
diff --git a/examples/versions.cmake b/examples/versions.cmake
new file mode 100644
index 000000000..9969c695c
--- /dev/null
+++ b/examples/versions.cmake
@@ -0,0 +1,15 @@
+# =============================================================================
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(RMM_TAG branch-25.04)
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
index cbb2853c3..25b921614 100644
--- a/include/rmm/device_buffer.hpp
+++ b/include/rmm/device_buffer.hpp
@@ -92,8 +92,7 @@ class device_buffer {
    */
   // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds
   // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
-  // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host-
-  // device function. This causes warnings/errors because this ctor invokes host-only functions.
+  // context of both host and device functions.
   device_buffer() : _mr{rmm::mr::get_current_device_resource_ref()} {}
 
   /**
diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
index 9ae390d8c..b1c5ff7d3 100644
--- a/include/rmm/mr/device/device_memory_resource.hpp
+++ b/include/rmm/mr/device/device_memory_resource.hpp
@@ -33,7 +33,7 @@ namespace mr {
  */
 
 /**
- * @brief Base class for all libcudf device memory allocation.
+ * @brief Base class for all librmm device memory allocation.
  *
  * This class serves as the interface that all custom device memory
  * implementations must satisfy.