From b44ebb66abd3ae06e7425bca719271e2afd8d18e Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 28 Feb 2025 19:07:29 -0600 Subject: [PATCH] Add basic example. (#1800) This is a skeleton for adding examples, requested in issue #1784. I plan to merge some minimal form of this, and then add a few examples that answer common questions about RMM, such as how to use specific memory resource adaptors or how to use RMM for managing multi-thread, multi-stream work. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Mark Harris (https://github.com/harrism) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/rmm/pull/1800 --- .../cuda12.8-conda/devcontainer.json | 2 +- .devcontainer/cuda12.8-pip/devcontainer.json | 2 +- README.md | 2 +- ci/release/update-version.sh | 3 + conda/recipes/librmm/recipe.yaml | 56 +++++++++++++++++- examples/README.md | 7 +++ examples/basic/CMakeLists.txt | 25 ++++++++ examples/basic/README.md | 19 ++++++ examples/basic/src/basic.cpp | 41 +++++++++++++ examples/build.sh | 58 +++++++++++++++++++ examples/fetch_dependencies.cmake | 30 ++++++++++ examples/set_cuda_architecture.cmake | 27 +++++++++ examples/versions.cmake | 15 +++++ include/rmm/device_buffer.hpp | 3 +- .../rmm/mr/device/device_memory_resource.hpp | 2 +- 15 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 examples/README.md create mode 100644 examples/basic/CMakeLists.txt create mode 100644 examples/basic/README.md create mode 100644 examples/basic/src/basic.cpp create mode 100755 examples/build.sh create mode 100644 examples/fetch_dependencies.cmake create mode 100644 examples/set_cuda_architecture.cmake create mode 100644 examples/versions.cmake diff --git a/.devcontainer/cuda12.8-conda/devcontainer.json b/.devcontainer/cuda12.8-conda/devcontainer.json index f8ccb3a59..fcf20229d 100644 --- a/.devcontainer/cuda12.8-conda/devcontainer.json +++ b/.devcontainer/cuda12.8-conda/devcontainer.json @@ -15,7 +15,7 @@ ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.8-pip/devcontainer.json b/.devcontainer/cuda12.8-pip/devcontainer.json index 3a890397d..6620e32a1 100644 --- a/.devcontainer/cuda12.8-pip/devcontainer.json +++ b/.devcontainer/cuda12.8-pip/devcontainer.json @@ -15,7 +15,7 @@ ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/README.md b/README.md index 54bab2eab..6e6fdc43a 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Compiler requirements: * `gcc` version 9.3+ * `nvcc` version 11.4+ -* `cmake` version 3.26.4+ +* `cmake` version 3.30.4+ CUDA/GPU requirements: diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index ef409d68e..21d76a117 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -32,6 +32,9 @@ function sed_runner() { # Centralized version file update echo "${NEXT_FULL_TAG}" > VERSION +# Examples update +sed_runner "s/RMM_TAG branch-[0-9.]*/RMM_TAG branch-${NEXT_SHORT_TAG}/" examples/versions.cmake + # CI files for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" diff --git a/conda/recipes/librmm/recipe.yaml b/conda/recipes/librmm/recipe.yaml index a59f67122..5003180e5 100644 --- a/conda/recipes/librmm/recipe.yaml +++ b/conda/recipes/librmm/recipe.yaml @@ -18,8 +18,8 @@ cache: build: script: - content: - - ./build.sh -n -v clean librmm tests benchmarks --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\" + content: | + ./build.sh -n -v clean librmm tests benchmarks --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\" secrets: - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY @@ -121,3 +121,55 @@ outputs: homepage: ${{ load_from_file("python/librmm/pyproject.toml").project.urls.Homepage }} license: ${{ load_from_file("python/librmm/pyproject.toml").project.license.text | replace(" ", "-") }} summary: librmm test & benchmark executables + + - package: + name: librmm-example + version: ${{ version }} + build: + string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }} + script: + content: | + ./examples/build.sh --install + env: + CMAKE_C_COMPILER_LAUNCHER: ${{ env.get("CMAKE_C_COMPILER_LAUNCHER") }} + CMAKE_CUDA_COMPILER_LAUNCHER: ${{ env.get("CMAKE_CUDA_COMPILER_LAUNCHER") }} + CMAKE_CXX_COMPILER_LAUNCHER: ${{ env.get("CMAKE_CXX_COMPILER_LAUNCHER") }} + CMAKE_GENERATOR: ${{ env.get("CMAKE_GENERATOR") }} + PARALLEL_LEVEL: ${{ env.get("PARALLEL_LEVEL") }} + SCCACHE_BUCKET: ${{ env.get("SCCACHE_BUCKET") }} + SCCACHE_IDLE_TIMEOUT: ${{ env.get("SCCACHE_IDLE_TIMEOUT") }} + SCCACHE_REGION: ${{ env.get("SCCACHE_REGION") }} + SCCACHE_S3_USE_SSL: ${{ env.get("SCCACHE_S3_USE_SSL") }} + SCCACHE_S3_NO_CREDENTIALS: ${{ env.get("SCCACHE_S3_NO_CREDENTIALS") }} + SCCACHE_S3_KEY_PREFIX: librmm-${{ env.get("RAPIDS_CONDA_ARCH") }} + requirements: + build: + - cmake ${{ cmake_version }} + - ninja + - ${{ compiler("c") }} + - ${{ compiler("cxx") }} + - ${{ compiler("cuda") }} + - cuda-version =${{ cuda_version }} + host: + - cuda-version =${{ cuda_version }} + - if: cuda_major == "11" + then: cudatoolkit + else: cuda-cudart-dev + - ${{ pin_subpackage("librmm", exact=True) }} + run: + - if: cuda_major == "11" + then: cudatoolkit + else: cuda-cudart + - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} + - ${{ pin_subpackage("librmm", exact=True) }} + ignore_run_exports: + from_package: + - if: cuda_major != "11" + then: cuda-cudart-dev + by_name: + - cuda-version + - librmm + about: + homepage: ${{ load_from_file("python/librmm/pyproject.toml").project.urls.Homepage }} + license: ${{ load_from_file("python/librmm/pyproject.toml").project.license.text | replace(" ", "-") }} + summary: librmm example executables diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 000000000..138f3ac0e --- /dev/null +++ b/examples/README.md @@ -0,0 +1,7 @@ +# RMM C++ Examples + +This folder contains examples to demonstrate librmm use cases. Running `build.sh` builds all examples. + +Current examples: + +- Basic: demonstrates memory resource construction and allocating a `device_uvector` on a stream. diff --git a/examples/basic/CMakeLists.txt b/examples/basic/CMakeLists.txt new file mode 100644 index 000000000..d74a934a5 --- /dev/null +++ b/examples/basic/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) 2020-2025, NVIDIA CORPORATION. + +cmake_minimum_required(VERSION 3.30.4) + +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(basic_example) + +project( + basic_example + VERSION 0.0.1 + LANGUAGES CXX CUDA) + +include(../fetch_dependencies.cmake) + +include(rapids-cmake) +rapids_cmake_build_type("Release") + +# Configure your project here +add_executable(basic_example src/basic.cpp) +target_link_libraries(basic_example PRIVATE rmm::rmm) +target_compile_features(basic_example PRIVATE cxx_std_17) + +install(TARGETS basic_example DESTINATION bin/examples/librmm) diff --git a/examples/basic/README.md b/examples/basic/README.md new file mode 100644 index 000000000..5d85fdcbc --- /dev/null +++ b/examples/basic/README.md @@ -0,0 +1,19 @@ +# Basic Standalone librmm CUDA C++ application + +This C++ example demonstrates a basic librmm use case and provides a minimal +example of building your own application based on librmm using CMake. + +The example source code creates a device memory resource, sets it to the +current device resource, and then uses it to allocate a buffer. The buffer is +initialized with data and then deallocated. + +## Compile and execute + +```bash +# Configure project +cmake -S . -B build/ +# Build +cmake --build build/ --parallel $PARALLEL_LEVEL +# Execute +build/basic_example +``` diff --git a/examples/basic/src/basic.cpp b/examples/basic/src/basic.cpp new file mode 100644 index 000000000..1f31ccfef --- /dev/null +++ b/examples/basic/src/basic.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +int main(int argc, char** argv) +{ + // Construct a CUDA async memory resource using RAPIDS Memory Manager (RMM). + // This uses a memory pool managed by the CUDA driver, using half of the + // available GPU memory. + rmm::mr::cuda_async_memory_resource mr{rmm::percent_of_free_device_memory(50)}; + + // Create a CUDA stream for asynchronous allocations + auto stream = rmm::cuda_stream{}; + + // Create a device_uvector with this stream and memory resource + auto const size{12345}; + rmm::device_uvector vec(size, stream, mr); + std::cout << "vec size: " << vec.size() << std::endl; + + // Synchronize the stream + stream.synchronize(); + + return 0; +} diff --git a/examples/build.sh b/examples/build.sh new file mode 100755 index 000000000..318e57f73 --- /dev/null +++ b/examples/build.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Copyright (c) 2025, NVIDIA CORPORATION. + +# librmm examples build script + +set -euo pipefail + +# Parallelism control +PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} +# Installation disabled by default +INSTALL_EXAMPLES=false + +# Check for -i or --install flags to enable installation +ARGS=$(getopt -o i --long install -- "$@") +eval set -- "$ARGS" +while [ : ]; do + case "$1" in + -i | --install) + INSTALL_EXAMPLES=true + shift + ;; + --) shift; + break + ;; + esac +done + +# Root of examples +EXAMPLES_DIR=$(dirname "$(realpath "$0")") + +# Set up default librmm build directory and install prefix if conda build +if [ "${CONDA_BUILD:-"0"}" == "1" ]; then + LIB_BUILD_DIR="${LIB_BUILD_DIR:-${SRC_DIR/cpp/build}}" + INSTALL_PREFIX="${INSTALL_PREFIX:-${PREFIX}}" +fi + +# librmm build directory +LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")} + +################################################################################ +# Add individual librmm examples build scripts down below + +build_example() { + example_dir=${1} + example_dir="${EXAMPLES_DIR}/${example_dir}" + build_dir="${example_dir}/build" + + # Configure + cmake -S ${example_dir} -B ${build_dir} -Drmm_ROOT="${LIB_BUILD_DIR}" + # Build + cmake --build ${build_dir} -j${PARALLEL_LEVEL} + # Install if needed + if [ "$INSTALL_EXAMPLES" = true ]; then + cmake --install ${build_dir} --prefix ${INSTALL_PREFIX:-${example_dir}/install} + fi +} + +build_example basic diff --git a/examples/fetch_dependencies.cmake b/examples/fetch_dependencies.cmake new file mode 100644 index 000000000..0c949769b --- /dev/null +++ b/examples/fetch_dependencies.cmake @@ -0,0 +1,30 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake) + +set(CPM_DOWNLOAD_VERSION v0.40.5) +file( + DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake + ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) +include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) + +# find or build it via CPM +CPMFindPackage( + NAME rmm + FIND_PACKAGE_ARGUMENTS "PATHS ${rmm_ROOT} ${rmm_ROOT}/latest" GIT_REPOSITORY + https://github.com/rapidsai/rmm + GIT_TAG ${RMM_TAG} + GIT_SHALLOW TRUE) diff --git a/examples/set_cuda_architecture.cmake b/examples/set_cuda_architecture.cmake new file mode 100644 index 000000000..e8b3d9b56 --- /dev/null +++ b/examples/set_cuda_architecture.cmake @@ -0,0 +1,27 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +include(${CMAKE_CURRENT_LIST_DIR}/versions.cmake) + +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/librmm_cpp_examples_RAPIDS.cmake) + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/${RMM_TAG}/RAPIDS.cmake + ${CMAKE_CURRENT_BINARY_DIR}/librmm_cpp_examples_RAPIDS.cmake) +endif() +include(${CMAKE_CURRENT_BINARY_DIR}/librmm_cpp_examples_RAPIDS.cmake) + +include(rapids-cmake) +include(rapids-cpm) +include(rapids-cuda) +include(rapids-export) +include(rapids-find) diff --git a/examples/versions.cmake b/examples/versions.cmake new file mode 100644 index 000000000..9969c695c --- /dev/null +++ b/examples/versions.cmake @@ -0,0 +1,15 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(RMM_TAG branch-25.04) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index cbb2853c3..25b921614 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -92,8 +92,7 @@ class device_buffer { */ // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds // `__host__ __device__` specifiers to the defaulted constructor when it is called within the - // context of both host and device functions. Specifically, the `cudf::type_dispatcher` is a host- - // device function. This causes warnings/errors because this ctor invokes host-only functions. + // context of both host and device functions. device_buffer() : _mr{rmm::mr::get_current_device_resource_ref()} {} /** diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 9ae390d8c..b1c5ff7d3 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -33,7 +33,7 @@ namespace mr { */ /** - * @brief Base class for all libcudf device memory allocation. + * @brief Base class for all librmm device memory allocation. * * This class serves as the interface that all custom device memory * implementations must satisfy.