diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 594ba8c3c..77b90fa20 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,6 +13,7 @@ RUN apt update -y \ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; ENV DEFAULT_VIRTUAL_ENV=rapids +ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true FROM ${BASE} as conda-base diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 05f11c005..f03ec7b19 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index b4c507f86..a59c499d3 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda11.8-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index 4f8d628c2..39852cec1 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 8e6ba4de8..d84966656 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:25.02-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.02-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "ghcr.io/rapidsai/devcontainers/features/cuda:25.2": { "version": "12.5", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.2": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml index 895ba83ee..e0ea775aa 100644 --- a/.github/copy-pr-bot.yaml +++ b/.github/copy-pr-bot.yaml @@ -2,3 +2,4 @@ # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ enabled: true +auto_sync_draft: false diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 7ac02e365..e93b7a694 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: rust-build: needs: cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -50,7 +50,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -59,7 +59,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -70,7 +70,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -82,7 +82,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuvs: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -92,7 +92,7 @@ jobs: wheel-publish-cuvs: needs: wheel-build-cuvs secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-25.02 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 78648235f..91f51bd90 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - check-nightly-ci - changed-files - checks - conda-cpp-build @@ -25,13 +26,25 @@ jobs: - wheel-tests-cuvs - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.02 if: always() with: needs: ${{ toJSON(needs) }} + check-nightly-ci: + # Switch to ubuntu-latest once it defaults to a version of Ubuntu that + # provides at least Python 3.11 (see + # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) + runs-on: ubuntu-24.04 + env: + RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Check if nightly CI is passing + uses: rapidsai/shared-actions/check_nightly_success/dispatch@main + with: + repo: cuvs changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.02 with: files_yaml: | test_cpp: @@ -64,27 +77,27 @@ jobs: - '!thirdparty/LICENSES/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.02 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: pull-request enable_check_symbols: true @@ -92,20 +105,20 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.02 with: build_type: pull-request conda-python-tests: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -115,7 +128,7 @@ jobs: rust-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -125,21 +138,21 @@ jobs: wheel-build-cuvs: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.02 with: build_type: pull-request script: ci/build_wheel_cuvs.sh wheel-tests-cuvs: needs: [wheel-build-cuvs, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cuvs.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.02 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 27dc99a11..e3bf5d16f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (void (thrust::|cub::)) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-cuvs: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.02 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml new file mode 100644 index 000000000..01dd2436b --- /dev/null +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -0,0 +1,26 @@ +name: Trigger Breaking Change Notifications + +on: + pull_request_target: + types: + - closed + - reopened + - labeled + - unlabeled + +jobs: + trigger-notifier: + if: contains(github.event.pull_request.labels.*.name, 'breaking') + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02 + with: + sender_login: ${{ github.event.sender.login }} + sender_avatar: ${{ github.event.sender.avatar_url }} + repo: ${{ github.repository }} + pr_number: ${{ github.event.pull_request.number }} + pr_title: "${{ github.event.pull_request.title }}" + pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}" + pr_base_ref: ${{ github.event.pull_request.base.ref }} + pr_author: ${{ github.event.pull_request.user.login }} + event_action: ${{ github.event.action }} + pr_merged: ${{ github.event.pull_request.merged }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e53abd92..fcfc7e1fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -108,8 +108,7 @@ repos: [.](cmake|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs)$| CMakeLists[.]txt$| CMakeLists_standalone[.]txt$| - meta[.]yaml$| - setup[.]cfg$ + meta[.]yaml$ exclude: | (?x) docs/source/sphinxext/github_link\.py| diff --git a/README.md b/README.md index 23759f598..dac71c881 100755 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ There are several benefits to using cuVS and GPUs for vector search, including 6. Multiple language support 7. Building blocks for composing new or accelerating existing algorithms -In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a deslightful development experimence, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale. +In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a delightful development experience, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale. ## cuVS Technology Stack @@ -109,7 +109,7 @@ pip install cuvs-cu12 --extra-index-url=https://pypi.nvidia.com If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`: ```bash -conda install -c conda-forge -c nvidia -c rapidsai-nightly cuvs=24.12 +conda install -c conda-forge -c nvidia -c rapidsai-nightly cuvs=25.02 ``` cuVS also has `pip` wheel packages that can be installed. Please see the [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build/) for more information on installing the available cuVS packages and building from source. diff --git a/VERSION b/VERSION index af28c42b5..72eefaf7c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.12.00 +25.02.00 diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 80bfb0c24..01853da84 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -7,7 +7,7 @@ channels: - conda-forge - nvidia dependencies: -- breathe +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -44,8 +44,7 @@ dependencies: - nvcc_linux-aarch64=11.8 - openblas - pre-commit -- pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 @@ -55,5 +54,8 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-aarch64==2.17 +- pip: + - nvidia-sphinx-theme name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 07937726c..a1ad68d7f 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -7,7 +7,7 @@ channels: - conda-forge - nvidia dependencies: -- breathe +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -15,7 +15,7 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 @@ -35,7 +35,7 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -44,8 +44,7 @@ dependencies: - nvcc_linux-64=11.8 - openblas - pre-commit -- pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 @@ -55,5 +54,8 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-64==2.17 +- pip: + - nvidia-sphinx-theme name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index b7fd6fcfa..ee0213fff 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -7,7 +7,7 @@ channels: - conda-forge - nvidia dependencies: -- breathe +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -40,8 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 @@ -51,5 +50,8 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-aarch64==2.17 +- pip: + - nvidia-sphinx-theme name: all_cuda-125_arch-aarch64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 83a457465..d93dcaf7a 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -7,7 +7,7 @@ channels: - conda-forge - nvidia dependencies: -- breathe +- breathe>=4.35.0 - c-compiler - clang - clang-tools=16.0.6 @@ -17,7 +17,7 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler @@ -32,7 +32,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==24.12.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - make - nccl>=2.19 - ninja @@ -40,8 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pydata-sphinx-theme -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pytest-cov - pytest==7.* - rapids-build-backend>=0.3.0,<0.4.0.dev0 @@ -51,5 +50,8 @@ dependencies: - scikit-learn - sphinx-copybutton - sphinx-markdown-tables +- sphinx>=8.0.0 - sysroot_linux-64==2.17 +- pip: + - nvidia-sphinx-theme name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 59d471bda..a90dc03e7 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -15,11 +15,11 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 -- cuvs==24.12.*,>=0.0.0a0 +- cuvs==25.2.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -34,8 +34,8 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libcuvs==24.12.*,>=0.0.0a0 -- librmm==24.12.*,>=0.0.0a0 +- libcuvs==25.2.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -43,7 +43,7 @@ dependencies: - nvcc_linux-aarch64=11.8 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 31a416eb5..b7344c822 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -15,11 +15,11 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 -- cuda-python>=11.7.1,<12.0a0,<=11.8.3 +- cuda-python>=11.8.5,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 -- cuvs==24.12.*,>=0.0.0a0 +- cuvs==25.2.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -34,8 +34,8 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libcuvs==24.12.*,>=0.0.0a0 -- librmm==24.12.*,>=0.0.0a0 +- libcuvs==25.2.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja @@ -43,7 +43,7 @@ dependencies: - nvcc_linux-64=11.8 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml index 3efe9ebde..da7229004 100644 --- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml @@ -17,10 +17,10 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 -- cuvs==24.12.*,>=0.0.0a0 +- cuvs==25.2.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -31,15 +31,15 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==24.12.*,>=0.0.0a0 -- librmm==24.12.*,>=0.0.0a0 +- libcuvs==25.2.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml index 7fbd77368..5d1dd8fc7 100644 --- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml @@ -17,10 +17,10 @@ dependencies: - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-python>=12.0,<13.0a0,<=12.6.0 +- cuda-python>=12.6.2,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 -- cuvs==24.12.*,>=0.0.0a0 +- cuvs==25.2.*,>=0.0.0a0 - cxx-compiler - cython>=3.0.0 - dlpack>=0.8,<1.0 @@ -31,15 +31,15 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==24.12.*,>=0.0.0a0 -- librmm==24.12.*,>=0.0.0a0 +- libcuvs==25.2.*,>=0.0.0a0 +- librmm==25.2.*,>=0.0.0a0 - matplotlib - nccl>=2.19 - ninja - nlohmann_json>=3.11.2 - openblas - pandas -- pylibraft==24.12.*,>=0.0.0a0 +- pylibraft==25.2.*,>=0.0.0a0 - pyyaml - rapids-build-backend>=0.3.0,<0.4.0.dev0 - setuptools diff --git a/conda/recipes/cuvs-bench/meta.yaml b/conda/recipes/cuvs-bench/meta.yaml index 0681a1038..d77aee8ce 100644 --- a/conda/recipes/cuvs-bench/meta.yaml +++ b/conda/recipes/cuvs-bench/meta.yaml @@ -79,6 +79,7 @@ requirements: - python - rapids-build-backend>=0.3.0,<0.4.0.dev0 - rmm ={{ minor_version }} + - setuptools>=64.0.0 run: - benchmark diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml index 560c95feb..ad7ffe756 100644 --- a/conda/recipes/cuvs/meta.yaml +++ b/conda/recipes/cuvs/meta.yaml @@ -43,10 +43,10 @@ requirements: - {{ stdlib("c") }} host: {% if cuda_major == "11" %} - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 - cudatoolkit {% else %} - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 - cuda-cudart-dev {% endif %} - cuda-version ={{ cuda_version }} @@ -61,10 +61,10 @@ requirements: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} - cudatoolkit - - cuda-python >=11.7.1,<12.0a0,<=11.8.3 + - cuda-python >=11.8.5,<12.0a0 {% else %} - cuda-cudart - - cuda-python >=12.0,<13.0a0,<=12.6.0 + - cuda-python >=12.6.2,<13.0a0 {% endif %} - pylibraft {{ minor_version }} - libcuvs {{ version }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 328d643b4..78862cb33 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -485,12 +485,14 @@ if(BUILD_SHARED_LIBS) "$<$:${CUVS_CUDA_FLAGS}>" ) target_link_libraries( - cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES} - $ + cuvs_objs + PUBLIC raft::raft rmm::rmm rmm::rmm_logger ${CUVS_CTK_MATH_DEPENDENCIES} + $ + PRIVATE rmm::rmm_logger_impl raft::raft_logger_impl ) - add_library(cuvs SHARED $) - add_library(cuvs_static STATIC $) + add_library(cuvs SHARED $,EXCLUDE,rmm.*logger>) + add_library(cuvs_static STATIC $,EXCLUDE,rmm.*logger>) target_compile_options( cuvs INTERFACE $<$:--expt-extended-lambda @@ -702,7 +704,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENAB target_link_libraries( cuvs_c PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES} - PRIVATE raft::raft + PRIVATE raft::raft rmm::rmm_logger_impl raft::raft_logger_impl ) # ensure CUDA symbols aren't relocated to the middle of the debug build binaries diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index c161a68bc..200b52ab3 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -126,9 +126,11 @@ function(ConfigureAnnBench) PRIVATE ${ConfigureAnnBench_LINKS} nlohmann_json::nlohmann_json Threads::Threads + $ $<$:CUDA::cudart_static> $ $ + $ ) set_target_properties( @@ -174,6 +176,13 @@ function(ConfigureAnnBench) add_dependencies(CUVS_ANN_BENCH_ALL ${BENCH_NAME}) endfunction() +if(CUVS_FAISS_ENABLE_GPU OR CUVS_ANN_BENCH_SINGLE_EXE) + add_library(cuvs_bench_logger OBJECT) + target_link_libraries( + cuvs_bench_logger PRIVATE rmm::rmm_logger_impl $ + ) +endif() + # ################################################################################################## # * Configure benchmark targets ------------------------------------------------------------- @@ -297,8 +306,14 @@ if(CUVS_ANN_BENCH_SINGLE_EXE) target_link_libraries( ANN_BENCH - PRIVATE raft::raft nlohmann_json::nlohmann_json benchmark::benchmark dl fmt::fmt-header-only - spdlog::spdlog_header_only $<$:CUDA::nvtx3> + PRIVATE raft::raft + nlohmann_json::nlohmann_json + benchmark::benchmark + dl + fmt::fmt-header-only + spdlog::spdlog_header_only + $<$:CUDA::nvtx3> + cuvs_bench_logger ) set_target_properties( ANN_BENCH diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 06e1e27af..49be78673 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -597,18 +597,16 @@ inline auto parse_string_flag(const char* arg, const char* pat, std::string& res inline auto run_main(int argc, char** argv) -> int { - bool force_overwrite = false; - bool build_mode = false; - bool search_mode = false; - bool no_lap_sync = false; - std::string data_prefix = "data"; - std::string index_prefix = "index"; - std::string new_override_kv = ""; - std::string mode = "latency"; - std::string threads_arg_txt = ""; - std::vector threads = {1, -1}; // min_thread, max_thread - std::string log_level_str = ""; - [[maybe_unused]] int raft_log_level = 0; // raft::logger::get(RAFT_NAME).get_level(); + bool force_overwrite = false; + bool build_mode = false; + bool search_mode = false; + bool no_lap_sync = false; + std::string data_prefix = "data"; + std::string index_prefix = "index"; + std::string new_override_kv = ""; + std::string mode = "latency"; + std::string threads_arg_txt = ""; + std::vector threads = {1, -1}; // min_thread, max_thread kv_series override_kv{}; char arg0_default[] = "benchmark"; // NOLINT @@ -639,12 +637,7 @@ inline auto run_main(int argc, char** argv) -> int parse_string_flag(argv[i], "--index_prefix", index_prefix) || parse_string_flag(argv[i], "--mode", mode) || parse_string_flag(argv[i], "--override_kv", new_override_kv) || - parse_string_flag(argv[i], "--threads", threads_arg_txt) || - parse_string_flag(argv[i], "--raft_log_level", log_level_str)) { - if (!log_level_str.empty()) { - raft_log_level = std::stoi(log_level_str); - log_level_str = ""; - } + parse_string_flag(argv[i], "--threads", threads_arg_txt)) { if (!threads_arg_txt.empty()) { auto threads_arg = split(threads_arg_txt, ':'); threads[0] = std::stoi(threads_arg[0]); @@ -673,8 +666,6 @@ inline auto run_main(int argc, char** argv) -> int } } - // raft::logger::get(RAFT_NAME).set_level(raft_log_level); - Mode metric_objective = Mode::kLatency; if (mode == "throughput") { metric_objective = Mode::kThroughput; } diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 7640fbfa6..2e57df84e 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -39,7 +39,7 @@ function(find_and_configure_raft) # Invoke CPM find_package() #----------------------------------------------------- rapids_cpm_find(raft ${PKG_VERSION} - GLOBAL_TARGETS raft::raft + GLOBAL_TARGETS raft::raft raft::raft_logger raft::raft_logger_impl BUILD_EXPORT_SET cuvs-exports INSTALL_EXPORT_SET cuvs-exports COMPONENTS ${RAFT_COMPONENTS} @@ -50,7 +50,6 @@ function(find_and_configure_raft) OPTIONS "BUILD_TESTS OFF" "BUILD_PRIMS_BENCH OFF" - "BUILD_ANN_BENCH OFF" "RAFT_NVTX ${PKG_ENABLE_NVTX}" "RAFT_COMPILE_LIBRARY OFF" ) diff --git a/cpp/include/cuvs/cluster/kmeans.hpp b/cpp/include/cuvs/cluster/kmeans.hpp index 89b3acc24..cb8d36b10 100644 --- a/cpp/include/cuvs/cluster/kmeans.hpp +++ b/cpp/include/cuvs/cluster/kmeans.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -85,7 +86,7 @@ struct params : base_params { /** * verbosity level. */ - int verbosity = RAFT_LEVEL_INFO; + raft::level_enum verbosity = raft::level_enum::info; /** * Seed to the random number generator. diff --git a/cpp/include/cuvs/core/detail/interop.hpp b/cpp/include/cuvs/core/detail/interop.hpp index 2ed0b330d..19e4a922c 100644 --- a/cpp/include/cuvs/core/detail/interop.hpp +++ b/cpp/include/cuvs/core/detail/interop.hpp @@ -86,7 +86,6 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor) RAFT_EXPECTS(to_data_type.lanes == tensor.dtype.lanes, "lanes mismatch between return mdspan and DLTensor"); RAFT_EXPECTS(tensor.dtype.lanes == 1, "More than 1 DLTensor lanes not supported"); - RAFT_EXPECTS(tensor.strides == nullptr, "Strided memory layout for DLTensor not supported"); auto to_device = accessor_type_to_DLDevice(); if (to_device.device_type == kDLCUDA) { @@ -110,4 +109,36 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor) return MdspanType{reinterpret_cast(tensor.data), exts}; } +inline bool is_f_contiguous(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + if (!tensor.strides) { return false; } + int64_t expected_stride = 1; + for (int64_t i = 0; i < tensor.ndim; ++i) { + if (tensor.strides[i] != expected_stride) { return false; } + expected_stride *= tensor.shape[i]; + } + + return true; +} + +inline bool is_c_contiguous(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + if (!tensor.strides) { + // no stride information indicates a row-major tensor according to the dlpack spec + return true; + } + + int64_t expected_stride = 1; + for (int64_t i = tensor.ndim - 1; i >= 0; --i) { + if (tensor.strides[i] != expected_stride) { return false; } + expected_stride *= tensor.shape[i]; + } + + return true; +} + } // namespace cuvs::core::detail diff --git a/cpp/include/cuvs/core/interop.hpp b/cpp/include/cuvs/core/interop.hpp index 2462f02ec..096885f2f 100644 --- a/cpp/include/cuvs/core/interop.hpp +++ b/cpp/include/cuvs/core/interop.hpp @@ -51,9 +51,25 @@ inline bool is_dlpack_host_compatible(DLTensor tensor) return detail::is_dlpack_host_compatible(tensor); } +/** + * @brief Check if DLManagedTensor has a row-major (c-contiguous) layout + * + * @param tensor DLManagedTensor object to check + * @return bool + */ +inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_contiguous(tensor); } + +/** + * @brief Check if DLManagedTensor has a col-major (f-contiguous) layout + * + * @param tensor DLManagedTensor object to check + * @return bool + */ +inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); } + /** * @brief Convert a DLManagedTensor to an mdspan - * NOTE: This function only supports compact row-major layouts. + * NOTE: This function only supports compact row-major and col-major layouts. * * @code {.cpp} * #include diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index d040e03db..99581469f 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -16,7 +16,6 @@ #pragma once -#include "common.hpp" #include #include #include @@ -28,6 +27,10 @@ namespace cuvs::neighbors::brute_force { +struct index_params : cuvs::neighbors::index_params {}; + +struct search_params : cuvs::neighbors::search_params {}; + /** * @defgroup bruteforce_cpp_index Bruteforce index * @{ @@ -41,6 +44,11 @@ namespace cuvs::neighbors::brute_force { */ template struct index : cuvs::neighbors::index { + using index_params_type = brute_force::index_params; + using search_params_type = brute_force::search_params; + using index_type = int64_t; + using value_type = T; + public: index(const index&) = delete; index(index&&) = default; @@ -181,20 +189,40 @@ struct index : cuvs::neighbors::index { * @code{.cpp} * using namespace cuvs::neighbors; * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument * * @return the constructed brute-force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a host pointer to a row-major matrix [n_rows, dim] + * + * @return the constructed brute-force index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::brute_force::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; /** * @brief Build the index from the dataset for efficient search. * @@ -202,62 +230,92 @@ auto build(raft::resources const& handle, * @code{.cpp} * using namespace cuvs::neighbors; * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument * - * @return the constructed ivf-flat index + * @return the constructed brute force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * @param[in] handle + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a host pointer to a row-major matrix [n_rows, dim] + * + * @return the constructed brute-force index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::brute_force::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + /** * @brief Build the index from the dataset for efficient search. * * Usage example: * @code{.cpp} - * using namespace cuvs::neighbors; - * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle - * @param[in] dataset a device pointer to a col-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] * - * @return the constructed bruteforce index + * @return the constructed brute force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + /** * @brief Build the index from the dataset for efficient search. * * Usage example: * @code{.cpp} - * using namespace cuvs::neighbors; - * // create and fill the index from a [N, D] dataset - * auto index = brute_force::build(handle, dataset, metric); + * brute_force::index_params index_params; + * auto index = brute_force::build(handle, index_params, dataset); * @endcode * * @param[in] handle - * @param[in] dataset a device pointer to a col-major matrix [n_rows, dim] - * @param[in] metric cuvs::distance::DistanceType - * @param[in] metric_arg metric argument + * @param[in] index_params parameters such as the distance metric to use + * @param[in] dataset a device pointer to a row-major matrix [n_rows, dim] * - * @return the constructed bruteforce index + * @return the constructed brute force index */ auto build(raft::resources const& handle, - raft::device_matrix_view dataset, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, - float metric_arg = 0) -> cuvs::neighbors::brute_force::index; + const cuvs::neighbors::brute_force::index_params& index_params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::brute_force::index; + +[[deprecated]] auto build( + raft::resources const& handle, + raft::device_matrix_view dataset, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded, + float metric_arg = 0) -> cuvs::neighbors::brute_force::index; /** * @} */ @@ -286,6 +344,7 @@ auto build(raft::resources const& handle, * @endcode * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index brute-force constructed index * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -296,6 +355,7 @@ auto build(raft::resources const& handle, * `index->size()` bits to indicate whether queries[0] should compute the distance with dataset. */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, @@ -303,6 +363,14 @@ void search(raft::resources const& handle, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); + /** * @brief Search ANN using the constructed index. * @@ -323,6 +391,7 @@ void search(raft::resources const& handle, * @endcode * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index ivf-flat constructed index * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -332,18 +401,28 @@ void search(raft::resources const& handle, * given */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); + +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); /** * @brief Search ANN using the constructed index. * * See the [brute_force::build](#brute_force::build) documentation for a usage example. * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index bruteforce constructed index * @param[in] queries a device pointer to a col-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -353,18 +432,28 @@ void search(raft::resources const& handle, * given query */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); + +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); /** * @brief Search ANN using the constructed index. * * See the [brute_force::build](#brute_force::build) documentation for a usage example. * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index bruteforce constructed index * @param[in] queries a device pointer to a col-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset @@ -374,12 +463,21 @@ void search(raft::resources const& handle, * given query */ void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::search_params& params, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, raft::device_matrix_view distances, const cuvs::neighbors::filtering::base_filter& sample_filter = cuvs::neighbors::filtering::none_sample_filter{}); + +[[deprecated]] void search(raft::resources const& handle, + const cuvs::neighbors::brute_force::index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + const cuvs::neighbors::filtering::base_filter& sample_filter = + cuvs::neighbors::filtering::none_sample_filter{}); /** * @} */ @@ -472,6 +570,7 @@ struct sparse_search_params { * @brief Search the sparse bruteforce index for nearest neighbors * * @param[in] handle + * @param[in] params parameters configuring the search * @param[in] index Sparse brute-force constructed index * @param[in] queries a sparse CSR matrix on the device to query * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index 14331ebbc..f7f58a19c 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -87,6 +88,8 @@ typedef struct cuvsCagraCompressionParams* cuvsCagraCompressionParams_t; * */ struct cuvsCagraIndexParams { + /** Distance type. */ + cuvsDistanceType metric; /** Degree of input graph for pruning. */ size_t intermediate_graph_degree; /** Degree of output graph. */ diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 60b8cc122..bd9ea4834 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -264,6 +264,77 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t return std::make_unique(std::move(out_array), out_layout); } +/** + * @brief Contstruct a strided matrix from any mdarray. + * + * This function constructs an owning device matrix and copies the data. + * When the data is copied, padding elements are filled with zeroes. + * + * @tparam DataT + * @tparam IdxT + * @tparam LayoutPolicy + * @tparam ContainerPolicy + * + * @param[in] res raft resources handle + * @param[in] src the source mdarray or mdspan + * @param[in] required_stride the leading dimension (in elements) + * @return owning current-device-accessible strided matrix + */ +template +auto make_strided_dataset( + const raft::resources& res, + raft::mdarray, LayoutPolicy, ContainerPolicy>&& src, + uint32_t required_stride) -> std::unique_ptr> +{ + using value_type = DataT; + using index_type = IdxT; + using layout_type = LayoutPolicy; + using container_policy_type = ContainerPolicy; + static_assert(std::is_same_v || + std::is_same_v> || + std::is_same_v, + "The input must be row-major"); + RAFT_EXPECTS(src.extent(1) <= required_stride, + "The input row length must be not larger than the desired stride."); + const uint32_t src_stride = src.stride(0) > 0 ? src.stride(0) : src.extent(1); + const bool stride_matches = required_stride == src_stride; + + auto out_layout = + raft::make_strided_layout(src.extents(), std::array{required_stride, 1}); + + using out_mdarray_type = raft::device_matrix; + using out_layout_type = typename out_mdarray_type::layout_type; + using out_container_policy_type = typename out_mdarray_type::container_policy_type; + using out_owning_type = + owning_dataset; + + if constexpr (std::is_same_v && + std::is_same_v) { + if (stride_matches) { + // Everything matches, we can own the mdarray + return std::make_unique(std::move(src), out_layout); + } + } + // Something is wrong: have to make a copy and produce an owning dataset + auto out_array = + raft::make_device_matrix(res, src.extent(0), required_stride); + + RAFT_CUDA_TRY(cudaMemsetAsync(out_array.data_handle(), + 0, + out_array.size() * sizeof(value_type), + raft::resource::get_cuda_stream(res))); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(out_array.data_handle(), + sizeof(value_type) * required_stride, + src.data_handle(), + sizeof(value_type) * src_stride, + sizeof(value_type) * src.extent(1), + src.extent(0), + cudaMemcpyDefault, + raft::resource::get_cuda_stream(res))); + + return std::make_unique(std::move(out_array), out_layout); +} + /** * @brief Contstruct a strided matrix from any mdarray or mdspan. * @@ -278,14 +349,15 @@ auto make_strided_dataset(const raft::resources& res, const SrcT& src, uint32_t * @return maybe owning current-device-accessible strided matrix */ template -auto make_aligned_dataset(const raft::resources& res, const SrcT& src, uint32_t align_bytes = 16) +auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_bytes = 16) -> std::unique_ptr> { - using value_type = typename SrcT::value_type; + using source_type = std::remove_cv_t>; + using value_type = typename source_type::value_type; constexpr size_t kSize = sizeof(value_type); uint32_t required_stride = raft::round_up_safe(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize; - return make_strided_dataset(res, src, required_stride); + return make_strided_dataset(res, std::forward(src), required_stride); } /** * @brief VPQ compressed dataset. diff --git a/cpp/include/cuvs/neighbors/refine.hpp b/cpp/include/cuvs/neighbors/refine.hpp index 19fbd30bb..5e60ff537 100644 --- a/cpp/include/cuvs/neighbors/refine.hpp +++ b/cpp/include/cuvs/neighbors/refine.hpp @@ -76,6 +76,51 @@ void refine(raft::resources const& handle, raft::device_matrix_view distances, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset device matrix that stores the dataset [n_rows, dims] + * @param[in] queries device matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where + * n_candidates >= k + * @param[out] indices device matrix that stores the refined indices [n_queries, k] + * @param[out] distances device matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + /** * @brief Refine nearest neighbor search. * diff --git a/cpp/src/cluster/detail/kmeans.cuh b/cpp/src/cluster/detail/kmeans.cuh index 3d054f0fd..e943b8afc 100644 --- a/cpp/src/cluster/detail/kmeans.cuh +++ b/cpp/src/cluster/detail/kmeans.cuh @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,8 +56,6 @@ namespace cuvs::cluster::kmeans::detail { -// TODO(cjnolet): RAFT_NAME needs to be removed and the raft::logger fixed to not require it -static const std::string RAFT_NAME = "raft"; static const std::string CUVS_NAME = "cuvs"; // ========================================================= @@ -373,7 +371,7 @@ void kmeans_fit_main(raft::resources const& handle, rmm::device_uvector& workspace) { raft::common::nvtx::range fun_scope("kmeans_fit_main"); - raft::logger::get(RAFT_NAME).set_level(params.verbosity); + raft::default_logger().set_level(params.verbosity); cudaStream_t stream = raft::resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); @@ -879,7 +877,7 @@ void kmeans_fit(raft::resources const& handle, pams.n_clusters); } - raft::logger::get(RAFT_NAME).set_level(pams.verbosity); + raft::default_logger().set_level(pams.verbosity); // Allocate memory rmm::device_uvector workspace(0, stream); @@ -1025,7 +1023,7 @@ void kmeans_predict(raft::resources const& handle, RAFT_EXPECTS(centroids.extent(1) == n_features, "invalid parameter (centroids.extent(1) != n_features)"); - raft::logger::get(RAFT_NAME).set_level(pams.verbosity); + raft::default_logger().set_level(pams.verbosity); auto metric = pams.metric; // Allocate memory @@ -1218,7 +1216,7 @@ void kmeans_transform(raft::resources const& handle, raft::device_matrix_view X_new) { raft::common::nvtx::range fun_scope("kmeans_transform"); - raft::logger::get(RAFT_NAME).set_level(pams.verbosity); + raft::default_logger().set_level(pams.verbosity); cudaStream_t stream = raft::resource::get_cuda_stream(handle); auto n_samples = X.extent(0); auto n_features = X.extent(1); diff --git a/cpp/src/cluster/detail/kmeans_auto_find_k.cuh b/cpp/src/cluster/detail/kmeans_auto_find_k.cuh index 6441f7ad5..797b33bca 100644 --- a/cpp/src/cluster/detail/kmeans_auto_find_k.cuh +++ b/cpp/src/cluster/detail/kmeans_auto_find_k.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -230,4 +230,4 @@ void find_k(raft::resources const& handle, n_iter); } } -} // namespace cuvs::cluster::kmeans::detail \ No newline at end of file +} // namespace cuvs::cluster::kmeans::detail diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh index 3f1ad2334..ba4cabbde 100644 --- a/cpp/src/cluster/detail/kmeans_balanced.cuh +++ b/cpp/src/cluster/detail/kmeans_balanced.cuh @@ -25,7 +25,8 @@ #include #include -#include +#include +#include #include #include #include @@ -59,7 +60,6 @@ namespace cuvs::cluster::kmeans::detail { -static const std::string RAFT_NAME = "raft"; constexpr static inline float kAdjustCentersWeight = 7.0f; /** diff --git a/cpp/src/cluster/detail/kmeans_common.cuh b/cpp/src/cluster/detail/kmeans_common.cuh index eec71b5d2..03db08bd1 100644 --- a/cpp/src/cluster/detail/kmeans_common.cuh +++ b/cpp/src/cluster/detail/kmeans_common.cuh @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh index 1f4b19af4..e44edc68a 100644 --- a/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh +++ b/cpp/src/distance/detail/sparse/coo_spmv_kernel.cuh @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp index d457198a2..121574880 100644 --- a/cpp/src/distance/pairwise_distance_c.cpp +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -29,7 +29,7 @@ namespace { -template +template void _pairwise_distance(cuvsResources_t res, DLManagedTensor* x_tensor, DLManagedTensor* y_tensor, @@ -39,8 +39,8 @@ void _pairwise_distance(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); - using mdspan_type = raft::device_matrix_view; - using distances_mdspan_type = raft::device_matrix_view; + using mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; auto x_mds = cuvs::core::from_dlpack(x_tensor); auto y_mds = cuvs::core::from_dlpack(y_tensor); @@ -70,12 +70,64 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype"); } - if (x_dt.bits == 32) { - _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); - } else if (x_dt.bits == 64) { - _pairwise_distance(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + bool x_row_major; + if (cuvs::core::is_c_contiguous(x_tensor)) { + x_row_major = true; + } else if (cuvs::core::is_f_contiguous(x_tensor)) { + x_row_major = false; } else { - RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + RAFT_FAIL("X input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + bool y_row_major; + if (cuvs::core::is_c_contiguous(y_tensor)) { + y_row_major = true; + } else if (cuvs::core::is_f_contiguous(y_tensor)) { + y_row_major = false; + } else { + RAFT_FAIL("Y input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + bool distances_row_major; + if (cuvs::core::is_c_contiguous(distances_tensor)) { + distances_row_major = true; + } else if (cuvs::core::is_f_contiguous(distances_tensor)) { + distances_row_major = false; + } else { + RAFT_FAIL("distances input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + if ((x_row_major != y_row_major) || (x_row_major != distances_row_major)) { + RAFT_FAIL( + "Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major)"); + } + + if (x_row_major) { + if (x_dt.bits == 32) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } + } else { + if (x_dt.bits == 32) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } } }); } diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index d534676e3..d54a75879 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -160,45 +160,88 @@ void index::update_dataset( dataset_view_ = raft::make_const_mdspan(dataset_.view()); } -#define CUVS_INST_BFKNN(T, DistT) \ - auto build(raft::resources const& res, \ - raft::device_matrix_view dataset, \ - cuvs::distance::DistanceType metric, \ - DistT metric_arg) \ - ->cuvs::neighbors::brute_force::index \ - { \ - return detail::build(res, dataset, metric, metric_arg); \ - } \ - auto build(raft::resources const& res, \ - raft::device_matrix_view dataset, \ - cuvs::distance::DistanceType metric, \ - DistT metric_arg) \ - ->cuvs::neighbors::brute_force::index \ - { \ - return detail::build(res, dataset, metric, metric_arg); \ - } \ - \ - void search(raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ - const cuvs::neighbors::filtering::base_filter& sample_filter) \ - { \ - detail::search( \ - res, idx, queries, neighbors, distances, sample_filter); \ - } \ - void search(raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ - const cuvs::neighbors::filtering::base_filter& sample_filter) \ - { \ - detail::search( \ - res, idx, queries, neighbors, distances, sample_filter); \ - } \ - \ +#define CUVS_INST_BFKNN(T, DistT) \ + auto build(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index_params& index_params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ + } \ + auto build(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index_params& index_params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ + } \ + auto build(raft::resources const& res, \ + raft::device_matrix_view dataset, \ + cuvs::distance::DistanceType metric, \ + DistT metric_arg) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, metric, metric_arg); \ + } \ + auto build(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index_params& index_params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ + } \ + auto build(raft::resources const& res, \ + raft::device_matrix_view dataset, \ + cuvs::distance::DistanceType metric, \ + DistT metric_arg) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, metric, metric_arg); \ + } \ + \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::search_params& params, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::search_params& params, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + void search(raft::resources const& res, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + const cuvs::neighbors::filtering::base_filter& sample_filter) \ + { \ + detail::search( \ + res, idx, queries, neighbors, distances, sample_filter); \ + } \ + \ template struct cuvs::neighbors::brute_force::index; CUVS_INST_BFKNN(float, float); @@ -206,4 +249,4 @@ CUVS_INST_BFKNN(half, float); #undef CUVS_INST_BFKNN -} // namespace cuvs::neighbors::brute_force \ No newline at end of file +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp index f1a8c995d..1693ac930 100644 --- a/cpp/src/neighbors/brute_force_c.cpp +++ b/cpp/src/neighbors/brute_force_c.cpp @@ -33,7 +33,7 @@ namespace { -template +template void* _build(cuvsResources_t res, DLManagedTensor* dataset_tensor, cuvsDistanceType metric, @@ -41,17 +41,19 @@ void* _build(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); - using mdspan_type = raft::device_matrix_view; + using mdspan_type = raft::device_matrix_view; auto mds = cuvs::core::from_dlpack(dataset_tensor); - auto index_on_stack = cuvs::neighbors::brute_force::build( - *res_ptr, mds, static_cast((int)metric), metric_arg); - auto index_on_heap = new cuvs::neighbors::brute_force::index(std::move(index_on_stack)); + cuvs::neighbors::brute_force::index_params params; + params.metric = metric; + params.metric_arg = metric_arg; + auto index_on_stack = cuvs::neighbors::brute_force::build(*res_ptr, params, mds); + auto index_on_heap = new cuvs::neighbors::brute_force::index(std::move(index_on_stack)); return index_on_heap; } -template +template void _search(cuvsResources_t res, cuvsBruteForceIndex index, DLManagedTensor* queries_tensor, @@ -62,7 +64,7 @@ void _search(cuvsResources_t res, auto res_ptr = reinterpret_cast(res); auto index_ptr = reinterpret_cast*>(index.addr); - using queries_mdspan_type = raft::device_matrix_view; + using queries_mdspan_type = raft::device_matrix_view; using neighbors_mdspan_type = raft::device_matrix_view; using distances_mdspan_type = raft::device_matrix_view; using prefilter_mds_type = raft::device_vector_view; @@ -72,8 +74,11 @@ void _search(cuvsResources_t res, auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + cuvs::neighbors::brute_force::search_params params; + if (prefilter.type == NO_FILTER) { cuvs::neighbors::brute_force::search(*res_ptr, + params, *index_ptr, queries_mds, neighbors_mds, @@ -87,7 +92,7 @@ void _search(cuvsResources_t res, queries_mds.extent(0), index_ptr->dataset().extent(0))); cuvs::neighbors::brute_force::search( - *res_ptr, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter_view); + *res_ptr, params, *index_ptr, queries_mds, neighbors_mds, distances_mds, prefilter_view); } else { RAFT_FAIL("Unsupported prefilter type: BITSET"); } @@ -145,8 +150,15 @@ extern "C" cuvsError_t cuvsBruteForceBuild(cuvsResources_t res, auto dataset = dataset_tensor->dl_tensor; if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { - index->addr = - reinterpret_cast(_build(res, dataset_tensor, metric, metric_arg)); + if (cuvs::core::is_c_contiguous(dataset_tensor)) { + index->addr = + reinterpret_cast(_build(res, dataset_tensor, metric, metric_arg)); + } else if (cuvs::core::is_f_contiguous(dataset_tensor)) { + index->addr = reinterpret_cast( + _build(res, dataset_tensor, metric, metric_arg)); + } else { + RAFT_FAIL("dataset input to cuvsBruteForceBuild must be contiguous (non-strided)"); + } index->dtype = dataset.dtype; } else { RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", @@ -184,7 +196,14 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { - _search(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); + if (cuvs::core::is_c_contiguous(queries_tensor)) { + _search(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); + } else if (cuvs::core::is_f_contiguous(queries_tensor)) { + _search( + res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); + } else { + RAFT_FAIL("queries input to cuvsBruteForceSearch must be contiguous (non-strided)"); + } } else { RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", queries.dtype.code, @@ -226,4 +245,4 @@ extern "C" cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res, RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits); } }); -} \ No newline at end of file +} diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 326a89665..02b7a566e 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -41,7 +41,8 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* auto res_ptr = reinterpret_cast(res); auto index = new cuvs::neighbors::cagra::index(*res_ptr); - auto index_params = cuvs::neighbors::cagra::index_params(); + auto index_params = cuvs::neighbors::cagra::index_params(); + index_params.metric = static_cast((int)params.metric), index_params.intermediate_graph_degree = params.intermediate_graph_degree; index_params.graph_degree = params.graph_degree; @@ -252,7 +253,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res, extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params) { return cuvs::core::translate_exceptions([=] { - *params = new cuvsCagraIndexParams{.intermediate_graph_degree = 128, + *params = new cuvsCagraIndexParams{.metric = L2Expanded, + .intermediate_graph_degree = 128, .graph_degree = 64, .build_algo = IVF_PQ, .nn_descent_niter = 20}; diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index 652d41c85..149eea3f1 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -403,6 +403,17 @@ struct batch_load_iterator { /** A single batch of data residing in device memory. */ struct batch { + ~batch() noexcept + { + /* + If there's no copy, there's no allocation owned by the batch. + If there's no allocation, there's no guarantee that the device pointer is stream-ordered. + If there's no stream order guarantee, we must synchronize with the stream before the batch is + destroyed to make sure all GPU operations in that stream finish earlier. + */ + if (!does_copy()) { RAFT_CUDA_TRY_NO_THROW(cudaStreamSynchronize(stream_)); } + } + /** Logical width of a single row in a batch, in elements of type `T`. */ [[nodiscard]] auto row_width() const -> size_type { return row_width_; } /** Logical offset of the batch, in rows (`row_width()`) */ diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index b03b8214b..358b7643e 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -31,8 +31,6 @@ namespace cuvs::neighbors::cagra { -static const std::string RAFT_NAME = "raft"; - template void add_node_core( raft::resources const& handle, @@ -432,8 +430,14 @@ void extend_core( } else { index.update_graph(handle, raft::make_const_mdspan(updated_graph.view())); } + } else if (dynamic_cast*>(&index.data()) != + nullptr) { + RAFT_FAIL( + "cagra::extend only supports an index to which the dataset is attached. Please check if the " + "index was built with index_param.attach_dataset_on_build = true, or if a dataset was " + "attached after the build."); } else { - RAFT_FAIL("Only uncompressed dataset is supported"); + RAFT_FAIL("cagra::extend only supports an uncompressed dataset index"); } } } // namespace cuvs::neighbors::cagra diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index b7fec724b..340986448 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -46,8 +46,6 @@ namespace cuvs::neighbors::cagra::detail { -static const std::string RAFT_NAME = "raft"; - template void write_to_graph(raft::host_matrix_view knn_graph, raft::host_matrix_view neighbors_host_view, diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index 0f6cf852f..c83da7bb1 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -34,8 +34,6 @@ namespace cuvs::neighbors::cagra::detail { -static const std::string RAFT_NAME = "raft"; - constexpr int serialization_version = 4; /** diff --git a/cpp/src/neighbors/detail/cagra/compute_distance.hpp b/cpp/src/neighbors/detail/cagra/compute_distance.hpp index 7eb798459..2227e4f9e 100644 --- a/cpp/src/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/src/neighbors/detail/cagra/compute_distance.hpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include // TODO: This shouldn't be invoking spatial/knn diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh index ecfd856f1..9cb432bcb 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 9fa9d5894..7535ff217 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -26,7 +26,7 @@ #include "utils.hpp" #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index c6fe21642..469c80a08 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -23,7 +23,7 @@ #include "utils.hpp" #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh index fa71dbaf9..161aa8c4a 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -26,7 +26,7 @@ #include "utils.hpp" #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 678ed0cb4..188862fbb 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include @@ -64,7 +64,6 @@ namespace cuvs::neighbors::cagra::detail { namespace single_cta_search { -using raft::RAFT_NAME; // TODO: this is required for RAFT_LOG_XXX messages. // #define _CLK_BREAKDOWN diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 0f8309328..9f95c5b1c 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -179,7 +179,7 @@ class device_matrix_view_from_host { public: device_matrix_view_from_host(raft::resources const& res, raft::host_matrix_view host_view) - : host_view_(host_view) + : res_(res), host_view_(host_view) { cudaPointerAttributes attr; RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, host_view.data_handle())); @@ -199,6 +199,17 @@ class device_matrix_view_from_host { } } + ~device_matrix_view_from_host() noexcept + { + /* + If there's no copy, there's no allocation owned by this struct. + If there's no allocation, there's no guarantee that the device pointer is stream-ordered. + If there's no stream order guarantee, we must synchronize with the stream before the struct is + destroyed to make sure all GPU operations in that stream finish earlier. + */ + if (!allocated_memory()) { raft::resource::sync_stream(res_); } + } + raft::device_matrix_view view() { return raft::make_device_matrix_view( @@ -207,9 +218,10 @@ class device_matrix_view_from_host { T* data_handle() { return device_ptr; } - bool allocated_memory() const { return device_mem_.has_value(); } + [[nodiscard]] bool allocated_memory() const { return device_mem_.has_value(); } private: + const raft::resources& res_; std::optional> device_mem_; raft::host_matrix_view host_view_; T* device_ptr; diff --git a/cpp/src/neighbors/detail/dataset_serialize.hpp b/cpp/src/neighbors/detail/dataset_serialize.hpp index 40d9df930..ba3090b59 100644 --- a/cpp/src/neighbors/detail/dataset_serialize.hpp +++ b/cpp/src/neighbors/detail/dataset_serialize.hpp @@ -21,7 +21,7 @@ #include #include -#include +#include #include @@ -140,7 +140,7 @@ auto deserialize_strided(raft::resources const& res, std::istream& is) auto stride = raft::deserialize_scalar(res, is); auto host_array = raft::make_host_matrix(n_rows, dim); raft::deserialize_mdspan(res, is, host_array.view()); - return make_strided_dataset(res, host_array, stride); + return make_strided_dataset(res, std::move(host_array), stride); } template diff --git a/cpp/src/neighbors/detail/dynamic_batching.cuh b/cpp/src/neighbors/detail/dynamic_batching.cuh index 5c6b1654e..23c5c07f6 100644 --- a/cpp/src/neighbors/detail/dynamic_batching.cuh +++ b/cpp/src/neighbors/detail/dynamic_batching.cuh @@ -50,8 +50,6 @@ namespace cuvs::neighbors::dynamic_batching::detail { -using raft::RAFT_NAME; // TODO: a workaround for RAFT_LOG_XXX macros - /** * A helper to make the requester threads more cooperative when busy-spinning. * It is used in the wait loops across this file to reduce the CPU usage. @@ -240,8 +238,8 @@ enum struct slot_state : int32_t { struct batch_token { uint64_t value = 0; - constexpr inline batch_token() {} - explicit constexpr inline batch_token(uint32_t buffer_id) { id() = buffer_id; } + constexpr inline batch_token() = default; + RAFT_INLINE_FUNCTION explicit batch_token(uint32_t buffer_id) { id() = buffer_id; } /** * Sequential id of the batch in the array of batches. @@ -494,7 +492,7 @@ struct batch_queue_t { * NB: "round" is the number of times the queue counters went over the whole ring buffer. * It's used to avoid the ABA problem for atomic token updates. */ - static constexpr inline auto make_empty_token(seq_order_id seq_id) noexcept -> batch_token + static inline auto make_empty_token(seq_order_id seq_id) noexcept -> batch_token { // Modify the seq_id to identify that the token slot is empty auto empty_round = static_cast(slot_state::kEmptyPast) * kSize; diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index e5eeecbc9..f1976e002 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -28,6 +28,7 @@ #include "./knn_utils.cuh" #include +#include #include #include #include @@ -750,10 +751,10 @@ void search(raft::resources const& res, } } -template +template cuvs::neighbors::brute_force::index build( raft::resources const& res, - raft::device_matrix_view dataset, + mdspan, LayoutT, AccessorT> dataset, cuvs::distance::DistanceType metric, DistT metric_arg) { @@ -764,18 +765,31 @@ cuvs::neighbors::brute_force::index build( if (metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded) { + auto dataset_storage = std::optional>{}; + auto dataset_view = [&res, &dataset_storage, dataset]() { + if constexpr (std::is_same_v>) { + return dataset; + } else { + dataset_storage = + make_device_matrix(res, dataset.extent(0), dataset.extent(1)); + raft::copy(res, dataset_storage->view(), dataset); + return raft::make_const_mdspan(dataset_storage->view()); + } + }(); + norms = raft::make_device_vector(res, dataset.extent(0)); // cosine needs the l2norm, where as l2 distances needs the squared norm if (metric == cuvs::distance::DistanceType::CosineExpanded) { raft::linalg::norm(res, - dataset, + dataset_view, norms->view(), raft::linalg::NormType::L2Norm, raft::linalg::Apply::ALONG_ROWS, raft::sqrt_op{}); } else { raft::linalg::norm(res, - dataset, + dataset_view, norms->view(), raft::linalg::NormType::L2Norm, raft::linalg::Apply::ALONG_ROWS); diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh index da24decb3..ec75c99c1 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -52,8 +52,6 @@ namespace cuvs::neighbors::experimental::vamana::detail { * @{ */ -static const std::string RAFT_NAME = "raft"; - static const int blockD = 32; static const int maxBlocks = 10000; diff --git a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh index a554464f6..c360ae19a 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh index 86cb4e1f8..f6f0279f7 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh index d85bad920..0d7882b4b 100644 --- a/cpp/src/neighbors/detail/vpq_dataset.cuh +++ b/cpp/src/neighbors/detail/vpq_dataset.cuh @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/dynamic_batching.cu b/cpp/src/neighbors/dynamic_batching.cu index 6be70353b..84c8a2cf1 100644 --- a/cpp/src/neighbors/dynamic_batching.cu +++ b/cpp/src/neighbors/dynamic_batching.cu @@ -16,6 +16,7 @@ #include "detail/dynamic_batching.cuh" +#include #include #include #include @@ -53,6 +54,8 @@ namespace cuvs::neighbors::dynamic_batching { return index.runner->search(res, params, queries, neighbors, distances); \ } +CUVS_INST_DYNAMIC_BATCHING_INDEX(float, int64_t, cuvs::neighbors::brute_force, index); + CUVS_INST_DYNAMIC_BATCHING_INDEX(float, uint32_t, cuvs::neighbors::cagra, index); CUVS_INST_DYNAMIC_BATCHING_INDEX(half, uint32_t, cuvs::neighbors::cagra, index); CUVS_INST_DYNAMIC_BATCHING_INDEX(int8_t, uint32_t, cuvs::neighbors::cagra, index); diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index d6ffc1218..f594343c7 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -27,7 +27,8 @@ #include "../../cluster/kmeans_balanced.cuh" #include "../detail/ann_utils.cuh" #include -#include +#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index f5a4267cd..79b4f1a18 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -23,7 +23,7 @@ #include "../detail/ann_utils.cuh" #include -#include // RAFT_LOG_TRACE +#include #include #include #include // RAFT_CUDA_TRY diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 032b6a8ff..2df6f4f0e 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -27,7 +27,8 @@ #include // is_min_close, DistanceType #include // cuvs::selection::select_k #include -#include // RAFT_LOG_TRACE +#include +#include #include #include // raft::resources #include // raft::linalg::gemm diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index 6f7d11e50..c16dc47aa 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -226,6 +226,7 @@ void index::check_consistency() "inconsistent number of lists (clusters)"); } +template struct index; // Used for refine function template struct index; template struct index; template struct index; diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index 1d4acea1e..44a1b11fa 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -30,7 +30,7 @@ #include "../../cluster/kmeans_balanced.cuh" #include -#include +#include #include #include #include @@ -68,7 +68,6 @@ #include namespace cuvs::neighbors::ivf_pq::detail { -using raft::RAFT_NAME; // TODO: this is required for RAFT_LOG_XXX messages. using namespace cuvs::spatial::knn::detail; // NOLINT using internal_extents_t = int64_t; // The default mdspan extent type used internally. diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh index 5b41e5f3d..1b098ac5c 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_fp_8bit.cuh @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index db8f9fbd3..05bb99353 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh index 5eaebe69d..4af9dbb8e 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/neighbors/mg/omp_checks.cpp b/cpp/src/neighbors/mg/omp_checks.cpp index e09182dfe..c8cc27414 100644 --- a/cpp/src/neighbors/mg/omp_checks.cpp +++ b/cpp/src/neighbors/mg/omp_checks.cpp @@ -18,7 +18,6 @@ #include namespace cuvs::neighbors::mg { -using raft::RAFT_NAME; void check_omp_threads(const int requirements) { diff --git a/cpp/src/neighbors/refine/detail/refine_device_float_float.cu b/cpp/src/neighbors/refine/detail/refine_device_float_float.cu index 25bad201b..76b792d1c 100644 --- a/cpp/src/neighbors/refine/detail/refine_device_float_float.cu +++ b/cpp/src/neighbors/refine/detail/refine_device_float_float.cu @@ -43,5 +43,6 @@ } instantiate_cuvs_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_cuvs_neighbors_refine_d(uint32_t, float, float, int64_t); #undef instantiate_cuvs_neighbors_refine_d diff --git a/cpp/src/neighbors/refine/refine_device.cuh b/cpp/src/neighbors/refine/refine_device.cuh index 6184e540b..a5491be0d 100644 --- a/cpp/src/neighbors/refine/refine_device.cuh +++ b/cpp/src/neighbors/refine/refine_device.cuh @@ -84,12 +84,13 @@ void refine_device( cuvs::neighbors::ivf_flat::index refinement_index( handle, cuvs::distance::DistanceType(metric), n_queries, false, true, dim); - cuvs::neighbors::ivf_flat::detail::fill_refinement_index(handle, - &refinement_index, - dataset.data_handle(), - neighbor_candidates.data_handle(), - n_queries, - n_candidates); + cuvs::neighbors::ivf_flat::detail::fill_refinement_index( + handle, + &refinement_index, + dataset.data_handle(), + neighbor_candidates.data_handle(), + static_cast(n_queries), + static_cast(n_candidates)); uint32_t grid_dim_x = 1; // the neighbor ids will be computed in uint32_t as offset diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 9224e88d8..9aa596a6e 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -49,6 +49,7 @@ function(ConfigureTest) PRIVATE cuvs cuvs::cuvs raft::raft + test_rmm_logger GTest::gtest GTest::gtest_main Threads::Threads @@ -87,6 +88,9 @@ function(ConfigureTest) ) endfunction() +add_library(test_rmm_logger OBJECT) +target_link_libraries(test_rmm_logger PRIVATE rmm::rmm_logger_impl raft::raft_logger_impl) + # ################################################################################################## # test sources ################################################################################## # ################################################################################################## @@ -179,6 +183,7 @@ if(BUILD_TESTS) NAME NEIGHBORS_DYNAMIC_BATCHING_TEST PATH + neighbors/dynamic_batching/test_brute_force.cu neighbors/dynamic_batching/test_cagra.cu neighbors/dynamic_batching/test_ivf_flat.cu neighbors/dynamic_batching/test_ivf_pq.cu @@ -232,7 +237,7 @@ if(BUILD_TESTS) NAME SPARSE_TEST PATH sparse/cluster/cluster_solvers.cu sparse/cluster/eigen_solvers.cu sparse/cluster/spectral.cu GPUS 1 PERCENT 100 ) - + ConfigureTest( NAME PREPROCESSING_TEST PATH preprocessing/scalar_quantization.cu GPUS 1 PERCENT 100 ) diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 8d5701439..c1cd3ca09 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -389,12 +389,13 @@ class AnnCagraTest : public ::testing::TestWithParam { (const DataT*)database.data(), ps.n_rows, ps.dim); { + std::optional> database_host{std::nullopt}; cagra::index index(handle_, index_params.metric); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + (const DataT*)database_host->data_handle(), ps.n_rows, ps.dim); index = cagra::build(handle_, index_params, database_host_view); } else { @@ -567,13 +568,16 @@ class AnnCagraAddNodesTest : public ::testing::TestWithParam { auto initial_database_view = raft::make_device_matrix_view( (const DataT*)database.data(), initial_database_size, ps.dim); + std::optional> database_host{std::nullopt}; cagra::index index(handle_); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy( - database_host.data_handle(), database.data(), initial_database_view.size(), stream_); + database_host->data_handle(), database.data(), initial_database_view.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), initial_database_size, ps.dim); + (const DataT*)database_host->data_handle(), initial_database_size, ps.dim); + // NB: database_host must live no less than the index, because the index _may_be_ + // non-onwning index = cagra::build(handle_, index_params, database_host_view); } else { index = cagra::build(handle_, index_params, initial_database_view); @@ -763,12 +767,13 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); + std::optional> database_host{std::nullopt}; cagra::index index(handle_); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host->data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + (const DataT*)database_host->data_handle(), ps.n_rows, ps.dim); index = cagra::build(handle_, index_params, database_host_view); } else { index = cagra::build(handle_, index_params, database_view); diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 3a92b5e3d..6c0fdc608 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -31,8 +31,6 @@ namespace cuvs::neighbors::ivf_pq { -using raft::RAFT_NAME; // For logging - struct test_ivf_sample_filter { static constexpr unsigned offset = 300; }; @@ -881,7 +879,7 @@ inline auto enum_variety_ip() -> test_cases_t // InnerProduct score is signed, // thus we're forced to used signed 8-bit representation, // thus we have one bit less precision - y.min_recall = y.min_recall.value() * 0.90; + y.min_recall = y.min_recall.value() * 0.88; } else { // In other cases it seems to perform a little bit better, still worse than L2 y.min_recall = y.min_recall.value() * 0.94; diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index 94bccade2..ded8cb5af 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -38,8 +38,6 @@ namespace cuvs::neighbors { -using raft::RAFT_NAME; // For logging - struct print_dtype { cudaDataType_t value; }; diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/test/neighbors/brute_force.cu index 8c354baa9..a9ad4bf1c 100644 --- a/cpp/test/neighbors/brute_force.cu +++ b/cpp/test/neighbors/brute_force.cu @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -76,11 +77,9 @@ class KNNTest : public ::testing::TestWithParam> { protected: void testBruteForce() { - // #if (RAFT_ACTIVE_LEVEL >= RAFT_LEVEL_DEBUG) raft::print_device_vector("Input array: ", input_.data(), rows_ * cols_, std::cout); std::cout << "K: " << k_ << std::endl; raft::print_device_vector("Labels array: ", search_labels_.data(), rows_, std::cout); - // #endif auto index = raft::make_device_matrix_view( (const T*)(input_.data()), rows_, cols_); @@ -91,10 +90,18 @@ class KNNTest : public ::testing::TestWithParam> { auto distances = raft::make_device_matrix_view(distances_.data(), rows_, k_); - auto metric = cuvs::distance::DistanceType::L2Unexpanded; - auto idx = cuvs::neighbors::brute_force::build(handle, index, metric); - cuvs::neighbors::brute_force::search( - handle, idx, search, indices, distances, cuvs::neighbors::filtering::none_sample_filter{}); + cuvs::neighbors::brute_force::index_params index_params; + index_params.metric = cuvs::distance::DistanceType::L2Unexpanded; + + auto idx = cuvs::neighbors::brute_force::build(handle, index_params, index); + cuvs::neighbors::brute_force::search_params search_params; + cuvs::neighbors::brute_force::search(handle, + search_params, + idx, + search, + indices, + distances, + cuvs::neighbors::filtering::none_sample_filter{}); build_actual_output<<>>( actual_labels_.data(), rows_, k_, search_labels_.data(), indices_.data()); @@ -204,6 +211,7 @@ struct RandomKNNInputs { int k; cuvs::distance::DistanceType metric; bool row_major; + bool host_dataset; }; std::ostream& operator<<(std::ostream& os, const RandomKNNInputs& input) @@ -211,7 +219,7 @@ std::ostream& operator<<(std::ostream& os, const RandomKNNInputs& input) return os << "num_queries:" << input.num_queries << " num_vecs:" << input.num_db_vecs << " dim:" << input.dim << " k:" << input.k << " metric:" << cuvs::neighbors::print_metric{input.metric} - << " row_major:" << input.row_major; + << " row_major:" << input.row_major << " host_dataset:" << input.host_dataset; } template @@ -387,16 +395,25 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam auto distances = raft::make_device_matrix_view( cuvs_distances_.data(), params_.num_queries, params_.k); - if (params_.row_major) { - auto idx = - cuvs::neighbors::brute_force::build(handle_, - raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim), - metric, - metric_arg); + cuvs::neighbors::brute_force::index_params index_params; + index_params.metric = metric; + index_params.metric_arg = metric_arg; + + cuvs::neighbors::brute_force::search_params search_params; + + if (params_.host_dataset) { + // test building from a dataset in host memory + auto host_database = + raft::make_host_matrix(params_.num_db_vecs, params_.dim); + raft::copy( + host_database.data_handle(), database.data(), params_.num_db_vecs * params_.dim, stream_); + + auto idx = cuvs::neighbors::brute_force::build( + handle_, index_params, raft::make_const_mdspan(host_database.view())); cuvs::neighbors::brute_force::search( handle_, + search_params, idx, raft::make_device_matrix_view( search_queries.data(), params_.num_queries, params_.dim), @@ -404,21 +421,39 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam distances, cuvs::neighbors::filtering::none_sample_filter{}); } else { - auto idx = cuvs::neighbors::brute_force::build( - handle_, - raft::make_device_matrix_view( - database.data(), params_.num_db_vecs, params_.dim), - metric, - metric_arg); + if (params_.row_major) { + auto idx = + cuvs::neighbors::brute_force::build(handle_, + index_params, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim)); - cuvs::neighbors::brute_force::search( - handle_, - idx, - raft::make_device_matrix_view( - search_queries.data(), params_.num_queries, params_.dim), - indices, - distances, - cuvs::neighbors::filtering::none_sample_filter{}); + cuvs::neighbors::brute_force::search( + handle_, + search_params, + idx, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + cuvs::neighbors::filtering::none_sample_filter{}); + } else { + auto idx = cuvs::neighbors::brute_force::build( + handle_, + index_params, + raft::make_device_matrix_view( + database.data(), params_.num_db_vecs, params_.dim)); + + cuvs::neighbors::brute_force::search( + handle_, + search_params, + idx, + raft::make_device_matrix_view( + search_queries.data(), params_.num_queries, params_.dim), + indices, + distances, + cuvs::neighbors::filtering::none_sample_filter{}); + } } ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(ref_indices_.data(), @@ -468,42 +503,51 @@ class RandomBruteForceKNNTest : public ::testing::TestWithParam const std::vector random_inputs = { // test each distance metric on a small-ish input, with row-major inputs - {100, 256, 2, 65, cuvs::distance::DistanceType::L2Expanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L1, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, true}, + {100, 256, 2, 65, cuvs::distance::DistanceType::L2Expanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L1, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, true, false}, // test each distance metric with col-major inputs - {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L1, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, false}, + {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Unexpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L1, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CorrelationExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::CosineExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::LpUnexpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::JensenShannon, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Canberra, false, false}, // larger tests on different sized data / k values - {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded, false}, - {345, 1023, 16, 128, cuvs::distance::DistanceType::CosineExpanded, true}, - {789, 20516, 64, 256, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, true}, - {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, false}, - {1000, 5000, 128, 128, cuvs::distance::DistanceType::LpUnexpanded, true}, - {1000, 5000, 128, 128, cuvs::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 5000, 128, 128, cuvs::distance::DistanceType::InnerProduct, false}}; + {10000, 40000, 32, 30, cuvs::distance::DistanceType::L2Expanded, false, false}, + {345, 1023, 16, 128, cuvs::distance::DistanceType::CosineExpanded, true, false}, + {789, 20516, 64, 256, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, true, false}, + {1000, 200000, 128, 128, cuvs::distance::DistanceType::L2Expanded, false, false}, + {1000, 5000, 128, 128, cuvs::distance::DistanceType::LpUnexpanded, true, false}, + {1000, 5000, 128, 128, cuvs::distance::DistanceType::L2SqrtExpanded, false, false}, + {1000, 5000, 128, 128, cuvs::distance::DistanceType::InnerProduct, false, false}, + // test with datasets on host memory + {256, 512, 16, 8, cuvs::distance::DistanceType::L2Expanded, true, true}, + {256, 512, 32, 16, cuvs::distance::DistanceType::L2Unexpanded, true, true}, + {256, 512, 8, 8, cuvs::distance::DistanceType::L2SqrtExpanded, true, true}, + {256, 128, 32, 8, cuvs::distance::DistanceType::L2SqrtUnexpanded, true, true}, + {256, 512, 16, 8, cuvs::distance::DistanceType::L1, true, true}, + {256, 512, 16, 8, cuvs::distance::DistanceType::Linf, true, true}, + {256, 512, 16, 8, cuvs::distance::DistanceType::InnerProduct, true, true}, + {256, 512, 16, 7, cuvs::distance::DistanceType::L2Expanded, true, true}}; typedef RandomBruteForceKNNTest RandomBruteForceKNNTestF; TEST_P(RandomBruteForceKNNTestF, BruteForce) { this->testBruteForce(); } diff --git a/cpp/test/neighbors/dynamic_batching/test_brute_force.cu b/cpp/test/neighbors/dynamic_batching/test_brute_force.cu new file mode 100644 index 000000000..11f468374 --- /dev/null +++ b/cpp/test/neighbors/dynamic_batching/test_brute_force.cu @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../dynamic_batching.cuh" + +#include + +namespace cuvs::neighbors::dynamic_batching { + +using brute_force_float32 = dynamic_batching_test, + brute_force::build, + brute_force::search>; + +TEST_P(brute_force_float32, defaults) +{ + build_all(); + search_all(); + check_neighbors(); +} + +INSTANTIATE_TEST_CASE_P(dynamic_batching, brute_force_float32, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::dynamic_batching diff --git a/dependencies.yaml b/dependencies.yaml index 98cac5300..fbd1d8372 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -7,39 +7,39 @@ files: arch: [x86_64, aarch64] includes: - build - - rapids_build - build_py_cuvs + - build_wheels + - checks - cuda - cuda_version - - depends_on_pylibraft + - depends_on_cupy - depends_on_librmm + - depends_on_pylibraft - develop - - checks - - build_wheels - - test_libcuvs - docs + - rapids_build - run_py_cuvs + - rust + - test_libcuvs - test_python_common - test_py_cuvs - - cupy - - rust bench_ann: output: conda matrix: cuda: ["11.8", "12.5"] arch: [x86_64, aarch64] includes: - - rapids_build + - bench + - bench_python - build_py_cuvs - cuda - cuda_version + - depends_on_cupy - depends_on_pylibraft - depends_on_librmm - develop - - bench - - bench_python + - rapids_build - rapids_build_setuptools - - cupy test_cpp: output: none includes: @@ -49,10 +49,10 @@ files: output: none includes: - cuda_version + - depends_on_cupy - py_version - test_python_common - test_py_cuvs - - cupy checks: output: none includes: @@ -61,19 +61,19 @@ files: docs: output: none includes: + - cuda - cuda_version - - cupy + - depends_on_cupy - docs - py_version - - rust - rapids_build - - cuda + - rust rust: output: none includes: + - cuda - cuda_version - rapids_build - - cuda - rust py_build_cuvs: output: pyproject @@ -89,8 +89,8 @@ files: table: tool.rapids-build-backend key: requires includes: - - rapids_build - build_py_cuvs + - rapids_build py_run_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -98,8 +98,8 @@ files: table: project includes: - cuda_wheels - - run_py_cuvs - depends_on_pylibraft + - run_py_cuvs py_test_cuvs: output: pyproject pyproject_dir: python/cuvs @@ -107,9 +107,9 @@ files: table: project.optional-dependencies key: test includes: + - depends_on_cupy - test_python_common - test_py_cuvs - - cupy py_build_cuvs_bench: output: pyproject pyproject_dir: python/cuvs_bench @@ -214,11 +214,11 @@ dependencies: - matrix: cuda: "12.*" packages: - - &cuda_python12 cuda-python>=12.0,<13.0a0,<=12.6.0 + - &cuda_python12 cuda-python>=12.6.2,<13.0a0 - matrix: cuda: "11.*" packages: - - &cuda_python11 cuda-python>=11.7.1,<12.0a0,<=11.8.3 + - &cuda_python11 cuda-python>=11.8.5,<12.0a0 - matrix: packages: - &cuda_python cuda-python @@ -368,7 +368,7 @@ dependencies: - nvidia-cusolver - nvidia-cusparse - cupy: + depends_on_cupy: common: - output_types: conda packages: @@ -394,22 +394,24 @@ dependencies: common: - output_types: [conda] packages: - - breathe + - breathe>=4.35.0 - doxygen>=1.8.20 - graphviz - ipython - numpydoc - - pydata-sphinx-theme - recommonmark + - sphinx>=8.0.0 - sphinx-copybutton - sphinx-markdown-tables + - pip: + - nvidia-sphinx-theme rust: common: - output_types: [conda] packages: - make - rust - # clang/liblclang only needed for bindgen support + # clang/libclang only needed for bindgen support - clang - libclang build_wheels: @@ -476,13 +478,13 @@ dependencies: - h5py>=3.8.0 - benchmark>=1.8.2 - openblas - - libcuvs==24.12.*,>=0.0.0a0 + - libcuvs==25.2.*,>=0.0.0a0 bench_python: common: - output_types: [conda, pyproject, requirements] packages: - click - - cuvs==24.12.*,>=0.0.0a0 + - cuvs==25.2.*,>=0.0.0a0 - matplotlib - pandas - pyyaml @@ -490,7 +492,7 @@ dependencies: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==24.12.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -503,18 +505,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==24.12.*,>=0.0.0a0 + - librmm-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - librmm-cu11==24.12.*,>=0.0.0a0 + - librmm-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.2.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -527,10 +529,10 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==24.12.*,>=0.0.0a0 + - pylibraft-cu12==25.2.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibraft-cu11==24.12.*,>=0.0.0a0 + - pylibraft-cu11==25.2.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed]} diff --git a/docs/source/conf.py b/docs/source/conf.py index 0d667833a..ca7330279 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -99,7 +99,7 @@ # a list of builtin themes. # -html_theme = "pydata_sphinx_theme" +html_theme = "nvidia_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme @@ -198,7 +198,7 @@ def setup(app): linkcode_resolve = make_linkcode_resolve( "cuvs", "https://github.com/rapidsai/cuvs/" - "blob/{revision}/python/cuvs/cuvs/" + "blob/{revision}/python/cuvs/" "{package}/{path}#L{lineno}", ) diff --git a/docs/source/cuvs_bench/index.rst b/docs/source/cuvs_bench/index.rst index 81fb7537c..820c44c4f 100644 --- a/docs/source/cuvs_bench/index.rst +++ b/docs/source/cuvs_bench/index.rst @@ -24,7 +24,7 @@ This tool offers several benefits, including * `Docker`_ -- `How to run the benchmarks`_ +- `How benchmarks are run`_ * `Step 1: Prepare the dataset`_ @@ -93,32 +93,36 @@ We provide images for GPU enabled systems, as well as systems without a GPU. The - `cuvs-bench-datasets`: Contains the GPU and CPU benchmarks with million-scale datasets already included in the container. Best suited for users that want to run multiple million scale datasets already included in the image. - `cuvs-bench-cpu`: Contains only CPU benchmarks with minimal size. Best suited for users that want the smallest containers to reproduce benchmarks on systems without a GPU. -Nightly images are located in `dockerhub `_, meanwhile release (stable) versions are located in `NGC `_, starting with release 24.10. +Nightly images are located in `dockerhub `_. -The following command pulls the nightly container for Python version 3.10, CUDA version 12.0, and cuVS version 24.10: +The following command pulls the nightly container for Python version 3.10, CUDA version 12.5, and cuVS version 24.12: .. code-block:: bash - docker pull rapidsai/cuvs-bench:24.10a-cuda12.0-py3.10 #substitute cuvs-bench for the exact desired container. + docker pull rapidsai/cuvs-bench:24.12a-cuda12.5-py3.10 #substitute cuvs-bench for the exact desired container. The CUDA and python versions can be changed for the supported values: -- Supported CUDA versions: 11.4 and 12.x -- Supported Python versions: 3.9 and 3.10. +- Supported CUDA versions: 11.8 and 12.5 +- Supported Python versions: 3.10 and 3.11. You can see the exact versions as well in the dockerhub site: - `cuVS bench images `_ -- `cuVS bench with datasets preloaded images `_ +- `cuVS bench with pre-loaded million-scale datasets images `_ - `cuVS bench CPU only images `_ **Note:** GPU containers use the CUDA toolkit from inside the container, the only requirement is a driver installed on the host machine that supports that version. So, for example, CUDA 11.8 containers can run in systems with a CUDA 12.x capable driver. Please also note that the Nvidia-Docker runtime from the `Nvidia Container Toolkit `_ is required to use GPUs inside docker containers. -How to run the benchmarks -========================= +How benchmarks are run +====================== + +The `cuvs-bench` package contains lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. -We provide a collection of lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. #. Prepare Dataset + #. Build Index and Search Index + #. Data Export + #. Plot Results Step 1: Prepare the dataset diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index 7702f80b3..4fdd6405e 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -230,7 +230,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.12/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-25.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/docs/source/sphinxext/github_link.py b/docs/source/sphinxext/github_link.py index 2c52488ca..75acfbd6e 100644 --- a/docs/source/sphinxext/github_link.py +++ b/docs/source/sphinxext/github_link.py @@ -1,5 +1,20 @@ # This contains code with copyright by the scikit-learn project, subject to the # license in /thirdparty/LICENSES/LICENSE.scikit_learn +# +# Copyright (c) 2024-2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import inspect import os @@ -101,10 +116,9 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): else: return else: - # Test if we are absolute or not (pyx are relative) - if (not os.path.isabs(fn)): - # Should be relative to docs right now - fn = os.path.abspath(os.path.join("..", "python", fn)) + if fn.endswith(".pyx"): + sp_path = next(x for x in sys.path if re.match(".*site-packages$", x)) + fn = fn.replace("/opt/conda/conda-bld/work/python/cuvs", sp_path) # Convert to relative from module root fn = os.path.relpath(fn, diff --git a/examples/cmake/thirdparty/fetch_rapids.cmake b/examples/cmake/thirdparty/fetch_rapids.cmake index 6f4c627ed..3c5510b8b 100644 --- a/examples/cmake/thirdparty/fetch_rapids.cmake +++ b/examples/cmake/thirdparty/fetch_rapids.cmake @@ -11,11 +11,11 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. -# Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.12") +# Use this variable to update RAPIDS and cuVS versions +set(RAPIDS_VERSION "25.02") -if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake - ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) + ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake) endif() -include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) +include(${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake) diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 951e0ad0c..9554207bb 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -45,13 +45,16 @@ add_executable(VAMANA_EXAMPLE src/vamana_example.cu) # `$` is a generator expression that ensures that targets are # installed in a conda environment, if one exists -target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $) +add_library(rmm_logger OBJECT) +target_link_libraries(rmm_logger PRIVATE rmm::rmm_logger_impl) + +target_link_libraries(CAGRA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) target_link_libraries( - CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + CAGRA_PERSISTENT_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger ) target_link_libraries( - DYNAMIC_BATCHING_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads + DYNAMIC_BATCHING_EXAMPLE PRIVATE cuvs::cuvs $ Threads::Threads rmm_logger ) -target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $) -target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $) -target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $) +target_link_libraries(IVF_PQ_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) +target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) +target_link_libraries(VAMANA_EXAMPLE PRIVATE cuvs::cuvs $ rmm_logger) diff --git a/pyproject.toml b/pyproject.toml index fbf4cf41f..417514466 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ force-exclude = ''' # unlike the match option above this match-dir will have no effect when # pydocstyle is invoked from pre-commit. Therefore this exclusion list must # also be maintained in the pre-commit config file. -match-dir = "^(?!(ci|cpp|conda|docs)).*$" +match-dir = "^(?!(ci|cpp|conda|docs|notebooks)).*$" select = "D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418" # Would like to enable the following rules in the future: # D200, D202, D205, D400 @@ -42,6 +42,6 @@ follow_imports = "skip" skip = "./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild" # ignore short words, and typename parameters like OffsetT ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" -ignore-words-list = "inout,numer" +ignore-words-list = "inout,unparseable,numer" builtin = "clear" quiet-level = 3 diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index feb3bd58c..c0990995f 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -110,6 +110,9 @@ endif() rapids_cython_init() +add_library(cuvs_rmm_logger OBJECT) +target_link_libraries(cuvs_rmm_logger PRIVATE rmm::rmm_logger_impl) + add_subdirectory(cuvs/common) add_subdirectory(cuvs/distance) add_subdirectory(cuvs/neighbors) diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt index 202919e01..361f2fafc 100644 --- a/python/cuvs/cuvs/common/CMakeLists.txt +++ b/python/cuvs/cuvs/common/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx index 79f88cddc..bee8d9afa 100644 --- a/python/cuvs/cuvs/common/cydlpack.pyx +++ b/python/cuvs/cuvs/common/cydlpack.pyx @@ -25,6 +25,8 @@ cdef void deleter(DLManagedTensor* tensor) noexcept: if tensor.manager_ctx is NULL: return stdlib.free(tensor.dl_tensor.shape) + if tensor.dl_tensor.strides is not NULL: + stdlib.free(tensor.dl_tensor.strides) tensor.manager_ctx = NULL stdlib.free(tensor) @@ -95,11 +97,20 @@ cdef DLManagedTensor* dlpack_c(ary): tensor.data = tensor_ptr tensor.device = dev tensor.dtype = dtype - tensor.strides = NULL tensor.ndim = ndim tensor.shape = shape tensor.byte_offset = 0 + if ary.c_contiguous: + tensor.strides = NULL + elif ary.f_contiguous: + tensor.strides = stdlib.malloc(ndim * sizeof(int64_t)) + tensor.strides[0] = 1 + for i in range(1, ndim): + tensor.strides[i] = tensor.strides[i-1] * tensor.shape[i-1] + else: + raise ValueError("Input data must be contiguous") + dlm.dl_tensor = tensor dlm.manager_ctx = NULL dlm.deleter = deleter diff --git a/python/cuvs/cuvs/distance/CMakeLists.txt b/python/cuvs/cuvs/distance/CMakeLists.txt index 363778a9c..514b08c43 100644 --- a/python/cuvs/cuvs/distance/CMakeLists.txt +++ b/python/cuvs/cuvs/distance/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX distance_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx index eb34366e4..d50fc152f 100644 --- a/python/cuvs/cuvs/distance/distance.pyx +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -56,7 +56,7 @@ SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", @auto_sync_resources @auto_convert_output -def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, +def pairwise_distance(X, Y, out=None, metric="euclidean", p=2.0, resources=None): """ Compute pairwise distances between X and Y @@ -74,7 +74,7 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, Y : CUDA array interface compliant matrix shape (n, k) out : Optional writable CUDA array interface matrix shape (m, n) metric : string denoting the metric type (default="euclidean") - metric_arg : metric parameter (currently used only for "minkowski") + p : metric parameter (currently used only for "minkowski") {resources_docstring} Examples @@ -100,7 +100,12 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, n = y_cai.shape[0] if out is None: - out = device_ndarray.empty((m, n), dtype=y_cai.dtype) + output_dtype = y_cai.dtype + if np.issubdtype(y_cai.dtype, np.float16): + output_dtype = np.float32 + + order = "C" if getattr(X, "flags", X).c_contiguous else "F" + out = device_ndarray.empty((m, n), dtype=output_dtype, order=order) out_cai = wrap_array(out) x_k = x_cai.shape[1] @@ -119,7 +124,7 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, y_dt = y_cai.dtype d_dt = out_cai.dtype - if x_dt != y_dt or x_dt != d_dt: + if x_dt != y_dt: raise ValueError("Inputs must have the same dtypes") cdef cydlpack.DLManagedTensor* x_dlpack = \ @@ -134,6 +139,6 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, y_dlpack, out_dlpack, distance_type, - metric_arg)) + p)) return out diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index f68bbea53..031fd485e 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -29,3 +29,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_refine_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt index 4806fb9fc..61eda649c 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/brute_force/CMakeLists.txt @@ -23,3 +23,7 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_brute_force_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 9d43bfb29..f71acd086 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -102,7 +102,7 @@ def build(dataset, metric="sqeuclidean", metric_arg=2.0, resources=None): """ dataset_ai = wrap_array(dataset) - _check_input_array(dataset_ai, [np.dtype('float32')]) + _check_input_array(dataset_ai, [np.dtype('float32')], exp_row_major=False) cdef cuvsResources_t res = resources.get_c_obj() @@ -218,7 +218,7 @@ def search(Index index, cdef cuvsResources_t res = resources.get_c_obj() queries_cai = wrap_array(queries) - _check_input_array(queries_cai, [np.dtype('float32')]) + _check_input_array(queries_cai, [np.dtype('float32')], exp_row_major=False) cdef uint32_t n_queries = queries_cai.shape[0] diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt index 87e6597fe..1f40daab2 100644 --- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd index bba5a91a8..a0f811480 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd @@ -28,6 +28,7 @@ from libcpp cimport bool from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType cdef extern from "cuvs/neighbors/cagra.h" nogil: @@ -47,6 +48,7 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil: ctypedef cuvsCagraCompressionParams* cuvsCagraCompressionParams_t ctypedef struct cuvsCagraIndexParams: + cuvsDistanceType metric size_t intermediate_graph_degree size_t graph_degree cuvsCagraGraphBuildAlgo build_algo diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index 752aef741..fd55905cf 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -28,11 +28,13 @@ from libcpp cimport bool, cast from libcpp.string cimport string from cuvs.common cimport cydlpack +from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible +from cuvs.distance import DISTANCE_TYPES from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( @@ -131,9 +133,11 @@ cdef class IndexParams: Parameters ---------- metric : string denoting the metric type, default="sqeuclidean" - Valid values for metric: ["sqeuclidean"], where + Valid values for metric: ["sqeuclidean", "inner_product"], where - sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2 + - inner_product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. intermediate_graph_degree : int, default = 128 graph_degree : int, default = 64 @@ -151,6 +155,7 @@ cdef class IndexParams: """ cdef cuvsCagraIndexParams* params + cdef object _metric # hold on to a reference to the compression, to keep from being GC'ed cdef public object compression @@ -170,10 +175,8 @@ cdef class IndexParams: nn_descent_niter=20, compression=None): - # todo (dgd): enable once other metrics are present - # and exposed in cuVS C API - # self.params.metric = _get_metric(metric) - # self.params.metric_arg = 0 + self._metric = metric + self.params.metric = DISTANCE_TYPES[metric] self.params.intermediate_graph_degree = intermediate_graph_degree self.params.graph_degree = graph_degree if build_algo == "ivf_pq": @@ -186,9 +189,9 @@ cdef class IndexParams: self.params.compression = \ compression.get_handle() - # @property - # def metric(self): - # return self.params.metric + @property + def metric(self): + return self._metric @property def intermediate_graph_degree(self): @@ -247,6 +250,7 @@ def build(IndexParams index_params, dataset, resources=None): The following distance metrics are supported: - L2 + - InnerProduct Parameters ---------- diff --git a/python/cuvs/cuvs/neighbors/common.py b/python/cuvs/cuvs/neighbors/common.py index c14b9f8c9..f49d9eb1f 100644 --- a/python/cuvs/cuvs/neighbors/common.py +++ b/python/cuvs/cuvs/neighbors/common.py @@ -14,11 +14,13 @@ # limitations under the License. -def _check_input_array(cai, exp_dt, exp_rows=None, exp_cols=None): +def _check_input_array( + cai, exp_dt, exp_rows=None, exp_cols=None, exp_row_major=True +): if cai.dtype not in exp_dt: raise TypeError("dtype %s not supported" % cai.dtype) - if not cai.c_contiguous: + if exp_row_major and not cai.c_contiguous: raise ValueError("Row major input is expected") if exp_cols is not None and cai.shape[1] != exp_cols: diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt index c90615feb..a678852d9 100644 --- a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_prefilter_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt index 1f9c422ca..8351916e6 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/hnsw/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_hnsw_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt index 09bd8f422..f5663cdaa 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_flat/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_ivf_flat_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt index 97c3a1824..a24320ded 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/ivf_pq/CMakeLists.txt @@ -22,3 +22,7 @@ rapids_cython_create_modules( SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_pq_ ) + +foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + target_link_libraries(${tgt} PRIVATE cuvs_rmm_logger) +endforeach() diff --git a/python/cuvs/cuvs/test/conftest.py b/python/cuvs/cuvs/test/conftest.py new file mode 100644 index 000000000..d84de5d21 --- /dev/null +++ b/python/cuvs/cuvs/test/conftest.py @@ -0,0 +1,5 @@ +# arm tests sporadically run into +# https://bugzilla.redhat.com/show_bug.cgi?id=1722181. +# This is a workaround to ensure that OpenMP gets the TLS that it needs. + +import sklearn # noqa: F401 diff --git a/python/cuvs/cuvs/test/test_brute_force.py b/python/cuvs/cuvs/test/test_brute_force.py index acf347ec3..0b37ad885 100644 --- a/python/cuvs/cuvs/test/test_brute_force.py +++ b/python/cuvs/cuvs/test/test_brute_force.py @@ -40,12 +40,15 @@ ], ) @pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("dtype", [np.float32]) def test_brute_force_knn( - n_index_rows, n_query_rows, n_cols, k, inplace, metric, dtype + n_index_rows, n_query_rows, n_cols, k, inplace, order, metric, dtype ): - index = np.random.random_sample((n_index_rows, n_cols)).astype(dtype) - queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype) + index = np.random.random_sample((n_index_rows, n_cols)) + index = np.asarray(index, order=order).astype(dtype) + queries = np.random.random_sample((n_query_rows, n_cols)) + queries = np.asarray(queries, order=order).astype(dtype) # RussellRao expects boolean arrays if metric == "russellrao": diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py index 56e132c23..d3b03a5d0 100644 --- a/python/cuvs/cuvs/test/test_cagra.py +++ b/python/cuvs/cuvs/test/test_cagra.py @@ -29,7 +29,7 @@ def run_cagra_build_search_test( n_queries=100, k=10, dtype=np.float32, - metric="euclidean", + metric="sqeuclidean", intermediate_graph_degree=128, graph_degree=64, build_algo="ivf_pq", @@ -42,6 +42,8 @@ def run_cagra_build_search_test( ): dataset = generate_data((n_rows, n_cols), dtype) if metric == "inner_product": + if dtype in [np.int8, np.uint8]: + pytest.skip("skip normalization for int8/uint8 data") dataset = normalize(dataset, norm="l2", axis=1) dataset_device = device_ndarray(dataset) @@ -122,7 +124,7 @@ def run_cagra_build_search_test( @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) @pytest.mark.parametrize("array_type", ["device", "host"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) -@pytest.mark.parametrize("metric", ["euclidean"]) +@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) def test_cagra_dataset_dtype_host_device( dtype, array_type, inplace, build_algo, metric ): @@ -145,7 +147,7 @@ def test_cagra_dataset_dtype_host_device( "graph_degree": 32, "add_data_on_build": True, "k": 1, - "metric": "euclidean", + "metric": "sqeuclidean", "build_algo": "ivf_pq", }, { diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index 681217fc8..483d5d201 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -35,15 +35,17 @@ "jensenshannon", "russellrao", "cosine", + "minkowski", "sqeuclidean", "inner_product", ], ) @pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_distance(n_rows, n_cols, inplace, metric, dtype): +@pytest.mark.parametrize("order", ["F", "C"]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) +def test_distance(n_rows, n_cols, inplace, order, metric, dtype): input1 = np.random.random_sample((n_rows, n_cols)) - input1 = np.asarray(input1).astype(dtype) + input1 = np.asarray(input1, order=order).astype(dtype) # RussellRao expects boolean arrays if metric == "russellrao": @@ -55,7 +57,10 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype): norm = np.sum(input1, axis=1) input1 = (input1.T / norm).T - output = np.zeros((n_rows, n_rows), dtype=dtype) + output_dtype = dtype + if np.issubdtype(dtype, np.float16): + output_dtype = np.float32 + output = np.zeros((n_rows, n_rows), dtype=output_dtype, order=order) if metric == "inner_product": expected = np.matmul(input1, input1.T) @@ -66,14 +71,15 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype): output_device = device_ndarray(output) if inplace else None ret_output = pairwise_distance( - input1_device, - input1_device, - output_device, - metric, + input1_device, input1_device, output_device, metric, p=2.0 ) output_device = ret_output if not inplace else output_device actual = output_device.copy_to_host() - assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3) + tol = 1e-3 + if np.issubdtype(dtype, np.float16): + tol = 1e-1 + + assert np.allclose(expected, actual, atol=tol, rtol=tol) diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 92e4993c7..155e454a8 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "nvidia-curand", "nvidia-cusolver", "nvidia-cusparse", - "pylibraft==24.12.*,>=0.0.0a0", + "pylibraft==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/python/cuvs/setup.cfg b/python/cuvs/setup.cfg deleted file mode 100644 index 57b4954bc..000000000 --- a/python/cuvs/setup.cfg +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[isort] -line_length=79 -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -combine_as_imports=True -order_by_type=True -known_dask= - dask - distributed - dask_cuda -known_rapids= - cuvs - nvtext - cudf - cuml - raft - cugraph - dask_cudf - rmm -known_first_party= - cuvs -default_section=THIRDPARTY -sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER -skip= - thirdparty - .eggs - .git - .hg - .mypy_cache - .tox - .venv - _build - buck-out - build - dist - __init__.py diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 5b17f7228..75e5406d4 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -19,7 +19,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "click", - "cuvs==24.12.*,>=0.0.0a0", + "cuvs==25.2.*,>=0.0.0a0", "matplotlib", "pandas", "pyyaml", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 79aa5756a..ddb8b32cd 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -6,7 +6,7 @@ members = [ resolver = "2" [workspace.package] -version = "24.12.0" +version = "25.2.0" edition = "2021" repository = "https://github.com/rapidsai/cuvs" homepage = "https://github.com/rapidsai/cuvs" diff --git a/rust/cuvs/Cargo.toml b/rust/cuvs/Cargo.toml index 13cc658e3..1095b1fea 100644 --- a/rust/cuvs/Cargo.toml +++ b/rust/cuvs/Cargo.toml @@ -9,7 +9,7 @@ authors.workspace = true license.workspace = true [dependencies] -ffi = { package = "cuvs-sys", path = "../cuvs-sys", version = "24.12.0" } +ffi = { package = "cuvs-sys", path = "../cuvs-sys", version = "25.2.0" } ndarray = "0.15" [dev-dependencies] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index e64641d05..000000000 --- a/setup.cfg +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. - -[flake8] -filename = *.py, *.pyx, *.pxd, *.pxi -exclude = __init__.py, *.egg, build, docs, .git -force-check = True -ignore = - # line break before binary operator - W503, - # whitespace before : - E203 -per-file-ignores = - # Rules ignored only in Cython: - # E211: whitespace before '(' (used in multi-line imports) - # E225: Missing whitespace around operators (breaks cython casting syntax like ) - # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) - # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) - # E275: Missing whitespace after keyword (Doesn't work with Cython except?) - # E402: invalid syntax (works for Python, not Cython) - # E999: invalid syntax (works for Python, not Cython) - # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 - *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 - -[pydocstyle] -# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather -# than include using match-dir. Note that as discussed in -# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, -# unlike the match option above this match-dir will have no effect when -# pydocstyle is invoked from pre-commit. Therefore this exclusion list must -# also be maintained in the pre-commit config file. -match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks)).*$ -# Allow missing docstrings for docutils -ignore-decorators = .*(docutils|doc_apply|copy_docstring).* -select = - D201, D204, D206, D207, D208, D209, D210, D211, D214, D215, D300, D301, D302, D403, D405, D406, D407, D408, D409, D410, D411, D412, D414, D418 - # Would like to enable the following rules in the future: - # D200, D202, D205, D400 - -[mypy] -ignore_missing_imports = True -# If we don't specify this, then mypy will check excluded files if -# they are imported by a checked file. -follow_imports = skip - -[codespell] -# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - -# this is only to allow you to run codespell interactively -skip = ./.git,./.github,./cpp/build,.*egg-info.*,./.mypy_cache,.*_skbuild -# ignore short words, and typename parameters like OffsetT -ignore-regex = \b(.{1,4}|[A-Z]\w*T)\b -ignore-words-list = inout,unparseable,numer -builtin = clear -quiet-level = 3