From 0fbd919b97d76011cdbb9dd3a448d382e6eac036 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 15 Aug 2024 13:12:09 -0500 Subject: [PATCH 1/6] Improve update-version.sh (#6014) A few small tweaks to `update-version.sh` for alignment across RAPIDS. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - James Lamb (https://github.com/jameslamb) URL: https://github.com/rapidsai/cuml/pull/6014 --- ci/release/update-version.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 52ed710f34..c953ad0317 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -18,7 +18,7 @@ CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} -#Get . for next version +# Get . for next version NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} @@ -58,7 +58,7 @@ for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done for FILE in python/*/pyproject.toml; do - sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" ${FILE} + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}" done done From f677791ad80f77244799dc8084728150787b2670 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 15 Aug 2024 15:32:51 -0400 Subject: [PATCH 2/6] Stop exporting fill_k kernel as that causes ODR violations (#6021) Removes the usage of `fill_k` in `cpp/src/fil/fil.cu` as that breaks the ODR requirements of CUDA whole compilation. To allow setting the shared memory of the kernel we move the logic over to `cpp/src/fil/infer.cu` and provide a c++ interface. Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cuml/pull/6021 --- cpp/src/fil/common.cuh | 11 +++-------- cpp/src/fil/fil.cu | 29 +++++------------------------ cpp/src/fil/infer.cu | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index e62df3e21f..fe5ba32496 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -309,17 +309,12 @@ struct compute_smem_footprint : dispatch_functor { int run(predict_params); }; -template -__attribute__((visibility("hidden"))) __global__ void infer_k(storage_type forest, - predict_params params); - // infer() calls the inference kernel with the parameters on the stream template void infer(storage_type forest, predict_params params, cudaStream_t stream); +template +void infer_shared_mem_size(predict_params params, int max_shm); + } // namespace fil } // namespace ML diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index e0d2f8baaf..69b0320e1e 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -349,26 +349,6 @@ struct forest { cat_sets_device_owner cat_sets_; }; -template -struct opt_into_arch_dependent_shmem : dispatch_functor { - const int max_shm; - opt_into_arch_dependent_shmem(int max_shm_) : max_shm(max_shm_) {} - - template > - void run(predict_params p) - { - auto kernel = infer_k; - // p.shm_sz might be > max_shm or < MAX_SHM_STD, but we should not check for either, because - // we don't run on both proba_ssp_ and class_ssp_ (only class_ssp_). This should be quick. - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, max_shm)); - } -}; - template struct dense_forest> : forest { using node_t = dense_node; @@ -427,8 +407,9 @@ struct dense_forest> : forest { h.get_stream())); // predict_proba is a runtime parameter, and opt-in is unconditional - dispatch_on_fil_template_params(opt_into_arch_dependent_shmem>(this->max_shm_), - static_cast(this->class_ssp_)); + fil::infer_shared_mem_size>(static_cast(this->class_ssp_), + this->max_shm_); + // copy must be finished before freeing the host data h.sync_stream(); h_nodes_.clear(); @@ -491,8 +472,8 @@ struct sparse_forest : forest { nodes_.data(), nodes, sizeof(node_t) * num_nodes_, cudaMemcpyHostToDevice, h.get_stream())); // predict_proba is a runtime parameter, and opt-in is unconditional - dispatch_on_fil_template_params(opt_into_arch_dependent_shmem>(this->max_shm_), - static_cast(this->class_ssp_)); + fil::infer_shared_mem_size>(static_cast(this->class_ssp_), + this->max_shm_); } virtual void infer(predict_params params, cudaStream_t stream) override diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 574a0a37e3..c3bdd1b810 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -908,12 +908,38 @@ struct infer_k_storage_template : dispatch_functor { } }; +template +struct opt_into_arch_dependent_shmem : dispatch_functor { + const int max_shm; + opt_into_arch_dependent_shmem(int max_shm_) : max_shm(max_shm_) {} + + template > + void run(predict_params p) + { + auto kernel = infer_k; + // p.shm_sz might be > max_shm or < MAX_SHM_STD, but we should not check for either, because + // we don't run on both proba_ssp_ and class_ssp_ (only class_ssp_). This should be quick. + RAFT_CUDA_TRY( + cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, max_shm)); + } +}; + template void infer(storage_type forest, predict_params params, cudaStream_t stream) { dispatch_on_fil_template_params(infer_k_storage_template(forest, stream), params); } +template +void infer_shared_mem_size(predict_params params, int max_shm) +{ + dispatch_on_fil_template_params(opt_into_arch_dependent_shmem(max_shm), params); +} + template void infer(dense_storage_f32 forest, predict_params params, cudaStream_t stream); @@ -930,5 +956,11 @@ template void infer(sparse_storage8 forest, predict_params params, cudaStream_t stream); +template void infer_shared_mem_size(predict_params params, int max_shm); +template void infer_shared_mem_size(predict_params params, int max_shm); +template void infer_shared_mem_size(predict_params params, int max_shm); +template void infer_shared_mem_size(predict_params params, int max_shm); +template void infer_shared_mem_size(predict_params params, int max_shm); + } // namespace fil } // namespace ML From 7df3bbd7a0eb3442e4f7b85596b244f2944992f4 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 15 Aug 2024 17:52:08 -0400 Subject: [PATCH 3/6] Use CUDA math wheels (#5966) Use CUDA math wheels to reduce wheel size by not statically linking CUDA math libraries. Contributes to https://github.com/rapidsai/build-planning/issues/35 Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cuml/pull/5966 --- ci/build_wheel.sh | 23 +++++++++++++++++++++-- dependencies.yaml | 21 +++++++++++++++++++++ python/cuml/CMakeLists.txt | 26 ++++++++++++++++++++++++-- python/cuml/pyproject.toml | 5 +++++ 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index db8393edeb..af3a4c124b 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -18,7 +18,26 @@ rapids-generate-version > ./VERSION cd ${package_dir} -SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/" \ +case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXCLUDE_ARGS=( + --exclude "libcublas.so.12" + --exclude "libcublasLt.so.12" + --exclude "libcufft.so.11" + --exclude "libcurand.so.10" + --exclude "libcusolver.so.11" + --exclude "libcusparse.so.12" + --exclude "libnvJitLink.so.12" + ) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" + ;; + 11.*) + EXCLUDE_ARGS=() + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" + ;; +esac + +SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/${EXTRA_CMAKE_ARGS}" \ python -m pip wheel . \ -w dist \ -vvv \ @@ -26,6 +45,6 @@ SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DC --disable-pip-version-check mkdir -p final_dist -python -m auditwheel repair -w final_dist dist/* +python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist diff --git a/dependencies.yaml b/dependencies.yaml index 2146264618..234bd4481d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -88,6 +88,7 @@ files: extras: table: project includes: + - cuda - py_run py_test: output: pyproject @@ -406,6 +407,26 @@ dependencies: - *libcusolver114 - *libcusparse_dev114 - *libcusparse114 + - output_types: pyproject + matrices: + - matrix: + cuda: "12.*" + packages: + - nvidia-cublas-cu12 + - nvidia-cufft-cu12 + - nvidia-curand-cu12 + - nvidia-cusparse-cu12 + - nvidia-cusolver-cu12 + - matrix: + cuda: "11.*" + packages: + - matrix: + packages: + - nvidia-cublas + - nvidia-cufft + - nvidia-curand + - nvidia-cusparse + - nvidia-cusolver docs: common: - output_types: [conda, requirements] diff --git a/python/cuml/CMakeLists.txt b/python/cuml/CMakeLists.txt index f2541f7f04..224525ee58 100644 --- a/python/cuml/CMakeLists.txt +++ b/python/cuml/CMakeLists.txt @@ -38,6 +38,7 @@ project( option(CUML_UNIVERSAL "Build all cuML Python components." ON) option(FIND_CUML_CPP "Search for existing CUML C++ installations before defaulting to local files" OFF) option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF) +option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) set(CUML_RAFT_CLONE_ON_PIN OFF) @@ -72,8 +73,10 @@ include(rapids-cython-core) set(CUML_PYTHON_TREELITE_TARGET treelite::treelite) -if(NOT ${CUML_CPU}) +if(NOT CUML_CPU) if(NOT cuml_FOUND) + find_package(CUDAToolkit REQUIRED) + set(BUILD_CUML_TESTS OFF) set(BUILD_PRIMS_TESTS OFF) set(BUILD_CUML_C_LIBRARY OFF) @@ -85,11 +88,19 @@ if(NOT ${CUML_CPU}) # Statically link dependencies if building wheels set(CUDA_STATIC_RUNTIME ON) - set(CUDA_STATIC_MATH_LIBRARIES ON) set(CUML_USE_RAFT_STATIC ON) set(CUML_USE_FAISS_STATIC ON) set(CUML_USE_TREELITE_STATIC ON) set(CUML_USE_CUMLPRIMS_MG_STATIC ON) + # Link to the CUDA wheels with shared libraries for CUDA 12+ + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) + set(CUDA_STATIC_MATH_LIBRARIES OFF) + else() + if(USE_CUDA_MATH_WHEELS) + message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") + endif() + set(CUDA_STATIC_MATH_LIBRARIES ON) + endif() # Don't install the static libs into wheels set(CUML_EXCLUDE_RAFT_FROM_ALL ON) set(RAFT_EXCLUDE_FAISS_FROM_ALL ON) @@ -98,6 +109,17 @@ if(NOT ${CUML_CPU}) add_subdirectory(${CUML_CPP_SRC} cuml-cpp EXCLUDE_FROM_ALL) + if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) + set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/cufft/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" + ) + endif() + set(cython_lib_dir cuml) install(TARGETS ${CUML_CPP_TARGET} DESTINATION ${cython_lib_dir}) endif() diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 2073a5060e..d990fb5032 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -86,6 +86,11 @@ dependencies = [ "dask-cudf==24.10.*,>=0.0.0a0", "joblib>=0.11", "numba>=0.57", + "nvidia-cublas", + "nvidia-cufft", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", "packaging", "pylibraft==24.10.*,>=0.0.0a0", "raft-dask==24.10.*,>=0.0.0a0", From c7f53ef92d80f604b04829406b1c0e16ba563823 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 15 Aug 2024 18:10:49 -0400 Subject: [PATCH 4/6] Use HDBSCAN package pin to `0.8.38` (#5906) HBSCAN has a [breaking change](https://github.com/scikit-learn-contrib/hdbscan/commit/aa99a71daa070b64b45018378c9d4bdd9c28f686) in `main` branch, which cause wheel nightly tests to fail as cuML picks up HDBSCAN from `main` branch HEAD. As HDBSCAN also releases on pypi now, we can install release packages instead of installing HDBSCAN from git source. This PR also updates HBDSCAN to v0.8.38. Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Dante Gama Dessavre (https://github.com/dantegd) - https://github.com/jakirkham URL: https://github.com/rapidsai/cuml/pull/5906 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cuml-cpu/meta.yaml | 2 +- dependencies.yaml | 9 +++------ python/cuml/pyproject.toml | 3 ++- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index e17902c787..e8a39e250e 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -22,7 +22,7 @@ dependencies: - doxygen=1.9.1 - gcc_linux-64=11.* - graphviz -- hdbscan<=0.8.30 +- hdbscan>=0.8.38,<0.8.39 - hypothesis>=6.0,<7 - ipykernel - ipython diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index b4d3324501..293028cdb1 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - doxygen=1.9.1 - gcc_linux-64=11.* - graphviz -- hdbscan<=0.8.30 +- hdbscan>=0.8.38,<0.8.39 - hypothesis>=6.0,<7 - ipykernel - ipython diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml index e3813db79a..09686ff9dd 100644 --- a/conda/recipes/cuml-cpu/meta.yaml +++ b/conda/recipes/cuml-cpu/meta.yaml @@ -34,7 +34,7 @@ requirements: - numpy>=1.23,<2.0a0 - pandas - scikit-learn=1.2 - - hdbscan<=0.8.30 + - hdbscan>=0.8.38,<0.8.39 - umap-learn=0.5.3 - nvtx diff --git a/dependencies.yaml b/dependencies.yaml index 234bd4481d..e22aec7034 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -177,7 +177,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - cython>=3.0.0 + - &cython cython>=3.0.0 - &treelite treelite==4.3.0 - output_types: conda packages: @@ -486,7 +486,9 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: + - *cython - dask-ml + - hdbscan>=0.8.38,<0.8.39 - hypothesis>=6.0,<7 - nltk - numpydoc @@ -505,14 +507,9 @@ dependencies: - pip - pip: - dask-glm==0.3.0 - # TODO: remove pin once a release that includes fixes for the error - # is released: https://github.com/rapidsai/cuml/issues/5514 - - hdbscan<=0.8.30 - output_types: pyproject packages: - dask-glm==0.3.0 - # TODO: Can we stop pulling from the master branch now that there was a release in October? - - hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master test_notebooks: common: - output_types: [conda, requirements] diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index d990fb5032..0b5c650703 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -109,9 +109,10 @@ classifiers = [ [project.optional-dependencies] test = [ + "cython>=3.0.0", "dask-glm==0.3.0", "dask-ml", - "hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master", + "hdbscan>=0.8.38,<0.8.39", "hypothesis>=6.0,<7", "nltk", "numpydoc", From e33dc73424a8e9c1d439da4d65834452975f349f Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 20 Aug 2024 05:09:58 +0200 Subject: [PATCH 5/6] MAINT: Allow for error message to contain ``np.float32(1.0)`` (#6030) This is one more small fixup for NumPy 2. The other error I found in the test PR, should be fixed via a cudf PR. Authors: - Sebastian Berg (https://github.com/seberg) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cuml/pull/6030 --- python/cuml/cuml/tests/dask/test_dask_logistic_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py b/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py index a78d507c75..dc6b63428d 100644 --- a/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py +++ b/python/cuml/cuml/tests/dask/test_dask_logistic_regression.py @@ -682,7 +682,7 @@ def test_exception_one_label(fit_intercept, client): y = np.array([1.0, 1.0, 1.0, 1.0], datatype) X_df, y_df = _prep_training_data(client, X, y, n_parts) - err_msg = "This solver needs samples of at least 2 classes in the data, but the data contains only one class: 1.0" + err_msg = "This solver needs samples of at least 2 classes in the data, but the data contains only one class:.*1.0" from cuml.dask.linear_model import LogisticRegression as cumlLBFGS_dask From d09e713b194dbf8a0308aa85bdf41efcaf06de26 Mon Sep 17 00:00:00 2001 From: Victor Lafargue Date: Wed, 21 Aug 2024 16:53:16 +0200 Subject: [PATCH 6/6] Enabling CPU/GPU interop for SVM, DBSCAN and KMeans (#6020) Authors: - Victor Lafargue (https://github.com/viclafargue) - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cuml/pull/6020 --- python/cuml/cuml/cluster/dbscan.pyx | 21 ++++--- python/cuml/cuml/cluster/kmeans.pyx | 57 +++++++++++-------- .../cuml/cuml/tests/test_device_selection.py | 51 ++++++++++++++++- 3 files changed, 96 insertions(+), 33 deletions(-) diff --git a/python/cuml/cuml/cluster/dbscan.pyx b/python/cuml/cuml/cluster/dbscan.pyx index b1a8dd5ae8..fff1eef3f9 100644 --- a/python/cuml/cuml/cluster/dbscan.pyx +++ b/python/cuml/cuml/cluster/dbscan.pyx @@ -22,7 +22,7 @@ from cuml.internals.safe_imports import gpu_only_import cp = gpu_only_import('cupy') from cuml.internals.array import CumlArray -from cuml.internals.base import Base +from cuml.internals.base import UniversalBase from cuml.common.doc_utils import generate_docstring from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.internals.mixins import ClusterMixin @@ -106,7 +106,7 @@ IF GPUBUILD == 1: bool opg) except + -class DBSCAN(Base, +class DBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin): """ @@ -222,8 +222,8 @@ class DBSCAN(Base, """ _cpu_estimator_import_path = 'sklearn.cluster.DBSCAN' - labels_ = CumlArrayDescriptor() - core_sample_indices_ = CumlArrayDescriptor() + core_sample_indices_ = CumlArrayDescriptor(order="C") + labels_ = CumlArrayDescriptor(order="C") @device_interop_preparation def __init__(self, *, @@ -268,7 +268,7 @@ class DBSCAN(Base, "np.int32, np.int64}") IF GPUBUILD == 1: - X_m, n_rows, n_cols, self.dtype = \ + X_m, n_rows, self.n_features_in_, self.dtype = \ input_to_cuml_array( X, order='C', @@ -338,7 +338,7 @@ class DBSCAN(Base, fit(handle_[0], input_ptr, n_rows, - n_cols, + self.n_features_in_, self.eps, self.min_samples, metric, @@ -353,7 +353,7 @@ class DBSCAN(Base, fit(handle_[0], input_ptr, n_rows, - n_cols, + self.n_features_in_, self.eps, self.min_samples, metric, @@ -370,7 +370,7 @@ class DBSCAN(Base, fit(handle_[0], input_ptr, n_rows, - n_cols, + self.n_features_in_, self.eps, self.min_samples, metric, @@ -385,7 +385,7 @@ class DBSCAN(Base, fit(handle_[0], input_ptr, n_rows, - n_cols, + self.n_features_in_, self.eps, self.min_samples, metric, @@ -475,3 +475,6 @@ class DBSCAN(Base, "metric", "algorithm", ] + + def get_attr_names(self): + return ["core_sample_indices_", "labels_", "n_features_in_"] diff --git a/python/cuml/cuml/cluster/kmeans.pyx b/python/cuml/cuml/cluster/kmeans.pyx index 760df6306b..e8ab51e4dd 100644 --- a/python/cuml/cuml/cluster/kmeans.pyx +++ b/python/cuml/cuml/cluster/kmeans.pyx @@ -38,7 +38,7 @@ IF GPUBUILD == 1: from cuml.internals.array import CumlArray from cuml.common.array_descriptor import CumlArrayDescriptor -from cuml.internals.base import Base +from cuml.internals.base import UniversalBase from cuml.common.doc_utils import generate_docstring from cuml.internals.mixins import ClusterMixin from cuml.internals.mixins import CMajorInputTagMixin @@ -46,8 +46,10 @@ from cuml.common import input_to_cuml_array from cuml.internals.api_decorators import device_interop_preparation from cuml.internals.api_decorators import enable_device_interop +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads -class KMeans(Base, + +class KMeans(UniversalBase, ClusterMixin, CMajorInputTagMixin): @@ -188,8 +190,8 @@ class KMeans(Base, """ _cpu_estimator_import_path = 'sklearn.cluster.KMeans' - labels_ = CumlArrayDescriptor() - cluster_centers_ = CumlArrayDescriptor() + labels_ = CumlArrayDescriptor(order='C') + cluster_centers_ = CumlArrayDescriptor(order='C') def _get_kmeans_params(self): IF GPUBUILD == 1: @@ -232,6 +234,9 @@ class KMeans(Base, self.labels_ = None self.cluster_centers_ = None + # For sklearn interoperability + self._n_threads = _openmp_effective_n_threads() + # cuPy does not allow comparing with string. See issue #2372 init_str = init if isinstance(init, str) else None @@ -258,7 +263,7 @@ class KMeans(Base, IF GPUBUILD == 1: self._params_init = Array - self.cluster_centers_, _n_rows, self.n_cols, self.dtype = \ + self.cluster_centers_, _n_rows, self.n_features_in_, self.dtype = \ input_to_cuml_array( init, order='C', convert_to_dtype=(np.float32 if convert_dtype @@ -274,7 +279,7 @@ class KMeans(Base, """ if self.init == 'preset': - check_cols = self.n_cols + check_cols = self.n_features_in_ check_dtype = self.dtype target_dtype = self.dtype else: @@ -282,7 +287,7 @@ class KMeans(Base, check_dtype = [np.float32, np.float64] target_dtype = np.float32 - _X_m, _n_rows, self.n_cols, self.dtype = \ + _X_m, _n_rows, self.n_features_in_, self.dtype = \ input_to_cuml_array(X, order='C', check_cols=check_cols, @@ -306,14 +311,14 @@ class KMeans(Base, cdef uintptr_t sample_weight_ptr = sample_weight_m.ptr - int_dtype = np.int32 if np.int64(_n_rows) * np.int64(self.n_cols) < 2**31-1 else np.int64 + int_dtype = np.int32 if np.int64(_n_rows) * np.int64(self.n_features_in_) < 2**31-1 else np.int64 self.labels_ = CumlArray.zeros(shape=_n_rows, dtype=int_dtype) cdef uintptr_t labels_ptr = self.labels_.ptr if (self.init in ['scalable-k-means++', 'k-means||', 'random']): self.cluster_centers_ = \ - CumlArray.zeros(shape=(self.n_clusters, self.n_cols), + CumlArray.zeros(shape=(self.n_clusters, self.n_features_in_), dtype=self.dtype, order='C') cdef uintptr_t cluster_centers_ptr = self.cluster_centers_.ptr @@ -334,7 +339,7 @@ class KMeans(Base, deref(params), input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, cluster_centers_ptr, labels_ptr, @@ -347,7 +352,7 @@ class KMeans(Base, deref(params), input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, cluster_centers_ptr, labels_ptr, @@ -364,7 +369,7 @@ class KMeans(Base, deref(params), input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, cluster_centers_ptr, labels_ptr, @@ -378,7 +383,7 @@ class KMeans(Base, deref(params), input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, cluster_centers_ptr, labels_ptr, @@ -442,11 +447,13 @@ class KMeans(Base, Sum of squared distances of samples to their closest cluster center. """ + self.dtype = self.cluster_centers_.dtype + _X_m, _n_rows, _n_cols, _ = \ input_to_cuml_array(X, order='C', check_dtype=self.dtype, convert_to_dtype=(self.dtype if convert_dtype else None), - check_cols=self.n_cols) + check_cols=self.n_features_in_) IF GPUBUILD == 1: cdef uintptr_t input_ptr = _X_m.ptr @@ -486,7 +493,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, normalize_weights, labels_ptr, @@ -498,7 +505,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, normalize_weights, labels_ptr, @@ -513,7 +520,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, normalize_weights, labels_ptr, @@ -525,7 +532,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, sample_weight_ptr, normalize_weights, labels_ptr, @@ -578,7 +585,7 @@ class KMeans(Base, input_to_cuml_array(X, order='C', check_dtype=self.dtype, convert_to_dtype=(self.dtype if convert_dtype else None), - check_cols=self.n_cols) + check_cols=self.n_features_in_) IF GPUBUILD == 1: cdef uintptr_t input_ptr = _X_m.ptr @@ -607,7 +614,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, preds_ptr) else: cpp_transform( @@ -616,7 +623,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, preds_ptr) elif self.dtype == np.float64: @@ -627,7 +634,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, preds_ptr) else: cpp_transform( @@ -636,7 +643,7 @@ class KMeans(Base, cluster_centers_ptr, input_ptr, _n_rows, - self.n_cols, + self.n_features_in_, preds_ptr) else: @@ -685,3 +692,7 @@ class KMeans(Base, ['n_init', 'oversampling_factor', 'max_samples_per_batch', 'init', 'max_iter', 'n_clusters', 'random_state', 'tol', "convert_dtype"] + + def get_attr_names(self): + return ['cluster_centers_', 'labels_', 'inertia_', + 'n_iter_', 'n_features_in_', '_n_threads'] diff --git a/python/cuml/cuml/tests/test_device_selection.py b/python/cuml/cuml/tests/test_device_selection.py index e5c2d9ce1a..1da3b0738e 100644 --- a/python/cuml/cuml/tests/test_device_selection.py +++ b/python/cuml/cuml/tests/test_device_selection.py @@ -32,7 +32,10 @@ from cuml.internals.memory_utils import using_memory_type from cuml.internals.mem_type import MemoryType from cuml.decomposition import PCA, TruncatedSVD +from cuml.cluster import KMeans +from cuml.cluster import DBSCAN from cuml.common.device_selection import DeviceType, using_device_type +from cuml.testing.utils import assert_dbscan_equal from hdbscan import HDBSCAN as refHDBSCAN from sklearn.neighbors import NearestNeighbors as skNearestNeighbors from sklearn.linear_model import Ridge as skRidge @@ -42,6 +45,8 @@ from sklearn.linear_model import LinearRegression as skLinearRegression from sklearn.decomposition import PCA as skPCA from sklearn.decomposition import TruncatedSVD as skTruncatedSVD +from sklearn.cluster import KMeans as skKMeans +from sklearn.cluster import DBSCAN as skDBSCAN from sklearn.datasets import make_regression, make_blobs from pytest_cases import fixture_union, fixture from importlib import import_module @@ -136,7 +141,11 @@ def make_reg_dataset(): def make_blob_dataset(): X, y = make_blobs( - n_samples=2000, n_features=20, centers=20, random_state=0 + n_samples=2000, + n_features=20, + centers=20, + random_state=0, + cluster_std=1.0, ) X_train, X_test = X[:1800], X[1800:] y_train, _ = y[:1800], y[1800:] @@ -948,3 +957,43 @@ def test_hdbscan_methods(train_device, infer_device): assert_membership_vectors(membership, ref_membership) assert adjusted_rand_score(labels, ref_labels) >= 0.98 assert array_equal(probs, ref_probs, unit_tol=0.001, total_tol=0.006) + + +@pytest.mark.parametrize("train_device", ["cpu", "gpu"]) +@pytest.mark.parametrize("infer_device", ["cpu", "gpu"]) +def test_kmeans_methods(train_device, infer_device): + n_clusters = 20 + ref_model = skKMeans(n_clusters=n_clusters) + ref_model.fit(X_train_blob) + ref_output = ref_model.predict(X_test_blob) + + model = KMeans(n_clusters=n_clusters) + with using_device_type(train_device): + model.fit(X_train_blob) + with using_device_type(infer_device): + output = model.predict(X_test_blob) + + assert adjusted_rand_score(ref_output, output) >= 0.9 + + +@pytest.mark.parametrize("train_device", ["cpu", "gpu"]) +@pytest.mark.parametrize("infer_device", ["cpu", "gpu"]) +def test_dbscan_methods(train_device, infer_device): + eps = 8.0 + ref_model = skDBSCAN(eps=eps) + ref_model.fit(X_train_blob) + ref_output = ref_model.fit_predict(X_train_blob) + + model = DBSCAN(eps=eps) + with using_device_type(train_device): + model.fit(X_train_blob) + with using_device_type(infer_device): + output = model.fit_predict(X_train_blob) + + assert array_equal( + ref_model.core_sample_indices_, ref_model.core_sample_indices_ + ) + assert adjusted_rand_score(ref_output, output) >= 0.95 + assert_dbscan_equal( + ref_output, output, X_train_blob, model.core_sample_indices_, eps + )