Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-24.10' into umap-batch-nnd
Browse files Browse the repository at this point in the history
  • Loading branch information
jinsolp committed Aug 21, 2024
2 parents 2cb4623 + d09e713 commit b9b9dd6
Show file tree
Hide file tree
Showing 15 changed files with 218 additions and 82 deletions.
23 changes: 21 additions & 2 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,33 @@ rapids-generate-version > ./VERSION

cd ${package_dir}

SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/" \
case "${RAPIDS_CUDA_VERSION}" in
12.*)
EXCLUDE_ARGS=(
--exclude "libcublas.so.12"
--exclude "libcublasLt.so.12"
--exclude "libcufft.so.11"
--exclude "libcurand.so.10"
--exclude "libcusolver.so.11"
--exclude "libcusparse.so.12"
--exclude "libnvJitLink.so.12"
)
EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON"
;;
11.*)
EXCLUDE_ARGS=()
EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF"
;;
esac

SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON;-DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/${EXTRA_CMAKE_ARGS}" \
python -m pip wheel . \
-w dist \
-vvv \
--no-deps \
--disable-pip-version-check

mkdir -p final_dist
python -m auditwheel repair -w final_dist dist/*
python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/*

RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
4 changes: 2 additions & 2 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}')
CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}')
CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}

#Get <major>.<minor> for next version
# Get <major>.<minor> for next version
NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
Expand Down Expand Up @@ -58,7 +58,7 @@ for DEP in "${DEPENDENCIES[@]}"; do
sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}"
done
for FILE in python/*/pyproject.toml; do
sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" ${FILE}
sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}"
done
done

Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ dependencies:
- doxygen=1.9.1
- gcc_linux-64=11.*
- graphviz
- hdbscan<=0.8.30
- hdbscan>=0.8.38,<0.8.39
- hypothesis>=6.0,<7
- ipykernel
- ipython
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies:
- doxygen=1.9.1
- gcc_linux-64=11.*
- graphviz
- hdbscan<=0.8.30
- hdbscan>=0.8.38,<0.8.39
- hypothesis>=6.0,<7
- ipykernel
- ipython
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cuml-cpu/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ requirements:
- numpy>=1.23,<2.0a0
- pandas
- scikit-learn=1.2
- hdbscan<=0.8.30
- hdbscan>=0.8.38,<0.8.39
- umap-learn=0.5.3
- nvtx

Expand Down
11 changes: 3 additions & 8 deletions cpp/src/fil/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -309,17 +309,12 @@ struct compute_smem_footprint : dispatch_functor<int> {
int run(predict_params);
};

template <int NITEMS,
leaf_algo_t leaf_algo,
bool cols_in_shmem,
bool CATS_SUPPORTED,
class storage_type>
__attribute__((visibility("hidden"))) __global__ void infer_k(storage_type forest,
predict_params params);

// infer() calls the inference kernel with the parameters on the stream
template <typename storage_type>
void infer(storage_type forest, predict_params params, cudaStream_t stream);

template <typename storage_type>
void infer_shared_mem_size(predict_params params, int max_shm);

} // namespace fil
} // namespace ML
29 changes: 5 additions & 24 deletions cpp/src/fil/fil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -349,26 +349,6 @@ struct forest {
cat_sets_device_owner cat_sets_;
};

template <typename storage_type>
struct opt_into_arch_dependent_shmem : dispatch_functor<void> {
const int max_shm;
opt_into_arch_dependent_shmem(int max_shm_) : max_shm(max_shm_) {}

template <typename KernelParams = KernelTemplateParams<>>
void run(predict_params p)
{
auto kernel = infer_k<KernelParams::N_ITEMS,
KernelParams::LEAF_ALGO,
KernelParams::COLS_IN_SHMEM,
KernelParams::CATS_SUPPORTED,
storage_type>;
// p.shm_sz might be > max_shm or < MAX_SHM_STD, but we should not check for either, because
// we don't run on both proba_ssp_ and class_ssp_ (only class_ssp_). This should be quick.
RAFT_CUDA_TRY(
cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, max_shm));
}
};

template <typename real_t>
struct dense_forest<dense_node<real_t>> : forest<real_t> {
using node_t = dense_node<real_t>;
Expand Down Expand Up @@ -427,8 +407,9 @@ struct dense_forest<dense_node<real_t>> : forest<real_t> {
h.get_stream()));

// predict_proba is a runtime parameter, and opt-in is unconditional
dispatch_on_fil_template_params(opt_into_arch_dependent_shmem<storage<node_t>>(this->max_shm_),
static_cast<predict_params>(this->class_ssp_));
fil::infer_shared_mem_size<storage<node_t>>(static_cast<predict_params>(this->class_ssp_),
this->max_shm_);

// copy must be finished before freeing the host data
h.sync_stream();
h_nodes_.clear();
Expand Down Expand Up @@ -491,8 +472,8 @@ struct sparse_forest : forest<typename node_t::real_type> {
nodes_.data(), nodes, sizeof(node_t) * num_nodes_, cudaMemcpyHostToDevice, h.get_stream()));

// predict_proba is a runtime parameter, and opt-in is unconditional
dispatch_on_fil_template_params(opt_into_arch_dependent_shmem<storage<node_t>>(this->max_shm_),
static_cast<predict_params>(this->class_ssp_));
fil::infer_shared_mem_size<storage<node_t>>(static_cast<predict_params>(this->class_ssp_),
this->max_shm_);
}

virtual void infer(predict_params params, cudaStream_t stream) override
Expand Down
32 changes: 32 additions & 0 deletions cpp/src/fil/infer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -908,12 +908,38 @@ struct infer_k_storage_template : dispatch_functor<void> {
}
};

template <typename storage_type>
struct opt_into_arch_dependent_shmem : dispatch_functor<void> {
const int max_shm;
opt_into_arch_dependent_shmem(int max_shm_) : max_shm(max_shm_) {}

template <typename KernelParams = KernelTemplateParams<>>
void run(predict_params p)
{
auto kernel = infer_k<KernelParams::N_ITEMS,
KernelParams::LEAF_ALGO,
KernelParams::COLS_IN_SHMEM,
KernelParams::CATS_SUPPORTED,
storage_type>;
// p.shm_sz might be > max_shm or < MAX_SHM_STD, but we should not check for either, because
// we don't run on both proba_ssp_ and class_ssp_ (only class_ssp_). This should be quick.
RAFT_CUDA_TRY(
cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, max_shm));
}
};

template <typename storage_type>
void infer(storage_type forest, predict_params params, cudaStream_t stream)
{
dispatch_on_fil_template_params(infer_k_storage_template<storage_type>(forest, stream), params);
}

template <typename storage_type>
void infer_shared_mem_size(predict_params params, int max_shm)
{
dispatch_on_fil_template_params(opt_into_arch_dependent_shmem<storage_type>(max_shm), params);
}

template void infer<dense_storage_f32>(dense_storage_f32 forest,
predict_params params,
cudaStream_t stream);
Expand All @@ -930,5 +956,11 @@ template void infer<sparse_storage8>(sparse_storage8 forest,
predict_params params,
cudaStream_t stream);

template void infer_shared_mem_size<dense_storage_f32>(predict_params params, int max_shm);
template void infer_shared_mem_size<dense_storage_f64>(predict_params params, int max_shm);
template void infer_shared_mem_size<sparse_storage16_f32>(predict_params params, int max_shm);
template void infer_shared_mem_size<sparse_storage16_f64>(predict_params params, int max_shm);
template void infer_shared_mem_size<sparse_storage8>(predict_params params, int max_shm);

} // namespace fil
} // namespace ML
30 changes: 24 additions & 6 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ files:
extras:
table: project
includes:
- cuda
- py_run
py_test:
output: pyproject
Expand Down Expand Up @@ -176,7 +177,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- cython>=3.0.0
- &cython cython>=3.0.0
- &treelite treelite==4.3.0
- output_types: conda
packages:
Expand Down Expand Up @@ -406,6 +407,26 @@ dependencies:
- *libcusolver114
- *libcusparse_dev114
- *libcusparse114
- output_types: pyproject
matrices:
- matrix:
cuda: "12.*"
packages:
- nvidia-cublas-cu12
- nvidia-cufft-cu12
- nvidia-curand-cu12
- nvidia-cusparse-cu12
- nvidia-cusolver-cu12
- matrix:
cuda: "11.*"
packages:
- matrix:
packages:
- nvidia-cublas
- nvidia-cufft
- nvidia-curand
- nvidia-cusparse
- nvidia-cusolver
docs:
common:
- output_types: [conda, requirements]
Expand Down Expand Up @@ -465,7 +486,9 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- *cython
- dask-ml
- hdbscan>=0.8.38,<0.8.39
- hypothesis>=6.0,<7
- nltk
- numpydoc
Expand All @@ -484,14 +507,9 @@ dependencies:
- pip
- pip:
- dask-glm==0.3.0
# TODO: remove pin once a release that includes fixes for the error
# is released: https://github.com/rapidsai/cuml/issues/5514
- hdbscan<=0.8.30
- output_types: pyproject
packages:
- dask-glm==0.3.0
# TODO: Can we stop pulling from the master branch now that there was a release in October?
- hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
test_notebooks:
common:
- output_types: [conda, requirements]
Expand Down
26 changes: 24 additions & 2 deletions python/cuml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ project(
option(CUML_UNIVERSAL "Build all cuML Python components." ON)
option(FIND_CUML_CPP "Search for existing CUML C++ installations before defaulting to local files" OFF)
option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF)
option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF)
set(CUML_RAFT_CLONE_ON_PIN OFF)


Expand Down Expand Up @@ -72,8 +73,10 @@ include(rapids-cython-core)

set(CUML_PYTHON_TREELITE_TARGET treelite::treelite)

if(NOT ${CUML_CPU})
if(NOT CUML_CPU)
if(NOT cuml_FOUND)
find_package(CUDAToolkit REQUIRED)

set(BUILD_CUML_TESTS OFF)
set(BUILD_PRIMS_TESTS OFF)
set(BUILD_CUML_C_LIBRARY OFF)
Expand All @@ -85,11 +88,19 @@ if(NOT ${CUML_CPU})

# Statically link dependencies if building wheels
set(CUDA_STATIC_RUNTIME ON)
set(CUDA_STATIC_MATH_LIBRARIES ON)
set(CUML_USE_RAFT_STATIC ON)
set(CUML_USE_FAISS_STATIC ON)
set(CUML_USE_TREELITE_STATIC ON)
set(CUML_USE_CUMLPRIMS_MG_STATIC ON)
# Link to the CUDA wheels with shared libraries for CUDA 12+
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0)
set(CUDA_STATIC_MATH_LIBRARIES OFF)
else()
if(USE_CUDA_MATH_WHEELS)
message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0")
endif()
set(CUDA_STATIC_MATH_LIBRARIES ON)
endif()
# Don't install the static libs into wheels
set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
set(RAFT_EXCLUDE_FAISS_FROM_ALL ON)
Expand All @@ -98,6 +109,17 @@ if(NOT ${CUML_CPU})

add_subdirectory(${CUML_CPP_SRC} cuml-cpp EXCLUDE_FROM_ALL)

if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS)
set_property(TARGET ${CUML_CPP_TARGET} PROPERTY INSTALL_RPATH
"$ORIGIN/../nvidia/cublas/lib"
"$ORIGIN/../nvidia/cufft/lib"
"$ORIGIN/../nvidia/curand/lib"
"$ORIGIN/../nvidia/cusolver/lib"
"$ORIGIN/../nvidia/cusparse/lib"
"$ORIGIN/../nvidia/nvjitlink/lib"
)
endif()

set(cython_lib_dir cuml)
install(TARGETS ${CUML_CPP_TARGET} DESTINATION ${cython_lib_dir})
endif()
Expand Down
Loading

0 comments on commit b9b9dd6

Please sign in to comment.