Skip to content

Commit

Permalink
Merge branch 'new-normalizer-apis' of github.com:davidwendt/cudf into…
Browse files Browse the repository at this point in the history
… new-normalizer-apis
  • Loading branch information
davidwendt committed Feb 24, 2025
2 parents bec02e3 + 79e5c26 commit 8b0507f
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 24 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
- unit-tests-cudf-pandas
- pandas-tests
- pandas-tests-diff
- narwhals-tests
- telemetry-setup
- third-party-integration-tests-cudf-pandas
secrets: inherit
Expand Down Expand Up @@ -358,6 +359,20 @@ jobs:
node_type: "cpu4"
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
narwhals-tests:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-l4-latest-1"
continue-on-error: true
container_image: "rapidsai/ci-conda:latest"
run_script: ci/test_narwhals.sh
spark-rapids-jni:
needs: changed-files
uses: ./.github/workflows/spark-rapids-jni.yaml
Expand Down
44 changes: 44 additions & 0 deletions ci/test_narwhals.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
# Copyright (c) 2025, NVIDIA CORPORATION.

# Support invoking test_python_cudf.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ || exit 1

# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh test_python_narwhals

rapids-logger "Check GPU usage"
nvidia-smi
rapids-print-env
EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest narwhals"
git clone https://github.com/narwhals-dev/narwhals --depth=1
pushd narwhals || exit 1
rapids-pip-retry install -U -e ".[dev]"

rapids-logger "Check narwhals versions"
python -c "import narwhals; print(narwhals.show_versions())"

rapids-logger "Run narwhals tests for cuDF"
python -m pytest \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-narwhals.xml" \
--numprocesses=8 \
--dist=worksteal \
--constructors=cudf

rapids-logger "Run narwhals tests for cuDF Polars"
NARWHALS_POLARS_GPU=1 python -m pytest \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars-narwhals.xml" \
--numprocesses=8 \
--dist=worksteal \
--constructors=polars[lazy]

popd || exit 1

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==4.1.0.6
- nvcomp==4.2.0.11
- nvtx>=0.2.1
- openpyxl
- packaging
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ dependencies:
- numba>=0.59.1,<0.61.0a0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcomp==4.1.0.6
- nvcomp==4.2.0.11
- nvtx>=0.2.1
- openpyxl
- packaging
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ flatbuffers_version:
- "=24.3.25"

nvcomp_version:
- "=4.1.0.6"
- "=4.2.0.11"

zlib_version:
- ">=1.2.13"
Expand Down
51 changes: 36 additions & 15 deletions cpp/include/cudf/utilities/type_dispatcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ namespace CUDF_EXPORT cudf {
* For example:
*
* ```
* return cudf::type_to_id<int32_t>(); // Returns INT32
* return cudf::base_type_to_id<int32_t>(); // Returns INT32
* ```
*
* @tparam T The type to map to a `cudf::type_id`
* @tparam T The non-cv type to map to a `cudf::type_id`
* @return The `cudf::type_id` corresponding to the specified type
*/
template <typename T>
CUDF_HOST_DEVICE inline constexpr type_id type_to_id()
CUDF_HOST_DEVICE inline constexpr type_id base_type_to_id()
{
return type_id::EMPTY;
};
Expand Down Expand Up @@ -114,20 +114,24 @@ using device_storage_type_t =
// clang-format on

/**
* @brief Checks if `fixed_point`-like types have template type `T` matching the column's
* stored type id
* @brief Maps a C++ type to its corresponding `cudf::type_id`
*
* @tparam T The type that is stored on the device
* @param id The `data_type::id` of the column
* @return `true` If T matches the stored column `type_id`
* @return `false` If T does not match the stored column `type_id`
* When explicitly passed a template argument of a given type, returns the
* appropriate `type_id` enum for the specified C++ type.
*
* For example:
*
* ```
* return cudf::type_to_id<int32_t>(); // Returns INT32
* ```
*
* @tparam T The type to map to a `cudf::type_id`
* @return The `cudf::type_id` corresponding to the specified type
*/
template <typename T>
constexpr bool type_id_matches_device_storage_type(type_id id)
constexpr inline type_id type_to_id()
{
return (id == type_id::DECIMAL32 && std::is_same_v<T, int32_t>) ||
(id == type_id::DECIMAL64 && std::is_same_v<T, int64_t>) ||
(id == type_id::DECIMAL128 && std::is_same_v<T, __int128_t>) || id == type_to_id<T>();
return base_type_to_id<std::remove_cv_t<T>>();
}

/**
Expand All @@ -140,7 +144,7 @@ constexpr bool type_id_matches_device_storage_type(type_id id)
#ifndef CUDF_TYPE_MAPPING
#define CUDF_TYPE_MAPPING(Type, Id) \
template <> \
constexpr inline type_id type_to_id<Type>() \
constexpr inline type_id base_type_to_id<Type>() \
{ \
return Id; \
} \
Expand Down Expand Up @@ -194,11 +198,28 @@ CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT)
* @return id for 'char' type
*/
template <> // CUDF_TYPE_MAPPING(char,INT8) causes duplicate id_to_type_impl definition
constexpr inline type_id type_to_id<char>()
constexpr inline type_id base_type_to_id<char>()
{
return type_id::INT8;
}

/**
* @brief Checks if `fixed_point`-like types have template type `T` matching the column's
* stored type id
*
* @tparam T The type that is stored on the device
* @param id The `data_type::id` of the column
* @return `true` If T matches the stored column `type_id`
* @return `false` If T does not match the stored column `type_id`
*/
template <typename T>
constexpr bool type_id_matches_device_storage_type(type_id id)
{
return (id == type_id::DECIMAL32 && std::is_same_v<T, int32_t>) ||
(id == type_id::DECIMAL64 && std::is_same_v<T, int64_t>) ||
(id == type_id::DECIMAL128 && std::is_same_v<T, __int128_t>) || id == type_to_id<T>();
}

/**
* @brief Use this specialization on `type_dispatcher` whenever you only need to operate on the
* underlying stored type.
Expand Down
8 changes: 7 additions & 1 deletion cpp/tests/types/type_dispatcher_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -50,6 +50,12 @@ TYPED_TEST(TypedDispatcherTest, TypeToId)
{
EXPECT_TRUE(cudf::type_dispatcher(cudf::data_type{cudf::type_to_id<TypeParam>()},
type_tester<TypeParam>{}));
EXPECT_TRUE(cudf::type_dispatcher(cudf::data_type{cudf::type_to_id<TypeParam const>()},
type_tester<TypeParam>{}));
EXPECT_TRUE(cudf::type_dispatcher(cudf::data_type{cudf::type_to_id<TypeParam volatile>()},
type_tester<TypeParam>{}));
EXPECT_TRUE(cudf::type_dispatcher(cudf::data_type{cudf::type_to_id<TypeParam const volatile>()},
type_tester<TypeParam>{}));
}

namespace {
Expand Down
18 changes: 14 additions & 4 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,16 @@ files:
includes:
- test_python_common
- test_python_cudf_common
test_python_narwhals:
output: none
includes:
- cuda_version
- py_version
- test_python_common
- test_python_cudf_common
- test_python_cudf
- depends_on_cudf
- depends_on_cudf_polars
channels:
- rapidsai
- rapidsai-nightly
Expand Down Expand Up @@ -454,20 +464,20 @@ dependencies:
- output_types: conda
packages:
# Align nvcomp version with rapids-cmake
- nvcomp==4.1.0.6
- nvcomp==4.2.0.11
specific:
- output_types: [requirements, pyproject]
matrices:
- matrix:
cuda: "12.*"
use_cuda_wheels: "true"
packages:
- nvidia-nvcomp-cu12==4.1.0.6
- nvidia-nvcomp-cu12==4.2.0.11
- matrix:
cuda: "11.*"
use_cuda_wheels: "true"
packages:
- nvidia-nvcomp-cu11==4.1.0.6
- nvidia-nvcomp-cu11==4.2.0.11
# if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels
# (e.g. for DLFW and pip devcontainers)
- matrix:
Expand All @@ -477,7 +487,7 @@ dependencies:
# (just as a source of documentation, as this populates pyproject.toml in source control)
- matrix:
packages:
- nvidia-nvcomp==4.1.0.6
- nvidia-nvcomp==4.2.0.11
rapids_build_skbuild:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -4679,6 +4679,9 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
r"""
Normalizes strings characters for tokenizing.
.. deprecated:: 25.04
Use `CharacterNormalizer` instead.
The normalizer function includes:
- adding padding around punctuation (unicode category starts with
Expand Down Expand Up @@ -4719,6 +4722,11 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
2 $ 99
dtype: object
"""
warnings.warn(
"normalize_characters is deprecated and will be removed in a future "
"version. Use CharacterNormalizer instead.",
FutureWarning,
)
return self._return_or_inplace(
self._column.characters_normalize(do_lower)
)
Expand Down
2 changes: 1 addition & 1 deletion python/libcudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ classifiers = [
dependencies = [
"libkvikio==25.4.*,>=0.0.0a0",
"librmm==25.4.*,>=0.0.0a0",
"nvidia-nvcomp==4.1.0.6",
"nvidia-nvcomp==4.2.0.11",
"rapids-logger==0.1.*,>=0.0.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

Expand Down

0 comments on commit 8b0507f

Please sign in to comment.