Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into more_cccl_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice authored Feb 6, 2025
2 parents 68fb2fd + e2af6c9 commit 20b73ae
Show file tree
Hide file tree
Showing 89 changed files with 1,616 additions and 1,032 deletions.
1 change: 1 addition & 0 deletions .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
jobs:
add_assignees:
runs-on: ubuntu-latest
if: ${{ ! github.event.pull_request.merged }}
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@nvks-runners
with:
enable_check_generated_files: false
ignored_pr_jobs: "telemetry-summarize"
ignored_pr_jobs: "telemetry-summarize spark-rapids-jni"
conda-cpp-build:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -342,6 +342,10 @@ jobs:
node_type: "cpu4"
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
spark-rapids-jni:
needs: changed-files
uses: ./.github/workflows/spark-rapids-jni.yaml
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java

telemetry-summarize:
# This job must use a self-hosted runner to record telemetry traces.
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/spark-rapids-jni.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: spark-rapids-jni

on:
workflow_call:

jobs:
spark-rapids-jni-build:
runs-on: linux-amd64-cpu8
container:
image: rapidsai/ci-spark-rapids-jni:rockylinux8-cuda12.2.0
steps:
- uses: actions/checkout@v4
with:
repository: NVIDIA/spark-rapids-jni
submodules: recursive
- uses: actions/checkout@v4
with:
path: thirdparty/cudf
- name: "Build spark-rapids-jni"
run: |
mkdir target
GPU_ARCHS=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-11 build/buildcpp.sh
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ repos:
args: ["--fix"]
- id: ruff-format
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.5.0
rev: v0.6.0
hooks:
- id: verify-copyright
exclude: |
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -16,7 +16,7 @@ cd "${package_dir}"
sccache --zero-stats

rapids-logger "Building '${package_name}' wheel"
python -m pip wheel \
rapids-pip-retry wheel \
-w dist \
-v \
--no-deps \
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ rapids-dependency-file-generator \
| tee /tmp/requirements-build.txt

rapids-logger "Installing build requirements"
python -m pip install \
rapids-pip-retry install \
-v \
--prefer-binary \
-r /tmp/requirements-build.txt
Expand Down
4 changes: 2 additions & 2 deletions ci/configure_cpp_static.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -15,7 +15,7 @@ rapids-dependency-file-generator \
--file-key test_static_build \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee "${REQUIREMENTS_FILE}"

python -m pip install -r "${REQUIREMENTS_FILE}"
rapids-pip-retry install -r "${REQUIREMENTS_FILE}"
pyenv rehash

cmake -S cpp -B build_static -GNinja -DBUILD_SHARED_LIBS=OFF -DCUDF_USE_ARROW_STATIC=ON -DBUILD_TESTS=OFF
11 changes: 11 additions & 0 deletions ci/run_pylibcudf_pytests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2025, NVIDIA CORPORATION.

set -euo pipefail

# It is essential to cd into python/pylibcudf/pylibcudf as `pytest-xdist` + `coverage` seem to work only at this directory level.

# Support invoking run_cudf_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/pylibcudf/pylibcudf/

pytest --cache-clear --ignore="benchmarks" "$@" tests
4 changes: 2 additions & 2 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-pylibcudf-dep

rapids-logger "Install libcudf, pylibcudf and cudf_polars"
python -m pip install \
rapids-pip-retry install \
-v \
"$(echo ./dist/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \
"$(echo ./local-libcudf-dep/libcudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
Expand All @@ -26,7 +26,7 @@ git clone https://github.com/pola-rs/polars.git --branch "${TAG}" --depth 1

# Install requirements for running polars tests
rapids-logger "Install polars test requirements"
python -m pip install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt
rapids-pip-retry install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt

# shellcheck disable=SC2317
function set_exitcode()
Expand Down
1 change: 1 addition & 0 deletions ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
"cudf=${RAPIDS_VERSION}" \
"pylibcudf=${RAPIDS_VERSION}" \
"libcudf=${RAPIDS_VERSION}"
14 changes: 8 additions & 6 deletions ci/test_python_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

# Support invoking test_python_cudf.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../;
Expand All @@ -15,12 +15,14 @@ trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest pylibcudf"
pushd python/pylibcudf/pylibcudf/tests
python -m pytest \
--cache-clear \
./ci/run_pylibcudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-pylibcudf.xml" \
--numprocesses=8 \
--dist=worksteal \
.
popd
--cov-config=../.coveragerc \
--cov=pylibcudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/pylibcudf-coverage.xml" \
--cov-report=term

rapids-logger "pytest cudf"
./ci/run_cudf_pytests.sh \
Expand Down
2 changes: 1 addition & 1 deletion ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ rapids-logger "Install cudf, pylibcudf, and test requirements"
rapids-generate-pip-constraints py_test_cudf ./constraints.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
rapids-pip-retry install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test]" \
Expand Down
2 changes: 1 addition & 1 deletion ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ rapids-logger "Installing cudf_polars and its dependencies"
rapids-generate-pip-constraints py_test_cudf_polars ./constraints.txt

# echo to expand wildcard before adding `[test,experimental]` requires for pip
python -m pip install \
rapids-pip-retry install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)[test,experimental]" \
Expand Down
2 changes: 1 addition & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
rapids-generate-pip-constraints py_test_dask_cudf ./constraints.txt

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
rapids-pip-retry install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_"${RAPIDS_PY_CUDA_SUFFIX}"*.whl)" \
Expand Down
3 changes: 0 additions & 3 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ build:
- SCCACHE_S3_KEY_PREFIX=cudf-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
ignore_run_exports:
# libcudf's run_exports pinning is looser than we would like
- libcudf
ignore_run_exports_from:
- {{ compiler('cuda') }}
{% if cuda_major != "11" %}
Expand Down
4 changes: 1 addition & 3 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ build:
- SCCACHE_S3_KEY_PREFIX=pylibcudf-linux64 # [linux64]
- SCCACHE_S3_USE_SSL
- SCCACHE_S3_NO_CREDENTIALS
ignore_run_exports:
# libcudf's run_exports pinning is looser than we would like
- libcudf
ignore_run_exports_from:
- {{ compiler('cuda') }}
{% if cuda_major != "11" %}
Expand Down Expand Up @@ -78,6 +75,7 @@ requirements:
- pandas >=2.0,<2.2.4dev0
- numpy >=1.23,<3.0a0
- pyarrow>=14.0.0,<20.0.0a0
- libcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,10 +190,10 @@ static void bench_query(nvbench::state& state)
{
srand(5236);

auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
auto const json_path = queries[query];
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
std::string_view const json_path = queries[query];

auto const stream = cudf::get_default_stream();
auto input = build_json_string_column(desired_bytes, num_rows);
Expand Down
6 changes: 3 additions & 3 deletions cpp/benchmarks/string/join_strings.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,8 +41,8 @@ static void bench_join(nvbench::state& state)
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
state.add_global_memory_writes<nvbench::int8_t>(chars_size); // all bytes are written

std::string separator(":");
std::string narep("null");
std::string_view separator(":");
std::string_view narep("null");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::strings::join_strings(input, separator, narep);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/like.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ static void bench_like(nvbench::state& state)
auto input = cudf::strings_column_view(col->view());

// This pattern forces reading the entire target string (when matched expected)
auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4.$"
auto pattern = std::string_view("% 5W4_"); // regex equivalent: ".* 5W4.$"

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
// gather some throughput statistics as well
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/replace_re.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ static void bench_replace(nvbench::state& state)
cudf::strings::replace_with_backrefs(input, *program, replacement);
});
} else {
auto replacement = std::string("77");
auto replacement = std::string_view("77");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::replace_re(input, *program, replacement);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/strings/libcudf_apis.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,7 +53,7 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,

auto const last_initial_first = cudf::table_view({last_initial->view(), first});

auto result = cudf::strings::concatenate(last_initial_first, std::string(" "));
auto result = cudf::strings::concatenate(last_initial_first, std::string_view(" "));

cudaStreamSynchronize(0);

Expand Down
20 changes: 10 additions & 10 deletions cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -916,7 +916,7 @@ class csv_reader_options_builder {
*/
csv_reader_options_builder& prefix(std::string pfx)
{
options._prefix = pfx;
options._prefix = std::move(pfx);
return *this;
}

Expand Down Expand Up @@ -1450,7 +1450,7 @@ class csv_writer_options {
*
* @return string to used for null entries
*/
[[nodiscard]] std::string get_na_rep() const { return _na_rep; }
[[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }

/**
* @brief Whether to write headers to csv.
Expand All @@ -1471,7 +1471,7 @@ class csv_writer_options {
*
* @return Character used for separating lines
*/
[[nodiscard]] std::string get_line_terminator() const { return _line_terminator; }
[[nodiscard]] std::string const& get_line_terminator() const { return _line_terminator; }

/**
* @brief Returns character used for separating column values.
Expand All @@ -1485,14 +1485,14 @@ class csv_writer_options {
*
* @return string used for values != 0 in INT8 types
*/
[[nodiscard]] std::string get_true_value() const { return _true_value; }
[[nodiscard]] std::string const& get_true_value() const { return _true_value; }

/**
* @brief Returns string used for values == 0 in INT8 types.
*
* @return string used for values == 0 in INT8 types
*/
[[nodiscard]] std::string get_false_value() const { return _false_value; }
[[nodiscard]] std::string const& get_false_value() const { return _false_value; }

/**
* @brief Returns the quote style for the writer.
Expand All @@ -1519,7 +1519,7 @@ class csv_writer_options {
*
* @param val String to represent null value
*/
void set_na_rep(std::string val) { _na_rep = val; }
void set_na_rep(std::string val) { _na_rep = std::move(val); }

/**
* @brief Enables/Disables headers being written to csv.
Expand All @@ -1540,7 +1540,7 @@ class csv_writer_options {
*
* @param term Character to represent line termination
*/
void set_line_terminator(std::string term) { _line_terminator = term; }
void set_line_terminator(std::string term) { _line_terminator = std::move(term); }

/**
* @brief Sets character used for separating column values.
Expand All @@ -1554,14 +1554,14 @@ class csv_writer_options {
*
* @param val String to represent values != 0 in INT8 types
*/
void set_true_value(std::string val) { _true_value = val; }
void set_true_value(std::string val) { _true_value = std::move(val); }

/**
* @brief Sets string used for values == 0 in INT8 types.
*
* @param val String to represent values == 0 in INT8 types
*/
void set_false_value(std::string val) { _false_value = val; }
void set_false_value(std::string val) { _false_value = std::move(val); }

/**
* @brief (Re)sets the table being written.
Expand Down
Loading

0 comments on commit 20b73ae

Please sign in to comment.