Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into build-libzstd
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi committed Feb 28, 2025
2 parents ff7dda3 + cf8938b commit 8fadd5f
Show file tree
Hide file tree
Showing 57 changed files with 1,245 additions and 244 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "cpu16"
python-build:
needs: [cpp-build]
secrets: inherit
Expand Down Expand Up @@ -77,6 +78,7 @@ jobs:
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
node_type: "cpu16"
script: ci/build_wheel_libcudf.sh
wheel-publish-libcudf:
needs: wheel-build-libcudf
Expand Down
12 changes: 0 additions & 12 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,6 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_cpp_memcheck.sh"
static-configure:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: ${{ inputs.build_type }}
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
# Use the wheel container so we can skip conda solves and since our
# primary static consumers (Spark) are not in conda anyway.
container_image: "rapidsai/ci-wheel:latest"
run_script: "ci/configure_cpp_static.sh"
cpp-linters:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
2 changes: 2 additions & 0 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ git clone https://github.com/pola-rs/polars.git --branch "${TAG}" --depth 1

# Install requirements for running polars tests
rapids-logger "Install polars test requirements"
# TODO: Remove sed command when polars-cloud supports 1.23
sed -i '/^polars-cloud$/d' polars/py-polars/requirements-dev.txt
rapids-pip-retry install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt

# shellcheck disable=SC2317
Expand Down
1 change: 1 addition & 0 deletions ci/test_narwhals.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ rapids-logger "Run narwhals tests for cuDF"
python -m pytest \
--cache-clear \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-narwhals.xml" \
-p cudf.testing.narwhals_test_plugin \
--numprocesses=8 \
--dist=worksteal \
--constructors=cudf
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.20,<1.23
- polars>=1.20,<1.24
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<20.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.20,<1.23
- polars>=1.20,<1.24
- pre-commit
- pyarrow>=14.0.0,<20.0.0a0
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- pylibcudf ={{ version }}
- polars >=1.20,<1.23
- polars >=1.20,<1.24
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/common/random_distribution_factory.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,7 @@
#include <thrust/tabulate.h>

#include <algorithm>
#include <functional>
#include <memory>
#include <type_traits>

Expand Down
94 changes: 94 additions & 0 deletions cpp/include/nvtext/minhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,99 @@ std::unique_ptr<cudf::column> minhash64(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Returns the minhash values for each input row
*
* This function uses MurmurHash3_x86_32 for the hash algorithm.
*
* The input row is first hashed using the given `seed` over a sliding window
* of `ngrams` of strings. These hash values are then combined with the `a`
* and `b` parameter values using the following formula:
* ```
* max_hash = max of uint32
* mp = (1 << 61) - 1
* hv[i] = hash value of a ngrams at i
* pv[i] = ((hv[i] * a[i] + b[i]) % mp) & max_hash
* ```
*
* This calculation is performed on each set of ngrams and the minimum value
* is computed as follows:
* ```
* mh[j,i] = min(pv[i]) for all ngrams in row j
* and where i=[0,a.size())
* ```
*
* Any null row entries result in corresponding null output rows.
*
* @throw std::invalid_argument if the ngrams < 2
* @throw std::invalid_argument if parameter_a is empty
* @throw std::invalid_argument if `parameter_b.size() != parameter_a.size()`
* @throw std::overflow_error if `parameter_a.size() * input.size()` exceeds the column size limit
*
* @param input Strings column to compute minhash
* @param ngrams The number of strings to hash within each row
* @param seed Seed value used for the hash algorithm
* @param parameter_a Values used for the permuted calculation
* @param parameter_b Values used for the permuted calculation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> minhash_ngrams(
cudf::lists_column_view const& input,
cudf::size_type ngrams,
uint32_t seed,
cudf::device_span<uint32_t const> parameter_a,
cudf::device_span<uint32_t const> parameter_b,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Returns the minhash values for each input row
*
* This function uses MurmurHash3_x64_128 for the hash algorithm.
*
* The input row is first hashed using the given `seed` over a sliding window
* of `ngrams` of strings. These hash values are then combined with the `a`
* and `b` parameter values using the following formula:
* ```
* max_hash = max of uint64
* mp = (1 << 61) - 1
* hv[i] = hash value of a ngrams at i
* pv[i] = ((hv[i] * a[i] + b[i]) % mp) & max_hash
* ```
*
* This calculation is performed on each set of ngrams and the minimum value
* is computed as follows:
* ```
* mh[j,i] = min(pv[i]) for all ngrams in row j
* and where i=[0,a.size())
* ```
*
* Any null row entries result in corresponding null output rows.
*
* @throw std::invalid_argument if the ngrams < 2
* @throw std::invalid_argument if parameter_a is empty
* @throw std::invalid_argument if `parameter_b.size() != parameter_a.size()`
* @throw std::overflow_error if `parameter_a.size() * input.size()` exceeds the column size limit
*
* @param input List strings column to compute minhash
* @param ngrams The number of strings to hash within each row
* @param seed Seed value used for the hash algorithm
* @param parameter_a Values used for the permuted calculation
* @param parameter_b Values used for the permuted calculation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> minhash64_ngrams(
cudf::lists_column_view const& input,
cudf::size_type ngrams,
uint64_t seed,
cudf::device_span<uint64_t const> parameter_a,
cudf::device_span<uint64_t const> parameter_b,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group
} // namespace CUDF_EXPORT nvtext
3 changes: 2 additions & 1 deletion cpp/src/column/column_device_view.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,7 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>

#include <functional>
#include <numeric>

namespace cudf {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <cudf/utilities/error.hpp>

#include <algorithm>
#include <functional>
#include <utility>

namespace cudf::io {
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/json/host_tree_algorithms.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -46,6 +46,7 @@

#include <algorithm>
#include <deque>
#include <functional>

namespace cudf::io::json::detail {

Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/json/read_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <BS_thread_pool.hpp>
#include <BS_thread_pool_utils.hpp>

#include <functional>
#include <numeric>

namespace cudf::io::json::detail {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/orc/aggregate_orc_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "io/utilities/row_selection.hpp"

#include <algorithm>
#include <functional>
#include <numeric>

namespace cudf::io::orc::detail {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@

#include <algorithm>
#include <cstring>
#include <functional>
#include <numeric>
#include <tuple>
#include <utility>
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/parquet/reader_impl_chunking.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <thrust/transform_scan.h>
#include <thrust/unique.h>

#include <functional>
#include <numeric>

namespace cudf::io::parquet::detail {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

#include <algorithm>
#include <cstring>
#include <functional>
#include <iterator>
#include <numeric>
#include <utility>
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/lists/dremel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,6 +36,8 @@
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>

#include <functional>

namespace cudf::detail {
namespace {
/**
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/regex/regex.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,7 @@
#include <cuda_runtime.h>
#include <thrust/pair.h>

#include <functional>
#include <memory>

namespace cudf {
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/replace/multi_re.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,6 +39,7 @@
#include <thrust/pair.h>

#include <algorithm>
#include <functional>

namespace cudf {
namespace strings {
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/table/row_operators.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,6 +33,8 @@

#include <thrust/iterator/transform_iterator.h>

#include <functional>

namespace cudf {
namespace experimental {

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/text/bpe/load_merge_pairs.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,6 +33,7 @@
#include <cuda/functional>

#include <fstream>
#include <functional>
#include <iostream>
#include <vector>

Expand Down
Loading

0 comments on commit 8fadd5f

Please sign in to comment.