Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into cudf/_lib/round
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Nov 26, 2024
2 parents b2aab3a + f05e89d commit b36faec
Show file tree
Hide file tree
Showing 17 changed files with 535 additions and 216 deletions.
4 changes: 4 additions & 0 deletions ci/run_cudf_polars_pytests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@ set -euo pipefail
# Support invoking run_cudf_polars_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/

# Test the default "cudf" executor
python -m pytest --cache-clear "$@" tests

# Test the "dask-experimental" executor
python -m pytest --cache-clear "$@" tests --executor dask-experimental
175 changes: 0 additions & 175 deletions cpp/include/cudf/detail/utilities/int_fastdiv.h

This file was deleted.

20 changes: 10 additions & 10 deletions cpp/include/cudf/groupby.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,23 +178,15 @@ class groupby {
*
* @param requests The set of columns to aggregate and the aggregations to
* perform
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
* @return Pair containing the table with each group's unique key and
* a vector of aggregation_results for each request in the same order as
* specified in `requests`.
*/
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> aggregate(
host_span<aggregation_request const> requests,
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @copydoc aggregate(host_span<aggregation_request const>, rmm::device_async_resource_ref)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> aggregate(
host_span<aggregation_request const> requests,
rmm::cuda_stream_view stream,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
/**
* @brief Performs grouped scans on the specified values.
Expand Down Expand Up @@ -242,13 +234,15 @@ class groupby {
* ```
*
* @param requests The set of columns to scan and the scans to perform
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
* @return Pair containing the table with each group's key and
* a vector of aggregation_results for each request in the same order as
* specified in `requests`.
*/
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> scan(
host_span<scan_request const> requests,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand Down Expand Up @@ -295,6 +289,7 @@ class groupby {
* @param values Table whose columns to be shifted
* @param offsets The offsets by which to shift the input
* @param fill_values Fill values for indeterminable outputs
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned table and columns' device memory
* @return Pair containing the tables with each group's key and the columns shifted
*
Expand All @@ -305,6 +300,7 @@ class groupby {
table_view const& values,
host_span<size_type const> offsets,
std::vector<std::reference_wrapper<scalar const>> const& fill_values,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -329,11 +325,13 @@ class groupby {
* and the `values` of the `groups` object will be `nullptr`.
*
* @param values Table representing values on which a groupby operation is to be performed
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned tables's device memory in the
* returned groups
* @return A `groups` object representing grouped keys and values
*/
groups get_groups(cudf::table_view values = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand Down Expand Up @@ -367,13 +365,15 @@ class groupby {
* @param[in] values A table whose column null values will be replaced
* @param[in] replace_policies Specify the position of replacement values relative to null values,
* one for each column
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @param[in] mr Device memory resource used to allocate device memory of the returned column
*
* @return Pair that contains a table with the sorted keys and the result column
*/
std::pair<std::unique_ptr<table>, std::unique_ptr<table>> replace_nulls(
table_view const& values,
host_span<cudf::replace_policy const> replace_policies,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

private:
Expand Down
6 changes: 6 additions & 0 deletions cpp/include/cudf/strings/attributes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ namespace strings {
* Any null string will result in a null entry for that row in the output column.
*
* @param input Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with lengths for each string
*/
std::unique_ptr<column> count_characters(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -59,11 +61,13 @@ std::unique_ptr<column> count_characters(
* Any null string will result in a null entry for that row in the output column.
*
* @param input Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New column with the number of bytes for each string
*/
std::unique_ptr<column> count_bytes(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
Expand All @@ -79,11 +83,13 @@ std::unique_ptr<column> count_bytes(
* Any null string is ignored. No null entries will appear in the output column.
*
* @param input Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New INT32 column with code point integer values for each character
*/
std::unique_ptr<column> code_points(
strings_column_view const& input,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of strings_apis group
Expand Down
22 changes: 9 additions & 13 deletions cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,6 @@ void verify_valid_requests(host_span<RequestType const> requests)

} // namespace

// Compute aggregation requests
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::aggregate(
host_span<aggregation_request const> requests, rmm::device_async_resource_ref mr)
{
return aggregate(requests, cudf::get_default_stream(), mr);
}

// Compute aggregation requests
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::aggregate(
host_span<aggregation_request const> requests,
Expand All @@ -220,7 +213,9 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::aggr

// Compute scan requests
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::scan(
host_span<scan_request const> requests, rmm::device_async_resource_ref mr)
host_span<scan_request const> requests,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(
Expand All @@ -233,13 +228,14 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::scan

if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); }

return sort_scan(requests, cudf::get_default_stream(), mr);
return sort_scan(requests, stream, mr);
}

groupby::groups groupby::get_groups(table_view values, rmm::device_async_resource_ref mr)
groupby::groups groupby::get_groups(table_view values,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
auto const stream = cudf::get_default_stream();
auto grouped_keys = helper().sorted_keys(stream, mr);

auto const& group_offsets = helper().group_offsets(stream);
Expand All @@ -262,6 +258,7 @@ groupby::groups groupby::get_groups(table_view values, rmm::device_async_resourc
std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::replace_nulls(
table_view const& values,
host_span<cudf::replace_policy const> replace_policies,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
Expand All @@ -271,7 +268,6 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::replace_nulls
"Size mismatch between num_columns and replace_policies.");

if (values.is_empty()) { return std::pair(empty_like(_keys), empty_like(values)); }
auto const stream = cudf::get_default_stream();

auto const& group_labels = helper().group_labels(stream);
std::vector<std::unique_ptr<column>> results;
Expand Down Expand Up @@ -306,6 +302,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::shift(
table_view const& values,
host_span<size_type const> offsets,
std::vector<std::reference_wrapper<scalar const>> const& fill_values,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
Expand All @@ -320,7 +317,6 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::shift(
}),
"values and fill_value should have the same type.",
cudf::data_type_error);
auto stream = cudf::get_default_stream();
std::vector<std::unique_ptr<column>> results;
auto const& group_offsets = helper().group_offsets(stream);
std::transform(
Expand Down
Loading

0 comments on commit b36faec

Please sign in to comment.