diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bb4d20f837c..a6fe04167c7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -642,6 +642,7 @@ add_library( src/rolling/detail/rolling_fixed_window.cu src/rolling/detail/rolling_variable_window.cu src/rolling/grouped_rolling.cu + src/rolling/range_rolling.cu src/rolling/range_window_bounds.cpp src/rolling/rolling.cu src/round/round.cu diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp index d8d5506969b..8e8126ab64f 100644 --- a/cpp/include/cudf/detail/rolling.hpp +++ b/cpp/include/cudf/detail/rolling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,39 @@ namespace CUDF_EXPORT cudf { namespace detail { +namespace rolling { +/** + * @brief Direction tag for a range-based rolling window. + */ +enum class direction : bool { + PRECEDING, ///< A preceding window. + FOLLOWING, ///< A following window. +}; +/** + * @brief Wrapper for preprocessed group information from sorted group keys. + */ +struct preprocessed_group_info { + rmm::device_uvector const& labels; ///< Mapping from row index to group label + rmm::device_uvector const& offsets; ///< Mapping from group label to row offsets + rmm::device_uvector const& + nulls_per_group; ///< Mapping from group label to null count in the group +}; + +} // namespace rolling + +/** + * @brief Compute the number of nulls in each group. + * + * @param orderby Column with null mask. + * @param offsets Offset array defining the (sorted) groups. + * @param stream CUDA stream used for kernel launches + * @return device_uvector containing the null count per group. + */ +[[nodiscard]] rmm::device_uvector nulls_per_group( + column_view const& orderby, + rmm::device_uvector const& offsets, + rmm::cuda_stream_view stream); + /** * @copydoc std::unique_ptr rolling_window( * column_view const& input, @@ -48,5 +81,33 @@ std::unique_ptr rolling_window(column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); +/** + * @brief Make a column representing the window offsets for a range-based window + * + * @param orderby Column use to define window ranges. If @p grouping is empty, + * must be sorted. If + * @p grouping is non-empty, must be sorted within each group. As well as + * being sorted, must be sorted consistently with the @p order and @p null_order + * parameters. + * @param grouping Optional preprocessed grouping information. + * @param order The sort order of the @p orderby column. + * @param null_order The sort order of nulls in the @p orderby column. + * @param window Descriptor specifying the window type. + * @param stream CUDA stream used for device memory operations and kernel + * launches. + * @param mr Device memory resource used for allocations. + * @return Column representing the window offsets as requested, suitable for passing to + * `rolling_window`. + */ +[[nodiscard]] std::unique_ptr make_range_window( + column_view const& orderby, + std::optional const& grouping, + rolling::direction direction, + order order, + null_order null_order, + range_window_type window, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + } // namespace detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index 6087c025b94..441a698ac06 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -19,10 +19,15 @@ #include #include #include +#include #include #include +#include + #include +#include +#include namespace CUDF_EXPORT cudf { /** @@ -31,6 +36,92 @@ namespace CUDF_EXPORT cudf { * @file */ +/** + * @brief Strongly typed wrapper for bounded closed rolling windows. + * + * The endpoints of this window are included. + */ +struct bounded_closed { + cudf::scalar const& delta_; ///< Delta from the current row in the window. Must be valid, + ///< behaviour is undefined if not. + /** + * @brief Return pointer to the row delta scalar. + * @return pointer to scalar, not null. + */ + cudf::scalar const* delta() const noexcept { return &delta_; } +}; + +/** + * @brief Strongly typed wrapper for bounded open rolling windows. + * + * The endpoints of this window are excluded. + */ +struct bounded_open { + cudf::scalar const& delta_; ///< Delta from the current row in the window. Must be valid, + ///< behaviour is undefined if not. + /** + * @brief Return pointer to the row delta scalar. + * @return pointer to scalar, not null. + */ + cudf::scalar const* delta() const noexcept { return &delta_; } +}; + +/** + * @brief Strongly typed wrapper for unbounded rolling windows. + * + * This window runs to the begin/end of the current row's group. + */ +struct unbounded { + /** + * @brief Return a null row delta + * @return nullptr + */ + constexpr cudf::scalar const* delta() const noexcept { return nullptr; } +}; +/** + * @brief Strongly typed wrapper for current_row rolling windows. + * + * This window contains all rows that are equal to the current row. + */ +struct current_row { + /** + * @brief Return a null row delta + * @return nullptr + */ + constexpr cudf::scalar const* delta() const noexcept { return nullptr; } +}; + +/** + * @brief The type of the range-based rolling window endpoint. + */ +using range_window_type = std::variant; + +/** + * @brief Constructs preceding and following columns given window range specifications. + * + * @param group_keys Possibly empty table of sorted keys defining groups. + * @param orderby Column defining window ranges. Must be sorted. If `group_keys` is non-empty, must + * be sorted groupwise. + * @param order Sort order of the `orderby` column. + * @param null_order Null sort order in the sorted `orderby` column. Apples groupwise if + * `group_keys` is non-empty. + * @param preceding Type of the preceding window. + * @param following Type of the following window. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return pair of preceding and following columns that define the window bounds for each row, + * suitable for passing to `rolling_window`. + */ +std::pair, std::unique_ptr> make_range_windows( + table_view const& group_keys, + column_view const& orderby, + order order, + null_order null_order, + range_window_type preceding, + range_window_type following, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** * @brief Applies a fixed-size rolling window function to the values in a column. * @@ -443,6 +534,33 @@ std::unique_ptr grouped_range_rolling_window( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Apply a grouping-aware range-based rolling window function to a sequence of columns. + * + * @param group_keys Possibly empty table of sorted keys defining groups. + * @param orderby Column defining window ranges. Must be sorted. If `group_keys` is non-empty, must + * be sorted groupwise. + * @param order Sort order of the `orderby` column. + * @param null_order Null sort order in the sorted `orderby` column. + * @param preceding Type of the preceding window. + * @param following Type of the following window. + * @param min_periods Minimum number of observations in the window required to have a value. + * @param requests Vector of pairs of columns and aggregation requests. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return A table of results, one column per input request. + */ +std::unique_ptr grouped_range_rolling_window( + table_view const& group_keys, + column_view const& orderby, + order order, + null_order null_order, + range_window_type preceding, + range_window_type following, + size_type min_periods, + std::vector> requests, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Applies a variable-size rolling window function to the values in a column. * diff --git a/cpp/src/rolling/detail/range_comparator_utils.cuh b/cpp/src/rolling/detail/range_comparator_utils.cuh deleted file mode 100644 index 009070d4ac2..00000000000 --- a/cpp/src/rolling/detail/range_comparator_utils.cuh +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include - -#include -#include - -namespace cudf::detail { - -/// For order-by columns of signed types, bounds calculation might cause accidental -/// overflow/underflows. This needs to be detected and handled appropriately -/// for signed and unsigned types. - -/** - * @brief Add `delta` to value, and cap at numeric_limits::max(), for signed types. - */ -template ::is_signed)> -__host__ __device__ T add_safe(T const& value, T const& delta) -{ - if constexpr (std::is_floating_point_v) { - if (std::isinf(value) or std::isnan(value)) { return value; } - } - // delta >= 0. - return (value < 0 || (cuda::std::numeric_limits::max() - value) >= delta) - ? (value + delta) - : cuda::std::numeric_limits::max(); -} - -/** - * @brief Add `delta` to value, and cap at numeric_limits::max(), for unsigned types. - */ -template ::is_signed)> -__host__ __device__ T add_safe(T const& value, T const& delta) -{ - // delta >= 0. - return ((cuda::std::numeric_limits::max() - value) >= delta) - ? (value + delta) - : cuda::std::numeric_limits::max(); -} - -/** - * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types. - * - * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns - * the smallest finite value, as opposed to min() which returns the smallest _positive_ value. - */ -template ::is_signed)> -__host__ __device__ T subtract_safe(T const& value, T const& delta) -{ - if constexpr (std::is_floating_point_v) { - if (std::isinf(value) or std::isnan(value)) { return value; } - } - // delta >= 0; - return (value >= 0 || (value - cuda::std::numeric_limits::lowest()) >= delta) - ? (value - delta) - : cuda::std::numeric_limits::lowest(); -} - -/** - * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types. - * - * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns - * the smallest finite value, as opposed to min() which returns the smallest _positive_ value. - * - * This distinction isn't truly relevant for this overload (because float is signed). - * lowest() is kept for uniformity. - */ -template ::is_signed)> -__host__ __device__ T subtract_safe(T const& value, T const& delta) -{ - // delta >= 0; - return ((value - cuda::std::numeric_limits::lowest()) >= delta) - ? (value - delta) - : cuda::std::numeric_limits::lowest(); -} - -/** - * @brief Comparator for numeric order-by columns, handling floating point NaN values. - * - * This is required for binary search through sorted vectors that contain NaN values. - * With ascending sort, NaN values are stored at the end of the sequence, even - * greater than infinity. - * But thrust::less would have trouble locating it because: - * 1. thrust::less(NaN, 10) returns false - * 2. thrust::less(10, NaN) also returns false - * - * This comparator honors the position of NaN values vis-à-vis non-NaN values. - * - */ -struct nan_aware_less { - template ())> - __host__ __device__ bool operator()(T const& lhs, T const& rhs) const - { - return thrust::less{}(lhs, rhs); - } - - template ())> - __host__ __device__ bool operator()(T const& lhs, T const& rhs) const - { - if (std::isnan(lhs)) { return false; } - return std::isnan(rhs) or thrust::less{}(lhs, rhs); - } -}; - -/** - * @brief Comparator for numeric order-by columns, handling floating point NaN values. - * - * This is required for binary search through sorted vectors that contain NaN values. - * With descending sort, NaN values are stored at the beginning of the sequence, even - * greater than infinity. - * But thrust::greater would have trouble locating it because: - * 1. thrust::greater(NaN, 10) returns false - * 2. thrust::greater(10, NaN) also returns false - * - * This comparator honors the position of NaN values vis-à-vis non-NaN values. - * - */ -struct nan_aware_greater { - template - __host__ __device__ bool operator()(T const& lhs, T const& rhs) const - { - return nan_aware_less{}(rhs, lhs); - } -}; -} // namespace cudf::detail diff --git a/cpp/src/rolling/detail/range_utils.cuh b/cpp/src/rolling/detail/range_utils.cuh new file mode 100644 index 00000000000..0ac074a86c0 --- /dev/null +++ b/cpp/src/rolling/detail/range_utils.cuh @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "rolling_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace CUDF_EXPORT cudf { +namespace detail { +namespace rolling { + +/* + * Spark requires that orderby columns with floating point type have a + * total order on floats where all NaNs compare equal to one-another, + * and greater than any non-nan value. These structs implement that logic. + */ +template +struct less { + __device__ constexpr bool operator()(T const& x, T const& y) const noexcept + { + if constexpr (cuda::std::is_floating_point_v) { + if (cuda::std::isnan(x)) { return false; } + return cuda::std::isnan(y) || x < y; + } else { + return x < y; + } + } +}; + +template +struct less_equal { + __device__ constexpr bool operator()(T const& x, T const& y) const noexcept + { + if constexpr (cuda::std::is_floating_point_v) { + if (cuda::std::isnan(x)) { return cuda::std::isnan(y); } + return cuda::std::isnan(y) || x <= y; + } else { + return x <= y; + } + } +}; +template +struct greater { + __device__ constexpr bool operator()(T const& x, T const& y) const noexcept + { + if constexpr (cuda::std::is_floating_point_v) { + if (cuda::std::isnan(x)) { return !cuda::std::isnan(y); } + return !cuda::std::isnan(y) && x > y; + } else { + return x > y; + } + } +}; + +template +struct greater_equal { + __device__ constexpr bool operator()(T const& x, T const& y) const noexcept + { + if constexpr (cuda::std::is_floating_point_v) { + if (cuda::std::isnan(x)) { return true; } + return !cuda::std::isnan(y) && x >= y; + } else { + return x >= y; + } + } +}; + +/** + * @brief Select the appropriate ordering comparator for the window type. + * + * @tparam T The type being compared. + * @tparam Tag The type of the window. + */ +template +struct comparator_impl { + using op = void; + using rev_op = void; +}; + +template +struct comparator_impl { + using op = less; + using rev_op = greater; +}; +template +struct comparator_impl { + using op = less_equal; + using rev_op = greater_equal; +}; +template +struct comparator_impl { + using op = less; + using rev_op = greater; +}; + +/** + * @brief Select the appropriate ordering comparator for the window type. + * + * @tparam Tag The type of the window. + * @tparam Order The sort order of the column used to define the windows. + */ +template +struct comparator_t { + order const order; + + __device__ constexpr bool operator()(T const& x, T const& y) const noexcept + { + if (order == order::ASCENDING) { + using op = typename comparator_impl::op; + return op{}(x, y); + } else { + using op = typename comparator_impl::rev_op; + return op{}(x, y); + } + } +}; + +/** + * @brief Saturating addition or subtraction of values. + * + * @tparam Op The operation to perform, `cuda::std::plus` or `cuda::std::minus`. + * + * Performs `x + y` (respectively `x - y`) saturating at the numeric + * limits for the type, returning the value and a flag indicating + * whether overflow occurred. + * + * For arithmetic types, the usual arithmetic conversions are _not_ + * applied, and so it is required that `x` and `y` have the same type. + * + * If `x` is floating, then `y` must be neither `inf` nor `nan` (not + * checked), otherwise behaviour is undefined. If `x` is `inf` or + * `nan`, then the result is the same as `x` and overflow does not + * occur. + * + * If `x` is a timestamp type, then `y` must be the matching + * `duration` type. + * + * If `x` is a decimal type, then `y` must be the matching underlying + * rep type, and _must_ have the same scale as `x` (not checked) + * otherwise behaviour is undefined. + * + * All other types are unsupported. + */ +template +struct saturating { + static_assert(cuda::std::is_same_v> || + cuda::std::is_same_v>, + "Only for addition or subtraction"); + template )> + [[nodiscard]] __host__ __device__ constexpr inline cuda::std::pair operator()( + T x, T y) noexcept + { + // Mimicking spark requirements, inf/nan x propagates + if (cuda::std::isinf(x) || cuda::std::isnan(x)) { return {x, false}; } + // Requirement, not checked, y is not inf or nan. + T result = Op{}(x, y); + // If the result is outside the range of finite values it can at + // this point only be +- infinity (we can't generate a nan by + // adding a non-nan/non-inf y to a non-nan/non-inf x). + if (result < cuda::std::numeric_limits::lowest()) { + return {cuda::std::numeric_limits::lowest(), true}; + } else if (result > cuda::std::numeric_limits::max()) { + return {cuda::std::numeric_limits::max(), true}; + } + return {result, false}; + } + + template && cuda::std::is_signed_v)> + [[nodiscard]] __host__ __device__ constexpr inline cuda::std::pair operator()( + T x, T y) noexcept + { + using U = cuda::std::make_unsigned_t; + U ux = static_cast(x); + U uy = static_cast(y); + U result = Op{}(ux, uy); + ux = (ux >> cuda::std::numeric_limits::digits) + + static_cast(cuda::std::numeric_limits::max()); + // Note: the casts here are implementation defined (until C++20) but all + // the platforms we care about do the twos-complement thing. + if constexpr (cuda::std::is_same_v>) { + auto const did_overflow = static_cast((ux ^ uy) | ~(uy ^ result)) >= 0; + return {did_overflow ? ux : result, did_overflow}; + } else { + auto const did_overflow = static_cast((ux ^ uy) & (ux ^ result)) < 0; + return {did_overflow ? ux : result, did_overflow}; + } + } + + template && cuda::std::is_unsigned_v)> + [[nodiscard]] __host__ __device__ constexpr inline cuda::std::pair operator()( + T x, T y) noexcept + { + T result = Op{}(x, y); + if constexpr (cuda::std::is_same_v>) { + // Only way we can overflow is in the positive direction + // in which case result will be less than both of x and y. + // To saturate, we bit-or with (T)-1 in this case + auto const did_overflow = result < x; + return {result | (-static_cast(did_overflow)), did_overflow}; + } else { + auto const did_overflow = result > x; + return {result & (-static_cast(!did_overflow)), did_overflow}; + } + } + + template ())> + [[nodiscard]] __host__ __device__ constexpr inline cuda::std::pair operator()( + T x, typename T::duration y) noexcept + { + using Duration = typename T::duration; + auto const [value, did_overflow] = saturating{}(x.time_since_epoch().count(), y.count()); + return {T{Duration{value}}, did_overflow}; + } + + template ())> + [[nodiscard]] __host__ __device__ constexpr inline cuda::std::pair operator()( + T x, typename T::rep y) noexcept + { + using Rep = typename T::rep; + auto const [value, did_overflow] = saturating{}(x.value(), y); + return {T{numeric::scaled_integer{value, x.scale()}}, did_overflow}; + } +}; + +/** + * @brief Functor to compute distance from current row for `unbounded` windows. + * + * An `unbounded` window runs to the boundary of the current row's group (inclusive). + * + * @tparam Grouping (inferred) type of object defining groups in the orderby column. + * @param groups object defining groups in the orderby column. + * @param direction direction of the window `PRECEDING` or `FOLLOWING`. + */ +template +struct unbounded_distance_functor { + unbounded_distance_functor(Grouping groups, direction direction) + : groups{groups}, direction{direction} + { + } + Grouping const groups; + direction const direction; + [[nodiscard]] __device__ size_type operator()(size_type i) const noexcept + { + auto const row_info = groups.row_info(i); + if (direction == direction::PRECEDING) { + return i - row_info.group_start + 1; + } else { + return row_info.group_end - i - 1; + } + } +}; + +/** + * @brief Functor to compute distance from current row for `current_row` windows. + * + * A `current_row` window contains all rows whose value is the same as the current row. + * + * @tparam Grouping (inferred) type of object defining groups in the orderby column. + * @tparam OrderbyT (inferred) type of the entries in the orderby column + * @param groups object defining groups in the orderby column. + * @param direction direction of the window `PRECEDING` or `FOLLOWING`. + * @param order sort order of the orderby column. + * @param begin iterator to the begin of the orderby column. + */ +template +struct current_row_distance_functor { + current_row_distance_functor(Grouping groups, + direction direction, + order order, + column_device_view::const_iterator begin) + : groups{groups}, direction{direction}, order{order}, begin{begin} + { + } + Grouping const groups; + direction const direction; + order const order; + column_device_view::const_iterator const begin; + + [[nodiscard]] __device__ size_type operator()(size_type i) const noexcept + { + using Comp = comparator_t; + auto const row_info = groups.row_info(i); + if (Grouping::has_nulls && i >= row_info.null_start && i < row_info.null_end) { + return direction == direction::PRECEDING ? i - row_info.null_start + 1 + : row_info.null_end - i - 1; + } + if (direction == direction::PRECEDING) { + return 1 + + thrust::distance( + thrust::lower_bound( + thrust::seq, begin + row_info.non_null_start, begin + i, begin[i], Comp{order}), + begin + i); + } else { + return thrust::distance( + begin + i, + thrust::upper_bound( + thrust::seq, begin + i, begin + row_info.non_null_end, begin[i], Comp{order})) - + 1; + } + } +}; + +/** + * @brief Functor to compute distance from current row for `bounded_open` and `bounded_closed` + * windows. + * + * A `bounded_open` window contains all rows up to but not including the computed endpoint. + * A `bounded_closed` window contains all rows up to and including the computed endpoint. + * + * @tparam WindowType type of window we're computing the distance for. + * @tparam Grouping type of object defining groups in the orderby column. + * @tparam OrderbyT type of elements in the orderby columns. + * @tparam DeltaT type of the elements in the scalar delta (returned + * by `scalar.data()`). + * @param groups object defining groups in the orderby column. + * @param direction direction of the window `PRECEDING` or `FOLLOWING`. + * @param order sort order of the orderby column. + * @param row_delta pointer to row delta on device. + * @param begin iterator to the begin of orderby column on device. + * + * @note Let `x` be the value of the current row and `delta` the provided + * row delta then for bounded windows the endpoints are computed as follows. + * + * | ASCENDING | DESCENDING + * ----------+-----------+----------- + * PRECEDING | x - delta | x + delta + * FOLLOWING | x + delta | x - delta + * + * See `saturating_op` for details of the implementation of saturating addition/subtraction. + */ +template +struct bounded_distance_functor { + static_assert(cuda::std::is_same_v || + cuda::std::is_same_v, + "Invalid WindowType, expecting bounded_open or bounded_closed."); + bounded_distance_functor(Grouping groups, + direction const direction, + order const order, + column_device_view::const_iterator begin, + DeltaT const* row_delta) + : groups{groups}, direction{direction}, order{order}, begin{begin}, row_delta{row_delta} + { + } + Grouping const groups; + direction const direction; + order const order; + column_device_view::const_iterator const begin; + DeltaT const* row_delta; + + /** + * @brief Compute the offset to the end of the window. + * + * @param i The current row index. + * @return Offset to the current row's window endpoint. + */ + [[nodiscard]] __device__ size_type operator()(size_type i) const + { + using Comp = comparator_t; + auto const row_info = groups.row_info(i); + if (Grouping::has_nulls && i >= row_info.null_start && i < row_info.null_end) { + // TODO: If the window is BOUNDED_OPEN, what does it mean for a row to fall in the null + // group? Not that important because only spark allows nulls in the orderby column, and it + // doesn't have BOUNDED_OPEN windows. + return direction == direction::PRECEDING ? i - row_info.null_start + 1 + : row_info.null_end - i - 1; + } + auto const offset_value_overflow = + [subtract = (order == order::ASCENDING && direction == direction::PRECEDING) || + (order == order::DESCENDING && direction == direction::FOLLOWING), + delta = *row_delta, + row_value = begin[i]]() { + return subtract ? saturating>{}(row_value, delta) + : saturating>{}(row_value, delta); + }(); + OrderbyT const offset_value = cuda::std::get<0>(offset_value_overflow); + bool const did_overflow = cuda::std::get<1>(offset_value_overflow); + auto const distance = [preceding = direction == direction::PRECEDING, + current = begin + i, + start = begin + row_info.non_null_start, + end = begin + row_info.non_null_end, + offset_value = offset_value](auto&& cmp) { + if (preceding) { + // Search for first slot we can place the offset value + return 1 + thrust::distance(thrust::lower_bound(thrust::seq, start, end, offset_value, cmp), + current); + } else { + // Search for last slot we can place the offset value + return thrust::distance(current, + thrust::upper_bound(thrust::seq, start, end, offset_value, cmp)) - + 1; + } + }; + if (did_overflow) { + // If computing the offset row value overflowed then we must + // adapt the search comparator. Suppose that the window + // direction is PRECEDING and orderby column is ASCENDING. The + // offset value is computed as row_value - delta. There are two + // cases: + // 1. delta is positive: we overflowed towards -infinity; + // 2. delta is negative, we overflowed towards +infinity. + // For case 1, the saturating minimum value must be + // included in the preceding window bound (because -infinity < + // min). Conversely, for case 2, the saturating maximum value + // must not be included in the preceding window bound (because + // max < +infinity). This can be obtained by picking a + // bounded_closed (respectively bounded_open) window. + // For FOLLOWING windows (with ASCENDING orderby), positive + // delta overflows towards +infinity and negative delta towards + // -infinity, and the same logic applies: we should include for + // positive overflow, but exclude for negative overflow. + // Since, when the orderby columns is ASCENDING, delta is + // treated with a sign flip, the above also applies in that case. + if (*row_delta > DeltaT{0}) { + return distance(comparator_t{order}); + } else { + return distance(comparator_t{order}); + } + } else { + return distance(Comp{order}); + } + } +}; + +/** + * @brief Functor to dispatch computation of clamped range-based rolling window bounds. + * + * @tparam WindowType The tag indicating the type of window being computed + */ +template +struct range_window_clamper { + static_assert(cuda::std::is_same_v || + cuda::std::is_same_v || + cuda::std::is_same_v || + cuda::std::is_same_v, + "Invalid WindowType descriptor"); + template + void expand_unbounded(Grouping grouping, + direction direction, + size_type size, + mutable_column_view& result, + rmm::cuda_stream_view stream) const + { + thrust::copy_n(rmm::exec_policy_nosync(stream), + cudf::detail::make_counting_transform_iterator( + 0, unbounded_distance_functor{grouping, direction}), + size, + result.begin()); + } + + template + void expand_current_row(Grouping grouping, + column_device_view::const_iterator begin, + direction direction, + order order, + size_type size, + mutable_column_view& result, + rmm::cuda_stream_view stream) const + { + thrust::copy_n(rmm::exec_policy_nosync(stream), + cudf::detail::make_counting_transform_iterator( + 0, current_row_distance_functor{grouping, direction, order, begin}), + size, + result.begin()); + } + + template + void expand_bounded(Grouping grouping, + direction direction, + order order, + column_device_view::const_iterator begin, + DeltaT const* row_delta, + size_type size, + mutable_column_view& result, + rmm::cuda_stream_view stream) const + { + thrust::copy_n(rmm::exec_policy_nosync(stream), + cudf::detail::make_counting_transform_iterator( + 0, + bounded_distance_functor{ + grouping, direction, order, begin, row_delta}), + size, + result.begin()); + } + + /** + * @brief Compute the window bounds (possibly grouped) for an orderby column. + * + * @tparam OrderbyT element type of the orderby column (dispatched on) + * @tparam ScalarT Concrete scalar type of the scalar row delta + * @param orderby Column used to define windows. + * @param grouping optional pre-processed group information. + * @param nulls_at_start If the orderby column contains nulls, are they are the start or the end? + * @param row_delta the delta applied to each row, will be null if the window is of type + * `UNBOUNDED` or `CURRENT_ROW`, otherwise non-null. If non-null, must be a finite value or + * behaviour is undefined. + * @param stream CUDA stream used for kernel launches and memory allocations + * @param mr Memory resource used for memory allocations. + * + * @return A column containing the computed endpoints for the given window description. + */ + template > + [[nodiscard]] std::unique_ptr window_bounds( + column_view const& orderby, + direction direction, + order order, + std::optional const& grouping, + bool nulls_at_start, + scalar const* row_delta, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + auto result = make_numeric_column( + data_type(type_to_id()), orderby.size(), mask_state::UNALLOCATED, stream, mr); + auto d_orderby = column_device_view::create(orderby, stream); + auto d_begin = d_orderby->begin(); + auto expand = [&](auto&& grouping) { + auto result_view = result->mutable_view(); + if constexpr (cuda::std::is_same_v) { + expand_unbounded(grouping, direction, orderby.size(), result_view, stream); + } else if constexpr (cuda::std::is_same_v) { + expand_current_row( + grouping, d_begin, direction, order, orderby.size(), result_view, stream); + } else { + auto const* d_row_delta = dynamic_cast(row_delta)->data(); + expand_bounded( + grouping, direction, order, d_begin, d_row_delta, orderby.size(), result_view, stream); + } + }; + + if (grouping.has_value()) { + if (orderby.has_nulls()) { + expand(grouped_with_nulls{nulls_at_start, + grouping->labels.data(), + grouping->offsets.data(), + grouping->nulls_per_group.data()}); + } else { + expand(grouped{grouping->labels.data(), grouping->offsets.data()}); + } + } else { + if (orderby.has_nulls()) { + expand(ungrouped_with_nulls{nulls_at_start, orderby.size(), orderby.null_count()}); + } else { + expand(ungrouped{orderby.size()}); + } + } + return result; + } + + /** + * @brief Is the given type supported as an orderby column. + * + * @tparam The type of the elements of the orderby column. + */ + template + static constexpr bool is_supported() + { + return (cuda::std::is_same_v && + (cuda::std::is_same_v || + cuda::std::is_same_v)) || + cudf::is_numeric_not_bool() || cudf::is_timestamp() || + cudf::is_fixed_point(); + } + + template ())> + [[nodiscard]] std::unique_ptr operator()( + column_view const& orderby, + direction direction, + order order, + std::optional const& grouping, + bool nulls_at_start, + scalar const* row_delta, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + using ScalarT = cudf::scalar_type_t; + CUDF_EXPECTS(!row_delta || cudf::is_duration(row_delta->type()), + "Row delta must be a duration type.", + cudf::data_type_error); + CUDF_EXPECTS(!row_delta || row_delta->type().id() == type_to_id(), + "Row delta must have same the resolution as orderby.", + cudf::data_type_error); + return window_bounds( + orderby, direction, order, grouping, nulls_at_start, row_delta, stream, mr); + } + + template ())> + [[nodiscard]] std::unique_ptr operator()( + column_view const& orderby, + direction direction, + order order, + std::optional const& grouping, + bool nulls_at_start, + scalar const* row_delta, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + CUDF_EXPECTS(!row_delta || (orderby.type().id() == row_delta->type().id()), + "Orderby column and row_delta must both be fixed point.", + cudf::data_type_error); + // TODO: Push this requirement onto the caller and just check for + // equal scales (avoids a kernel launch to rescale) + CUDF_EXPECTS(!row_delta || row_delta->type().scale() >= orderby.type().scale(), + "row_delta must have at least as much scale as orderby column.", + cudf::data_type_error); + if (row_delta && row_delta->type().scale() != orderby.type().scale()) { + auto const value = + static_cast const*>(row_delta)->fixed_point_value(stream); + auto const new_scalar = cudf::fixed_point_scalar{ + value.rescaled(numeric::scale_type{orderby.type().scale()}), true, stream}; + return window_bounds( + orderby, direction, order, grouping, nulls_at_start, &new_scalar, stream, mr); + } + return window_bounds( + orderby, direction, order, grouping, nulls_at_start, row_delta, stream, mr); + } + + template ())> + [[nodiscard]] std::unique_ptr operator()( + column_view const& orderby, + direction direction, + order order, + std::optional const& grouping, + bool nulls_at_start, + scalar const* row_delta, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + CUDF_EXPECTS(!row_delta || cudf::have_same_types(orderby, *row_delta), + "Orderby column and row_delta must have the same type.", + cudf::data_type_error); + return window_bounds( + orderby, direction, order, grouping, nulls_at_start, row_delta, stream, mr); + } + + template && + (cuda::std::is_same_v || + cuda::std::is_same_v))> + [[nodiscard]] std::unique_ptr operator()( + column_view const& orderby, + direction direction, + order order, + std::optional const& grouping, + bool nulls_at_start, + scalar const* row_delta, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + CUDF_EXPECTS(!row_delta, + "Not expecting window range to have value for string-based window calculation"); + return window_bounds( + orderby, direction, order, grouping, nulls_at_start, row_delta, stream, mr); + } + + template ())> + std::unique_ptr operator()(column_view const&, + direction, + order, + std::optional const&, + bool, + scalar const*, + rmm::cuda_stream_view, + rmm::device_async_resource_ref) const + { + CUDF_FAIL("Unsupported rolling window type.", cudf::data_type_error); + } +}; +} // namespace rolling +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp index 77cb2a8c7f5..62e28cf47c4 100644 --- a/cpp/src/rolling/detail/range_window_bounds.hpp +++ b/cpp/src/rolling/detail/range_window_bounds.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,16 +29,6 @@ constexpr bool is_supported_range_type() (cudf::is_numeric() && !cudf::is_boolean()); } -/// Checks if the specified type is a supported target type, -/// as an order-by column, for comparisons with a range_window_bounds scalar. -template -constexpr bool is_supported_order_by_column_type() -{ - return cudf::is_timestamp() || cudf::is_fixed_point() || - (cudf::is_numeric() && !cudf::is_boolean()) || - std::is_same_v; -} - /// Range-comparable representation type for an orderby column type. /// This is the datatype used for range comparisons. /// 1. For integral orderby column types `T`, comparisons are done as `T`. diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index 51740de9c86..d60e58ee1cc 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -68,23 +69,6 @@ namespace cudf { namespace detail { -/// Helper function to materialize preceding/following offsets. -template -std::unique_ptr expand_to_column(Calculator const& calc, - size_type const& num_rows, - rmm::cuda_stream_view stream) -{ - auto window_column = cudf::make_numeric_column( - cudf::data_type{type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream); - - auto begin = cudf::detail::make_counting_transform_iterator(0, calc); - - thrust::copy_n( - rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data()); - - return window_column; -} - /** * @brief Operator for applying a generic (non-specialized) rolling aggregation on a single window. */ diff --git a/cpp/src/rolling/detail/rolling_utils.cuh b/cpp/src/rolling/detail/rolling_utils.cuh index ec6d83fd68a..40b728114e9 100644 --- a/cpp/src/rolling/detail/rolling_utils.cuh +++ b/cpp/src/rolling/detail/rolling_utils.cuh @@ -17,14 +17,28 @@ #pragma once #include +#include #include #include +#include namespace CUDF_EXPORT cudf { namespace detail::rolling { +/** + * @brief Information about group bounds of the current row's group. + */ +struct range_group_info { + size_type const group_start; + size_type const group_end; + size_type const null_start; + size_type const null_end; + size_type const non_null_start; + size_type const non_null_end; +}; + /** * @brief A group descriptor for an ungrouped rolling window. * @@ -34,7 +48,9 @@ namespace detail::rolling { * construction. */ struct ungrouped { - cudf::size_type num_rows; + cudf::size_type const num_rows; + + static constexpr bool has_nulls{false}; [[nodiscard]] __device__ constexpr cudf::size_type label(cudf::size_type) const noexcept { @@ -48,6 +64,17 @@ struct ungrouped { { return num_rows; } + + /** + * @brief Return information about the current row. + * + * @param i The row + * @returns `range_group_info` with the information about the row. + */ + [[nodiscard]] __device__ constexpr range_group_info row_info(size_type i) const noexcept + { + return {0, num_rows, 0, 0, 0, num_rows}; + } }; /** @@ -64,6 +91,8 @@ struct grouped { cudf::size_type const* labels; cudf::size_type const* offsets; + static constexpr bool has_nulls{false}; + [[nodiscard]] __device__ constexpr cudf::size_type label(cudf::size_type i) const noexcept { return labels[i]; @@ -76,17 +105,104 @@ struct grouped { { return offsets[label + 1]; } + + /** + * @copydoc ungrouped::row_info + */ + [[nodiscard]] __device__ constexpr range_group_info row_info(size_type i) const noexcept + { + auto const label = labels[i]; + auto const group_start = offsets[label]; + auto const group_end = offsets[label + 1]; + return {group_start, group_end, group_start, group_start, group_start, group_end}; + } +}; + +/** + * @brief A group descriptor for an ungrouped rolling window with nulls + * + * @param nulls_at_start Are the nulls at the start or end? + * @param num_rows The number of rows to be rolled over. + * @param null_count The number of nulls. + * + * @note This is used for uniformity of interface between grouped and ungrouped + * iterator construction. + */ +struct ungrouped_with_nulls { + bool const nulls_at_start; + cudf::size_type const num_rows; + cudf::size_type const null_count; + + static constexpr bool has_nulls{true}; + /** + * @copydoc ungrouped::row_info + */ + [[nodiscard]] __device__ constexpr range_group_info row_info(size_type i) const noexcept + { + if (nulls_at_start) { + return {0, num_rows, 0, null_count, null_count, num_rows}; + } else { + return {num_rows, null_count, num_rows - null_count, num_rows, 0, num_rows - null_count}; + } + } }; -enum class direction : bool { - PRECEDING, - FOLLOWING, +/** + * @brief A group descriptor for a grouped rolling window with nulls + * + * @param nulls_at_start Are the nulls at the start of each group? + * @param labels The group labels, mapping from input rows to group. + * @param offsets The group offsets providing the endpoints of each group. + * @param null_counts The null counts per group. + * @param orderby The orderby column, sorted groupwise. + * + * @note This is used for uniformity of interface between grouped and ungrouped + * iterator construction. + */ +struct grouped_with_nulls { + bool const nulls_at_start; + // Taking raw pointers here to avoid stealing three registers for the sizes which are never + // needed. + cudf::size_type const* labels; + cudf::size_type const* offsets; + cudf::size_type const* null_counts; + + static constexpr bool has_nulls{true}; + /** + * @copydoc ungrouped::row_info + */ + [[nodiscard]] __device__ constexpr range_group_info row_info(size_type i) const noexcept + { + auto const label = labels[i]; + auto const null_count = null_counts[label]; + auto const group_start = offsets[label]; + auto const group_end = offsets[label + 1]; + if (nulls_at_start) { + return {group_start, + group_end, + group_start, + group_start + null_count, + group_start + null_count, + group_end}; + } else { + return {group_start, + group_end, + group_end - null_count, + group_end, + group_start, + group_end - null_count}; + } + } }; template struct fixed_window_clamper { Grouping groups; cudf::size_type delta; + static_assert(cuda::std::is_same_v || + cuda::std::is_same_v, + "Invalid grouping descriptor"); + [[nodiscard]] __device__ constexpr cudf::size_type operator()(cudf::size_type i) const { auto const label = groups.label(i); diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 8ab2ce65124..107570a9b59 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -15,30 +15,31 @@ */ #include "detail/optimized_unbounded_window.hpp" -#include "detail/range_comparator_utils.cuh" #include "detail/range_window_bounds.hpp" #include "detail/rolling.cuh" #include "detail/rolling_jit.hpp" #include "detail/rolling_utils.cuh" +#include "rolling/detail/rolling.hpp" #include #include #include #include #include +#include #include #include #include #include #include +#include + +#include #include -#include -#include -#include -#include -#include -#include + +#include +#include namespace cudf { @@ -217,735 +218,128 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, mr); } -namespace { - -/** - * @brief For a specified idx, find the lowest value of the (sorted) orderby column that - * participates in a range-window query. - */ -template -__device__ ElementT compute_lowest_in_window(ElementIter orderby_iter, - size_type idx, - [[maybe_unused]] ElementT delta) -{ - if constexpr (std::is_same_v) { - return orderby_iter[idx]; - } else { - return cudf::detail::subtract_safe(orderby_iter[idx], delta); - } -} - -/** - * @brief For a specified idx, find the highest value of the (sorted) orderby column that - * participates in a range-window query. - */ -template -__device__ ElementT compute_highest_in_window(ElementIter orderby_iter, - size_type idx, - [[maybe_unused]] ElementT delta) -{ - if constexpr (std::is_same_v) { - return orderby_iter[idx]; - } else { - return cudf::detail::add_safe(orderby_iter[idx], delta); - } -} - -/** - * Accessor for values in an order-by column, on the device. - */ -template -struct device_value_accessor { - column_device_view const col; ///< column view of column in device - - /// Checks that the type used to access device values matches the rep-type - /// of the order-by column. - struct is_correct_range_rep { - template /// Order-by type. - constexpr bool operator()() const - { - return std::is_same_v>; - } - }; - - /** - * @brief constructor - * - * @param[in] col_ column device view of cudf column - */ - explicit __device__ device_value_accessor(column_device_view const& col_) : col{col_} - { - // For non-timestamp types, T must match the order-by column's type. - // For timestamp types, T must match the range rep type for the order-by column. - cudf_assert((type_id_matches_device_storage_type(col.type().id()) or - cudf::type_dispatcher(col.type(), is_correct_range_rep{})) && - "data type mismatch when accessing the order-by column"); - } - - /** - * @brief Returns the value of element at index `i` - * @param[in] i index of element - * @return value of element at index `i` - */ - __device__ T operator()(cudf::size_type i) const { return col.element(i); } -}; - -template -using const_device_iterator = - thrust::transform_iterator, thrust::counting_iterator>; - -/// This is a stand-in for the `cudf::column_device_view::begin()`, which is `__host__` only. -/// For range window functions, one might need to iterate over the order-by column, per row. -template ())> -[[nodiscard]] __device__ const_device_iterator begin(cudf::column_device_view const& col) -{ - return const_device_iterator{thrust::make_counting_iterator(0), - device_value_accessor{col}}; -} +namespace detail { -/// Given a single, ungrouped order-by column, return the indices corresponding -/// to the first null element, and (one past) the last null timestamp. -/// The input column is sorted, with all null values clustered either -/// at the beginning of the column or at the end. -/// If no null values are founds, null_begin and null_end are 0. -std::tuple get_null_bounds_for_orderby_column( - column_view const& orderby_column, rmm::cuda_stream_view stream) +std::unique_ptr
grouped_range_rolling_window( + table_view const& group_keys, + column_view const& orderby, + order order, + null_order null_order, + range_window_type preceding, + range_window_type following, + size_type min_periods, + std::vector> requests, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { - auto const num_rows = orderby_column.size(); - auto const num_nulls = orderby_column.null_count(); - - if (num_nulls == num_rows || num_nulls == 0) { - // Short-circuit: All nulls, or no nulls. - return std::make_tuple(0, num_nulls); + std::vector> results; + results.reserve(requests.size()); + // Can we avoid making the window bounds? + if (std::all_of(requests.begin(), requests.end(), [](auto const& req) { + return std::get<0>(req).is_empty(); + })) { + std::transform( + requests.begin(), requests.end(), std::back_inserter(results), [](auto const& req) { + auto const& [value, agg] = req; + return cudf::detail::empty_output_for_rolling_aggregation(value, agg); + }); + return std::make_unique
(std::move(results)); } - - auto const first_row_is_null = orderby_column.null_count(0, 1, stream) == 1; - - return first_row_is_null ? std::make_tuple(0, num_nulls) - : std::make_tuple(num_rows - num_nulls, num_rows); -} - -/// Range window computation, with -/// 1. no grouping keys specified -/// 2. rows in ASCENDING order. -/// Treat as one single group. -template -std::unique_ptr range_window_ASC(column_view const& input, - column_view const& orderby_column, - T preceding_window, - bool preceding_window_is_unbounded, - T following_window, - bool following_window_is_unbounded, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto [h_nulls_begin_idx, h_nulls_end_idx] = - get_null_bounds_for_orderby_column(orderby_column, stream); - auto const p_orderby_device_view = cudf::column_device_view::create(orderby_column, stream); - - auto const preceding_calculator = cuda::proclaim_return_type( - [nulls_begin_idx = h_nulls_begin_idx, - nulls_end_idx = h_nulls_end_idx, - orderby_device_view = *p_orderby_device_view, - preceding_window, - preceding_window_is_unbounded] __device__(size_type idx) -> size_type { - if (preceding_window_is_unbounded) { - return idx + 1; // Technically `idx - 0 + 1`, - // where 0 == Group start, - // and 1 accounts for the current row - } - if (idx >= nulls_begin_idx && idx < nulls_end_idx) { - // Current row is in the null group. - // Must consider beginning of null-group as window start. - return idx - nulls_begin_idx + 1; - } - - auto const d_orderby = begin(orderby_device_view); - // orderby[idx] not null. Binary search the group, excluding null group. - // If nulls_begin_idx == 0, either - // 1. NULLS FIRST ordering: Binary search starts where nulls_end_idx. - // 2. NO NULLS: Binary search starts at 0 (also nulls_end_idx). - // Otherwise, NULLS LAST ordering. Start at 0. - auto const group_start = nulls_begin_idx == 0 ? nulls_end_idx : 0; - auto const lowest_in_window = compute_lowest_in_window(d_orderby, idx, preceding_window); - - return ((d_orderby + idx) - thrust::lower_bound(thrust::seq, - d_orderby + group_start, - d_orderby + idx, - lowest_in_window, - cudf::detail::nan_aware_less{})) + - 1; // Add 1, for `preceding` to account for current row. - }); - - auto const preceding_column = - cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); - - auto const following_calculator = cuda::proclaim_return_type( - [nulls_begin_idx = h_nulls_begin_idx, - nulls_end_idx = h_nulls_end_idx, - num_rows = input.size(), - orderby_device_view = *p_orderby_device_view, - following_window, - following_window_is_unbounded] __device__(size_type idx) -> size_type { - if (following_window_is_unbounded) { return num_rows - idx - 1; } - if (idx >= nulls_begin_idx && idx < nulls_end_idx) { - // Current row is in the null group. - // Window ends at the end of the null group. - return nulls_end_idx - idx - 1; - } - - auto const d_orderby = begin(orderby_device_view); - // orderby[idx] not null. Binary search the group, excluding null group. - // If nulls_begin_idx == 0, either - // 1. NULLS FIRST ordering: Binary search ends at num_rows. - // 2. NO NULLS: Binary search also ends at num_rows. - // Otherwise, NULLS LAST ordering. End at nulls_begin_idx. - - auto const group_end = nulls_begin_idx == 0 ? num_rows : nulls_begin_idx; - auto const highest_in_window = compute_highest_in_window(d_orderby, idx, following_window); - - return (thrust::upper_bound(thrust::seq, - d_orderby + idx, - d_orderby + group_end, - highest_in_window, - cudf::detail::nan_aware_less{}) - - (d_orderby + idx)) - - 1; - }); - - auto const following_column = - cudf::detail::expand_to_column(following_calculator, input.size(), stream); - - return cudf::detail::rolling_window( - input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); -} - -// Given an orderby column grouped as specified in group_offsets, -// return the following two vectors: -// 1. Vector with one entry per group, indicating the offset in the group -// where the null values begin. -// 2. Vector with one entry per group, indicating the offset in the group -// where the null values end. (i.e. 1 past the last null.) -// Each group in the input orderby column must be sorted, -// with null values clustered at either the start or the end of each group. -// If there are no nulls for any given group, (nulls_begin, nulls_end) == (0,0). -std::tuple, rmm::device_uvector> -get_null_bounds_for_orderby_column(column_view const& orderby_column, - cudf::device_span group_offsets, - rmm::cuda_stream_view stream) -{ - // For each group, the null values are clustered at the beginning or the end of the group. - // These nulls cannot participate, except in their own window. - - auto const num_groups = group_offsets.size() - 1; - - if (orderby_column.has_nulls()) { - auto null_start = rmm::device_uvector(num_groups, stream); - auto null_end = rmm::device_uvector(num_groups, stream); - - auto p_orderby_device_view = column_device_view::create(orderby_column, stream); - - // Null timestamps exist. Find null bounds, per group. - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(static_cast(0)), - thrust::make_counting_iterator(static_cast(num_groups)), - [d_orderby = *p_orderby_device_view, - d_group_offsets = group_offsets.data(), - d_null_start = null_start.data(), - d_null_end = null_end.data()] __device__(auto group_label) { - auto group_start = d_group_offsets[group_label]; - auto group_end = d_group_offsets[group_label + 1]; - auto first_element_is_null = d_orderby.is_null_nocheck(group_start); - auto last_element_is_null = d_orderby.is_null_nocheck(group_end - 1); - if (!first_element_is_null && !last_element_is_null) { - // Short circuit: No nulls. - d_null_start[group_label] = group_start; - d_null_end[group_label] = group_start; - } else if (first_element_is_null && last_element_is_null) { - // Short circuit: All nulls. - d_null_start[group_label] = group_start; - d_null_end[group_label] = group_end; - } else if (first_element_is_null) { - // NULLS FIRST. - d_null_start[group_label] = group_start; - d_null_end[group_label] = *thrust::partition_point( - thrust::seq, - thrust::make_counting_iterator(group_start), - thrust::make_counting_iterator(group_end), - [&d_orderby] __device__(auto i) { return d_orderby.is_null_nocheck(i); }); - } else { - // NULLS LAST. - d_null_end[group_label] = group_end; - d_null_start[group_label] = *thrust::partition_point( - thrust::seq, - thrust::make_counting_iterator(group_start), - thrust::make_counting_iterator(group_end), - [&d_orderby] __device__(auto i) { return d_orderby.is_valid_nocheck(i); }); - } + CUDF_EXPECTS( + std::all_of(requests.begin(), + requests.end(), + [&orderby](auto const& req) { return std::get<0>(req).size() == orderby.size(); }), + "Size mismatch between request columns and orderby column."); + + // Can we do an optimized fully unbounded aggregation in all cases? + if (std::all_of(requests.begin(), requests.end(), [&](auto const& req) { + return can_optimize_unbounded_window(std::holds_alternative(preceding), + std::holds_alternative(following), + min_periods, + std::get<1>(req)); + })) { + std::transform( + requests.begin(), requests.end(), std::back_inserter(results), [&](auto const& req) { + auto const& [value, agg] = req; + return optimized_unbounded_window(group_keys, value, agg, stream, mr); }); - - return std::make_tuple(std::move(null_start), std::move(null_end)); - } else { - // The returned vectors have num_groups items, but the input offsets have num_groups+1 - // Drop the last element using a span - auto const group_offsets_span = - cudf::device_span(group_offsets.data(), num_groups); - - // When there are no nulls, just copy the input group offsets to the output. - return std::make_tuple(cudf::detail::make_device_uvector_async( - group_offsets_span, stream, cudf::get_current_device_resource_ref()), - cudf::detail::make_device_uvector_async( - group_offsets_span, stream, cudf::get_current_device_resource_ref())); + return std::make_unique
(std::move(results)); } -} - -// Range window computation, for orderby column in ASCENDING order. -template -std::unique_ptr range_window_ASC(column_view const& input, - column_view const& orderby_column, - rmm::device_uvector const& group_offsets, - rmm::device_uvector const& group_labels, - T preceding_window, - bool preceding_window_is_unbounded, - T following_window, - bool following_window_is_unbounded, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto [null_start, null_end] = - get_null_bounds_for_orderby_column(orderby_column, group_offsets, stream); - auto const p_orderby_device_view = cudf::column_device_view::create(orderby_column, stream); - - auto const preceding_calculator = cuda::proclaim_return_type( - [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - orderby_device_view = *p_orderby_device_view, - d_nulls_begin = null_start.data(), - d_nulls_end = null_end.data(), - preceding_window, - preceding_window_is_unbounded] __device__(size_type idx) -> size_type { - auto const group_label = d_group_labels[idx]; - auto const group_start = d_group_offsets[group_label]; - auto const nulls_begin = d_nulls_begin[group_label]; - auto const nulls_end = d_nulls_end[group_label]; - - if (preceding_window_is_unbounded) { return idx - group_start + 1; } - - // If idx lies in the null-range, the window is the null range. - if (idx >= nulls_begin && idx < nulls_end) { - // Current row is in the null group. - // The window starts at the start of the null group. - return idx - nulls_begin + 1; - } - - auto const d_orderby = begin(orderby_device_view); - - // orderby[idx] not null. Search must exclude the null group. - // If nulls_begin == group_start, either of the following is true: - // 1. NULLS FIRST ordering: Search must begin at nulls_end. - // 2. NO NULLS: Search must begin at group_start (which also equals nulls_end.) - // Otherwise, NULLS LAST ordering. Search must start at nulls group_start. - auto const search_start = nulls_begin == group_start ? nulls_end : group_start; - auto const lowest_in_window = compute_lowest_in_window(d_orderby, idx, preceding_window); - - return ((d_orderby + idx) - thrust::lower_bound(thrust::seq, - d_orderby + search_start, - d_orderby + idx, - lowest_in_window, - cudf::detail::nan_aware_less{})) + - 1; // Add 1, for `preceding` to account for current row. - }); - - auto const preceding_column = - cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); - - auto const following_calculator = cuda::proclaim_return_type( - [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - orderby_device_view = *p_orderby_device_view, - d_nulls_begin = null_start.data(), - d_nulls_end = null_end.data(), - following_window, - following_window_is_unbounded] __device__(size_type idx) -> size_type { - auto const group_label = d_group_labels[idx]; - auto const group_start = d_group_offsets[group_label]; - auto const group_end = - d_group_offsets[group_label + 1]; // Cannot fall off the end, since offsets - // is capped with `input.size()`. - auto const nulls_begin = d_nulls_begin[group_label]; - auto const nulls_end = d_nulls_end[group_label]; - - if (following_window_is_unbounded) { return (group_end - idx) - 1; } - - // If idx lies in the null-range, the window is the null range. - if (idx >= nulls_begin && idx < nulls_end) { - // Current row is in the null group. - // The window ends at the end of the null group. - return nulls_end - idx - 1; - } - - auto const d_orderby = begin(orderby_device_view); - - // orderby[idx] not null. Search must exclude the null group. - // If nulls_begin == group_start, either of the following is true: - // 1. NULLS FIRST ordering: Search ends at group_end. - // 2. NO NULLS: Search ends at group_end. - // Otherwise, NULLS LAST ordering. Search ends at nulls_begin. - auto const search_end = nulls_begin == group_start ? group_end : nulls_begin; - auto const highest_in_window = compute_highest_in_window(d_orderby, idx, following_window); - - return (thrust::upper_bound(thrust::seq, - d_orderby + idx, - d_orderby + search_end, - highest_in_window, - cudf::detail::nan_aware_less{}) - - (d_orderby + idx)) - - 1; - }); - - auto const following_column = - cudf::detail::expand_to_column(following_calculator, input.size(), stream); - - return cudf::detail::rolling_window( - input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); -} - -/// Range window computation, with -/// 1. no grouping keys specified -/// 2. rows in DESCENDING order. -/// Treat as one single group. -template -std::unique_ptr range_window_DESC(column_view const& input, - column_view const& orderby_column, - T preceding_window, - bool preceding_window_is_unbounded, - T following_window, - bool following_window_is_unbounded, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto [h_nulls_begin_idx, h_nulls_end_idx] = - get_null_bounds_for_orderby_column(orderby_column, stream); - auto const p_orderby_device_view = cudf::column_device_view::create(orderby_column, stream); - - auto const preceding_calculator = cuda::proclaim_return_type( - [nulls_begin_idx = h_nulls_begin_idx, - nulls_end_idx = h_nulls_end_idx, - orderby_device_view = *p_orderby_device_view, - preceding_window, - preceding_window_is_unbounded] __device__(size_type idx) -> size_type { - if (preceding_window_is_unbounded) { - return idx + 1; // Technically `idx - 0 + 1`, - // where 0 == Group start, - // and 1 accounts for the current row - } - if (idx >= nulls_begin_idx && idx < nulls_end_idx) { - // Current row is in the null group. - // Must consider beginning of null-group as window start. - return idx - nulls_begin_idx + 1; - } - - auto const d_orderby = begin(orderby_device_view); - // orderby[idx] not null. Binary search the group, excluding null group. - // If nulls_begin_idx == 0, either - // 1. NULLS FIRST ordering: Binary search starts where nulls_end_idx. - // 2. NO NULLS: Binary search starts at 0 (also nulls_end_idx). - // Otherwise, NULLS LAST ordering. Start at 0. - auto const group_start = nulls_begin_idx == 0 ? nulls_end_idx : 0; - auto const highest_in_window = compute_highest_in_window(d_orderby, idx, preceding_window); - - return ((d_orderby + idx) - thrust::lower_bound(thrust::seq, - d_orderby + group_start, - d_orderby + idx, - highest_in_window, - cudf::detail::nan_aware_greater{})) + - 1; // Add 1, for `preceding` to account for current row. - }); - - auto const preceding_column = - cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); - - auto const following_calculator = cuda::proclaim_return_type( - [nulls_begin_idx = h_nulls_begin_idx, - nulls_end_idx = h_nulls_end_idx, - num_rows = input.size(), - orderby_device_view = *p_orderby_device_view, - following_window, - following_window_is_unbounded] __device__(size_type idx) -> size_type { - if (following_window_is_unbounded) { return (num_rows - idx) - 1; } - if (idx >= nulls_begin_idx && idx < nulls_end_idx) { - // Current row is in the null group. - // Window ends at the end of the null group. - return nulls_end_idx - idx - 1; + // OK, need to do the more complicated thing + auto [preceding_column, following_column] = + make_range_windows(group_keys, + orderby, + order, + null_order, + preceding, + following, + stream, + cudf::get_current_device_resource_ref()); + auto const& preceding_view = preceding_column->view(); + auto const& following_view = following_column->view(); + std::transform( + requests.begin(), requests.end(), std::back_inserter(results), [&](auto const& req) { + auto const& [value, agg] = req; + if (can_optimize_unbounded_window(std::holds_alternative(preceding), + std::holds_alternative(following), + min_periods, + agg)) { + return optimized_unbounded_window(group_keys, value, agg, stream, mr); + } else { + return detail::rolling_window( + value, preceding_view, following_view, min_periods, agg, stream, mr); } - - auto const d_orderby = begin(orderby_device_view); - // orderby[idx] not null. Search must exclude null group. - // If nulls_begin_idx = 0, either - // 1. NULLS FIRST ordering: Search ends at num_rows. - // 2. NO NULLS: Search also ends at num_rows. - // Otherwise, NULLS LAST ordering: End at nulls_begin_idx. - - auto const group_end = nulls_begin_idx == 0 ? num_rows : nulls_begin_idx; - auto const lowest_in_window = compute_lowest_in_window(d_orderby, idx, following_window); - - return (thrust::upper_bound(thrust::seq, - d_orderby + idx, - d_orderby + group_end, - lowest_in_window, - cudf::detail::nan_aware_greater{}) - - (d_orderby + idx)) - - 1; }); - - auto const following_column = - cudf::detail::expand_to_column(following_calculator, input.size(), stream); - - return cudf::detail::rolling_window( - input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); + return std::make_unique
(std::move(results)); } -// Range window computation, for rows in DESCENDING order. -template -std::unique_ptr range_window_DESC(column_view const& input, - column_view const& orderby_column, - rmm::device_uvector const& group_offsets, - rmm::device_uvector const& group_labels, - T preceding_window, - bool preceding_window_is_unbounded, - T following_window, - bool following_window_is_unbounded, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +[[nodiscard]] static null_order deduce_null_order( + column_view const& orderby, + order order, + rmm::device_uvector const& offsets, + rmm::device_uvector const& per_group_nulls, + rmm::cuda_stream_view stream) { - auto [null_start, null_end] = - get_null_bounds_for_orderby_column(orderby_column, group_offsets, stream); - auto const p_orderby_device_view = cudf::column_device_view::create(orderby_column, stream); - - auto const preceding_calculator = cuda::proclaim_return_type( - [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - orderby_device_view = *p_orderby_device_view, - d_nulls_begin = null_start.data(), - d_nulls_end = null_end.data(), - preceding_window, - preceding_window_is_unbounded] __device__(size_type idx) -> size_type { - auto const group_label = d_group_labels[idx]; - auto const group_start = d_group_offsets[group_label]; - auto const nulls_begin = d_nulls_begin[group_label]; - auto const nulls_end = d_nulls_end[group_label]; - - if (preceding_window_is_unbounded) { return (idx - group_start) + 1; } - - // If idx lies in the null-range, the window is the null range. - if (idx >= nulls_begin && idx < nulls_end) { - // Current row is in the null group. - // The window starts at the start of the null group. - return idx - nulls_begin + 1; - } - - auto const d_orderby = begin(orderby_device_view); - // orderby[idx] not null. Search must exclude the null group. - // If nulls_begin == group_start, either of the following is true: - // 1. NULLS FIRST ordering: Search must begin at nulls_end. - // 2. NO NULLS: Search must begin at group_start (which also equals nulls_end.) - // Otherwise, NULLS LAST ordering. Search must start at nulls group_start. - auto const search_start = nulls_begin == group_start ? nulls_end : group_start; - auto const highest_in_window = compute_highest_in_window(d_orderby, idx, preceding_window); - - return ((d_orderby + idx) - thrust::lower_bound(thrust::seq, - d_orderby + search_start, - d_orderby + idx, - highest_in_window, - cudf::detail::nan_aware_greater{})) + - 1; // Add 1, for `preceding` to account for current row. - }); - - auto const preceding_column = - cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); - - auto const following_calculator = cuda::proclaim_return_type( - [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - orderby_device_view = *p_orderby_device_view, - d_nulls_begin = null_start.data(), - d_nulls_end = null_end.data(), - following_window, - following_window_is_unbounded] __device__(size_type idx) -> size_type { - auto const group_label = d_group_labels[idx]; - auto const group_start = d_group_offsets[group_label]; - auto const group_end = d_group_offsets[group_label + 1]; - auto const nulls_begin = d_nulls_begin[group_label]; - auto const nulls_end = d_nulls_end[group_label]; - - if (following_window_is_unbounded) { return (group_end - idx) - 1; } - - // If idx lies in the null-range, the window is the null range. - if (idx >= nulls_begin && idx < nulls_end) { - // Current row is in the null group. - // The window ends at the end of the null group. - return nulls_end - idx - 1; - } - - auto const d_orderby = begin(orderby_device_view); - // orderby[idx] not null. Search must exclude the null group. - // If nulls_begin == group_start, either of the following is true: - // 1. NULLS FIRST ordering: Search ends at group_end. - // 2. NO NULLS: Search ends at group_end. - // Otherwise, NULLS LAST ordering. Search ends at nulls_begin. - auto const search_end = nulls_begin == group_start ? group_end : nulls_begin; - auto const lowest_in_window = compute_lowest_in_window(d_orderby, idx, following_window); - - return (thrust::upper_bound(thrust::seq, - d_orderby + idx, - d_orderby + search_end, - lowest_in_window, - cudf::detail::nan_aware_greater{}) - - (d_orderby + idx)) - - 1; - }); - - auto const following_column = - cudf::detail::expand_to_column(following_calculator, input.size(), stream); - - if (aggr.kind == aggregation::CUDA || aggr.kind == aggregation::PTX) { - CUDF_FAIL("Ranged rolling window does NOT (yet) support UDF."); + auto d_orderby = column_device_view::create(orderby, stream); + if (order == order::ASCENDING) { + // Sort order is ASCENDING + // null_order was either BEFORE or AFTER + // If at least one group has a null at the beginning and that + // group has more entries than the null count of the group, must + // be nulls at starts of groups (BEFORE),otherwise must be nulls at end (AFTER) + auto it = cudf::detail::make_counting_transform_iterator( + size_type{0}, + cuda::proclaim_return_type( + [d_orderby = *d_orderby, + d_offsets = offsets.data(), + nulls_per_group = per_group_nulls.data()] __device__(size_type i) -> bool { + return nulls_per_group[i] < (d_offsets[i + 1] - d_offsets[i]) && + d_orderby.is_null_nocheck(d_offsets[i]); + })); + auto is_before = thrust::reduce( + rmm::exec_policy_nosync(stream), it, it + offsets.size() - 1, false, thrust::logical_or<>{}); + return is_before ? null_order::BEFORE : null_order::AFTER; } else { - return cudf::detail::rolling_window( - input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); + // Sort order is DESCENDING + // null_order was either BEFORE or AFTER + // If at least one group has a null at the end and that group has + // more entries than the null count of the group must be nulls at ends of groups (BEFORE). + // Otherwise must be nulls at start (AFTER) + auto it = cudf::detail::make_counting_transform_iterator( + size_type{0}, + cuda::proclaim_return_type( + [d_orderby = *d_orderby, + d_offsets = offsets.data(), + nulls_per_group = per_group_nulls.data()] __device__(size_type i) -> bool { + return nulls_per_group[i] < (d_offsets[i + 1] - d_offsets[i]) && + d_orderby.is_null_nocheck(d_offsets[i + 1] - 1); + })); + auto is_before = thrust::reduce( + rmm::exec_policy_nosync(stream), it, it + offsets.size() - 1, false, thrust::logical_or<>{}); + return is_before ? null_order::BEFORE : null_order::AFTER; } } -template -std::unique_ptr grouped_range_rolling_window_impl( - column_view const& input, - column_view const& orderby_column, - cudf::order const& order_of_orderby_column, - rmm::device_uvector const& group_offsets, - rmm::device_uvector const& group_labels, - range_window_bounds const& preceding_window, - range_window_bounds const& following_window, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto [preceding_value, following_value] = [&] { - if constexpr (std::is_same_v) { - CUDF_EXPECTS( - preceding_window.is_unbounded() || preceding_window.is_current_row(), - "For STRING order-by column, preceding range has to be either UNBOUNDED or CURRENT ROW."); - CUDF_EXPECTS( - following_window.is_unbounded() || following_window.is_current_row(), - "For STRING order-by column, following range has to be either UNBOUNDED or CURRENT ROW."); - return std::pair{cudf::string_view{}, cudf::string_view{}}; - } else { - return std::pair{ - detail::range_comparable_value(preceding_window, orderby_column.type(), stream), - detail::range_comparable_value(following_window, orderby_column.type(), stream)}; - } - }(); - - if (order_of_orderby_column == cudf::order::ASCENDING) { - return group_offsets.is_empty() ? range_window_ASC(input, - orderby_column, - preceding_value, - preceding_window.is_unbounded(), - following_value, - following_window.is_unbounded(), - min_periods, - aggr, - stream, - mr) - : range_window_ASC(input, - orderby_column, - group_offsets, - group_labels, - preceding_value, - preceding_window.is_unbounded(), - following_value, - following_window.is_unbounded(), - min_periods, - aggr, - stream, - mr); - } else { - return group_offsets.is_empty() ? range_window_DESC(input, - orderby_column, - preceding_value, - preceding_window.is_unbounded(), - following_value, - following_window.is_unbounded(), - min_periods, - aggr, - stream, - mr) - : range_window_DESC(input, - orderby_column, - group_offsets, - group_labels, - preceding_value, - preceding_window.is_unbounded(), - following_value, - following_window.is_unbounded(), - min_periods, - aggr, - stream, - mr); - } -} - -struct dispatch_grouped_range_rolling_window { - template - std::enable_if_t(), - std::unique_ptr> - operator()(Args&&...) const - { - CUDF_FAIL("Unsupported OrderBy column type."); - } - - template - std::enable_if_t(), - std::unique_ptr> - operator()(column_view const& input, - column_view const& orderby_column, - cudf::order const& order_of_orderby_column, - rmm::device_uvector const& group_offsets, - rmm::device_uvector const& group_labels, - range_window_bounds const& preceding_window, - range_window_bounds const& following_window, - size_type min_periods, - rolling_aggregation const& aggr, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const - { - return grouped_range_rolling_window_impl(input, - orderby_column, - order_of_orderby_column, - group_offsets, - group_labels, - preceding_window, - following_window, - min_periods, - aggr, - stream, - mr); - } -}; - -} // namespace - -namespace detail { - /** * @copydoc std::unique_ptr grouped_range_rolling_window( * table_view const& group_keys, @@ -986,29 +380,71 @@ std::unique_ptr grouped_range_rolling_window(table_view const& group_key return optimized_unbounded_window(group_keys, input, aggr, stream, mr); } - using sort_groupby_helper = cudf::groupby::detail::sort::sort_groupby_helper; - using index_vector = sort_groupby_helper::index_vector; - - index_vector group_offsets(0, stream), group_labels(0, stream); - if (group_keys.num_columns() > 0) { - sort_groupby_helper helper{group_keys, cudf::null_policy::INCLUDE, cudf::sorted::YES, {}}; - group_offsets = index_vector(helper.group_offsets(stream), stream); - group_labels = index_vector(helper.group_labels(stream), stream); - } + auto get_window_type = [](range_window_bounds const& bound) -> range_window_type { + if (bound.is_unbounded()) { + return unbounded{}; + } else if (bound.is_current_row()) { + return current_row{}; + } else { + return bounded_closed{bound.range_scalar()}; + } + }; + auto [preceding_column, following_column] = [&]() { + if (group_keys.num_columns() > 0 && order_by_column.has_nulls()) { + using sort_helper = cudf::groupby::detail::sort::sort_groupby_helper; + sort_helper helper{group_keys, null_policy::INCLUDE, sorted::YES, {}}; + auto const& labels = helper.group_labels(stream); + auto const& offsets = helper.group_offsets(stream); + auto per_group_nulls = order_by_column.has_nulls() + ? detail::nulls_per_group(order_by_column, offsets, stream) + : rmm::device_uvector{0, stream}; + detail::rolling::preprocessed_group_info grouping{labels, offsets, per_group_nulls}; + auto null_order = deduce_null_order(order_by_column, order, offsets, per_group_nulls, stream); + // Don't use make_range_windows since that reconstructs the grouping info. + // This is polyfill code anyway, so can be removed after this public API is deprecated and + // removed. + return std::pair{ + detail::make_range_window(order_by_column, + grouping, + rolling::direction::PRECEDING, + order, + null_order, + get_window_type(preceding), + stream, + cudf::get_current_device_resource_ref()), + detail::make_range_window(order_by_column, + grouping, + rolling::direction::FOLLOWING, + order, + + null_order, + get_window_type(following), + stream, + cudf::get_current_device_resource_ref()), + }; + } else { + auto null_order = + order_by_column.has_nulls() + ? (((order == order::ASCENDING && order_by_column.null_count(0, 1, stream) == 1) || + (order == order::DESCENDING && + order_by_column.null_count( + order_by_column.size() - 1, order_by_column.size(), stream) == 1)) + ? null_order::BEFORE + : null_order::AFTER) + : null_order::BEFORE; + return make_range_windows(group_keys, + order_by_column, + order, + null_order, + get_window_type(preceding), + get_window_type(following), + stream, + cudf::get_current_device_resource_ref()); + } + }(); - return cudf::type_dispatcher(order_by_column.type(), - dispatch_grouped_range_rolling_window{}, - input, - order_by_column, - order, - group_offsets, - group_labels, - preceding, - following, - min_periods, - aggr, - stream, - mr); + return detail::rolling_window( + input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); } } // namespace detail @@ -1049,4 +485,30 @@ std::unique_ptr grouped_range_rolling_window(table_view const& group_key mr); } +std::unique_ptr
grouped_range_rolling_window( + table_view const& group_keys, + column_view const& orderby, + order order, + null_order null_order, + range_window_type preceding, + range_window_type following, + size_type min_periods, + std::vector> requests, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(min_periods > 0, "min_periods must be positive"); + return detail::grouped_range_rolling_window(group_keys, + orderby, + order, + null_order, + preceding, + following, + min_periods, + requests, + stream, + mr); +} + } // namespace cudf diff --git a/cpp/src/rolling/range_rolling.cu b/cpp/src/rolling/range_rolling.cu new file mode 100644 index 00000000000..c3e0a0e94d6 --- /dev/null +++ b/cpp/src/rolling/range_rolling.cu @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "detail/range_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace CUDF_EXPORT cudf { +namespace detail { + +rmm::device_uvector nulls_per_group(column_view const& orderby, + rmm::device_uvector const& offsets, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + auto d_orderby = column_device_view::create(orderby, stream); + auto const num_groups = offsets.size() - 1; + std::size_t bytes{0}; + auto is_null_it = cudf::detail::make_counting_transform_iterator( + cudf::size_type{0}, + cuda::proclaim_return_type( + [orderby = *d_orderby] __device__(size_type i) -> size_type { + return static_cast(orderby.is_null_nocheck(i)); + })); + rmm::device_uvector null_counts{num_groups, stream}; + cub::DeviceSegmentedReduce::Sum(nullptr, + bytes, + is_null_it, + null_counts.begin(), + num_groups, + offsets.begin(), + offsets.begin() + 1, + stream.value()); + auto tmp = rmm::device_buffer(bytes, stream); + cub::DeviceSegmentedReduce::Sum(tmp.data(), + bytes, + is_null_it, + null_counts.begin(), + num_groups, + offsets.begin(), + offsets.begin() + 1, + stream.value()); + return null_counts; +} + +std::unique_ptr make_range_window( + column_view const& orderby, + std::optional const& grouping, + rolling::direction direction, + order order, + null_order null_order, + range_window_type window, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + bool const nulls_at_start = (order == order::ASCENDING && null_order == null_order::BEFORE) || + (order == order::DESCENDING && null_order == null_order::AFTER); + + auto dispatch = [&](auto&& clamper, scalar const* row_delta) { + return type_dispatcher(orderby.type(), + clamper, + orderby, + direction, + order, + grouping, + nulls_at_start, + row_delta, + stream, + mr); + }; + return std::visit( + [&](auto&& window) -> std::unique_ptr { + using WindowType = cuda::std::decay_t; + return dispatch(rolling::range_window_clamper{}, window.delta()); + }, + window); +} + +std::pair, std::unique_ptr> make_range_windows( + table_view const& group_keys, + column_view const& orderby, + order order, + null_order null_order, + range_window_type preceding, + range_window_type following, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + if (group_keys.num_columns() > 0) { + using sort_helper = cudf::groupby::detail::sort::sort_groupby_helper; + sort_helper helper{group_keys, null_policy::INCLUDE, sorted::YES, {}}; + auto const& labels = helper.group_labels(stream); + auto const& offsets = helper.group_offsets(stream); + auto per_group_nulls = orderby.has_nulls() ? nulls_per_group(orderby, offsets, stream) + : rmm::device_uvector{0, stream}; + auto grouping = detail::rolling::preprocessed_group_info{labels, offsets, per_group_nulls}; + return { + make_range_window( + orderby, grouping, rolling::direction::PRECEDING, order, null_order, preceding, stream, mr), + make_range_window(orderby, + grouping, + rolling::direction::FOLLOWING, + order, + null_order, + following, + stream, + mr)}; + } else { + return {make_range_window(orderby, + std::nullopt, + rolling::direction::PRECEDING, + order, + null_order, + preceding, + stream, + mr), + make_range_window(orderby, + std::nullopt, + rolling::direction::FOLLOWING, + order, + null_order, + following, + stream, + mr)}; + } +} +} // namespace detail + +std::pair, std::unique_ptr> make_range_windows( + table_view const& group_keys, + column_view const& orderby, + order order, + null_order null_order, + range_window_type preceding, + range_window_type following, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS( + group_keys.num_columns() == 0 || group_keys.num_rows() == orderby.size(), + "If a grouping table is provided, it must have same number of rows as the orderby column."); + return detail::make_range_windows( + group_keys, orderby, order, null_order, preceding, following, stream, mr); +} + +} // namespace CUDF_EXPORT cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cfc6a0dc425..6e0d787de5d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -500,6 +500,7 @@ ConfigureTest( rolling/range_comparator_test.cu rolling/range_rolling_window_test.cpp rolling/range_window_bounds_test.cpp + rolling/range_window_type_test.cpp rolling/rolling_test.cpp GPUS 1 PERCENT 70 diff --git a/cpp/tests/rolling/range_comparator_test.cu b/cpp/tests/rolling/range_comparator_test.cu index 96c3fe77cf6..9bc8176d776 100644 --- a/cpp/tests/rolling/range_comparator_test.cu +++ b/cpp/tests/rolling/range_comparator_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ #include #include -#include +#include + +#include +#include struct RangeComparatorTest : cudf::test::BaseFixture {}; @@ -32,7 +35,7 @@ TYPED_TEST_SUITE(RangeComparatorTypedTest, TestTypes); TYPED_TEST(RangeComparatorTypedTest, TestLessComparator) { - auto const less = cudf::detail::nan_aware_less{}; + auto const less = cudf::detail::rolling::less{}; auto constexpr nine = TypeParam{9}; auto constexpr ten = TypeParam{10}; @@ -67,7 +70,7 @@ TYPED_TEST(RangeComparatorTypedTest, TestLessComparator) TYPED_TEST(RangeComparatorTypedTest, TestGreaterComparator) { - auto const greater = cudf::detail::nan_aware_greater{}; + auto const greater = cudf::detail::rolling::greater{}; auto constexpr nine = TypeParam{9}; auto constexpr ten = TypeParam{10}; @@ -102,46 +105,82 @@ TYPED_TEST(RangeComparatorTypedTest, TestGreaterComparator) TYPED_TEST(RangeComparatorTypedTest, TestAddSafe) { - using T = TypeParam; - EXPECT_EQ(cudf::detail::add_safe(T{3}, T{4}), T{7}); - - if constexpr (cuda::std::numeric_limits::is_signed) { - EXPECT_EQ(cudf::detail::add_safe(T{-3}, T{4}), T{1}); + using T = TypeParam; + using result_t = decltype(cudf::detail::rolling::saturating>{}(T{1}, T{2})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{3}, T{4}), + (result_t{T{7}, false})); + + if constexpr (std::numeric_limits::is_signed) { + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{-3}, T{4}), + (result_t{T{1}, false})); } - auto constexpr max = cuda::std::numeric_limits::max(); - EXPECT_EQ(cudf::detail::add_safe(T{max - 5}, T{4}), max - 1); - EXPECT_EQ(cudf::detail::add_safe(T{max - 4}, T{4}), max); - EXPECT_EQ(cudf::detail::add_safe(T{max - 3}, T{4}), max); - EXPECT_EQ(cudf::detail::add_safe(max, T{4}), max); - + auto constexpr max = std::numeric_limits::max(); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{max - 5}, T{4}), + (result_t{max - 1, false})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{max - 4}, T{4}), + (result_t{max, false})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{max - 3}, T{4}), + (result_t{max, !std::is_floating_point_v})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(max, T{4}), + (result_t{max, !std::is_floating_point_v})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(max, T{max - 10}), + (result_t{max, true})); + if constexpr (std::is_signed_v) { + auto constexpr min = std::numeric_limits::lowest(); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{-10}, min), + (result_t{min, !std::is_floating_point_v})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{min + 10}, min), + (result_t{min, true})); + } if constexpr (std::is_floating_point_v) { - auto const NaN = std::numeric_limits::quiet_NaN(); - auto const Inf = std::numeric_limits::infinity(); - EXPECT_TRUE(std::isnan(cudf::detail::add_safe(NaN, T{4}))); - EXPECT_EQ(cudf::detail::add_safe(Inf, T{4}), Inf); + auto const NaN = std::numeric_limits::quiet_NaN(); + auto const Inf = std::numeric_limits::infinity(); + auto [val, did_overflow] = cudf::detail::rolling::saturating>{}(NaN, T{4}); + EXPECT_TRUE(std::isnan(val)); + EXPECT_FALSE(did_overflow); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(Inf, T{4}), + (result_t{Inf, false})); } } TYPED_TEST(RangeComparatorTypedTest, TestSubtractSafe) { - using T = TypeParam; - EXPECT_EQ(cudf::detail::subtract_safe(T{4}, T{3}), T{1}); - - if constexpr (cuda::std::numeric_limits::is_signed) { - EXPECT_EQ(cudf::detail::subtract_safe(T{3}, T{4}), T{-1}); + using T = TypeParam; + using result_t = decltype(cudf::detail::rolling::saturating>{}(T{1}, T{2})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{4}, T{3}), + (result_t{T{1}, false})); + + if constexpr (std::numeric_limits::is_signed) { + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{3}, T{4}), + (result_t{T{-1}, false})); } - auto constexpr min = cuda::std::numeric_limits::lowest(); - EXPECT_EQ(cudf::detail::subtract_safe(T{min + 5}, T{4}), min + 1); - EXPECT_EQ(cudf::detail::subtract_safe(T{min + 4}, T{4}), min); - EXPECT_EQ(cudf::detail::subtract_safe(T{min + 3}, T{4}), min); - EXPECT_EQ(cudf::detail::subtract_safe(min, T{4}), min); + auto constexpr min = std::numeric_limits::lowest(); + auto constexpr max = std::numeric_limits::max(); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{min + 5}, T{4}), + (result_t{min + 1, false})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{min + 4}, T{4}), + (result_t{min, false})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{min + 3}, T{4}), + (result_t{min, !std::is_floating_point_v})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(min, T{4}), + (result_t{min, !std::is_floating_point_v})); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(min, max), + (result_t{min, true})); + + if constexpr (std::is_signed_v) { + EXPECT_EQ(cudf::detail::rolling::saturating>{}(T{max - 1}, min), + (result_t{max, true})); + } if constexpr (std::is_floating_point_v) { - auto const NaN = std::numeric_limits::quiet_NaN(); - auto const Inf = std::numeric_limits::infinity(); - EXPECT_TRUE(std::isnan(cudf::detail::subtract_safe(NaN, T{4}))); - EXPECT_EQ(cudf::detail::subtract_safe(-Inf, T{4}), -Inf); + auto const NaN = std::numeric_limits::quiet_NaN(); + auto const Inf = std::numeric_limits::infinity(); + auto [val, did_overflow] = cudf::detail::rolling::saturating>{}(NaN, T{4}); + EXPECT_TRUE(std::isnan(val)); + EXPECT_FALSE(did_overflow); + EXPECT_EQ(cudf::detail::rolling::saturating>{}(-Inf, T{4}), + (result_t{-Inf, false})); } } diff --git a/cpp/tests/rolling/range_window_type_test.cpp b/cpp/tests/rolling/range_window_type_test.cpp new file mode 100644 index 00000000000..70535e79f1d --- /dev/null +++ b/cpp/tests/rolling/range_window_type_test.cpp @@ -0,0 +1,1559 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +template +struct UngroupedBase : cudf::test::BaseFixture { + static constexpr T min{std::numeric_limits::lowest()}; + static constexpr T max{std::numeric_limits::max()}; + using cw_t = cudf::test::fixed_width_column_wrapper; + cw_t ascending_no_nulls{}; + cw_t ascending_nulls_before{}; + cw_t ascending_nulls_after{}; + cw_t descending_no_nulls{}; + cw_t descending_nulls_before{}; + cw_t descending_nulls_after{}; + UngroupedBase() + : ascending_no_nulls{ + // clang-format off + {min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max}}, + // clang-format on + ascending_nulls_before{ + {min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max}, + {false, false, false, false, true, true, true, true, true, true, true, true, true}}, + ascending_nulls_after{ + {min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max}, + {true, true, true, true, true, true, false, false, false, false, false, false, false}}, + descending_no_nulls{ + {max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min}}, + descending_nulls_before{ + {max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min}, + {true, true, true, true, true, true, true, false, false, false, false, false, false}}, + descending_nulls_after{ + {max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min}, + {false, false, false, false, false, true, true, true, true, true, true, true, true}} + { + } + + void run_test(cudf::column_view const& orderby, + cudf::order order, + cudf::null_order null_order, + cudf::range_window_type preceding, + cudf::range_window_type following, + cudf::column_view const& expect_preceding, + cudf::column_view const& expect_following) + { + auto result = cudf::make_range_windows( + cudf::table_view{}, orderby, order, null_order, preceding, following); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + std::get<0>(result)->view(), expect_preceding, cudf::test::debug_output_level::ALL_ERRORS); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + std::get<1>(result)->view(), expect_following, cudf::test::debug_output_level::ALL_ERRORS); + } +}; +template +struct UngroupedIntegralRangeWindows : UngroupedBase {}; +TYPED_TEST_SUITE(UngroupedIntegralRangeWindows, cudf::test::IntegralTypesNotBool); + +template +struct UngroupedSignedIntegralRangeWindows : UngroupedBase {}; +using SignedIntegralTypes = cudf::test::Types; +TYPED_TEST_SUITE(UngroupedSignedIntegralRangeWindows, SignedIntegralTypes); + +TYPED_TEST(UngroupedIntegralRangeWindows, AscendingNoNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0}}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -1, -1, -1, -2, -1, -1, -1, -1, -2, -1}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, AscendingNoNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 3, 4, 2, 3, 1, 2, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 3, 2, 1, 2, 1, 2, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, AscendingNoNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {0, 3, 2, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0}}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -2, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, AscendingNoNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 3, 4, 5, 6, 3, 4, 2, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -3, -2, -1, -2, -1, -2, -1, -1, -2, -1}}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 3, 4, 3, 4, 3, 2, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -3, -4, -2, -3, -1, -2, -1, -1, -2, -1}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, DescendingNoNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0}}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -2, -1}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, DescendingNoNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 2, 1, 2, 3, 2, 3, 4, 1}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0}}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, DescendingNoNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 1, 0, 2, 1, 3, 2, 1, 0, 0}}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, -1, 0, -1, 0, -1, -2, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, DescendingNoNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 2, 2, 3, 4, 3, 4, 5, 6, 1}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -1, -1, -2, -1, -2, -1, -2, -2, -3, -1}}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 2, 2, 3, 3, 4, 3, 4, 1}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -1, -1, -2, -1, -2, -3, -2, -3, -4, -1}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, AscendingNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 1, 1, 2, 1, 1, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {3, 2, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0}}); + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 0, -1, 0, 0, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {3, 2, 1, 0, -1, -1, -2, -1, -1, -1, -1, -2, -1}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, AscendingNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {3, 2, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}}); + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 1, 1, 2, 1, 2, 1, 1, 2, 1}}, + cudf::test::fixed_width_column_wrapper{ + {3, 2, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, AscendingNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7}}, + cudf::test::fixed_width_column_wrapper{ + {0, 3, 2, 1, 1, 0, 6, 5, 4, 3, 2, 1, 0}}); + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -2, -1, -1, 0, 0, 1, 2, 3, 4, 5, 6, 7}}, + cudf::test::fixed_width_column_wrapper{ + {0, 2, 1, 1, 0, 0, 6, 5, 4, 3, 2, 1, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, AscendingNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -3, -2, -1, 6, 5, 4, 3, 2, 1, 0}}); + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 3, 4, 3, 1, 2, 3, 4, 5, 6, 7}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -3, -4, -2, 6, 5, 4, 3, 2, 1, 0}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, DescendingNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0}}); + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {0, -1, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6}}, + cudf::test::fixed_width_column_wrapper{ + {-1, -1, -2, -1, -1, -1, -1, 5, 4, 3, 2, 1, 0}}); +} + +TYPED_TEST(UngroupedIntegralRangeWindows, DescendingNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 4, 5, 6}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 1, 0, 0, 5, 4, 3, 2, 1, 0}}); + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 4, 5, 6}}, + cudf::test::fixed_width_column_wrapper{ + {0, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, DescendingNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 5, 0, -1, 0, 0, 0, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {4, 3, 2, 1, 0, 0, 2, 1, 3, 2, 1, 0, 0}}); + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 5, 0, -1, 0, -1, -2, -1, 0, 0}}, + cudf::test::fixed_width_column_wrapper{ + {4, 3, 2, 1, 0, 0, 1, 0, 1, 2, 1, 0, 0}}); +} + +TYPED_TEST(UngroupedSignedIntegralRangeWindows, DescendingNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 5, 1, 2, 3, 3, 4, 5, 6, 1}}, + cudf::test::fixed_width_column_wrapper{ + {4, 3, 2, 1, 0, -1, -1, -2, -1, -2, -2, -3, -1}}); + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{ + {1, 2, 3, 4, 5, 1, 2, 3, 3, 4, 3, 4, 1}}, + cudf::test::fixed_width_column_wrapper{ + {4, 3, 2, 1, 0, -1, -1, -2, -3, -2, -3, -4, -1}}); +} + +template +struct GroupedBase : cudf::test::BaseFixture { + static constexpr T min{std::numeric_limits::lowest()}; + static constexpr T max{std::numeric_limits::max()}; + using cw_t = cudf::test::fixed_width_column_wrapper; + cw_t group_keys{}; + cw_t ascending_no_nulls{}; + cw_t ascending_nulls_before{}; + cw_t ascending_nulls_after{}; + cw_t descending_no_nulls{}; + cw_t descending_nulls_before{}; + cw_t descending_nulls_after{}; + GroupedBase() + // clang-format off + : group_keys{{ + // Group-1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Group-2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // Group-3 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + }}, + ascending_no_nulls{{ + // Group-1 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + // Group-2 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + // Group-3 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + }}, + ascending_nulls_before{{ + // Group-1 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + // Group-2 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + // Group-3, + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + }, { + // Group-1 + false, false, false, false, true, true, true, true, true, true, true, true, true, + // Group-2 + true, true, true, true, true, true, true, true, true, true, true, true, true, + // Group-3 + false, false, true, true, true, true, true, true, true, true, true, true, true, + }}, + ascending_nulls_after{{ + // Group-1 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + // Group-2 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + // Group-3 + min, T{5}, T{5}, T{6}, T{7}, T{9}, T{9}, T{12}, T{13}, T{17}, T{22}, T{22}, max, + }, { + // Group-1 + true, true, true, true, true, true, false, false, false, false, false, false, false, + // Group-2 + true, true, true, true, true, true, true, true, true, true, true, true, true, + // Group-3 + true, true, true, true, true, true, true, true, true, true, false, false, false, + }}, + descending_no_nulls{{ + // Group-1 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + // Group-2 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + // Group-3 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + }}, + descending_nulls_before{{ + // Group-1 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + // Group-2 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + // Group-3 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + }, { + // Group-1 + true, true, true, true, true, true, true, false, false, false, false, false, false, + // Group-2 + true, true, true, true, true, true, true, true, true, true, true, true, true, + // Group-3 + true, true, true, true, true, true, true, true, true, true, false, false, false, + }}, + descending_nulls_after{{ + // Group-1 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + // Group-2 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + // Group-3 + max, T{22}, T{22}, T{17}, T{13}, T{12}, T{9}, T{9}, T{7}, T{6}, T{5}, T{5}, min, + }, { + // Group-1 + false, false, false, false, false, true, true, true, true, true, true, true, true, + // Group-2 + true, true, true, true, true, true, true, true, true, true, true, true, true, + // Group-3 + false, false, false, false, true, true, true, true, true, true, true, true, true, + }} + // clang-format on + { + } + + void run_test(cudf::column_view const& orderby, + cudf::order order, + cudf::null_order null_order, + cudf::range_window_type preceding, + cudf::range_window_type following, + cudf::column_view const& expect_preceding, + cudf::column_view const& expect_following) + { + auto result = cudf::make_range_windows( + cudf::table_view{{this->group_keys}}, orderby, order, null_order, preceding, following); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + std::get<0>(result)->view(), expect_preceding, cudf::test::debug_output_level::ALL_ERRORS); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + std::get<1>(result)->view(), expect_following, cudf::test::debug_output_level::ALL_ERRORS); + } +}; + +template +struct GroupedIntegralRangeWindows : GroupedBase {}; +TYPED_TEST_SUITE(GroupedIntegralRangeWindows, cudf::test::IntegralTypesNotBool); + +template +struct GroupedSignedIntegralRangeWindows : GroupedBase {}; +TYPED_TEST_SUITE(GroupedSignedIntegralRangeWindows, SignedIntegralTypes); + +TYPED_TEST(GroupedIntegralRangeWindows, AscendingNoNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -1, -1, -1, -2, -1, -1, -1, -1, -2, -1, + // Group-2 + -1, -1, -2, -1, -1, -1, -2, -1, -1, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -1, -1, -1, -2, -1, -1, -1, -1, -2, -1, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, AscendingNoNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 3, 4, 2, 3, 1, 2, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 3, 4, 2, 3, 1, 2, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 3, 4, 2, 3, 1, 2, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-2 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-3 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 3, 2, 1, 2, 1, 2, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 3, 2, 1, 2, 1, 2, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 3, 2, 1, 2, 1, 2, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, AscendingNoNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 3, 2, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-2 + 0, 3, 2, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-3 + 0, 3, 2, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -2, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, + // Group-2 + 0, -2, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, + // Group-3 + 0, -2, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-2 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-3 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, AscendingNoNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 3, 4, 5, 6, 3, 4, 2, 1, 2, 1, + // Group-2 + 1, 1, 2, 3, 4, 5, 6, 3, 4, 2, 1, 2, 1, + // Group-3 + 1, 1, 2, 3, 4, 5, 6, 3, 4, 2, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -3, -2, -1, -2, -1, -2, -1, -1, -2, -1, + // Group-2 + -1, -1, -2, -3, -2, -1, -2, -1, -2, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -3, -2, -1, -2, -1, -2, -1, -1, -2, -1, + // clang-format on + }}); + this->run_test(this->ascending_no_nulls, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 3, 4, 3, 4, 3, 2, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 3, 4, 3, 4, 3, 2, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 3, 4, 3, 4, 3, 2, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -3, -4, -2, -3, -1, -2, -1, -1, -2, -1, + // Group-2 + -1, -1, -2, -3, -4, -2, -3, -1, -2, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -3, -4, -2, -3, -1, -2, -1, -1, -2, -1, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, DescendingNoNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -2, -1, + // Group-2 + -1, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -2, -1, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, DescendingNoNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 2, 1, 2, 3, 2, 3, 4, 1, + // Group-2 + 1, 1, 2, 1, 1, 2, 1, 2, 3, 2, 3, 4, 1, + // Group-3 + 1, 1, 2, 1, 1, 2, 1, 2, 3, 2, 3, 4, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1, + // Group-2 + 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1, + // Group-3 + 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, DescendingNoNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 1, 0, 2, 1, 3, 2, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 1, 0, 2, 1, 3, 2, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 1, 0, 2, 1, 3, 2, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, -1, 0, -1, 0, -1, -2, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, -1, 0, -1, 0, -1, -2, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, -1, 0, -1, 0, -1, -2, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, DescendingNoNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 2, 2, 3, 4, 3, 4, 5, 6, 1, + // Group-2 + 1, 1, 2, 1, 2, 2, 3, 4, 3, 4, 5, 6, 1, + // Group-3 + 1, 1, 2, 1, 2, 2, 3, 4, 3, 4, 5, 6, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -1, -1, -2, -1, -2, -1, -2, -2, -3, -1, + // Group-2 + -1, -1, -2, -1, -1, -2, -1, -2, -1, -2, -2, -3, -1, + // Group-3 + -1, -1, -2, -1, -1, -2, -1, -2, -1, -2, -2, -3, -1, + // clang-format on + }}); + this->run_test(this->descending_no_nulls, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 2, 2, 3, 3, 4, 3, 4, 1, + // Group-2 + 1, 1, 2, 1, 1, 2, 2, 3, 3, 4, 3, 4, 1, + // Group-3 + 1, 1, 2, 1, 1, 2, 2, 3, 3, 4, 3, 4, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -1, -1, -2, -1, -2, -3, -2, -3, -4, -1, + // Group-2 + -1, -1, -2, -1, -1, -2, -1, -2, -3, -2, -3, -4, -1, + // Group-3 + -1, -1, -2, -1, -1, -2, -1, -2, -3, -2, -3, -4, -1, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, AscendingNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 1, 1, 2, 1, 1, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, + // Group-3 + 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 3, 2, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-3 + 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-3 + 1, 2, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 3, 2, 1, 0, -1, -1, -2, -1, -1, -1, -1, -2, -1, + // Group-2 + -1, -1, -2, -1, -1, -1, -2, -1, -1, -1, -1, -2, -1, + // Group-3 + 1, 0, -1, -1, -1, -1, -2, -1, -1, -1, -1, -2, -1, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, AscendingNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 1, 2, 3, 1, 2, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 3, 4, 2, 3, 1, 2, 1, 1, 2, 1, + // Group-3 + 1, 2, 1, 2, 3, 2, 3, 1, 2, 1, 1, 2, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 3, 2, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-2 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-3 + 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->ascending_nulls_before, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 1, 1, 2, 1, 2, 1, 1, 2, 1, + // Group-2 + 1, 1, 2, 3, 2, 1, 2, 1, 2, 1, 1, 2, 1, + // Group-3 + 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1 + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 3, 2, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // Group-3 + 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, AscendingNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + // Group-2 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, 0, -1, 0, 0, 0, 0, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 3, 2, 1, 1, 0, 6, 5, 4, 3, 2, 1, 0, + // Group-2 + 0, 3, 2, 1, 2, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-3 + 0, 3, 2, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, + // clang-format on + }}); + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -2, -1, -1, 0, 0, 1, 2, 3, 4, 5, 6, 7, + // Group-2 + 0, -2, -1, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, + // Group-3 + 0, -2, -1, -1, 0, -1, 0, -1, 0, 0, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 2, 1, 1, 0, 0, 6, 5, 4, 3, 2, 1, 0, + // Group-2 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, + // Group-3 + 0, 2, 1, 1, 0, 1, 0, 1, 0, 0, 2, 1, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, AscendingNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, + // Group-2 + 1, 1, 2, 3, 4, 5, 6, 3, 4, 2, 1, 2, 1, + // Group-3 + 1, 1, 2, 3, 4, 5, 6, 3, 4, 2, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -3, -2, -1, 6, 5, 4, 3, 2, 1, 0, + // Group-2 + -1, -1, -2, -3, -2, -1, -2, -1, -2, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -3, -2, -1, -2, -1, -2, -1, 2, 1, 0, + // clang-format on + }}); + this->run_test(this->ascending_nulls_after, + cudf::order::ASCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 3, 4, 3, 1, 2, 3, 4, 5, 6, 7, + // Group-2 + 1, 1, 2, 3, 4, 3, 4, 3, 2, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 3, 4, 3, 4, 3, 2, 1, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -3, -4, -2, 6, 5, 4, 3, 2, 1, 0, + // Group-2 + -1, -1, -2, -3, -4, -2, -3, -1, -2, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -3, -4, -2, -3, -1, -2, -1, 2, 1, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, DescendingNullsZeroPrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(0); + auto foll = cudf::make_fixed_width_scalar(0); + + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, + // Group-2 + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, + // Group-3 + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, + // Group-2 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 0, + // clang-format on + }}); + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, -1, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, + // Group-2 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-3 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + -1, -1, -2, -1, -1, -1, -1, 5, 4, 3, 2, 1, 0, + // Group-2 + -1, -1, -2, -1, -1, -1, -1, -2, -1, -1, -1, -2, -1, + // Group-3 + -1, -1, -2, -1, -1, -1, -1, -2, -1, -1, 2, 1, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedIntegralRangeWindows, DescendingNullsPositivePrecedingFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(2); + auto foll = cudf::make_fixed_width_scalar(1); + + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 4, 5, 6, + // Group-2 + 1, 1, 2, 1, 1, 2, 1, 2, 3, 2, 3, 4, 1, + // Group-3 + 1, 1, 2, 1, 1, 2, 1, 2, 3, 2, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 1, 0, 0, 5, 4, 3, 2, 1, 0, + // Group-2 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 2, 1, 0, + // clang-format on + }}); + this->run_test(this->descending_nulls_before, + cudf::order::DESCENDING, + cudf::null_order::BEFORE, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 1, 2, 1, 1, 2, 1, 1, 2, 3, 4, 5, 6, + // Group-2 + 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 3, 1, + // Group-3 + 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 3, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 0, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, + // Group-2 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, + // Group-3 + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 2, 1, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, DescendingNullsNegativePreceding) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(-1); + auto foll = cudf::make_fixed_width_scalar(2); + + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 5, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // Group-3 + 1, 2, 3, 4, 0, 0, -1, 0, 0, 0, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 4, 3, 2, 1, 0, 0, 2, 1, 3, 2, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 1, 0, 2, 1, 3, 2, 1, 0, 0, + // Group-3 + 3, 2, 1, 0, 1, 0, 2, 1, 3, 2, 1, 0, 0, + // clang-format on + }}); + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 5, 0, -1, 0, -1, -2, -1, 0, 0, + // Group-2 + 0, -1, 0, 0, -1, 0, -1, 0, -1, -2, -1, 0, 0, + // Group-3 + 1, 2, 3, 4, -1, 0, -1, 0, -1, -2, -1, 0, 0, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 4, 3, 2, 1, 0, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-2 + 0, 1, 0, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // Group-3 + 3, 2, 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, + // clang-format on + }}); +} + +TYPED_TEST(GroupedSignedIntegralRangeWindows, DescendingNullsNegativeFollowing) +{ + using T = TypeParam; + auto prec = cudf::make_fixed_width_scalar(4); + auto foll = cudf::make_fixed_width_scalar(-2); + + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_closed{*prec}, + cudf::bounded_closed{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 5, 1, 2, 3, 3, 4, 5, 6, 1, + // Group-2 + 1, 1, 2, 1, 2, 2, 3, 4, 3, 4, 5, 6, 1, + // Group-3 + 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 6, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 4, 3, 2, 1, 0, -1, -1, -2, -1, -2, -2, -3, -1, + // Group-2 + -1, -1, -2, -1, -1, -2, -1, -2, -1, -2, -2, -3, -1, + // Group-3 + 3, 2, 1, 0, -1, -2, -1, -2, -1, -2, -2, -3, -1, + // clang-format on + }}); + this->run_test(this->descending_nulls_after, + cudf::order::DESCENDING, + cudf::null_order::AFTER, + cudf::bounded_open{*prec}, + cudf::bounded_open{*foll}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 1, 2, 3, 4, 5, 1, 2, 3, 3, 4, 3, 4, 1, + // Group-2 + 1, 1, 2, 1, 1, 2, 2, 3, 3, 4, 3, 4, 1, + // Group-3 + 1, 2, 3, 4, 1, 2, 2, 3, 3, 4, 3, 4, 1, + // clang-format on + }}, + cudf::test::fixed_width_column_wrapper{{ + // clang-format off + // Group-1 + 4, 3, 2, 1, 0, -1, -1, -2, -3, -2, -3, -4, -1, + // Group-2 + -1, -1, -2, -1, -1, -2, -1, -2, -3, -2, -3, -4, -1, + // Group-3 + 3, 2, 1, 0, -1, -2, -1, -2, -3, -2, -3, -4, -1, + // clang-format on + }}); +}