Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into as_proxy_object
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Feb 27, 2025
2 parents 6ebdc85 + 79d0b75 commit bba133c
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 60 deletions.
28 changes: 16 additions & 12 deletions cpp/tests/rolling/offset_row_window_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,18 +43,21 @@ auto constexpr null = int32_t{0}; // NULL representation for int32_t;
auto no_nulls_list() { return nulls_at({}); }

struct OffsetRowWindowTest : public cudf::test::BaseFixture {
static ints_column const _keys; // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
static ints_column const _values; // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

struct rolling_runner {
cudf::window_bounds _preceding, _following;
cudf::size_type _min_periods;
bool _grouped = true;
ints_column const _keys; // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
ints_column const _values; // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

rolling_runner(cudf::window_bounds const& preceding,
cudf::window_bounds const& following,
cudf::size_type min_periods_ = 1)
: _preceding{preceding}, _following{following}, _min_periods{min_periods_}
: _preceding{preceding},
_following{following},
_min_periods{min_periods_},
_keys{0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
_values{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
{
}

Expand All @@ -80,9 +83,6 @@ struct OffsetRowWindowTest : public cudf::test::BaseFixture {
};
};

ints_column const OffsetRowWindowTest::_keys{0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
ints_column const OffsetRowWindowTest::_values{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

auto const AGG_COUNT_NON_NULL =
cudf::make_count_aggregation<cudf::rolling_aggregation>(cudf::null_policy::EXCLUDE);
auto const AGG_COUNT_ALL =
Expand All @@ -96,7 +96,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_3_to_Minus_1)
{
auto const preceding = cudf::window_bounds::get(3);
auto const following = cudf::window_bounds::get(-1);
auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(true);
auto run_rolling = rolling_runner{preceding, following};
run_rolling.min_periods(1).grouped(true);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL),
ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, nulls_at({0, 6})});
Expand Down Expand Up @@ -136,7 +137,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_3_to_Minus_1)
{
auto const preceding = cudf::window_bounds::get(3);
auto const following = cudf::window_bounds::get(-1);
auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(false);
auto run_rolling = rolling_runner{preceding, following};
run_rolling.min_periods(1).grouped(false);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL),
ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, nulls_at({0})});
Expand Down Expand Up @@ -176,7 +178,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2)
{
auto const preceding = cudf::window_bounds::get(0);
auto const following = cudf::window_bounds::get(2);
auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(true);
auto run_rolling = rolling_runner{preceding, following};
run_rolling.min_periods(1).grouped(true);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*run_rolling(*AGG_COUNT_NON_NULL),
Expand Down Expand Up @@ -219,7 +222,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
{
auto const preceding = cudf::window_bounds::get(0);
auto const following = cudf::window_bounds::get(2);
auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(false);
auto run_rolling = rolling_runner{preceding, following};
run_rolling.min_periods(1).grouped(false);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL),
ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, null}, nulls_at({9})});
Expand Down
29 changes: 10 additions & 19 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import warnings
from collections import abc
from collections.abc import MutableSequence, Sequence
from functools import cached_property
Expand Down Expand Up @@ -1946,8 +1945,7 @@ def _reduce(
skipna=skipna, min_count=min_count
)
if isinstance(preprocessed, ColumnBase):
dtype = kwargs.pop("dtype", None)
return preprocessed.reduce(op, dtype, **kwargs)
return preprocessed.reduce(op, **kwargs)
return preprocessed

def _can_return_nan(self, skipna: bool | None = None) -> bool:
Expand Down Expand Up @@ -2110,16 +2108,8 @@ def scan(self, scan_op: str, inclusive: bool, **kwargs) -> Self:
)
)

def reduce(self, reduction_op: str, dtype=None, **kwargs) -> ScalarLike:
if dtype is not None:
warnings.warn(
"dtype is deprecated and will be remove in a future release. "
"Cast the result (e.g. .astype) after the operation instead.",
FutureWarning,
)
col_dtype = dtype
else:
col_dtype = self._reduction_result_dtype(reduction_op)
def reduce(self, reduction_op: str, **kwargs) -> ScalarLike:
col_dtype = self._reduction_result_dtype(reduction_op)

# check empty case
if len(self) <= self.null_count:
Expand Down Expand Up @@ -2148,7 +2138,7 @@ def reduce(self, reduction_op: str, dtype=None, **kwargs) -> ScalarLike:
}:
scale = -plc_scalar.type().scale()
# https://docs.microsoft.com/en-us/sql/t-sql/data-types/precision-scale-and-length-transact-sql
p = col_dtype.precision
p = col_dtype.precision # type: ignore[union-attr]
nrows = len(self)
if reduction_op in {"min", "max"}:
new_p = p
Expand All @@ -2162,7 +2152,7 @@ def reduce(self, reduction_op: str, dtype=None, **kwargs) -> ScalarLike:
raise NotImplementedError(
f"{reduction_op} not implemented for decimal types."
)
precision = max(min(new_p, col_dtype.MAX_PRECISION), 0)
precision = max(min(new_p, col_dtype.MAX_PRECISION), 0) # type: ignore[union-attr]
new_dtype = type(col_dtype)(precision, scale)
result_col = result_col.astype(new_dtype)
elif isinstance(col_dtype, IntervalDtype):
Expand Down Expand Up @@ -2322,13 +2312,14 @@ def build_column(
offset=offset,
null_count=null_count,
)
elif dtype.type in (np.object_, np.str_):
elif dtype == CUDF_STRING_DTYPE:
return cudf.core.column.StringColumn(
data=data,
mask=mask,
data=data, # type: ignore[arg-type]
size=size,
dtype=dtype,
mask=mask,
offset=offset,
children=children,
children=children, # type: ignore[arg-type]
null_count=null_count,
)
elif isinstance(dtype, ListDtype):
Expand Down
24 changes: 14 additions & 10 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import cudf.core.column.datetime as datetime
from cudf.api.types import is_integer, is_scalar, is_string_dtype
from cudf.core._internals import binaryop
from cudf.core.buffer import acquire_spill_lock
from cudf.core.buffer import Buffer, acquire_spill_lock
from cudf.core.column.column import ColumnBase
from cudf.core.column.methods import ColumnMethods
from cudf.core.scalar import pa_scalar_to_plc_scalar
Expand All @@ -46,7 +46,6 @@
ScalarLike,
SeriesOrIndex,
)
from cudf.core.buffer import Buffer
from cudf.core.column.lists import ListColumn
from cudf.core.column.numerical import NumericalColumn

Expand Down Expand Up @@ -5595,13 +5594,14 @@ class StringColumn(column.ColumnBase):
Parameters
----------
data : Buffer
Buffer of the string data
mask : Buffer
The validity mask
offset : int
Data offset
children : Tuple[Column]
Two non-null columns containing the string data and offsets
respectively
Columns containing the offsets
"""

_start_offset: int | None
Expand Down Expand Up @@ -5629,14 +5629,20 @@ class StringColumn(column.ColumnBase):

def __init__(
self,
data: Buffer | None = None,
data: Buffer,
size: int | None,
dtype: np.dtype,
mask: Buffer | None = None,
size: int | None = None, # TODO: make non-optional
offset: int = 0,
null_count: int | None = None,
children: tuple["column.ColumnBase", ...] = (),
children: tuple[column.ColumnBase] = (), # type: ignore[assignment]
):
dtype = cudf.api.types.dtype("object")
if not isinstance(data, Buffer):
raise ValueError("data must be a Buffer")
if dtype != CUDF_STRING_DTYPE:
raise ValueError(f"dtypy must be {CUDF_STRING_DTYPE}")
if len(children) > 1:
raise ValueError("StringColumn must have at most 1 offset column.")

if size is None:
for child in children:
Expand Down Expand Up @@ -5731,8 +5737,6 @@ def base_size(self) -> int:
# override for string column
@property
def data(self):
if self.base_data is None:
return None
if self._data is None:
if (
self.offset == 0
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/column/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,14 +452,13 @@ def sum(
self,
skipna: bool | None = None,
min_count: int = 0,
dtype: Dtype | None = None,
) -> pd.Timedelta:
return pd.Timedelta(
# Since sum isn't overridden in Numerical[Base]Column, mypy only
# sees the signature from Reducible (which doesn't have the extra
# parameters from ColumnBase._reduce) so we have to ignore this.
self.astype(np.dtype(np.int64)).sum( # type: ignore
skipna=skipna, min_count=min_count, dtype=dtype
skipna=skipna, min_count=min_count
),
unit=self.time_unit,
).as_unit(self.time_unit)
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1328,7 +1328,6 @@ def sum(
self,
axis=no_default,
skipna=True,
dtype=None,
numeric_only=False,
min_count=0,
**kwargs,
Expand All @@ -1342,8 +1341,6 @@ def sum(
Axis for the function to be applied on.
skipna: bool, default True
Exclude NA/null values when computing the result.
dtype: data type
Data type to cast the result to.
numeric_only : bool, default False
If True, includes only float, int, boolean columns.
If False, will raise error in-case there are
Expand Down Expand Up @@ -1373,7 +1370,6 @@ def sum(
"sum",
axis=axis,
skipna=skipna,
dtype=dtype,
numeric_only=numeric_only,
min_count=min_count,
**kwargs,
Expand All @@ -1384,7 +1380,6 @@ def product(
self,
axis=no_default,
skipna=True,
dtype=None,
numeric_only=False,
min_count=0,
**kwargs,
Expand All @@ -1398,8 +1393,6 @@ def product(
Axis for the function to be applied on.
skipna: bool, default True
Exclude NA/null values when computing the result.
dtype: data type
Data type to cast the result to.
numeric_only : bool, default False
If True, includes only float, int, boolean columns.
If False, will raise error in-case there are
Expand Down Expand Up @@ -1432,7 +1425,6 @@ def product(
"prod" if axis in {1, "columns"} else "product",
axis=axis,
skipna=skipna,
dtype=dtype,
numeric_only=numeric_only,
min_count=min_count,
**kwargs,
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/tests/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,14 +512,6 @@ def test_reduction_column_multiindex():
assert_eq(result, expected)


@pytest.mark.parametrize("op", ["sum", "product"])
def test_dtype_deprecated(op):
ser = cudf.Series(range(5))
with pytest.warns(FutureWarning):
result = getattr(ser, op)(dtype=np.dtype(np.int8))
assert isinstance(result, np.int8)


@pytest.mark.parametrize(
"columns", [pd.RangeIndex(2), pd.Index([0, 1], dtype="int8")]
)
Expand Down
10 changes: 9 additions & 1 deletion python/cudf/cudf/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
import pyarrow as pa
import pytest

import rmm

import cudf
from cudf import concat
from cudf.core.buffer import as_buffer
from cudf.core.column.string import StringColumn
from cudf.core.index import Index
from cudf.testing import assert_eq
Expand Down Expand Up @@ -1202,7 +1205,12 @@ def test_string_misc_name(ps_gs, name):


def test_string_no_children_properties():
empty_col = StringColumn(children=())
empty_col = StringColumn(
as_buffer(rmm.DeviceBuffer(size=0)),
size=0,
dtype=np.dtype("object"),
children=(),
)
assert empty_col.base_children == ()
assert empty_col.base_size == 0

Expand Down

0 comments on commit bba133c

Please sign in to comment.