Skip to content

Commit 4f13697

Browse files
WillAydjorisvandenbossche
andauthoredNov 15, 2024
Backport PR #60312 on branch 2.3.x (TST (string dtype): resolve xfails in pandas/tests/apply + raise TypeError for ArrowArray accumulate) (#60328)
* Backport PR #60312 on branch 2.3.x (TST (string dtype): resolve xfails in pandas/tests/apply + raise TypeError for ArrowArray accumulate) (cherry picked from commit fba5f08) * 2.3 test function compat --------- Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent e37ffb3 commit 4f13697

File tree

4 files changed

+24
-27
lines changed

4 files changed

+24
-27
lines changed
 

‎pandas/core/arrays/arrow/array.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1633,7 +1633,11 @@ def _accumulate(
16331633
else:
16341634
data_to_accum = data_to_accum.cast(pa.int64())
16351635

1636-
result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
1636+
try:
1637+
result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
1638+
except pa.ArrowNotImplementedError as err:
1639+
msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
1640+
raise TypeError(msg) from err
16371641

16381642
if convert_to_int:
16391643
result = result.cast(pa_dtype)

‎pandas/tests/apply/test_invalid_arg.py

+10-20
Original file line numberDiff line numberDiff line change
@@ -218,18 +218,12 @@ def transform(row):
218218
def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
219219
# GH 21224
220220
if using_infer_string:
221-
if df.dtypes.iloc[0].storage == "pyarrow":
222-
import pyarrow as pa
223-
224-
# TODO(infer_string)
225-
# should raise a proper TypeError instead of propagating the pyarrow error
226-
227-
expected = (expected, pa.lib.ArrowNotImplementedError)
228-
else:
229-
expected = (expected, NotImplementedError)
221+
expected = (expected, NotImplementedError)
230222

231223
msg = (
232-
"can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform"
224+
"can't multiply sequence by non-int of type 'str'"
225+
"|cannot perform cumprod with type str" # NotImplementedError python backend
226+
"|operation 'cumprod' not supported for dtype 'str'" # TypeError pyarrow
233227
)
234228
warn = None if isinstance(func, str) else FutureWarning
235229
with pytest.raises(expected, match=msg):
@@ -259,16 +253,12 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri
259253
if func == "median" or func is np.nanmedian or func is np.median:
260254
msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
261255

262-
if using_infer_string:
263-
if series.dtype.storage == "pyarrow":
264-
import pyarrow as pa
265-
266-
# TODO(infer_string)
267-
# should raise a proper TypeError instead of propagating the pyarrow error
268-
expected = (expected, pa.lib.ArrowNotImplementedError)
269-
else:
270-
expected = (expected, NotImplementedError)
271-
msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform"
256+
if using_infer_string and func in ("cumprod", np.cumprod, np.nancumprod):
257+
expected = (expected, NotImplementedError)
258+
259+
msg = (
260+
msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation"
261+
)
272262
warn = None if isinstance(func, str) else FutureWarning
273263

274264
with pytest.raises(expected, match=msg):

‎pandas/tests/apply/test_str.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
97
from pandas.core.dtypes.common import is_number
108

119
from pandas import (
@@ -88,7 +86,6 @@ def test_apply_np_transformer(float_frame, op, how):
8886
tm.assert_frame_equal(result, expected)
8987

9088

91-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
9289
@pytest.mark.parametrize(
9390
"series, func, expected",
9491
chain(
@@ -147,7 +144,6 @@ def test_agg_cython_table_series(series, func, expected):
147144
assert result == expected
148145

149146

150-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
151147
@pytest.mark.parametrize(
152148
"series, func, expected",
153149
chain(
@@ -170,10 +166,17 @@ def test_agg_cython_table_series(series, func, expected):
170166
),
171167
),
172168
)
173-
def test_agg_cython_table_transform_series(series, func, expected):
169+
def test_agg_cython_table_transform_series(request, series, func, expected):
174170
# GH21224
175171
# test transforming functions in
176172
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
173+
if series.dtype == "string" and func in ("cumsum", np.cumsum, np.nancumsum):
174+
request.applymarker(
175+
pytest.mark.xfail(
176+
raises=(TypeError, NotImplementedError),
177+
reason="TODO(infer_string) cumsum not yet implemented for string",
178+
)
179+
)
177180
warn = None if isinstance(func, str) else FutureWarning
178181
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
179182
result = series.agg(func)

‎pandas/tests/extension/test_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
436436
request.applymarker(
437437
pytest.mark.xfail(
438438
reason=f"{all_numeric_accumulations} not implemented for {pa_type}",
439-
raises=NotImplementedError,
439+
raises=TypeError,
440440
)
441441
)
442442

0 commit comments

Comments
 (0)