Skip to content

Commit fba5f08

Browse files
TST (string dtype): resolve xfails in pandas/tests/apply + raise TypeError for ArrowArray accumulate (#60312)
1 parent b26b1d2 commit fba5f08

File tree

4 files changed

+24
-27
lines changed

4 files changed

+24
-27
lines changed

pandas/core/arrays/arrow/array.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1644,7 +1644,11 @@ def _accumulate(
16441644
else:
16451645
data_to_accum = data_to_accum.cast(pa.int64())
16461646

1647-
result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
1647+
try:
1648+
result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs)
1649+
except pa.ArrowNotImplementedError as err:
1650+
msg = f"operation '{name}' not supported for dtype '{self.dtype}'"
1651+
raise TypeError(msg) from err
16481652

16491653
if convert_to_int:
16501654
result = result.cast(pa_dtype)

pandas/tests/apply/test_invalid_arg.py

+10-20
Original file line numberDiff line numberDiff line change
@@ -218,18 +218,12 @@ def transform(row):
218218
def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
219219
# GH 21224
220220
if using_infer_string:
221-
if df.dtypes.iloc[0].storage == "pyarrow":
222-
import pyarrow as pa
223-
224-
# TODO(infer_string)
225-
# should raise a proper TypeError instead of propagating the pyarrow error
226-
227-
expected = (expected, pa.lib.ArrowNotImplementedError)
228-
else:
229-
expected = (expected, NotImplementedError)
221+
expected = (expected, NotImplementedError)
230222

231223
msg = (
232-
"can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform"
224+
"can't multiply sequence by non-int of type 'str'"
225+
"|cannot perform cumprod with type str" # NotImplementedError python backend
226+
"|operation 'cumprod' not supported for dtype 'str'" # TypeError pyarrow
233227
)
234228
warn = None if isinstance(func, str) else FutureWarning
235229
with pytest.raises(expected, match=msg):
@@ -259,16 +253,12 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri
259253
if func == "median" or func is np.nanmedian or func is np.median:
260254
msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
261255

262-
if using_infer_string:
263-
if series.dtype.storage == "pyarrow":
264-
import pyarrow as pa
265-
266-
# TODO(infer_string)
267-
# should raise a proper TypeError instead of propagating the pyarrow error
268-
expected = (expected, pa.lib.ArrowNotImplementedError)
269-
else:
270-
expected = (expected, NotImplementedError)
271-
msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform"
256+
if using_infer_string and func == "cumprod":
257+
expected = (expected, NotImplementedError)
258+
259+
msg = (
260+
msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation"
261+
)
272262
warn = None if isinstance(func, str) else FutureWarning
273263

274264
with pytest.raises(expected, match=msg):

pandas/tests/apply/test_str.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._config import using_string_dtype
8-
97
from pandas.compat import WASM
108

119
from pandas.core.dtypes.common import is_number
@@ -81,7 +79,6 @@ def test_apply_np_transformer(float_frame, op, how):
8179
tm.assert_frame_equal(result, expected)
8280

8381

84-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
8582
@pytest.mark.parametrize(
8683
"series, func, expected",
8784
chain(
@@ -140,7 +137,6 @@ def test_agg_cython_table_series(series, func, expected):
140137
assert result == expected
141138

142139

143-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
144140
@pytest.mark.parametrize(
145141
"series, func, expected",
146142
chain(
@@ -163,10 +159,17 @@ def test_agg_cython_table_series(series, func, expected):
163159
),
164160
),
165161
)
166-
def test_agg_cython_table_transform_series(series, func, expected):
162+
def test_agg_cython_table_transform_series(request, series, func, expected):
167163
# GH21224
168164
# test transforming functions in
169165
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
166+
if series.dtype == "string" and func == "cumsum":
167+
request.applymarker(
168+
pytest.mark.xfail(
169+
raises=(TypeError, NotImplementedError),
170+
reason="TODO(infer_string) cumsum not yet implemented for string",
171+
)
172+
)
170173
warn = None if isinstance(func, str) else FutureWarning
171174
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
172175
result = series.agg(func)

pandas/tests/extension/test_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
441441
request.applymarker(
442442
pytest.mark.xfail(
443443
reason=f"{all_numeric_accumulations} not implemented for {pa_type}",
444-
raises=NotImplementedError,
444+
raises=TypeError,
445445
)
446446
)
447447

0 commit comments

Comments
 (0)