Skip to content

Commit 3f7bc81

Browse files
TST (string dtype): resolve xfails in pandas/tests/series (#60233)
* TST (string dtype): resolve xfails in pandas/tests/series * a few more * link TODO to issue * fix for non-future mode
1 parent f9d2e50 commit 3f7bc81

File tree

7 files changed

+68
-56
lines changed

7 files changed

+68
-56
lines changed

pandas/tests/series/accessors/test_dt_accessor.py

-4
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
import numpy as np
1111
import pytest
1212

13-
from pandas._config import using_string_dtype
14-
1513
from pandas._libs.tslibs.timezones import maybe_get_tz
1614

1715
from pandas.core.dtypes.common import (
@@ -556,7 +554,6 @@ def test_strftime(self):
556554
)
557555
tm.assert_series_equal(result, expected)
558556

559-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
560557
def test_strftime_dt64_days(self):
561558
ser = Series(date_range("20130101", periods=5))
562559
ser.iloc[0] = pd.NaT
@@ -571,7 +568,6 @@ def test_strftime_dt64_days(self):
571568

572569
expected = Index(
573570
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
574-
dtype=np.object_,
575571
)
576572
# dtype may be S10 or U10 depending on python version
577573
tm.assert_index_equal(result, expected)

pandas/tests/series/indexing/test_indexing.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
from pandas.errors import IndexingError
1210

1311
from pandas import (
@@ -251,18 +249,29 @@ def test_slice(string_series, object_series):
251249
tm.assert_series_equal(string_series, original)
252250

253251

254-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
255252
def test_timedelta_assignment():
256253
# GH 8209
257254
s = Series([], dtype=object)
258255
s.loc["B"] = timedelta(1)
259-
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
256+
expected = Series(
257+
Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object)
258+
)
259+
tm.assert_series_equal(s, expected)
260260

261261
s = s.reindex(s.index.insert(0, "A"))
262-
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
262+
expected = Series(
263+
[np.nan, Timedelta("1 days")],
264+
dtype="timedelta64[ns]",
265+
index=Index(["A", "B"], dtype=object),
266+
)
267+
tm.assert_series_equal(s, expected)
263268

264269
s.loc["A"] = timedelta(1)
265-
expected = Series(Timedelta("1 days"), index=["A", "B"])
270+
expected = Series(
271+
Timedelta("1 days"),
272+
dtype="timedelta64[ns]",
273+
index=Index(["A", "B"], dtype=object),
274+
)
266275
tm.assert_series_equal(s, expected)
267276

268277

pandas/tests/series/indexing/test_setitem.py

+28-19
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,7 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas._config import using_string_dtype
13-
14-
from pandas.compat import (
15-
HAS_PYARROW,
16-
WASM,
17-
)
12+
from pandas.compat import WASM
1813
from pandas.compat.numpy import np_version_gte1p24
1914
from pandas.errors import IndexingError
2015

@@ -32,6 +27,7 @@
3227
NaT,
3328
Period,
3429
Series,
30+
StringDtype,
3531
Timedelta,
3632
Timestamp,
3733
array,
@@ -535,14 +531,16 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
535531
tm.assert_series_equal(ser, expected)
536532
assert isinstance(ser["td"], Timedelta)
537533

538-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
539534
def test_setitem_with_expansion_type_promotion(self):
540535
# GH#12599
541536
ser = Series(dtype=object)
542537
ser["a"] = Timestamp("2016-01-01")
543538
ser["b"] = 3.0
544539
ser["c"] = "foo"
545-
expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
540+
expected = Series(
541+
[Timestamp("2016-01-01"), 3.0, "foo"],
542+
index=Index(["a", "b", "c"], dtype=object),
543+
)
546544
tm.assert_series_equal(ser, expected)
547545

548546
def test_setitem_not_contained(self, string_series):
@@ -826,11 +824,6 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli):
826824
else:
827825
indexer_sli(obj)[mask] = val
828826

829-
@pytest.mark.xfail(
830-
using_string_dtype() and not HAS_PYARROW,
831-
reason="TODO(infer_string)",
832-
strict=False,
833-
)
834827
def test_series_where(self, obj, key, expected, raises, val, is_inplace):
835828
mask = np.zeros(obj.shape, dtype=bool)
836829
mask[key] = True
@@ -846,6 +839,11 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace):
846839
obj = obj.copy()
847840
arr = obj._values
848841

842+
if raises and obj.dtype == "string":
843+
with pytest.raises(TypeError, match="Invalid value"):
844+
obj.where(~mask, val)
845+
return
846+
849847
res = obj.where(~mask, val)
850848

851849
if val is NA and res.dtype == object:
@@ -858,25 +856,23 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace):
858856

859857
self._check_inplace(is_inplace, orig, arr, obj)
860858

861-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
862-
def test_index_where(self, obj, key, expected, raises, val, using_infer_string):
859+
def test_index_where(self, obj, key, expected, raises, val):
863860
mask = np.zeros(obj.shape, dtype=bool)
864861
mask[key] = True
865862

866-
if using_infer_string and obj.dtype == object:
863+
if raises and obj.dtype == "string":
867864
with pytest.raises(TypeError, match="Invalid value"):
868865
Index(obj).where(~mask, val)
869866
else:
870867
res = Index(obj).where(~mask, val)
871868
expected_idx = Index(expected, dtype=expected.dtype)
872869
tm.assert_index_equal(res, expected_idx)
873870

874-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
875-
def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string):
871+
def test_index_putmask(self, obj, key, expected, raises, val):
876872
mask = np.zeros(obj.shape, dtype=bool)
877873
mask[key] = True
878874

879-
if using_infer_string and obj.dtype == object:
875+
if raises and obj.dtype == "string":
880876
with pytest.raises(TypeError, match="Invalid value"):
881877
Index(obj).putmask(mask, val)
882878
else:
@@ -1372,6 +1368,19 @@ def raises(self):
13721368
return False
13731369

13741370

1371+
@pytest.mark.parametrize(
1372+
"val,exp_dtype,raises",
1373+
[
1374+
(1, object, True),
1375+
("e", StringDtype(na_value=np.nan), False),
1376+
],
1377+
)
1378+
class TestCoercionString(CoercionTest):
1379+
@pytest.fixture
1380+
def obj(self):
1381+
return Series(["a", "b", "c", "d"], dtype=StringDtype(na_value=np.nan))
1382+
1383+
13751384
@pytest.mark.parametrize(
13761385
"val,exp_dtype,raises",
13771386
[

pandas/tests/series/indexing/test_where.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas.core.dtypes.common import is_integer
75

86
import pandas as pd
@@ -231,7 +229,6 @@ def test_where_ndframe_align():
231229
tm.assert_series_equal(out, expected)
232230

233231

234-
@pytest.mark.xfail(using_string_dtype(), reason="can't set ints into string")
235232
def test_where_setitem_invalid():
236233
# GH 2702
237234
# make sure correct exceptions are raised on invalid list assignment
@@ -241,7 +238,7 @@ def test_where_setitem_invalid():
241238
"different length than the value"
242239
)
243240
# slice
244-
s = Series(list("abc"))
241+
s = Series(list("abc"), dtype=object)
245242

246243
with pytest.raises(ValueError, match=msg("slice")):
247244
s[0:3] = list(range(27))
@@ -251,18 +248,18 @@ def test_where_setitem_invalid():
251248
tm.assert_series_equal(s.astype(np.int64), expected)
252249

253250
# slice with step
254-
s = Series(list("abcdef"))
251+
s = Series(list("abcdef"), dtype=object)
255252

256253
with pytest.raises(ValueError, match=msg("slice")):
257254
s[0:4:2] = list(range(27))
258255

259-
s = Series(list("abcdef"))
256+
s = Series(list("abcdef"), dtype=object)
260257
s[0:4:2] = list(range(2))
261258
expected = Series([0, "b", 1, "d", "e", "f"])
262259
tm.assert_series_equal(s, expected)
263260

264261
# neg slices
265-
s = Series(list("abcdef"))
262+
s = Series(list("abcdef"), dtype=object)
266263

267264
with pytest.raises(ValueError, match=msg("slice")):
268265
s[:-1] = list(range(27))
@@ -272,18 +269,18 @@ def test_where_setitem_invalid():
272269
tm.assert_series_equal(s, expected)
273270

274271
# list
275-
s = Series(list("abc"))
272+
s = Series(list("abc"), dtype=object)
276273

277274
with pytest.raises(ValueError, match=msg("list-like")):
278275
s[[0, 1, 2]] = list(range(27))
279276

280-
s = Series(list("abc"))
277+
s = Series(list("abc"), dtype=object)
281278

282279
with pytest.raises(ValueError, match=msg("list-like")):
283280
s[[0, 1, 2]] = list(range(2))
284281

285282
# scalar
286-
s = Series(list("abc"))
283+
s = Series(list("abc"), dtype=object)
287284
s[0] = list(range(10))
288285
expected = Series([list(range(10)), "b", "c"])
289286
tm.assert_series_equal(s, expected)

pandas/tests/series/methods/test_replace.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas as pd
97
import pandas._testing as tm
108
from pandas.core.arrays import IntervalArray
@@ -628,15 +626,23 @@ def test_replace_nullable_numeric(self):
628626
with pytest.raises(TypeError, match="Invalid value"):
629627
ints.replace(1, 9.5)
630628

631-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 1 in string")
632629
@pytest.mark.parametrize("regex", [False, True])
633630
def test_replace_regex_dtype_series(self, regex):
634631
# GH-48644
635-
series = pd.Series(["0"])
632+
series = pd.Series(["0"], dtype=object)
636633
expected = pd.Series([1], dtype=object)
637634
result = series.replace(to_replace="0", value=1, regex=regex)
638635
tm.assert_series_equal(result, expected)
639636

637+
@pytest.mark.parametrize("regex", [False, True])
638+
def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
639+
if not using_infer_string:
640+
# then this is object dtype which is already tested above
641+
return
642+
series = pd.Series(["0"], dtype="str")
643+
with pytest.raises(TypeError, match="Invalid value"):
644+
series.replace(to_replace="0", value=1, regex=regex)
645+
640646
def test_replace_different_int_types(self, any_int_numpy_dtype):
641647
# GH#45311
642648
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
@@ -656,21 +662,18 @@ def test_replace_value_none_dtype_numeric(self, val):
656662
expected = pd.Series([1, None], dtype=object)
657663
tm.assert_series_equal(result, expected)
658664

659-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
660-
def test_replace_change_dtype_series(self, using_infer_string):
665+
def test_replace_change_dtype_series(self):
661666
# GH#25797
662-
df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
663-
warn = FutureWarning if using_infer_string else None
664-
with tm.assert_produces_warning(warn, match="Downcasting"):
665-
df["Test"] = df["Test"].replace([True], [np.nan])
666-
expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
667+
df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
668+
df["Test"] = df["Test"].replace([True], [np.nan])
669+
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
667670
tm.assert_frame_equal(df, expected)
668671

669-
df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
672+
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
670673
df["Test"] = df["Test"].replace([None], [np.nan])
671674
tm.assert_frame_equal(df, expected)
672675

673-
df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
676+
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
674677
df["Test"] = df["Test"].fillna(np.nan)
675678
tm.assert_frame_equal(df, expected)
676679

pandas/tests/series/methods/test_unstack.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -136,11 +134,10 @@ def test_unstack_mixed_type_name_in_multiindex(
136134
tm.assert_frame_equal(result, expected)
137135

138136

139-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
140137
def test_unstack_multi_index_categorical_values():
141138
df = DataFrame(
142139
np.random.default_rng(2).standard_normal((10, 4)),
143-
columns=Index(list("ABCD"), dtype=object),
140+
columns=Index(list("ABCD")),
144141
index=date_range("2000-01-01", periods=10, freq="B"),
145142
)
146143
mi = df.stack().index.rename(["major", "minor"])

pandas/tests/series/test_logical_ops.py

+1
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,7 @@ def test_logical_ops_label_based(self, using_infer_string):
413413
for e in [Series(["z"])]:
414414
if using_infer_string:
415415
# TODO(infer_string) should this behave differently?
416+
# -> https://github.com/pandas-dev/pandas/issues/60234
416417
with pytest.raises(
417418
TypeError, match="not supported for dtype|unsupported operand type"
418419
):

0 commit comments

Comments
 (0)