Skip to content

Commit bec2dbc

Browse files
TST (string dtype): update all tests in tests/frame/indexing (#60193)
1 parent cf52dec commit bec2dbc

File tree

6 files changed

+38
-43
lines changed

6 files changed

+38
-43
lines changed

pandas/tests/frame/indexing/test_coercion.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
1311
import pandas as pd
1412
from pandas import (
1513
DataFrame,
@@ -84,14 +82,18 @@ def test_6942(indexer_al):
8482
assert df.iloc[0, 0] == t2
8583

8684

87-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
8885
def test_26395(indexer_al):
8986
# .at case fixed by GH#45121 (best guess)
9087
df = DataFrame(index=["A", "B", "C"])
9188
df["D"] = 0
9289

9390
indexer_al(df)["C", "D"] = 2
94-
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
91+
expected = DataFrame(
92+
{"D": [0, 0, 2]},
93+
index=["A", "B", "C"],
94+
columns=pd.Index(["D"], dtype=object),
95+
dtype=np.int64,
96+
)
9597
tm.assert_frame_equal(df, expected)
9698

9799
with pytest.raises(TypeError, match="Invalid value"):

pandas/tests/frame/indexing/test_indexing.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from pandas._config import using_string_dtype
1313

1414
from pandas._libs import iNaT
15-
from pandas.compat import HAS_PYARROW
1615
from pandas.errors import InvalidIndexError
1716

1817
from pandas.core.dtypes.common import is_integer
@@ -505,17 +504,16 @@ def test_setitem_ambig(self, using_infer_string):
505504
assert dm[2].dtype == np.object_
506505

507506
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
508-
def test_setitem_None(self, float_frame, using_infer_string):
507+
def test_setitem_None(self, float_frame):
509508
# GH #766
510509
float_frame[None] = float_frame["A"]
511-
key = None if not using_infer_string else np.nan
512510
tm.assert_series_equal(
513511
float_frame.iloc[:, -1], float_frame["A"], check_names=False
514512
)
515513
tm.assert_series_equal(
516-
float_frame.loc[:, key], float_frame["A"], check_names=False
514+
float_frame.loc[:, None], float_frame["A"], check_names=False
517515
)
518-
tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
516+
tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)
519517

520518
def test_loc_setitem_boolean_mask_allfalse(self):
521519
# GH 9596
@@ -1125,7 +1123,6 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self):
11251123
df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]]
11261124
tm.assert_series_equal(df["dates"], column)
11271125

1128-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
11291126
def test_loc_setitem_datetimelike_with_inference(self):
11301127
# GH 7592
11311128
# assignment of timedeltas with NaT
@@ -1144,13 +1141,10 @@ def test_loc_setitem_datetimelike_with_inference(self):
11441141
result = df.dtypes
11451142
expected = Series(
11461143
[np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
1147-
index=list("ABCDEFGH"),
1144+
index=Index(list("ABCDEFGH"), dtype=object),
11481145
)
11491146
tm.assert_series_equal(result, expected)
11501147

1151-
@pytest.mark.xfail(
1152-
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
1153-
)
11541148
def test_getitem_boolean_indexing_mixed(self):
11551149
df = DataFrame(
11561150
{
@@ -1192,7 +1186,7 @@ def test_getitem_boolean_indexing_mixed(self):
11921186
tm.assert_frame_equal(df2, expected)
11931187

11941188
df["foo"] = "test"
1195-
msg = "not supported between instances|unorderable types"
1189+
msg = "not supported between instances|unorderable types|Invalid comparison"
11961190

11971191
with pytest.raises(TypeError, match=msg):
11981192
df[df > 0.3] = 1

pandas/tests/frame/indexing/test_insert.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
1210
from pandas.errors import PerformanceWarning
1311

1412
from pandas import (
@@ -63,15 +61,15 @@ def test_insert_column_bug_4032(self):
6361
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
6462
tm.assert_frame_equal(result, expected)
6563

66-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
6764
def test_insert_with_columns_dups(self):
6865
# GH#14291
6966
df = DataFrame()
7067
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
7168
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
7269
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
7370
exp = DataFrame(
74-
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
71+
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]],
72+
columns=Index(["A", "A", "A"], dtype=object),
7573
)
7674
tm.assert_frame_equal(df, exp)
7775

pandas/tests/frame/indexing/test_setitem.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.core.dtypes.base import _registry as ea_registry
97
from pandas.core.dtypes.common import is_object_dtype
108
from pandas.core.dtypes.dtypes import (
@@ -146,13 +144,16 @@ def test_setitem_different_dtype(self):
146144
)
147145
tm.assert_series_equal(result, expected)
148146

149-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
150147
def test_setitem_empty_columns(self):
151148
# GH 13522
152149
df = DataFrame(index=["A", "B", "C"])
153150
df["X"] = df.index
154151
df["X"] = ["x", "y", "z"]
155-
exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
152+
exp = DataFrame(
153+
data={"X": ["x", "y", "z"]},
154+
index=["A", "B", "C"],
155+
columns=Index(["X"], dtype=object),
156+
)
156157
tm.assert_frame_equal(df, exp)
157158

158159
def test_setitem_dt64_index_empty_columns(self):
@@ -162,14 +163,15 @@ def test_setitem_dt64_index_empty_columns(self):
162163
df["A"] = rng
163164
assert df["A"].dtype == np.dtype("M8[ns]")
164165

165-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
166166
def test_setitem_timestamp_empty_columns(self):
167167
# GH#19843
168168
df = DataFrame(index=range(3))
169169
df["now"] = Timestamp("20130101", tz="UTC")
170170

171171
expected = DataFrame(
172-
[[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
172+
[[Timestamp("20130101", tz="UTC")]] * 3,
173+
index=range(3),
174+
columns=Index(["now"], dtype=object),
173175
)
174176
tm.assert_frame_equal(df, expected)
175177

@@ -202,14 +204,13 @@ def test_setitem_with_unaligned_sparse_value(self):
202204
expected = Series(SparseArray([1, 0, 0]), name="new_column")
203205
tm.assert_series_equal(df["new_column"], expected)
204206

205-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
206207
def test_setitem_period_preserves_dtype(self):
207208
# GH: 26861
208209
data = [Period("2003-12", "D")]
209210
result = DataFrame([])
210211
result["a"] = data
211212

212-
expected = DataFrame({"a": data})
213+
expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object))
213214

214215
tm.assert_frame_equal(result, expected)
215216

@@ -672,11 +673,10 @@ def test_setitem_iloc_two_dimensional_generator(self):
672673
expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
673674
tm.assert_frame_equal(df, expected)
674675

675-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
676676
def test_setitem_dtypes_bytes_type_to_object(self):
677677
# GH 20734
678678
index = Series(name="id", dtype="S24")
679-
df = DataFrame(index=index)
679+
df = DataFrame(index=index, columns=Index([], dtype="str"))
680680
df["a"] = Series(name="a", index=index, dtype=np.uint32)
681681
df["b"] = Series(name="b", index=index, dtype="S64")
682682
df["c"] = Series(name="c", index=index, dtype="S64")
@@ -705,7 +705,6 @@ def test_setitem_ea_dtype_rhs_series(self):
705705
expected = DataFrame({"a": [1, 2]}, dtype="Int64")
706706
tm.assert_frame_equal(df, expected)
707707

708-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
709708
def test_setitem_npmatrix_2d(self):
710709
# GH#42376
711710
# for use-case df["x"] = sparse.random((10, 10)).mean(axis=1)
@@ -714,7 +713,7 @@ def test_setitem_npmatrix_2d(self):
714713
)
715714

716715
a = np.ones((10, 1))
717-
df = DataFrame(index=np.arange(10))
716+
df = DataFrame(index=np.arange(10), columns=Index([], dtype="str"))
718717
df["np-array"] = a
719718

720719
# Instantiation of `np.matrix` gives PendingDeprecationWarning
@@ -927,12 +926,11 @@ def test_setitem_with_expansion_categorical_dtype(self):
927926
ser.name = "E"
928927
tm.assert_series_equal(result2.sort_index(), ser.sort_index())
929928

930-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
931929
def test_setitem_scalars_no_index(self):
932930
# GH#16823 / GH#17894
933931
df = DataFrame()
934932
df["foo"] = 1
935-
expected = DataFrame(columns=["foo"]).astype(np.int64)
933+
expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64)
936934
tm.assert_frame_equal(df, expected)
937935

938936
def test_setitem_newcol_tuple_key(self, float_frame):

pandas/tests/frame/indexing/test_where.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def is_ok(s):
4848

4949

5050
class TestDataFrameIndexingWhere:
51-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
5251
def test_where_get(self, where_frame, float_string_frame):
5352
def _check_get(df, cond, check_dtypes=True):
5453
other1 = _safe_add(df)
@@ -66,7 +65,10 @@ def _check_get(df, cond, check_dtypes=True):
6665
# check getting
6766
df = where_frame
6867
if df is float_string_frame:
69-
msg = "'>' not supported between instances of 'str' and 'int'"
68+
msg = (
69+
"'>' not supported between instances of 'str' and 'int'"
70+
"|Invalid comparison"
71+
)
7072
with pytest.raises(TypeError, match=msg):
7173
df > 0
7274
return
@@ -99,7 +101,6 @@ def test_where_upcasting(self):
99101

100102
tm.assert_series_equal(result, expected)
101103

102-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
103104
def test_where_alignment(self, where_frame, float_string_frame):
104105
# aligning
105106
def _check_align(df, cond, other, check_dtypes=True):
@@ -131,7 +132,10 @@ def _check_align(df, cond, other, check_dtypes=True):
131132

132133
df = where_frame
133134
if df is float_string_frame:
134-
msg = "'>' not supported between instances of 'str' and 'int'"
135+
msg = (
136+
"'>' not supported between instances of 'str' and 'int'"
137+
"|Invalid comparison"
138+
)
135139
with pytest.raises(TypeError, match=msg):
136140
df > 0
137141
return
@@ -174,7 +178,6 @@ def test_where_invalid(self):
174178
with pytest.raises(ValueError, match=msg):
175179
df.mask(0)
176180

177-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
178181
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
179182
# where inplace
180183

@@ -196,7 +199,10 @@ def _check_set(df, cond, check_dtypes=True):
196199

197200
df = where_frame
198201
if df is float_string_frame:
199-
msg = "'>' not supported between instances of 'str' and 'int'"
202+
msg = (
203+
"'>' not supported between instances of 'str' and 'int'"
204+
"|Invalid comparison"
205+
)
200206
with pytest.raises(TypeError, match=msg):
201207
df > 0
202208
return

pandas/tests/frame/indexing/test_xs.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas import (
97
DataFrame,
108
Index,
@@ -74,10 +72,9 @@ def test_xs_other(self, float_frame):
7472
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
7573
assert not (expected == 5).all()
7674

77-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
7875
def test_xs_corner(self):
7976
# pathological mixed-type reordering case
80-
df = DataFrame(index=[0])
77+
df = DataFrame(index=[0], columns=Index([], dtype="str"))
8178
df["A"] = 1.0
8279
df["B"] = "foo"
8380
df["C"] = 2.0

0 commit comments

Comments
 (0)