Skip to content

Commit a83184f

Browse files
[backport 2.3.x] TST (string dtype): update all tests in tests/frame/indexing (#60193) (#60212)
* TST (string dtype): update all tests in tests/frame/indexing (#60193) (cherry picked from commit bec2dbc) * update for 2.3.x
1 parent a82cf8e commit a83184f

File tree

6 files changed

+51
-46
lines changed

6 files changed

+51
-46
lines changed

pandas/tests/frame/indexing/test_coercion.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
1210
import pandas as pd
1311
from pandas import (
1412
DataFrame,
@@ -99,28 +97,42 @@ def test_6942(indexer_al):
9997
assert df.iloc[0, 0] == t2
10098

10199

102-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
103100
def test_26395(indexer_al):
104101
# .at case fixed by GH#45121 (best guess)
105102
df = DataFrame(index=["A", "B", "C"])
106103
df["D"] = 0
107104

108105
indexer_al(df)["C", "D"] = 2
109-
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
106+
expected = DataFrame(
107+
{"D": [0, 0, 2]},
108+
index=["A", "B", "C"],
109+
columns=pd.Index(["D"], dtype=object),
110+
dtype=np.int64,
111+
)
110112
tm.assert_frame_equal(df, expected)
111113

112114
with tm.assert_produces_warning(
113115
FutureWarning, match="Setting an item of incompatible dtype"
114116
):
115117
indexer_al(df)["C", "D"] = 44.5
116-
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
118+
expected = DataFrame(
119+
{"D": [0, 0, 44.5]},
120+
index=["A", "B", "C"],
121+
columns=pd.Index(["D"], dtype=object),
122+
dtype=np.float64,
123+
)
117124
tm.assert_frame_equal(df, expected)
118125

119126
with tm.assert_produces_warning(
120127
FutureWarning, match="Setting an item of incompatible dtype"
121128
):
122129
indexer_al(df)["C", "D"] = "hello"
123-
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
130+
expected = DataFrame(
131+
{"D": [0, 0, "hello"]},
132+
index=["A", "B", "C"],
133+
columns=pd.Index(["D"], dtype=object),
134+
dtype=object,
135+
)
124136
tm.assert_frame_equal(df, expected)
125137

126138

pandas/tests/frame/indexing/test_indexing.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from pandas._config import using_string_dtype
1313

1414
from pandas._libs import iNaT
15-
from pandas.compat import HAS_PYARROW
1615
from pandas.errors import (
1716
InvalidIndexError,
1817
PerformanceWarning,
@@ -518,18 +517,17 @@ def test_setitem_ambig(self, using_infer_string):
518517
else:
519518
assert dm[2].dtype == np.object_
520519

521-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
522-
def test_setitem_None(self, float_frame, using_infer_string):
520+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
521+
def test_setitem_None(self, float_frame):
523522
# GH #766
524523
float_frame[None] = float_frame["A"]
525-
key = None if not using_infer_string else np.nan
526524
tm.assert_series_equal(
527525
float_frame.iloc[:, -1], float_frame["A"], check_names=False
528526
)
529527
tm.assert_series_equal(
530-
float_frame.loc[:, key], float_frame["A"], check_names=False
528+
float_frame.loc[:, None], float_frame["A"], check_names=False
531529
)
532-
tm.assert_series_equal(float_frame[key], float_frame["A"], check_names=False)
530+
tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False)
533531

534532
def test_loc_setitem_boolean_mask_allfalse(self):
535533
# GH 9596
@@ -1191,7 +1189,6 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self):
11911189
df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]]
11921190
tm.assert_series_equal(df["dates"], column)
11931191

1194-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
11951192
def test_loc_setitem_datetimelike_with_inference(self):
11961193
# GH 7592
11971194
# assignment of timedeltas with NaT
@@ -1210,13 +1207,10 @@ def test_loc_setitem_datetimelike_with_inference(self):
12101207
result = df.dtypes
12111208
expected = Series(
12121209
[np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2,
1213-
index=list("ABCDEFGH"),
1210+
index=Index(list("ABCDEFGH"), dtype=object),
12141211
)
12151212
tm.assert_series_equal(result, expected)
12161213

1217-
@pytest.mark.xfail(
1218-
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
1219-
)
12201214
def test_getitem_boolean_indexing_mixed(self):
12211215
df = DataFrame(
12221216
{
@@ -1258,7 +1252,7 @@ def test_getitem_boolean_indexing_mixed(self):
12581252
tm.assert_frame_equal(df2, expected)
12591253

12601254
df["foo"] = "test"
1261-
msg = "not supported between instances|unorderable types"
1255+
msg = "not supported between instances|unorderable types|Invalid comparison"
12621256

12631257
with pytest.raises(TypeError, match=msg):
12641258
df[df > 0.3] = 1

pandas/tests/frame/indexing/test_insert.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
from pandas.errors import PerformanceWarning
1210

1311
from pandas import (
@@ -62,15 +60,15 @@ def test_insert_column_bug_4032(self):
6260
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
6361
tm.assert_frame_equal(result, expected)
6462

65-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
6663
def test_insert_with_columns_dups(self):
6764
# GH#14291
6865
df = DataFrame()
6966
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
7067
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
7168
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
7269
exp = DataFrame(
73-
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
70+
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]],
71+
columns=Index(["A", "A", "A"], dtype=object),
7472
)
7573
tm.assert_frame_equal(df, exp)
7674

pandas/tests/frame/indexing/test_setitem.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
import pandas.util._test_decorators as td
97

108
from pandas.core.dtypes.base import _registry as ea_registry
@@ -148,13 +146,16 @@ def test_setitem_different_dtype(self):
148146
)
149147
tm.assert_series_equal(result, expected)
150148

151-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
152149
def test_setitem_empty_columns(self):
153150
# GH 13522
154151
df = DataFrame(index=["A", "B", "C"])
155152
df["X"] = df.index
156153
df["X"] = ["x", "y", "z"]
157-
exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
154+
exp = DataFrame(
155+
data={"X": ["x", "y", "z"]},
156+
index=["A", "B", "C"],
157+
columns=Index(["X"], dtype=object),
158+
)
158159
tm.assert_frame_equal(df, exp)
159160

160161
def test_setitem_dt64_index_empty_columns(self):
@@ -164,14 +165,15 @@ def test_setitem_dt64_index_empty_columns(self):
164165
df["A"] = rng
165166
assert df["A"].dtype == np.dtype("M8[ns]")
166167

167-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
168168
def test_setitem_timestamp_empty_columns(self):
169169
# GH#19843
170170
df = DataFrame(index=range(3))
171171
df["now"] = Timestamp("20130101", tz="UTC").as_unit("ns")
172172

173173
expected = DataFrame(
174-
[[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"]
174+
[[Timestamp("20130101", tz="UTC")]] * 3,
175+
index=range(3),
176+
columns=Index(["now"], dtype=object),
175177
)
176178
tm.assert_frame_equal(df, expected)
177179

@@ -204,14 +206,13 @@ def test_setitem_with_unaligned_sparse_value(self):
204206
expected = Series(SparseArray([1, 0, 0]), name="new_column")
205207
tm.assert_series_equal(df["new_column"], expected)
206208

207-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
208209
def test_setitem_period_preserves_dtype(self):
209210
# GH: 26861
210211
data = [Period("2003-12", "D")]
211212
result = DataFrame([])
212213
result["a"] = data
213214

214-
expected = DataFrame({"a": data})
215+
expected = DataFrame({"a": data}, columns=Index(["a"], dtype=object))
215216

216217
tm.assert_frame_equal(result, expected)
217218

@@ -677,11 +678,10 @@ def test_setitem_iloc_two_dimensional_generator(self):
677678
expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
678679
tm.assert_frame_equal(df, expected)
679680

680-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
681681
def test_setitem_dtypes_bytes_type_to_object(self):
682682
# GH 20734
683683
index = Series(name="id", dtype="S24")
684-
df = DataFrame(index=index)
684+
df = DataFrame(index=index, columns=Index([], dtype="str"))
685685
df["a"] = Series(name="a", index=index, dtype=np.uint32)
686686
df["b"] = Series(name="b", index=index, dtype="S64")
687687
df["c"] = Series(name="c", index=index, dtype="S64")
@@ -712,7 +712,6 @@ def test_setitem_ea_dtype_rhs_series(self):
712712

713713
# TODO(ArrayManager) set column with 2d column array, see #44788
714714
@td.skip_array_manager_not_yet_implemented
715-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
716715
def test_setitem_npmatrix_2d(self):
717716
# GH#42376
718717
# for use-case df["x"] = sparse.random((10, 10)).mean(axis=1)
@@ -721,7 +720,7 @@ def test_setitem_npmatrix_2d(self):
721720
)
722721

723722
a = np.ones((10, 1))
724-
df = DataFrame(index=np.arange(10))
723+
df = DataFrame(index=np.arange(10), columns=Index([], dtype="str"))
725724
df["np-array"] = a
726725

727726
# Instantiation of `np.matrix` gives PendingDeprecationWarning
@@ -936,12 +935,11 @@ def test_setitem_with_expansion_categorical_dtype(self):
936935
ser.name = "E"
937936
tm.assert_series_equal(result2.sort_index(), ser.sort_index())
938937

939-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
940938
def test_setitem_scalars_no_index(self):
941939
# GH#16823 / GH#17894
942940
df = DataFrame()
943941
df["foo"] = 1
944-
expected = DataFrame(columns=["foo"]).astype(np.int64)
942+
expected = DataFrame(columns=Index(["foo"], dtype=object)).astype(np.int64)
945943
tm.assert_frame_equal(df, expected)
946944

947945
def test_setitem_newcol_tuple_key(self, float_frame):

pandas/tests/frame/indexing/test_where.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ def is_ok(s):
4646

4747

4848
class TestDataFrameIndexingWhere:
49-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
5049
def test_where_get(self, where_frame, float_string_frame):
5150
def _check_get(df, cond, check_dtypes=True):
5251
other1 = _safe_add(df)
@@ -64,7 +63,10 @@ def _check_get(df, cond, check_dtypes=True):
6463
# check getting
6564
df = where_frame
6665
if df is float_string_frame:
67-
msg = "'>' not supported between instances of 'str' and 'int'"
66+
msg = (
67+
"'>' not supported between instances of 'str' and 'int'"
68+
"|Invalid comparison"
69+
)
6870
with pytest.raises(TypeError, match=msg):
6971
df > 0
7072
return
@@ -98,7 +100,6 @@ def test_where_upcasting(self):
98100
tm.assert_series_equal(result, expected)
99101

100102
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
101-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
102103
def test_where_alignment(self, where_frame, float_string_frame):
103104
# aligning
104105
def _check_align(df, cond, other, check_dtypes=True):
@@ -130,7 +131,10 @@ def _check_align(df, cond, other, check_dtypes=True):
130131

131132
df = where_frame
132133
if df is float_string_frame:
133-
msg = "'>' not supported between instances of 'str' and 'int'"
134+
msg = (
135+
"'>' not supported between instances of 'str' and 'int'"
136+
"|Invalid comparison"
137+
)
134138
with pytest.raises(TypeError, match=msg):
135139
df > 0
136140
return
@@ -174,7 +178,6 @@ def test_where_invalid(self):
174178
df.mask(0)
175179

176180
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
177-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
178181
def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
179182
# where inplace
180183

@@ -196,7 +199,10 @@ def _check_set(df, cond, check_dtypes=True):
196199

197200
df = where_frame
198201
if df is float_string_frame:
199-
msg = "'>' not supported between instances of 'str' and 'int'"
202+
msg = (
203+
"'>' not supported between instances of 'str' and 'int'"
204+
"|Invalid comparison"
205+
)
200206
with pytest.raises(TypeError, match=msg):
201207
df > 0
202208
return

pandas/tests/frame/indexing/test_xs.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.errors import SettingWithCopyError
97

108
from pandas import (
@@ -79,10 +77,9 @@ def test_xs(
7977
else:
8078
assert (expected == 5).all()
8179

82-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
8380
def test_xs_corner(self):
8481
# pathological mixed-type reordering case
85-
df = DataFrame(index=[0])
82+
df = DataFrame(index=[0], columns=Index([], dtype="str"))
8683
df["A"] = 1.0
8784
df["B"] = "foo"
8885
df["C"] = 2.0

0 commit comments

Comments
 (0)