Skip to content

Commit 9c8c685

Browse files
TST (string dtype): xfail all currently failing tests with future.infer_string (#59329)
* TST (string dtype): xfail all currently failing tests with future.infer_string * more xfails * more xfails * add missing strict=False * also run slow and single cpu tests * fix single_cpu tests * xfail some slow tests * stop suppressing non-zero exit code from pytest on string CI build * remove accidentally added xlsx file --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent aa4dc71 commit 9c8c685

File tree

132 files changed

+543
-22
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

132 files changed

+543
-22
lines changed

.github/workflows/unit-tests.yml

-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ jobs:
5959
extra_loc: "zh_CN"
6060
- name: "Future infer strings"
6161
env_file: actions-311.yaml
62-
pattern: "not slow and not network and not single_cpu"
6362
pandas_future_infer_string: "1"
6463
- name: "Pypy"
6564
env_file: actions-pypy-39.yaml

ci/run_tests.sh

-6
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,5 @@ if [[ "$PATTERN" ]]; then
1616
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
1717
fi
1818

19-
# temporarily let pytest always succeed (many tests are not yet passing in the
20-
# build enabling the future string dtype)
21-
if [[ "$PANDAS_FUTURE_INFER_STRING" == "1" ]]; then
22-
PYTEST_CMD="$PYTEST_CMD || true"
23-
fi
24-
2519
echo $PYTEST_CMD
2620
sh -c "$PYTEST_CMD"

pandas/tests/apply/test_frame_apply.py

+6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
import pytest
66

7+
from pandas._config import using_string_dtype
8+
79
from pandas.core.dtypes.dtypes import CategoricalDtype
810

911
import pandas as pd
@@ -61,6 +63,7 @@ def test_apply(float_frame, engine, request):
6163
assert result.index is float_frame.index
6264

6365

66+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
6467
@pytest.mark.parametrize("axis", [0, 1])
6568
@pytest.mark.parametrize("raw", [True, False])
6669
@pytest.mark.parametrize("nopython", [True, False])
@@ -1213,6 +1216,7 @@ def test_agg_with_name_as_column_name():
12131216
tm.assert_series_equal(result, expected)
12141217

12151218

1219+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
12161220
def test_agg_multiple_mixed():
12171221
# GH 20909
12181222
mdf = DataFrame(
@@ -1338,6 +1342,7 @@ def test_named_agg_reduce_axis1_raises(float_frame):
13381342
float_frame.agg(row1=(name1, "sum"), row2=(name2, "max"), axis=axis)
13391343

13401344

1345+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
13411346
def test_nuiscance_columns():
13421347
# GH 15015
13431348
df = DataFrame(
@@ -1514,6 +1519,7 @@ def test_apply_datetime_tz_issue(engine, request):
15141519
tm.assert_series_equal(result, expected)
15151520

15161521

1522+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
15171523
@pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})])
15181524
@pytest.mark.parametrize("method", ["min", "max", "sum"])
15191525
def test_mixed_column_raises(df, method, using_infer_string):

pandas/tests/apply/test_numba.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
import pandas.util._test_decorators as td
57

68
from pandas import (
@@ -17,6 +19,7 @@ def apply_axis(request):
1719
return request.param
1820

1921

22+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
2023
def test_numba_vs_python_noop(float_frame, apply_axis):
2124
func = lambda x: x
2225
result = float_frame.apply(func, engine="numba", axis=apply_axis)
@@ -40,6 +43,7 @@ def test_numba_vs_python_string_index():
4043
)
4144

4245

46+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
4347
def test_numba_vs_python_indexing():
4448
frame = DataFrame(
4549
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},

pandas/tests/apply/test_str.py

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
import pytest
66

7+
from pandas._config import using_string_dtype
8+
79
from pandas.compat import WASM
810

911
from pandas.core.dtypes.common import is_number
@@ -79,6 +81,7 @@ def test_apply_np_transformer(float_frame, op, how):
7981
tm.assert_frame_equal(result, expected)
8082

8183

84+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
8285
@pytest.mark.parametrize(
8386
"series, func, expected",
8487
chain(
@@ -137,6 +140,7 @@ def test_agg_cython_table_series(series, func, expected):
137140
assert result == expected
138141

139142

143+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
140144
@pytest.mark.parametrize(
141145
"series, func, expected",
142146
chain(

pandas/tests/arrays/categorical/test_analytics.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
import pytest
66

7+
from pandas._config import using_string_dtype
8+
79
from pandas.compat import PYPY
810

911
from pandas import (
@@ -294,6 +296,7 @@ def test_nbytes(self):
294296
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
295297
assert cat.nbytes == exp
296298

299+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
297300
def test_memory_usage(self):
298301
cat = Categorical([1, 2, 3])
299302

pandas/tests/arrays/categorical/test_api.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_string_dtype
7+
68
from pandas.compat import PY311
79

810
from pandas import (
@@ -149,6 +151,7 @@ def test_reorder_categories_raises(self, new_categories):
149151
with pytest.raises(ValueError, match=msg):
150152
cat.reorder_categories(new_categories)
151153

154+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
152155
def test_add_categories(self):
153156
cat = Categorical(["a", "b", "c", "a"], ordered=True)
154157
old = cat.copy()

pandas/tests/arrays/categorical/test_constructors.py

+1
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ def test_interval(self):
735735
tm.assert_numpy_array_equal(cat.codes, expected_codes)
736736
tm.assert_index_equal(cat.categories, idx)
737737

738+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
738739
def test_categorical_extension_array_nullable(self, nulls_fixture):
739740
# GH:
740741
arr = pd.arrays.StringArray._from_sequence(

pandas/tests/arrays/floating/test_arithmetic.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_string_dtype
7+
68
import pandas as pd
79
import pandas._testing as tm
810
from pandas.core.arrays import FloatingArray
@@ -122,6 +124,7 @@ def test_arith_zero_dim_ndarray(other):
122124
# -----------------------------------------------------------------------------
123125

124126

127+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
125128
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
126129
op = all_arithmetic_operators
127130
s = pd.Series(data)

pandas/tests/arrays/integer/test_arithmetic.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_string_dtype
7+
68
import pandas as pd
79
import pandas._testing as tm
810
from pandas.core import ops
@@ -172,6 +174,7 @@ def test_numpy_zero_dim_ndarray(other):
172174
# -----------------------------------------------------------------------------
173175

174176

177+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
175178
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
176179
op = all_arithmetic_operators
177180
s = pd.Series(data)

pandas/tests/arrays/masked/test_function.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
from pandas.core.dtypes.common import is_integer_dtype
57

68
import pandas as pd
@@ -58,6 +60,7 @@ def test_tolist(data):
5860
tm.assert_equal(result, expected)
5961

6062

63+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
6164
def test_to_numpy():
6265
# GH#56991
6366

pandas/tests/copy_view/test_array.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
from pandas import (
57
DataFrame,
68
Series,
@@ -117,6 +119,7 @@ def test_dataframe_array_ea_dtypes():
117119
assert arr.flags.writeable is False
118120

119121

122+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
120123
def test_dataframe_array_string_dtype():
121124
df = DataFrame({"a": ["a", "b"]}, dtype="string")
122125
arr = np.asarray(df)

pandas/tests/copy_view/test_astype.py

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_string_dtype
7+
68
from pandas.compat.pyarrow import pa_version_under12p0
79
import pandas.util._test_decorators as td
810

@@ -82,6 +84,7 @@ def test_astype_numpy_to_ea():
8284
assert np.shares_memory(get_array(ser), get_array(result))
8385

8486

87+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
8588
@pytest.mark.parametrize(
8689
"dtype, new_dtype", [("object", "string"), ("string", "object")]
8790
)
@@ -95,6 +98,7 @@ def test_astype_string_and_object(dtype, new_dtype):
9598
tm.assert_frame_equal(df, df_orig)
9699

97100

101+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
98102
@pytest.mark.parametrize(
99103
"dtype, new_dtype", [("object", "string"), ("string", "object")]
100104
)
@@ -195,6 +199,7 @@ def test_astype_arrow_timestamp():
195199
assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)
196200

197201

202+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
198203
def test_convert_dtypes_infer_objects():
199204
ser = Series(["a", "b", "c"])
200205
ser_orig = ser.copy()
@@ -210,6 +215,7 @@ def test_convert_dtypes_infer_objects():
210215
tm.assert_series_equal(ser, ser_orig)
211216

212217

218+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
213219
def test_convert_dtypes():
214220
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
215221
df_orig = df.copy()

pandas/tests/copy_view/test_constructors.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
import pandas as pd
57
from pandas import (
68
DataFrame,
@@ -207,6 +209,7 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
207209
assert np.shares_memory(arr_before, arr_after)
208210

209211

212+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
210213
@pytest.mark.parametrize(
211214
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
212215
)

pandas/tests/copy_view/test_functions.py

+8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
from pandas import (
57
DataFrame,
68
Index,
@@ -12,6 +14,7 @@
1214
from pandas.tests.copy_view.util import get_array
1315

1416

17+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
1518
def test_concat_frames():
1619
df = DataFrame({"b": ["a"] * 3})
1720
df2 = DataFrame({"a": ["a"] * 3})
@@ -30,6 +33,7 @@ def test_concat_frames():
3033
tm.assert_frame_equal(df, df_orig)
3134

3235

36+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
3337
def test_concat_frames_updating_input():
3438
df = DataFrame({"b": ["a"] * 3})
3539
df2 = DataFrame({"a": ["a"] * 3})
@@ -149,6 +153,7 @@ def test_concat_copy_keyword():
149153
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
150154

151155

156+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
152157
@pytest.mark.parametrize(
153158
"func",
154159
[
@@ -200,6 +205,7 @@ def test_merge_on_index():
200205
tm.assert_frame_equal(df2, df2_orig)
201206

202207

208+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
203209
@pytest.mark.parametrize(
204210
"func, how",
205211
[
@@ -243,6 +249,7 @@ def test_merge_copy_keyword():
243249
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
244250

245251

252+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
246253
def test_join_on_key():
247254
df_index = Index(["a", "b", "c"], name="key")
248255

@@ -270,6 +277,7 @@ def test_join_on_key():
270277
tm.assert_frame_equal(df2, df2_orig)
271278

272279

280+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
273281
def test_join_multiple_dataframes_on_key():
274282
df_index = Index(["a", "b", "c"], name="key")
275283

pandas/tests/copy_view/test_internals.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
from pandas import DataFrame
57
import pandas._testing as tm
68
from pandas.tests.copy_view.util import get_array
@@ -40,6 +42,7 @@ def test_consolidate():
4042
assert df.loc[0, "b"] == 0.1
4143

4244

45+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
4346
@pytest.mark.parametrize("dtype", [np.intp, np.int8])
4447
@pytest.mark.parametrize(
4548
"locs, arr",

pandas/tests/copy_view/test_interp_fillna.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
46
from pandas import (
57
NA,
68
DataFrame,
@@ -110,6 +112,7 @@ def test_interp_fill_functions_inplace(func, dtype):
110112
assert view._mgr._has_no_reference(0)
111113

112114

115+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
113116
def test_interpolate_cannot_with_object_dtype():
114117
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
115118

@@ -118,6 +121,7 @@ def test_interpolate_cannot_with_object_dtype():
118121
df.interpolate()
119122

120123

124+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
121125
def test_interpolate_object_convert_no_op():
122126
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
123127
arr_a = get_array(df, "a")

0 commit comments

Comments
 (0)