Skip to content

Commit

Permalink
TST (string dtype): xfail all currently failing tests with future.inf…
Browse files Browse the repository at this point in the history
…er_string (#59329)

* TST (string dtype): xfail all currently failing tests with future.infer_string

* more xfails

* more xfails

* add missing strict=False

* also run slow and single cpu tests

* fix single_cpu tests

* xfail some slow tests

* stop suppressing non-zero exit code from pytest on string CI build

* remove accidentally added xlsx file

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
jorisvandenbossche and mroeschke authored Jul 29, 2024
1 parent aa4dc71 commit 9c8c685
Show file tree
Hide file tree
Showing 132 changed files with 543 additions and 22 deletions.
1 change: 0 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ jobs:
extra_loc: "zh_CN"
- name: "Future infer strings"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_future_infer_string: "1"
- name: "Pypy"
env_file: actions-pypy-39.yaml
Expand Down
6 changes: 0 additions & 6 deletions ci/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,5 @@ if [[ "$PATTERN" ]]; then
PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
fi

# temporarily let pytest always succeed (many tests are not yet passing in the
# build enabling the future string dtype)
if [[ "$PANDAS_FUTURE_INFER_STRING" == "1" ]]; then
PYTEST_CMD="$PYTEST_CMD || true"
fi

echo $PYTEST_CMD
sh -c "$PYTEST_CMD"
6 changes: 6 additions & 0 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
Expand Down Expand Up @@ -61,6 +63,7 @@ def test_apply(float_frame, engine, request):
assert result.index is float_frame.index


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("raw", [True, False])
@pytest.mark.parametrize("nopython", [True, False])
Expand Down Expand Up @@ -1213,6 +1216,7 @@ def test_agg_with_name_as_column_name():
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_agg_multiple_mixed():
# GH 20909
mdf = DataFrame(
Expand Down Expand Up @@ -1338,6 +1342,7 @@ def test_named_agg_reduce_axis1_raises(float_frame):
float_frame.agg(row1=(name1, "sum"), row2=(name2, "max"), axis=axis)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_nuiscance_columns():
# GH 15015
df = DataFrame(
Expand Down Expand Up @@ -1514,6 +1519,7 @@ def test_apply_datetime_tz_issue(engine, request):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})])
@pytest.mark.parametrize("method", ["min", "max", "sum"])
def test_mixed_column_raises(df, method, using_infer_string):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/apply/test_numba.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

from pandas import (
Expand All @@ -17,6 +19,7 @@ def apply_axis(request):
return request.param


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_numba_vs_python_noop(float_frame, apply_axis):
func = lambda x: x
result = float_frame.apply(func, engine="numba", axis=apply_axis)
Expand All @@ -40,6 +43,7 @@ def test_numba_vs_python_string_index():
)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_numba_vs_python_indexing():
frame = DataFrame(
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import WASM

from pandas.core.dtypes.common import is_number
Expand Down Expand Up @@ -79,6 +81,7 @@ def test_apply_np_transformer(float_frame, op, how):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"series, func, expected",
chain(
Expand Down Expand Up @@ -137,6 +140,7 @@ def test_agg_cython_table_series(series, func, expected):
assert result == expected


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"series, func, expected",
chain(
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import PYPY

from pandas import (
Expand Down Expand Up @@ -294,6 +296,7 @@ def test_nbytes(self):
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
assert cat.nbytes == exp

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_memory_usage(self):
cat = Categorical([1, 2, 3])

Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/categorical/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import PY311

from pandas import (
Expand Down Expand Up @@ -149,6 +151,7 @@ def test_reorder_categories_raises(self, new_categories):
with pytest.raises(ValueError, match=msg):
cat.reorder_categories(new_categories)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_add_categories(self):
cat = Categorical(["a", "b", "c", "a"], ordered=True)
old = cat.copy()
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ def test_interval(self):
tm.assert_numpy_array_equal(cat.codes, expected_codes)
tm.assert_index_equal(cat.categories, idx)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_categorical_extension_array_nullable(self, nulls_fixture):
# GH:
arr = pd.arrays.StringArray._from_sequence(
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray
Expand Down Expand Up @@ -122,6 +124,7 @@ def test_arith_zero_dim_ndarray(other):
# -----------------------------------------------------------------------------


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
op = all_arithmetic_operators
s = pd.Series(data)
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.core import ops
Expand Down Expand Up @@ -172,6 +174,7 @@ def test_numpy_zero_dim_ndarray(other):
# -----------------------------------------------------------------------------


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
op = all_arithmetic_operators
s = pd.Series(data)
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arrays/masked/test_function.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_integer_dtype

import pandas as pd
Expand Down Expand Up @@ -58,6 +60,7 @@ def test_tolist(data):
tm.assert_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_to_numpy():
# GH#56991

Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/copy_view/test_array.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Series,
Expand Down Expand Up @@ -117,6 +119,7 @@ def test_dataframe_array_ea_dtypes():
assert arr.flags.writeable is False


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_dataframe_array_string_dtype():
df = DataFrame({"a": ["a", "b"]}, dtype="string")
arr = np.asarray(df)
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/copy_view/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat.pyarrow import pa_version_under12p0
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -82,6 +84,7 @@ def test_astype_numpy_to_ea():
assert np.shares_memory(get_array(ser), get_array(result))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize(
"dtype, new_dtype", [("object", "string"), ("string", "object")]
)
Expand All @@ -95,6 +98,7 @@ def test_astype_string_and_object(dtype, new_dtype):
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize(
"dtype, new_dtype", [("object", "string"), ("string", "object")]
)
Expand Down Expand Up @@ -195,6 +199,7 @@ def test_astype_arrow_timestamp():
assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_convert_dtypes_infer_objects():
ser = Series(["a", "b", "c"])
ser_orig = ser.copy()
Expand All @@ -210,6 +215,7 @@ def test_convert_dtypes_infer_objects():
tm.assert_series_equal(ser, ser_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_convert_dtypes():
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
df_orig = df.copy()
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -207,6 +209,7 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
assert np.shares_memory(arr_before, arr_after)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/copy_view/test_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Index,
Expand All @@ -12,6 +14,7 @@
from pandas.tests.copy_view.util import get_array


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_concat_frames():
df = DataFrame({"b": ["a"] * 3})
df2 = DataFrame({"a": ["a"] * 3})
Expand All @@ -30,6 +33,7 @@ def test_concat_frames():
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_concat_frames_updating_input():
df = DataFrame({"b": ["a"] * 3})
df2 = DataFrame({"a": ["a"] * 3})
Expand Down Expand Up @@ -149,6 +153,7 @@ def test_concat_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.parametrize(
"func",
[
Expand Down Expand Up @@ -200,6 +205,7 @@ def test_merge_on_index():
tm.assert_frame_equal(df2, df2_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"func, how",
[
Expand Down Expand Up @@ -243,6 +249,7 @@ def test_merge_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_join_on_key():
df_index = Index(["a", "b", "c"], name="key")

Expand Down Expand Up @@ -270,6 +277,7 @@ def test_join_on_key():
tm.assert_frame_equal(df2, df2_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_join_multiple_dataframes_on_key():
df_index = Index(["a", "b", "c"], name="key")

Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/copy_view/test_internals.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import DataFrame
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
Expand Down Expand Up @@ -40,6 +42,7 @@ def test_consolidate():
assert df.loc[0, "b"] == 0.1


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("dtype", [np.intp, np.int8])
@pytest.mark.parametrize(
"locs, arr",
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
NA,
DataFrame,
Expand Down Expand Up @@ -110,6 +112,7 @@ def test_interp_fill_functions_inplace(func, dtype):
assert view._mgr._has_no_reference(0)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_interpolate_cannot_with_object_dtype():
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})

Expand All @@ -118,6 +121,7 @@ def test_interpolate_cannot_with_object_dtype():
df.interpolate()


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_interpolate_object_convert_no_op():
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
arr_a = get_array(df, "a")
Expand Down
Loading

0 comments on commit 9c8c685

Please sign in to comment.