Skip to content

Commit

Permalink
TST (string dtype): resolve all infer_string TODO/xfails in pandas/te…
Browse files Browse the repository at this point in the history
…sts/arrays (#59686)
  • Loading branch information
jorisvandenbossche authored Oct 10, 2024
1 parent 88554d0 commit 4f328f0
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 18 deletions.
6 changes: 5 additions & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,13 @@ def _reduce(
arr = pc.or_kleene(nas, pc.not_equal(self._pa_array, ""))
else:
arr = pc.not_equal(self._pa_array, "")
return ArrowExtensionArray(arr)._reduce(
result = ArrowExtensionArray(arr)._reduce(
name, skipna=skipna, keepdims=keepdims, **kwargs
)
if keepdims:
# ArrowExtensionArray will return a length-1 bool[pyarrow] array
return result.astype(np.bool_)
return result

result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
if name in ("argmin", "argmax") and isinstance(result, pa.Array):
Expand Down
20 changes: 9 additions & 11 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
HAS_PYARROW,
PYPY,
)
from pandas.compat import PYPY

from pandas import (
Categorical,
Expand Down Expand Up @@ -299,18 +294,21 @@ def test_nbytes(self):
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
assert cat.nbytes == exp

@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
)
def test_memory_usage(self):
def test_memory_usage(self, using_infer_string):
cat = Categorical([1, 2, 3])

# .categories is an index, so we include the hashtable
assert 0 < cat.nbytes <= cat.memory_usage()
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)

cat = Categorical(["foo", "foo", "bar"])
assert cat.memory_usage(deep=True) > cat.nbytes
if using_infer_string:
if cat.categories.dtype.storage == "python":
assert cat.memory_usage(deep=True) > cat.nbytes
else:
assert cat.memory_usage(deep=True) >= cat.nbytes
else:
assert cat.memory_usage(deep=True) > cat.nbytes

if not PYPY:
# sys.getsizeof will call the .memory_usage with
Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/arrays/integer/test_reduction.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -104,10 +102,7 @@ def test_groupby_reductions(op, expected):
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
],
)
def test_mixed_reductions(request, op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
# TODO(infer_string) inconsistent result type
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
def test_mixed_reductions(op, expected):
df = DataFrame(
{
"A": ["a", "b", "b"],
Expand Down

0 comments on commit 4f328f0

Please sign in to comment.