Skip to content

Commit

Permalink
BUG: preserve (object) dtype in factorize (#60118)
Browse files Browse the repository at this point in the history
* BUG: preserve (object) dtype in factorize

* add fallback for float16
  • Loading branch information
jorisvandenbossche authored Oct 31, 2024
1 parent 1908f2e commit 13926e5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
12 changes: 9 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
ABCMultiIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import (
Expand Down Expand Up @@ -1287,13 +1288,18 @@ def factorize(
if uniques.dtype == np.float16:
uniques = uniques.astype(np.float32)

if isinstance(self, ABCIndex):
# preserve e.g. MultiIndex
if isinstance(self, ABCMultiIndex):
# preserve MultiIndex
uniques = self._constructor(uniques)
else:
from pandas import Index

uniques = Index(uniques)
try:
uniques = Index(uniques, dtype=self.dtype)
except NotImplementedError:
# not all dtypes are supported in Index that are allowed for Series
# e.g. float16 or bytes
uniques = Index(uniques)
return codes, uniques

_shared_docs["searchsorted"] = """
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def test_factorize_complex(self):
expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=complex)
tm.assert_numpy_array_equal(uniques, expected_uniques)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_factorize(self, index_or_series_obj, sort):
obj = index_or_series_obj
result_codes, result_uniques = obj.factorize(sort=sort)
Expand Down

0 comments on commit 13926e5

Please sign in to comment.