Skip to content

Commit 552b794

Browse files
committed
Add describe_categorical support and a buffer __repr__
1 parent 9c2717b commit 552b794

File tree

1 file changed

+27
-3
lines changed

1 file changed

+27
-3
lines changed

protocol/pandas_implementation.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ class Device(enum.IntEnum):
188188

189189
return (Device.CPU, None)
190190

191+
def __repr__(self) -> str:
192+
return 'PandasBuffer(' + str({'bufsize': self.bufsize,
193+
'ptr': self.ptr,
194+
'device': self.__dlpack_device__()[0].name}
195+
) + ')'
191196

192197
class _PandasColumn:
193198
"""
@@ -313,10 +318,19 @@ def describe_categorical(self) -> Dict[str, Any]:
313318
categorical values to other objects exists
314319
- "mapping" : dict, Python-level only (e.g. ``{int: str}``).
315320
None if not a dictionary-style categorical.
316-
317-
TBD: are there any other in-memory representations that are needed?
318321
"""
319-
raise NotImplementedError("TODO")
322+
if not self.dtype[0] == _DtypeKind.CATEGORICAL:
323+
raise TypeError("`describe_categorical only works on a column with "
324+
"categorical dtype!")
325+
326+
ordered = self._col.dtype.ordered
327+
is_dictionary = False
328+
# NOTE: this shows the children approach is better, transforming this
329+
# to a "mapping" dict would be inefficient
330+
codes = self._col.values.codes # ndarray, length `self.size`
331+
# categories.values is ndarray of length n_categories
332+
categories = self._col.values.categories
333+
return ordered, is_dictionary, None
320334

321335
@property
322336
def describe_null(self) -> Tuple[int, Any]:
@@ -490,7 +504,17 @@ def test_categorical_dtype():
490504
df = pd.DataFrame({"A": [1, 2, 3, 1]})
491505
df["B"] = df["A"].astype("category")
492506
df.at[1, 'B'] = np.nan # Set one item to null
507+
508+
# Some detailed testing for correctness of dtype and null handling:
509+
col = df.__dataframe__().get_column_by_name('B')
510+
assert col.dtype[0] == _DtypeKind.CATEGORICAL
511+
assert col.null_count == 1
512+
assert col.describe_null == (2, -1) # sentinel value -1
513+
assert col.num_chunks() == 1
514+
assert col.describe_categorical == (False, False, None)
515+
493516
df2 = from_dataframe(df)
517+
tm.assert_frame_equal(df, df2)
494518

495519

496520
if __name__ == '__main__':

0 commit comments

Comments
 (0)