@@ -188,6 +188,11 @@ class Device(enum.IntEnum):
188
188
189
189
return (Device .CPU , None )
190
190
191
+ def __repr__ (self ) -> str :
192
+ return 'PandasBuffer(' + str ({'bufsize' : self .bufsize ,
193
+ 'ptr' : self .ptr ,
194
+ 'device' : self .__dlpack_device__ ()[0 ].name }
195
+ ) + ')'
191
196
192
197
class _PandasColumn :
193
198
"""
@@ -313,10 +318,19 @@ def describe_categorical(self) -> Dict[str, Any]:
313
318
categorical values to other objects exists
314
319
- "mapping" : dict, Python-level only (e.g. ``{int: str}``).
315
320
None if not a dictionary-style categorical.
316
-
317
- TBD: are there any other in-memory representations that are needed?
318
321
"""
319
- raise NotImplementedError ("TODO" )
322
+ if not self .dtype [0 ] == _DtypeKind .CATEGORICAL :
323
+ raise TypeError ("`describe_categorical only works on a column with "
324
+ "categorical dtype!" )
325
+
326
+ ordered = self ._col .dtype .ordered
327
+ is_dictionary = False
328
+ # NOTE: this shows the children approach is better, transforming this
329
+ # to a "mapping" dict would be inefficient
330
+ codes = self ._col .values .codes # ndarray, length `self.size`
331
+ # categories.values is ndarray of length n_categories
332
+ categories = self ._col .values .categories
333
+ return ordered , is_dictionary , None
320
334
321
335
@property
322
336
def describe_null (self ) -> Tuple [int , Any ]:
@@ -490,7 +504,17 @@ def test_categorical_dtype():
490
504
df = pd .DataFrame ({"A" : [1 , 2 , 3 , 1 ]})
491
505
df ["B" ] = df ["A" ].astype ("category" )
492
506
df .at [1 , 'B' ] = np .nan # Set one item to null
507
+
508
+ # Some detailed testing for correctness of dtype and null handling:
509
+ col = df .__dataframe__ ().get_column_by_name ('B' )
510
+ assert col .dtype [0 ] == _DtypeKind .CATEGORICAL
511
+ assert col .null_count == 1
512
+ assert col .describe_null == (2 , - 1 ) # sentinel value -1
513
+ assert col .num_chunks () == 1
514
+ assert col .describe_categorical == (False , False , None )
515
+
493
516
df2 = from_dataframe (df )
517
+ tm .assert_frame_equal (df , df2 )
494
518
495
519
496
520
if __name__ == '__main__' :
0 commit comments