From 0448823f2f737439bebc7f7664aad21ae4335964 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 28 Jul 2022 13:22:46 -0700
Subject: [PATCH 1/4] REF: PandasColumn.describe_categorical return categores
 instead of mapping

---
 pandas/core/exchange/column.py             | 13 ++++++++-----
 pandas/core/exchange/dataframe_protocol.py | 11 ++++++-----
 pandas/core/exchange/from_dataframe.py     |  4 +---
 pandas/tests/exchange/test_impl.py         | 13 ++++++++-----
 4 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/pandas/core/exchange/column.py b/pandas/core/exchange/column.py
index c2a1cfe766b22..c7553fad5cb3f 100644
--- a/pandas/core/exchange/column.py
+++ b/pandas/core/exchange/column.py
@@ -145,15 +145,18 @@ def describe_categorical(self):
         """
         If the dtype is categorical, there are two options:
         - There are only values in the data buffer.
-        - There is a separate dictionary-style encoding for categorical values.
-        Raises RuntimeError if the dtype is not categorical
+        - There is a separate non-categorical Column encoding for categorical values.
+
+        Raises TypeError if the dtype is not categorical
+
         Content of returned dict:
             - "is_ordered" : bool, whether the ordering of dictionary indices is
                              semantically meaningful.
             - "is_dictionary" : bool, whether a dictionary-style mapping of
                                 categorical values to other objects exists
-            - "mapping" : dict, Python-level only (e.g. ``{int: str}``).
-                          None if not a dictionary-style categorical.
+            - "categories" : Column representing the (implicit) mapping of indices to
+                             category values (e.g. an array of cat1, cat2, ...).
+                             None if not a dictionary-style categorical.
         """
         if not self.dtype[0] == DtypeKind.CATEGORICAL:
             raise TypeError(
@@ -163,7 +166,7 @@ def describe_categorical(self):
         return {
             "is_ordered": self._col.cat.ordered,
             "is_dictionary": True,
-            "mapping": dict(enumerate(self._col.cat.categories)),
+            "categories": PandasColumn(pd.Series(self._col.cat.categories)),
         }
 
     @property
diff --git a/pandas/core/exchange/dataframe_protocol.py b/pandas/core/exchange/dataframe_protocol.py
index 367b906332741..51e02ab313214 100644
--- a/pandas/core/exchange/dataframe_protocol.py
+++ b/pandas/core/exchange/dataframe_protocol.py
@@ -110,7 +110,7 @@ class CategoricalDescription(TypedDict):
     is_dictionary: bool
     # Python-level only (e.g. ``{int: str}``).
     # None if not a dictionary-style categorical.
-    mapping: dict | None
+    categories: Column | None
 
 
 class Buffer(ABC):
@@ -274,17 +274,18 @@ def describe_categorical(self) -> CategoricalDescription:
         """
         If the dtype is categorical, there are two options:
         - There are only values in the data buffer.
-        - There is a separate dictionary-style encoding for categorical values.
+        - There is a separate non-categorical Column encoding for categorical values.
 
         Raises TypeError if the dtype is not categorical
 
         Returns the dictionary with description on how to interpret the data buffer:
             - "is_ordered" : bool, whether the ordering of dictionary indices is
                              semantically meaningful.
-            - "is_dictionary" : bool, whether a dictionary-style mapping of
+            - "is_dictionary" : bool, whether a mapping of
                                 categorical values to other objects exists
-            - "mapping" : dict, Python-level only (e.g. ``{int: str}``).
-                          None if not a dictionary-style categorical.
+            - "categories" : Column representing the (implicit) mapping of indices to
+                             category values (e.g. an array of cat1, cat2, ...).
+                             None if not a dictionary-style categorical.
 
         TBD: are there any other in-memory representations that are needed?
         """
diff --git a/pandas/core/exchange/from_dataframe.py b/pandas/core/exchange/from_dataframe.py
index a33e47ba3b68e..07ee538011ae9 100644
--- a/pandas/core/exchange/from_dataframe.py
+++ b/pandas/core/exchange/from_dataframe.py
@@ -179,9 +179,7 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
     if not categorical["is_dictionary"]:
         raise NotImplementedError("Non-dictionary categoricals not supported yet")
 
-    mapping = categorical["mapping"]
-    assert isinstance(mapping, dict), "Categorical mapping must be a dict"
-    categories = np.array(tuple(mapping[k] for k in sorted(mapping)))
+    categories = np.array(categorical["categories"]._col)
     buffers = col.get_buffers()
 
     codes_buff, codes_dtype = buffers["data"]
diff --git a/pandas/tests/exchange/test_impl.py b/pandas/tests/exchange/test_impl.py
index e0e9fdce645d0..de5ea83a58a47 100644
--- a/pandas/tests/exchange/test_impl.py
+++ b/pandas/tests/exchange/test_impl.py
@@ -6,6 +6,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.core.exchange.column import PandasColumn
 from pandas.core.exchange.dataframe_protocol import (
     ColumnNullType,
     DtypeKind,
@@ -59,11 +60,13 @@ def test_categorical_dtype(data):
     assert col.null_count == 0
     assert col.describe_null == (ColumnNullType.USE_SENTINEL, -1)
     assert col.num_chunks() == 1
-    assert col.describe_categorical == {
-        "is_ordered": data[1],
-        "is_dictionary": True,
-        "mapping": {0: "a", 1: "d", 2: "e", 3: "s", 4: "t"},
-    }
+    desc_cat = col.describe_categorical
+    assert desc_cat["is_ordered"] == data[1]
+    assert desc_cat["is_dictionary"] is True
+    assert isinstance(desc_cat["categories"], PandasColumn)
+    tm.assert_series_equal(
+        desc_cat["categories"]._col, pd.Series(["a", "d", "e", "s", "t"])
+    )
 
     tm.assert_frame_equal(df, from_dataframe(df.__dataframe__()))
 

From 23a9ce3e180748c422cdd1f68a0604563d44d3b0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 28 Jul 2022 15:57:58 -0700
Subject: [PATCH 2/4] ignore typing

---
 pandas/core/exchange/from_dataframe.py | 2 +-
 pyright_reportGeneralTypeIssues.json   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/exchange/from_dataframe.py b/pandas/core/exchange/from_dataframe.py
index 07ee538011ae9..80a629393abd2 100644
--- a/pandas/core/exchange/from_dataframe.py
+++ b/pandas/core/exchange/from_dataframe.py
@@ -179,7 +179,7 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
     if not categorical["is_dictionary"]:
         raise NotImplementedError("Non-dictionary categoricals not supported yet")
 
-    categories = np.array(categorical["categories"]._col)
+    categories = np.array(categorical["categories"]._col)  # type:ignore[union-attr]
     buffers = col.get_buffers()
 
     codes_buff, codes_dtype = buffers["data"]
diff --git a/pyright_reportGeneralTypeIssues.json b/pyright_reportGeneralTypeIssues.json
index c482aa32600fb..4318971cdbe96 100644
--- a/pyright_reportGeneralTypeIssues.json
+++ b/pyright_reportGeneralTypeIssues.json
@@ -36,6 +36,7 @@
         "pandas/core/arrays/timedeltas.py",
         "pandas/core/computation/align.py",
         "pandas/core/construction.py",
+        "pandas/core/exchange/from_dataframe.py",
         "pandas/core/dtypes/cast.py",
         "pandas/core/dtypes/common.py",
         "pandas/core/dtypes/concat.py",

From 5a2b5afa0fa17fc5356625243315621e87e7f3af Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 1 Aug 2022 11:00:20 -0700
Subject: [PATCH 3/4] Change naming

---
 pyright_reportGeneralTypeIssues.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyright_reportGeneralTypeIssues.json b/pyright_reportGeneralTypeIssues.json
index 4318971cdbe96..cc24280958a8a 100644
--- a/pyright_reportGeneralTypeIssues.json
+++ b/pyright_reportGeneralTypeIssues.json
@@ -36,7 +36,6 @@
         "pandas/core/arrays/timedeltas.py",
         "pandas/core/computation/align.py",
         "pandas/core/construction.py",
-        "pandas/core/exchange/from_dataframe.py",
         "pandas/core/dtypes/cast.py",
         "pandas/core/dtypes/common.py",
         "pandas/core/dtypes/concat.py",
@@ -59,6 +58,7 @@
         "pandas/core/indexes/numeric.py",
         "pandas/core/indexes/period.py",
         "pandas/core/indexing.py",
+        "pandas/core/interchange/from_dataframe.py",
         "pandas/core/internals/api.py",
         "pandas/core/internals/array_manager.py",
         "pandas/core/internals/blocks.py",

From a731b11e88c7988698d5141ab91b0da34b28d969 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Tue, 9 Aug 2022 10:54:33 -0700
Subject: [PATCH 4/4] Add back assert for typing

---
 pandas/core/interchange/from_dataframe.py | 6 +++++-
 pyright_reportGeneralTypeIssues.json      | 1 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
index 737f4737f2825..6e1b2de10e8e6 100644
--- a/pandas/core/interchange/from_dataframe.py
+++ b/pandas/core/interchange/from_dataframe.py
@@ -7,6 +7,7 @@
 import numpy as np
 
 import pandas as pd
+from pandas.core.interchange.column import PandasColumn
 from pandas.core.interchange.dataframe_protocol import (
     Buffer,
     Column,
@@ -179,7 +180,10 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
     if not categorical["is_dictionary"]:
         raise NotImplementedError("Non-dictionary categoricals not supported yet")
 
-    categories = np.array(categorical["categories"]._col)  # type:ignore[union-attr]
+    cat_column = categorical["categories"]
+    # for mypy/pyright
+    assert isinstance(cat_column, PandasColumn), "categories must be a PandasColumn"
+    categories = np.array(cat_column._col)
     buffers = col.get_buffers()
 
     codes_buff, codes_dtype = buffers["data"]
diff --git a/pyright_reportGeneralTypeIssues.json b/pyright_reportGeneralTypeIssues.json
index cc24280958a8a..c482aa32600fb 100644
--- a/pyright_reportGeneralTypeIssues.json
+++ b/pyright_reportGeneralTypeIssues.json
@@ -58,7 +58,6 @@
         "pandas/core/indexes/numeric.py",
         "pandas/core/indexes/period.py",
         "pandas/core/indexing.py",
-        "pandas/core/interchange/from_dataframe.py",
         "pandas/core/internals/api.py",
         "pandas/core/internals/array_manager.py",
         "pandas/core/internals/blocks.py",