[python] Drop extra index array schema name (#3033)

Drop and extra "index" field that gets added to the arrow schema when converting a `pandas.DataFrame` with an index that is not a `pandas.RangeIndex`.
single-cell-data · Sep 23, 2024 · fd1537f · fd1537f
1 parent 7a67820
commit fd1537f
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 1 deletion.
diff --git a/apis/python/src/tiledbsoma/_arrow_types.py b/apis/python/src/tiledbsoma/_arrow_types.py
@@ -256,7 +256,9 @@ def df_to_arrow(df: pd.DataFrame) -> pa.Table:
     # the bare minimum necessary.
     new_map = {}
     for field in arrow_table.schema:
-        if pa.types.is_dictionary(field.type):
+        if field.name == "__index_level_0__":
+            continue
+        elif pa.types.is_dictionary(field.type):
             old_index_type = field.type.index_type
             new_index_type = (
                 pa.int32()

diff --git a/apis/python/tests/test_arrow_type.py b/apis/python/tests/test_arrow_type.py
@@ -0,0 +1,31 @@
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pytest
+
+import tiledbsoma
+
+
+@pytest.mark.parametrize(
+    ("input_df", "expected"),
+    [
+        [
+            pd.DataFrame(
+                data={
+                    "id": np.array([1, 3, 5, 6], dtype=np.int64),
+                    "alpha": np.arange(4, dtype=np.float32),
+                },
+                index=[1, 3, 5, 6],
+            ),
+            pa.Table.from_pydict(
+                {
+                    "id": pa.array([1, 3, 5, 6], type=pa.int64()),
+                    "alpha": pa.array([0, 1, 2, 3], type=pa.float32()),
+                }
+            ),
+        ],
+    ],
+)
+def test_df_to_arrow(input_df: pd.DataFrame, expected: pa.Table):
+    actual = tiledbsoma._arrow_types.df_to_arrow(input_df)
+    assert actual == expected