Skip to content

Commit

Permalink
[python] Drop extra index array schema name (#3033)
Browse files Browse the repository at this point in the history
Drop and extra "index" field that gets added to the arrow schema when converting a `pandas.DataFrame` with an index that is not a `pandas.RangeIndex`.
  • Loading branch information
jp-dark authored Sep 23, 2024
1 parent 7a67820 commit fd1537f
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
4 changes: 3 additions & 1 deletion apis/python/src/tiledbsoma/_arrow_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,9 @@ def df_to_arrow(df: pd.DataFrame) -> pa.Table:
# the bare minimum necessary.
new_map = {}
for field in arrow_table.schema:
if pa.types.is_dictionary(field.type):
if field.name == "__index_level_0__":
continue
elif pa.types.is_dictionary(field.type):
old_index_type = field.type.index_type
new_index_type = (
pa.int32()
Expand Down
31 changes: 31 additions & 0 deletions apis/python/tests/test_arrow_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import pandas as pd
import pyarrow as pa
import pytest

import tiledbsoma


@pytest.mark.parametrize(
("input_df", "expected"),
[
[
pd.DataFrame(
data={
"id": np.array([1, 3, 5, 6], dtype=np.int64),
"alpha": np.arange(4, dtype=np.float32),
},
index=[1, 3, 5, 6],
),
pa.Table.from_pydict(
{
"id": pa.array([1, 3, 5, 6], type=pa.int64()),
"alpha": pa.array([0, 1, 2, 3], type=pa.float32()),
}
),
],
],
)
def test_df_to_arrow(input_df: pd.DataFrame, expected: pa.Table):
actual = tiledbsoma._arrow_types.df_to_arrow(input_df)
assert actual == expected

0 comments on commit fd1537f

Please sign in to comment.