From d1938262e4ad81fc5afeee2334b559523c28127c Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Wed, 29 May 2024 17:31:45 -0400 Subject: [PATCH] NestedExtensionArray.chunked_array --- src/nested_pandas/series/ext_array.py | 5 +++++ tests/nested_pandas/series/test_ext_array.py | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py index 04def7c..388e786 100644 --- a/src/nested_pandas/series/ext_array.py +++ b/src/nested_pandas/series/ext_array.py @@ -558,6 +558,11 @@ def _pyarrow_dtype(self) -> pa.DataType: """PyArrow data type of the extension array""" return self._dtype.pyarrow_dtype + @property + def chunked_array(self) -> pa.ChunkedArray: + """The underlying PyArrow ChunkedArray""" + return self._chunked_array + @staticmethod def _validate(array: pa.ChunkedArray) -> None: """Raises ValueError if the input array is not a struct array with all fields being diff --git a/tests/nested_pandas/series/test_ext_array.py b/tests/nested_pandas/series/test_ext_array.py index 136f478..c44732c 100644 --- a/tests/nested_pandas/series/test_ext_array.py +++ b/tests/nested_pandas/series/test_ext_array.py @@ -204,7 +204,7 @@ def test_from_sequence_with_arrow_array_and_dtype(): type=pa_type, ) - actual = NestedExtensionArray.from_sequence(pa_array, dtype=new_pa_type)._chunked_array + actual = NestedExtensionArray.from_sequence(pa_array, dtype=new_pa_type).chunked_array desired = pa.chunked_array([pa_array.cast(new_pa_type)]) # pyarrow doesn't convert pandas boxed missing values to nulls in nested arrays assert actual == desired @@ -525,7 +525,7 @@ def test___setitem___series_of_dfs(): ) desired = NestedExtensionArray(desired_struct_array) - assert ext_array._chunked_array == desired._chunked_array + assert ext_array.chunked_array == desired.chunked_array assert ext_array.equals(desired) @@ -588,6 +588,21 @@ def test_series_built_raises(data): _array = NestedExtensionArray(pa_array) +def test_chunked_array(): + """Test that the .chunked_array property is correct.""" + struct_array = pa.StructArray.from_arrays( + arrays=[ + pa.array([np.array([1, 2, 3]), np.array([1, 2, 1])]), + pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0])]), + ], + names=["a", "b"], + ) + ext_array = NestedExtensionArray(struct_array) + + # pyarrow returns a single bool for == + assert ext_array.chunked_array == pa.chunked_array(struct_array) + + def test_list_offsets_single_chunk(): """Test that the .list_offset property is correct for a single chunk.""" struct_array = pa.StructArray.from_arrays(