Skip to content

Commit b5d0615

Browse files
Backport PR #60222 on branch 2.3.x (ENH (string dtype): accept string_view in addition to string/large_string for ArrowStringArray input) (#60223)
Backport PR #60222: ENH (string dtype): accept string_view in addition to string/large_string for ArrowStringArray input Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 678266c commit b5d0615

File tree

2 files changed

+21
-0
lines changed

2 files changed

+21
-0
lines changed

pandas/core/arrays/string_arrow.py

+7
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas.compat import (
1919
pa_version_under10p1,
2020
pa_version_under13p0,
21+
pa_version_under16p0,
2122
)
2223
from pandas.util._exceptions import find_stack_level
2324

@@ -65,6 +66,10 @@ def _chk_pyarrow_available() -> None:
6566
raise ImportError(msg)
6667

6768

69+
def _is_string_view(typ):
70+
return not pa_version_under16p0 and pa.types.is_string_view(typ)
71+
72+
6873
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
6974
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
7075
# fallback for the ones that pyarrow doesn't yet support
@@ -122,11 +127,13 @@ def __init__(self, values) -> None:
122127
_chk_pyarrow_available()
123128
if isinstance(values, (pa.Array, pa.ChunkedArray)) and (
124129
pa.types.is_string(values.type)
130+
or _is_string_view(values.type)
125131
or (
126132
pa.types.is_dictionary(values.type)
127133
and (
128134
pa.types.is_string(values.type.value_type)
129135
or pa.types.is_large_string(values.type.value_type)
136+
or _is_string_view(values.type.value_type)
130137
)
131138
)
132139
):

pandas/tests/arrays/string_/test_string_arrow.py

+14
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,20 @@ def test_constructor_valid_string_type_value_dictionary(string_type, chunked):
9999
assert pa.types.is_large_string(arr._pa_array.type)
100100

101101

102+
@pytest.mark.parametrize("chunked", [True, False])
103+
def test_constructor_valid_string_view(chunked):
104+
# requires pyarrow>=18 for casting string_view to string
105+
pa = pytest.importorskip("pyarrow", minversion="18")
106+
107+
arr = pa.array(["1", "2", "3"], pa.string_view())
108+
if chunked:
109+
arr = pa.chunked_array(arr)
110+
111+
arr = ArrowStringArray(arr)
112+
# dictionary type get converted to dense large string array
113+
assert pa.types.is_large_string(arr._pa_array.type)
114+
115+
102116
def test_constructor_from_list():
103117
# GH#27673
104118
pytest.importorskip("pyarrow")

0 commit comments

Comments
 (0)