Skip to content

Commit 1b0ef02

Browse files
authored
Update py03 from 0.20 to 0.21 (#5566)
* Update py03 from 0.20 to 0.21 * Bump pyo3 in arrow-pyarrow-integration-testing * Update pyarrow API to align with pyo3 0.21 changes * Fix arrow-pyarrow-integration-testing clippy * Minor * Fix typo * Use PyBackedStr when extracting * Bump to pyo3 0.21.1 * Trigger
1 parent bc2a73d commit 1b0ef02

File tree

5 files changed

+67
-61
lines changed

5 files changed

+67
-61
lines changed

arrow-pyarrow-integration-testing/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
3434

3535
[dependencies]
3636
arrow = { path = "../arrow", features = ["pyarrow"] }
37-
pyo3 = { version = "0.20", features = ["extension-module"] }
37+
pyo3 = { version = "0.21.1", features = ["extension-module"] }

arrow-pyarrow-integration-testing/src/lib.rs

+9-16
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ fn to_py_err(err: ArrowError) -> PyErr {
4040

4141
/// Returns `array + array` of an int64 array.
4242
#[pyfunction]
43-
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
43+
fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
4444
// import
45-
let array = make_array(ArrayData::from_pyarrow(array)?);
45+
let array = make_array(ArrayData::from_pyarrow_bound(&array)?);
4646

4747
// perform some operation
4848
let array = array
@@ -60,15 +60,15 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
6060
/// calls a lambda function that receives and returns an array
6161
/// whose result must be the array multiplied by two
6262
#[pyfunction]
63-
fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
63+
fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
6464
// create
6565
let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
6666
let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;
6767

6868
// to py
6969
let pyarray = array.to_data().to_pyarrow(py)?;
7070
let pyarray = lambda.call1((pyarray,))?;
71-
let array = make_array(ArrayData::from_pyarrow(pyarray)?);
71+
let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?);
7272

7373
Ok(array == expected)
7474
}
@@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType<DataType>, py: Python) -> PyResult<PyO
8282

8383
/// Returns the substring
8484
#[pyfunction]
85-
fn substring(
86-
array: PyArrowType<ArrayData>,
87-
start: i64,
88-
) -> PyResult<PyArrowType<ArrayData>> {
85+
fn substring(array: PyArrowType<ArrayData>, start: i64) -> PyResult<PyArrowType<ArrayData>> {
8986
// import
9087
let array = make_array(array.0);
9188

9289
// substring
93-
let array =
94-
kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;
90+
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;
9591

9692
Ok(array.to_data().into())
9793
}
@@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject>
10298
let array = make_array(array.0);
10399

104100
// concat
105-
let array =
106-
kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;
101+
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;
107102

108103
array.to_data().to_pyarrow(py)
109104
}
@@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayDa
129124
}
130125

131126
#[pyfunction]
132-
fn round_trip_record_batch(
133-
obj: PyArrowType<RecordBatch>,
134-
) -> PyResult<PyArrowType<RecordBatch>> {
127+
fn round_trip_record_batch(obj: PyArrowType<RecordBatch>) -> PyResult<PyArrowType<RecordBatch>> {
135128
Ok(obj)
136129
}
137130

@@ -168,7 +161,7 @@ fn boxed_reader_roundtrip(
168161
}
169162

170163
#[pymodule]
171-
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
164+
fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
172165
m.add_wrapped(wrap_pyfunction!(double))?;
173166
m.add_wrapped(wrap_pyfunction!(double_py))?;
174167
m.add_wrapped(wrap_pyfunction!(make_empty_array))?;

arrow/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ arrow-select = { workspace = true }
5454
arrow-string = { workspace = true }
5555

5656
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
57-
pyo3 = { version = "0.20", default-features = false, optional = true }
57+
pyo3 = { version = "0.21.1", default-features = false, optional = true }
5858

5959
[package.metadata.docs.rs]
6060
features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]

arrow/src/pyarrow.rs

+54-41
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ use pyo3::exceptions::{PyTypeError, PyValueError};
6464
use pyo3::ffi::Py_uintptr_t;
6565
use pyo3::import_exception;
6666
use pyo3::prelude::*;
67+
use pyo3::pybacked::PyBackedStr;
6768
use pyo3::types::{PyCapsule, PyList, PyTuple};
6869

6970
use crate::array::{make_array, ArrayData};
@@ -82,7 +83,12 @@ fn to_py_err(err: ArrowError) -> PyErr {
8283
}
8384

8485
pub trait FromPyArrow: Sized {
85-
fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
86+
#[deprecated(since = "52.0.0", note = "Use from_pyarrow_bound")]
87+
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
88+
Self::from_pyarrow_bound(&value.as_borrowed())
89+
}
90+
91+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self>;
8692
}
8793

8894
/// Create a new PyArrow object from a arrow-rs type.
@@ -101,15 +107,17 @@ impl<T: ToPyArrow> IntoPyArrow for T {
101107
}
102108
}
103109

104-
fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
105-
let pyarrow = PyModule::import(value.py(), "pyarrow")?;
110+
fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
111+
let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
106112
let class = pyarrow.getattr(expected)?;
107-
if !value.is_instance(class)? {
108-
let expected_module = class.getattr("__module__")?.extract::<&str>()?;
109-
let expected_name = class.getattr("__name__")?.extract::<&str>()?;
113+
if !value.is_instance(&class)? {
114+
let expected_module = class.getattr("__module__")?.extract::<PyBackedStr>()?;
115+
let expected_name = class.getattr("__name__")?.extract::<PyBackedStr>()?;
110116
let found_class = value.get_type();
111-
let found_module = found_class.getattr("__module__")?.extract::<&str>()?;
112-
let found_name = found_class.getattr("__name__")?.extract::<&str>()?;
117+
let found_module = found_class
118+
.getattr("__module__")?
119+
.extract::<PyBackedStr>()?;
120+
let found_name = found_class.getattr("__name__")?.extract::<PyBackedStr>()?;
113121
return Err(PyTypeError::new_err(format!(
114122
"Expected instance of {}.{}, got {}.{}",
115123
expected_module, expected_name, found_module, found_name
@@ -118,7 +126,7 @@ fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
118126
Ok(())
119127
}
120128

121-
fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
129+
fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
122130
let capsule_name = capsule.name()?;
123131
if capsule_name.is_none() {
124132
return Err(PyValueError::new_err(
@@ -138,13 +146,13 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
138146
}
139147

140148
impl FromPyArrow for DataType {
141-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
149+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
142150
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
143151
// method, so prefer it over _export_to_c.
144152
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
145153
if value.hasattr("__arrow_c_schema__")? {
146-
let capsule: &PyCapsule =
147-
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
154+
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
155+
let capsule = capsule.downcast::<PyCapsule>()?;
148156
validate_pycapsule(capsule, "arrow_schema")?;
149157

150158
let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -166,21 +174,21 @@ impl ToPyArrow for DataType {
166174
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
167175
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
168176
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
169-
let module = py.import("pyarrow")?;
177+
let module = py.import_bound("pyarrow")?;
170178
let class = module.getattr("DataType")?;
171179
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
172180
Ok(dtype.into())
173181
}
174182
}
175183

176184
impl FromPyArrow for Field {
177-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
185+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
178186
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
179187
// method, so prefer it over _export_to_c.
180188
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
181189
if value.hasattr("__arrow_c_schema__")? {
182-
let capsule: &PyCapsule =
183-
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
190+
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
191+
let capsule = capsule.downcast::<PyCapsule>()?;
184192
validate_pycapsule(capsule, "arrow_schema")?;
185193

186194
let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -202,21 +210,21 @@ impl ToPyArrow for Field {
202210
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
203211
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
204212
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
205-
let module = py.import("pyarrow")?;
213+
let module = py.import_bound("pyarrow")?;
206214
let class = module.getattr("Field")?;
207215
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
208216
Ok(dtype.into())
209217
}
210218
}
211219

212220
impl FromPyArrow for Schema {
213-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
221+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
214222
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
215223
// method, so prefer it over _export_to_c.
216224
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
217225
if value.hasattr("__arrow_c_schema__")? {
218-
let capsule: &PyCapsule =
219-
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
226+
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
227+
let capsule = capsule.downcast::<PyCapsule>()?;
220228
validate_pycapsule(capsule, "arrow_schema")?;
221229

222230
let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
@@ -238,15 +246,15 @@ impl ToPyArrow for Schema {
238246
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
239247
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
240248
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
241-
let module = py.import("pyarrow")?;
249+
let module = py.import_bound("pyarrow")?;
242250
let class = module.getattr("Schema")?;
243251
let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
244252
Ok(schema.into())
245253
}
246254
}
247255

248256
impl FromPyArrow for ArrayData {
249-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
257+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
250258
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
251259
// method, so prefer it over _export_to_c.
252260
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -259,8 +267,10 @@ impl FromPyArrow for ArrayData {
259267
));
260268
}
261269

262-
let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
263-
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
270+
let schema_capsule = tuple.get_item(0)?;
271+
let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
272+
let array_capsule = tuple.get_item(1)?;
273+
let array_capsule = array_capsule.downcast::<PyCapsule>()?;
264274

265275
validate_pycapsule(schema_capsule, "arrow_schema")?;
266276
validate_pycapsule(array_capsule, "arrow_array")?;
@@ -296,7 +306,7 @@ impl ToPyArrow for ArrayData {
296306
let array = FFI_ArrowArray::new(self);
297307
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
298308

299-
let module = py.import("pyarrow")?;
309+
let module = py.import_bound("pyarrow")?;
300310
let class = module.getattr("Array")?;
301311
let array = class.call_method1(
302312
"_import_from_c",
@@ -310,9 +320,9 @@ impl ToPyArrow for ArrayData {
310320
}
311321

312322
impl<T: FromPyArrow> FromPyArrow for Vec<T> {
313-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
323+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
314324
let list = value.downcast::<PyList>()?;
315-
list.iter().map(|x| T::from_pyarrow(x)).collect()
325+
list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
316326
}
317327
}
318328

@@ -327,7 +337,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
327337
}
328338

329339
impl FromPyArrow for RecordBatch {
330-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
340+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
331341
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
332342
// method, so prefer it over _export_to_c.
333343
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -340,8 +350,10 @@ impl FromPyArrow for RecordBatch {
340350
));
341351
}
342352

343-
let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
344-
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
353+
let schema_capsule = tuple.get_item(0)?;
354+
let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
355+
let array_capsule = tuple.get_item(1)?;
356+
let array_capsule = array_capsule.downcast::<PyCapsule>()?;
345357

346358
validate_pycapsule(schema_capsule, "arrow_schema")?;
347359
validate_pycapsule(array_capsule, "arrow_array")?;
@@ -370,12 +382,13 @@ impl FromPyArrow for RecordBatch {
370382
validate_class("RecordBatch", value)?;
371383
// TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches
372384
let schema = value.getattr("schema")?;
373-
let schema = Arc::new(Schema::from_pyarrow(schema)?);
385+
let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?);
374386

375-
let arrays = value.getattr("columns")?.downcast::<PyList>()?;
387+
let arrays = value.getattr("columns")?;
376388
let arrays = arrays
389+
.downcast::<PyList>()?
377390
.iter()
378-
.map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?)))
391+
.map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
379392
.collect::<PyResult<_>>()?;
380393

381394
let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
@@ -395,13 +408,13 @@ impl ToPyArrow for RecordBatch {
395408

396409
/// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader].
397410
impl FromPyArrow for ArrowArrayStreamReader {
398-
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
411+
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
399412
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
400413
// method, so prefer it over _export_to_c.
401414
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
402415
if value.hasattr("__arrow_c_stream__")? {
403-
let capsule: &PyCapsule =
404-
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
416+
let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
417+
let capsule = capsule.downcast::<PyCapsule>()?;
405418
validate_pycapsule(capsule, "arrow_array_stream")?;
406419

407420
let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
@@ -421,7 +434,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
421434
// make the conversion through PyArrow's private API
422435
// this changes the pointer's memory and is thus unsafe.
423436
// In particular, `_export_to_c` can go out of bounds
424-
let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
437+
let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]);
425438
value.call_method1("_export_to_c", args)?;
426439

427440
let stream_reader = ArrowArrayStreamReader::try_new(stream)
@@ -439,9 +452,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
439452
let mut stream = FFI_ArrowArrayStream::new(self);
440453

441454
let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
442-
let module = py.import("pyarrow")?;
455+
let module = py.import_bound("pyarrow")?;
443456
let class = module.getattr("RecordBatchReader")?;
444-
let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
457+
let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
445458
let reader = class.call_method1("_import_from_c", args)?;
446459

447460
Ok(PyObject::from(reader))
@@ -463,8 +476,8 @@ impl IntoPyArrow for ArrowArrayStreamReader {
463476
pub struct PyArrowType<T>(pub T);
464477

465478
impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
466-
fn extract(value: &'source PyAny) -> PyResult<Self> {
467-
Ok(Self(T::from_pyarrow(value)?))
479+
fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
480+
Ok(Self(T::from_pyarrow_bound(value)?))
468481
}
469482
}
470483

arrow/tests/pyarrow.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ fn test_to_pyarrow() {
3232

3333
let res = Python::with_gil(|py| {
3434
let py_input = input.to_pyarrow(py)?;
35-
let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
35+
let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
3636
let py_records = records.to_pyarrow(py)?;
37-
RecordBatch::from_pyarrow(py_records.as_ref(py))
37+
RecordBatch::from_pyarrow_bound(py_records.bind(py))
3838
})
3939
.unwrap();
4040

0 commit comments

Comments
 (0)