From 4bff65908e55789b400f5b114bb1593438b5e94a Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 1 Jul 2024 13:33:02 -0400 Subject: [PATCH 1/6] Use pyo3-arrow for Python bindings --- python/core/Cargo.lock | 17 ++ python/core/Cargo.toml | 2 +- python/core/src/algorithm/native/eq.rs | 3 - python/core/src/algorithm/native/len.rs | 3 - python/core/src/error.rs | 8 + python/core/src/ffi/from_python/array.rs | 2 - python/core/src/ffi/from_python/mod.rs | 3 - .../core/src/ffi/from_python/record_batch.rs | 38 --- .../ffi/from_python/record_batch_reader.rs | 15 -- python/core/src/ffi/from_python/schema.rs | 11 - python/core/src/ffi/from_python/utils.rs | 21 +- python/core/src/ffi/to_python/mod.rs | 2 - .../src/ffi/to_python/record_batch_reader.rs | 35 --- python/core/src/ffi/to_python/table.rs | 37 --- .../src/interop/geopandas/from_geopandas.rs | 247 ++++++++---------- .../src/interop/geopandas/to_geopandas.rs | 144 +++++----- .../core/src/interop/pyogrio/from_pyogrio.rs | 14 +- python/core/src/interop/util.rs | 12 + python/core/src/io/csv.rs | 8 +- python/core/src/io/flatgeobuf.rs | 12 +- python/core/src/io/geojson.rs | 8 +- python/core/src/io/geojson_lines.rs | 8 +- python/core/src/io/ipc.rs | 12 +- python/core/src/io/parquet/reader.rs | 29 +- python/core/src/io/parquet/writer.rs | 22 +- python/core/src/io/postgis.rs | 9 +- python/core/src/lib.rs | 6 - python/core/src/record_batch.rs | 16 -- python/core/src/schema/mod.rs | 16 -- python/core/src/stream/mod.rs | 22 -- 30 files changed, 275 insertions(+), 507 deletions(-) delete mode 100644 python/core/src/ffi/from_python/record_batch.rs delete mode 100644 python/core/src/ffi/from_python/record_batch_reader.rs delete mode 100644 python/core/src/ffi/from_python/schema.rs delete mode 100644 python/core/src/ffi/to_python/record_batch_reader.rs delete mode 100644 python/core/src/ffi/to_python/table.rs delete mode 100644 python/core/src/record_batch.rs delete mode 100644 python/core/src/schema/mod.rs delete mode 100644 python/core/src/stream/mod.rs diff --git a/python/core/Cargo.lock b/python/core/Cargo.lock index b92019d3..91c06e1b 100644 --- a/python/core/Cargo.lock +++ b/python/core/Cargo.lock @@ -1084,6 +1084,7 @@ dependencies = [ "openssl", "parquet", "pyo3", + "pyo3-arrow", "pyo3-asyncio-0-21", "reqwest 0.12.5", "sqlx", @@ -1960,6 +1961,7 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec170733ca37175f5d75a5bea5911d6ff45d2cd52849ce98b685394e4f2f37f4" dependencies = [ + "half", "libc", "ndarray", "num-complex", @@ -2341,6 +2343,21 @@ dependencies = [ "unindent", ] +[[package]] +name = "pyo3-arrow" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7cca8a058838202f3e29f45d4408389ff689ff2c491d1f74ba43b6e93f9dad" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "numpy", + "pyo3", + "thiserror", +] + [[package]] name = "pyo3-asyncio-0-21" version = "0.21.0" diff --git a/python/core/Cargo.toml b/python/core/Cargo.toml index d7f130c1..cc15075e 100644 --- a/python/core/Cargo.toml +++ b/python/core/Cargo.toml @@ -37,8 +37,8 @@ pyo3 = { version = "0.21.0", features = [ "serde", "anyhow", ] } +pyo3-arrow = "0.1" pyo3-asyncio-0-21 = { version = "0.21", features = ["tokio-runtime"] } -# pyo3-asyncio = { version = "0.20", features = ["tokio-runtime"] } geo = "0.28" geoarrow = { path = "../../", features = [ "csv", diff --git a/python/core/src/algorithm/native/eq.rs b/python/core/src/algorithm/native/eq.rs index 443bb6de..867e6982 100644 --- a/python/core/src/algorithm/native/eq.rs +++ b/python/core/src/algorithm/native/eq.rs @@ -3,7 +3,6 @@ use crate::array::*; use crate::chunked_array::*; use crate::scalar::*; -use crate::table::GeoTable; use pyo3::prelude::*; macro_rules! impl_eq { @@ -78,5 +77,3 @@ impl_eq!(ChunkedInt32Array); impl_eq!(ChunkedInt64Array); impl_eq!(ChunkedStringArray); impl_eq!(ChunkedLargeStringArray); - -impl_eq!(GeoTable); diff --git a/python/core/src/algorithm/native/len.rs b/python/core/src/algorithm/native/len.rs index aa71281c..3e85a7a8 100644 --- a/python/core/src/algorithm/native/len.rs +++ b/python/core/src/algorithm/native/len.rs @@ -1,6 +1,5 @@ use crate::array::*; use crate::chunked_array::*; -use crate::table::GeoTable; use arrow_array::Array; use geoarrow::GeometryArrayTrait; use pyo3::prelude::*; @@ -66,5 +65,3 @@ impl_len!(ChunkedInt32Array); impl_len!(ChunkedInt64Array); impl_len!(ChunkedStringArray); impl_len!(ChunkedLargeStringArray); - -impl_len!(GeoTable); diff --git a/python/core/src/error.rs b/python/core/src/error.rs index 47aa4489..d003f8d5 100644 --- a/python/core/src/error.rs +++ b/python/core/src/error.rs @@ -4,6 +4,7 @@ use pyo3::prelude::*; pub enum PyGeoArrowError { GeoArrowError(geoarrow::error::GeoArrowError), PyErr(PyErr), + PyArrowError(pyo3_arrow::error::PyArrowError), ObjectStoreError(object_store::Error), ObjectStorePathError(object_store::path::Error), UrlParseError(url::ParseError), @@ -14,6 +15,7 @@ impl From for PyErr { match error { PyGeoArrowError::GeoArrowError(err) => PyException::new_err(err.to_string()), PyGeoArrowError::PyErr(err) => err, + PyGeoArrowError::PyArrowError(err) => err.into(), PyGeoArrowError::ObjectStoreError(err) => PyException::new_err(err.to_string()), PyGeoArrowError::ObjectStorePathError(err) => PyException::new_err(err.to_string()), PyGeoArrowError::UrlParseError(err) => PyException::new_err(err.to_string()), @@ -27,6 +29,12 @@ impl From for PyGeoArrowError { } } +impl From for PyGeoArrowError { + fn from(other: pyo3_arrow::error::PyArrowError) -> Self { + Self::PyArrowError(other) + } +} + impl From for PyGeoArrowError { fn from(other: object_store::Error) -> Self { Self::ObjectStoreError(other) diff --git a/python/core/src/ffi/from_python/array.rs b/python/core/src/ffi/from_python/array.rs index 7cd1f662..a07665cf 100644 --- a/python/core/src/ffi/from_python/array.rs +++ b/python/core/src/ffi/from_python/array.rs @@ -1,6 +1,5 @@ use crate::array::*; use crate::ffi::from_python::utils::import_arrow_c_array; -use crate::table::GeoTable; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3::types::PyType; @@ -65,4 +64,3 @@ impl_from_arrow!(MultiPolygonArray); impl_from_arrow!(MixedGeometryArray); // impl_from_arrow!(RectArray); impl_from_arrow!(GeometryCollectionArray); -impl_from_arrow!(GeoTable); diff --git a/python/core/src/ffi/from_python/mod.rs b/python/core/src/ffi/from_python/mod.rs index df056bfe..dab75446 100644 --- a/python/core/src/ffi/from_python/mod.rs +++ b/python/core/src/ffi/from_python/mod.rs @@ -2,10 +2,7 @@ pub mod array; pub mod chunked; pub mod ffi_stream; pub mod input; -pub mod record_batch; -pub mod record_batch_reader; pub mod scalar; -pub mod schema; pub mod table; pub mod utils; diff --git a/python/core/src/ffi/from_python/record_batch.rs b/python/core/src/ffi/from_python/record_batch.rs deleted file mode 100644 index a9652777..00000000 --- a/python/core/src/ffi/from_python/record_batch.rs +++ /dev/null @@ -1,38 +0,0 @@ -use std::sync::Arc; - -use crate::ffi::from_python::utils::import_arrow_c_array; -use crate::record_batch::RecordBatch; -use arrow::array::AsArray; -use arrow::datatypes::{DataType, SchemaBuilder}; -use arrow_array::Array; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use pyo3::{PyAny, PyResult}; - -impl<'a> FromPyObject<'a> for RecordBatch { - fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { - let (array, field) = import_arrow_c_array(ob)?; - match field.data_type() { - DataType::Struct(fields) => { - let struct_array = array.as_struct(); - let schema = SchemaBuilder::from(fields) - .finish() - .with_metadata(field.metadata().clone()); - assert_eq!( - struct_array.null_count(), - 0, - "Cannot convert nullable StructArray to RecordBatch" - ); - - let columns = struct_array.columns().to_vec(); - let batch = arrow_array::RecordBatch::try_new(Arc::new(schema), columns) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - Ok(Self(batch)) - } - dt => Err(PyValueError::new_err(format!( - "Unexpected data type {}", - dt - ))), - } - } -} diff --git a/python/core/src/ffi/from_python/record_batch_reader.rs b/python/core/src/ffi/from_python/record_batch_reader.rs deleted file mode 100644 index 8657e6de..00000000 --- a/python/core/src/ffi/from_python/record_batch_reader.rs +++ /dev/null @@ -1,15 +0,0 @@ -use crate::ffi::from_python::utils::import_arrow_c_stream; -use crate::stream::PyRecordBatchReader; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use pyo3::{PyAny, PyResult}; - -impl<'a> FromPyObject<'a> for PyRecordBatchReader { - fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { - let stream = import_arrow_c_stream(ob)?; - let stream_reader = arrow::ffi_stream::ArrowArrayStreamReader::try_new(stream) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - - Ok(Self(Some(Box::new(stream_reader)))) - } -} diff --git a/python/core/src/ffi/from_python/schema.rs b/python/core/src/ffi/from_python/schema.rs deleted file mode 100644 index fdd22b4b..00000000 --- a/python/core/src/ffi/from_python/schema.rs +++ /dev/null @@ -1,11 +0,0 @@ -use crate::ffi::from_python::utils::import_arrow_c_schema; -use crate::schema::Schema; -use pyo3::prelude::*; -use pyo3::{PyAny, PyResult}; - -impl<'a> FromPyObject<'a> for Schema { - fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { - let schema = import_arrow_c_schema(ob)?; - Ok(Self(schema)) - } -} diff --git a/python/core/src/ffi/from_python/utils.rs b/python/core/src/ffi/from_python/utils.rs index 1ab5035b..c068c4f2 100644 --- a/python/core/src/ffi/from_python/utils.rs +++ b/python/core/src/ffi/from_python/utils.rs @@ -1,6 +1,4 @@ -use std::sync::Arc; - -use arrow::datatypes::{Field, Schema, SchemaRef}; +use arrow::datatypes::Field; use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; use arrow::ffi_stream::FFI_ArrowArrayStream; use arrow_array::{make_array, ArrayRef}; @@ -29,23 +27,6 @@ pub fn validate_pycapsule_name(capsule: &Bound, expected_name: &str) Ok(()) } -/// Import `__arrow_c_schema__` across Python boundary -pub(crate) fn import_arrow_c_schema(ob: &Bound) -> PyResult { - if !ob.hasattr("__arrow_c_schema__")? { - return Err(PyValueError::new_err( - "Expected an object with dunder __arrow_c_schema__", - )); - } - - let capsule: Bound = ob.getattr("__arrow_c_schema__")?.call0()?.downcast_into()?; - validate_pycapsule_name(&capsule, "arrow_schema")?; - - let schema_ptr = unsafe { capsule.reference::() }; - let schema = - Schema::try_from(schema_ptr).map_err(|err| PyTypeError::new_err(err.to_string()))?; - Ok(Arc::new(schema)) -} - /// Import `__arrow_c_array__` across Python boundary pub(crate) fn import_arrow_c_array(ob: &Bound) -> PyResult<(ArrayRef, Field)> { if !ob.hasattr("__arrow_c_array__")? { diff --git a/python/core/src/ffi/to_python/mod.rs b/python/core/src/ffi/to_python/mod.rs index 1843a3ec..0aaa258e 100644 --- a/python/core/src/ffi/to_python/mod.rs +++ b/python/core/src/ffi/to_python/mod.rs @@ -1,8 +1,6 @@ pub mod array; pub mod chunked; pub mod ffi_stream; -pub mod record_batch_reader; pub mod scalar; -pub mod table; pub use array::{chunked_geometry_array_to_pyobject, geometry_array_to_pyobject}; diff --git a/python/core/src/ffi/to_python/record_batch_reader.rs b/python/core/src/ffi/to_python/record_batch_reader.rs deleted file mode 100644 index ca61211c..00000000 --- a/python/core/src/ffi/to_python/record_batch_reader.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::error::PyGeoArrowResult; -use crate::stream::PyRecordBatchReader; -use arrow::ffi_stream::FFI_ArrowArrayStream; - -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use pyo3::types::PyCapsule; -use std::ffi::CString; - -#[pymethods] -impl PyRecordBatchReader { - /// An implementation of the [Arrow PyCapsule - /// Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). - /// This dunder method should not be called directly, but enables zero-copy - /// data transfer to other Python libraries that understand Arrow memory. - /// - /// For example, you can call [`pyarrow.table()`][pyarrow.table] to convert this array - /// into a pyarrow table, without copying memory. - fn __arrow_c_stream__( - &mut self, - _requested_schema: Option, - ) -> PyGeoArrowResult { - let reader = self.0.take().ok_or(PyValueError::new_err( - "Cannot read from closed RecordBatchReader", - ))?; - - let ffi_stream = FFI_ArrowArrayStream::new(reader); - let stream_capsule_name = CString::new("arrow_array_stream").unwrap(); - - Python::with_gil(|py| { - let stream_capsule = PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name))?; - Ok(stream_capsule.to_object(py)) - }) - } -} diff --git a/python/core/src/ffi/to_python/table.rs b/python/core/src/ffi/to_python/table.rs deleted file mode 100644 index 4787c33b..00000000 --- a/python/core/src/ffi/to_python/table.rs +++ /dev/null @@ -1,37 +0,0 @@ -use crate::error::PyGeoArrowResult; -use crate::table::GeoTable; -use arrow::ffi_stream::FFI_ArrowArrayStream; -use arrow_array::RecordBatchIterator; - -use pyo3::prelude::*; -use pyo3::types::PyCapsule; -use std::ffi::CString; - -#[pymethods] -impl GeoTable { - /// An implementation of the [Arrow PyCapsule - /// Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html). - /// This dunder method should not be called directly, but enables zero-copy - /// data transfer to other Python libraries that understand Arrow memory. - /// - /// For example, you can call [`pyarrow.table()`][pyarrow.table] to convert this array - /// into a pyarrow table, without copying memory. - fn __arrow_c_stream__( - &self, - _requested_schema: Option, - ) -> PyGeoArrowResult { - let (schema, batches) = self.0.clone().into_inner(); - let record_batch_reader = Box::new(RecordBatchIterator::new( - batches.into_iter().map(Ok), - schema, - )); - let ffi_stream = FFI_ArrowArrayStream::new(record_batch_reader); - - let stream_capsule_name = CString::new("arrow_array_stream").unwrap(); - - Python::with_gil(|py| { - let stream_capsule = PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name))?; - Ok(stream_capsule.to_object(py)) - }) - } -} diff --git a/python/core/src/interop/geopandas/from_geopandas.rs b/python/core/src/interop/geopandas/from_geopandas.rs index a16c59d0..7b5ec62f 100644 --- a/python/core/src/interop/geopandas/from_geopandas.rs +++ b/python/core/src/interop/geopandas/from_geopandas.rs @@ -5,15 +5,15 @@ use crate::error::PyGeoArrowResult; use crate::ffi::from_python::utils::import_arrow_c_stream; use crate::interop::shapely::from_shapely::from_shapely; use crate::interop::util::import_pyarrow; -use crate::table::GeoTable; use arrow::ffi_stream::ArrowArrayStreamReader; use arrow_array::RecordBatchReader; use geoarrow::chunked_array::ChunkedGeometryArrayTrait; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::intern; use pyo3::prelude::*; -use pyo3::types::{PyDict, PyType}; +use pyo3::types::PyDict; use pyo3::PyAny; +use pyo3_arrow::PyTable; /// Create a GeoArrow Table from a [GeoPandas GeoDataFrame][geopandas.GeoDataFrame]. /// @@ -29,144 +29,119 @@ use pyo3::PyAny; /// Returns: /// A GeoArrow Table #[pyfunction] -pub fn from_geopandas(py: Python, input: &Bound) -> PyGeoArrowResult { - GeoTable::from_geopandas(&py.get_type_bound::(), py, input) -} - -#[pymethods] -impl GeoTable { - /// Create a GeoArrow Table from a [GeoPandas GeoDataFrame][geopandas.GeoDataFrame]. - /// - /// ### Notes: - /// - /// - Currently this will always generate a non-chunked GeoArrow array. This is partly because - /// [pyarrow.Table.from_pandas][pyarrow.Table.from_pandas] always creates a single batch. - /// - This requires `pyarrow` version 14 or later. - /// - /// Args: - /// input: A [GeoPandas GeoDataFrame][geopandas.GeoDataFrame]. - /// - /// Returns: - /// A GeoArrow Table - #[classmethod] - fn from_geopandas( - _cls: &Bound, - py: Python, - input: &Bound, - ) -> PyGeoArrowResult { - // Imports and validation - let pyarrow_mod = import_pyarrow(py)?; - let geopandas_mod = py.import_bound(intern!(py, "geopandas"))?; - let geodataframe_class = geopandas_mod.getattr(intern!(py, "GeoDataFrame"))?; - if !input.is_instance(&geodataframe_class)? { - return Err(PyValueError::new_err("Expected GeoDataFrame input.").into()); - } +pub fn from_geopandas(py: Python, input: &Bound) -> PyGeoArrowResult { + // Imports and validation + let pyarrow_mod = import_pyarrow(py)?; + let geopandas_mod = py.import_bound(intern!(py, "geopandas"))?; + let geodataframe_class = geopandas_mod.getattr(intern!(py, "GeoDataFrame"))?; + if !input.is_instance(&geodataframe_class)? { + return Err(PyValueError::new_err("Expected GeoDataFrame input.").into()); + } - // Convert main table to pyarrow table - let geometry_column_name = input - .getattr(intern!(py, "_geometry_column_name"))? - .extract::()?; - let dataframe_column_names = input - .getattr(intern!(py, "columns"))? - .call_method0(intern!(py, "tolist"))? - .extract::>()?; - let pyarrow_column_names = dataframe_column_names - .into_iter() - .filter(|name| name.as_str() != geometry_column_name.as_str()) - .collect::>(); + // Convert main table to pyarrow table + let geometry_column_name = input + .getattr(intern!(py, "_geometry_column_name"))? + .extract::()?; + let dataframe_column_names = input + .getattr(intern!(py, "columns"))? + .call_method0(intern!(py, "tolist"))? + .extract::>()?; + let pyarrow_column_names = dataframe_column_names + .into_iter() + .filter(|name| name.as_str() != geometry_column_name.as_str()) + .collect::>(); - let args = (input,); - let kwargs = PyDict::new_bound(py); - kwargs.set_item("columns", pyarrow_column_names)?; - let pyarrow_table = pyarrow_mod.getattr(intern!(py, "Table"))?.call_method( - intern!(py, "from_pandas"), - args, - Some(&kwargs), - )?; + let args = (input,); + let kwargs = PyDict::new_bound(py); + kwargs.set_item("columns", pyarrow_column_names)?; + let pyarrow_table = pyarrow_mod.getattr(intern!(py, "Table"))?.call_method( + intern!(py, "from_pandas"), + args, + Some(&kwargs), + )?; - // Move the pyarrow table into Rust - let stream = import_arrow_c_stream(&pyarrow_table)?; - let stream_reader = ArrowArrayStreamReader::try_new(stream) - .map_err(|err| PyValueError::new_err(err.to_string()))?; - let schema = stream_reader.schema(); + // Move the pyarrow table into Rust + let stream = import_arrow_c_stream(&pyarrow_table)?; + let stream_reader = ArrowArrayStreamReader::try_new(stream) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + let schema = stream_reader.schema(); - let mut batches = vec![]; - for batch in stream_reader { - let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; - batches.push(batch); - } - if batches.len() > 1 { - return Err(PyValueError::new_err("Expected 1 batch from pyarrow table.").into()); - } + let mut batches = vec![]; + for batch in stream_reader { + let batch = batch.map_err(|err| PyTypeError::new_err(err.to_string()))?; + batches.push(batch); + } + if batches.len() > 1 { + return Err(PyValueError::new_err("Expected 1 batch from pyarrow table.").into()); + } - // Convert GeoPandas geometry - // Note: this is kinda a hack because from_ragged_array returns a _Python_ geoarrow class, - // but I need to convert that back into a Rust object to make a ChunkedGeometryArray to - // create the GeoTable - let python_geometry_array = from_shapely(py, &input.getattr(intern!(py, "geometry"))?)?; - let chunked_geometry: Arc = if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else if python_geometry_array - .bind(py) - .is_instance_of::() - { - let ga_arr = python_geometry_array.extract::(py)?; - Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ - ga_arr.0, - ])) - } else { - unreachable!() - }; + // Convert GeoPandas geometry + // Note: this is kinda a hack because from_ragged_array returns a _Python_ geoarrow class, + // but I need to convert that back into a Rust object to make a ChunkedGeometryArray to + // create the GeoTable + let python_geometry_array = from_shapely(py, &input.getattr(intern!(py, "geometry"))?)?; + let chunked_geometry: Arc = if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else if python_geometry_array + .bind(py) + .is_instance_of::() + { + let ga_arr = python_geometry_array.extract::(py)?; + Arc::new(geoarrow::chunked_array::ChunkedGeometryArray::new(vec![ + ga_arr.0, + ])) + } else { + unreachable!() + }; - Ok( - geoarrow::table::Table::from_arrow_and_geometry(batches, schema, chunked_geometry)? - .into(), - ) - } + let rust_table = + geoarrow::table::Table::from_arrow_and_geometry(batches, schema, chunked_geometry)?; + let (schema, batches) = rust_table.into_inner(); + Ok(PyTable::new(schema, batches)) } diff --git a/python/core/src/interop/geopandas/to_geopandas.rs b/python/core/src/interop/geopandas/to_geopandas.rs index 24df2fc2..76953497 100644 --- a/python/core/src/interop/geopandas/to_geopandas.rs +++ b/python/core/src/interop/geopandas/to_geopandas.rs @@ -1,13 +1,12 @@ use crate::chunked_array::*; use crate::error::PyGeoArrowResult; -use crate::interop::util::import_pyarrow; -use crate::table::GeoTable; use geoarrow::array::AsChunkedGeometryArray; use geoarrow::datatypes::GeoDataType; use pyo3::exceptions::PyValueError; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::PyDict; +use pyo3_arrow::PyTable; /// Convert a GeoArrow Table to a [GeoPandas GeoDataFrame][geopandas.GeoDataFrame]. /// @@ -21,92 +20,75 @@ use pyo3::types::PyDict; /// Returns: /// the converted GeoDataFrame #[pyfunction] -pub fn to_geopandas(py: Python, input: &Bound) -> PyGeoArrowResult { - let table = GeoTable::extract_bound(input)?; - table.to_geopandas(py) -} +pub fn to_geopandas(py: Python, input: PyTable) -> PyGeoArrowResult { + // Imports and validation + let geopandas_mod = py.import_bound(intern!(py, "geopandas"))?; + let pandas_mod = py.import_bound(intern!(py, "pandas"))?; + let geodataframe_class = geopandas_mod.getattr(intern!(py, "GeoDataFrame"))?; -#[pymethods] -impl GeoTable { - /// Convert this GeoArrow Table to a [GeoPandas GeoDataFrame][geopandas.GeoDataFrame]. - /// - /// ### Notes: - /// - /// - This requires [`pyarrow`][pyarrow] version 14 or later. - /// - /// Returns: - /// the converted GeoDataFrame - fn to_geopandas(&self, py: Python) -> PyGeoArrowResult { - // Imports and validation - let pyarrow_mod = import_pyarrow(py)?; - let geopandas_mod = py.import_bound(intern!(py, "geopandas"))?; - let pandas_mod = py.import_bound(intern!(py, "pandas"))?; - let geodataframe_class = geopandas_mod.getattr(intern!(py, "GeoDataFrame"))?; + let (batches, schema) = input.into_inner(); + let rust_table = geoarrow::table::Table::try_new(schema.clone(), batches.clone())?; - // Hack: create a new table because I can't figure out how to pass `self` - let cloned_table = GeoTable(self.0.clone()); - let pyarrow_table = pyarrow_mod.call_method1(intern!(py, "table"), (cloned_table,))?; + let pyarrow_table = PyTable::new(schema, batches).to_pyarrow(py)?; - let geometry_column_index = self.0.default_geometry_column_idx()?; - let pyarrow_table = - pyarrow_table.call_method1(intern!(py, "remove_column"), (geometry_column_index,))?; + let geometry_column_index = rust_table.default_geometry_column_idx()?; + let pyarrow_table = + pyarrow_table.call_method1(py, intern!(py, "remove_column"), (geometry_column_index,))?; - let kwargs = PyDict::new_bound(py); - kwargs.set_item( - "types_mapper", - pandas_mod.getattr(intern!(py, "ArrowDtype"))?, - )?; - let pandas_df = pyarrow_table.call_method(intern!(py, "to_pandas"), (), Some(&kwargs))?; + let kwargs = PyDict::new_bound(py); + kwargs.set_item( + "types_mapper", + pandas_mod.getattr(intern!(py, "ArrowDtype"))?, + )?; + let pandas_df = + pyarrow_table.call_method_bound(py, intern!(py, "to_pandas"), (), Some(&kwargs))?; - let geometry = self.0.geometry_column(Some(geometry_column_index))?; - let shapely_geometry = match geometry.data_type() { - GeoDataType::Point(_) => ChunkedPointArray(geometry.as_ref().as_point().clone()) + let geometry = rust_table.geometry_column(Some(geometry_column_index))?; + let shapely_geometry = match geometry.data_type() { + GeoDataType::Point(_) => ChunkedPointArray(geometry.as_ref().as_point().clone()) + .to_shapely(py)? + .to_object(py), + GeoDataType::LineString(_) => { + ChunkedLineStringArray(geometry.as_ref().as_line_string().clone()) + .to_shapely(py)? + .to_object(py) + } + GeoDataType::Polygon(_) => ChunkedPolygonArray(geometry.as_ref().as_polygon().clone()) + .to_shapely(py)? + .to_object(py), + GeoDataType::MultiPoint(_) => { + ChunkedMultiPointArray(geometry.as_ref().as_multi_point().clone()) + .to_shapely(py)? + .to_object(py) + } + GeoDataType::MultiLineString(_) => { + ChunkedMultiLineStringArray(geometry.as_ref().as_multi_line_string().clone()) .to_shapely(py)? - .to_object(py), - GeoDataType::LineString(_) => { - ChunkedLineStringArray(geometry.as_ref().as_line_string().clone()) - .to_shapely(py)? - .to_object(py) - } - GeoDataType::Polygon(_) => ChunkedPolygonArray(geometry.as_ref().as_polygon().clone()) + .to_object(py) + } + GeoDataType::MultiPolygon(_) => { + ChunkedMultiPolygonArray(geometry.as_ref().as_multi_polygon().clone()) .to_shapely(py)? - .to_object(py), - GeoDataType::MultiPoint(_) => { - ChunkedMultiPointArray(geometry.as_ref().as_multi_point().clone()) - .to_shapely(py)? - .to_object(py) - } - GeoDataType::MultiLineString(_) => { - ChunkedMultiLineStringArray(geometry.as_ref().as_multi_line_string().clone()) - .to_shapely(py)? - .to_object(py) - } - GeoDataType::MultiPolygon(_) => { - ChunkedMultiPolygonArray(geometry.as_ref().as_multi_polygon().clone()) - .to_shapely(py)? - .to_object(py) - } - GeoDataType::Mixed(_) => { - ChunkedMixedGeometryArray(geometry.as_ref().as_mixed().clone()) - .to_shapely(py)? - .to_object(py) - } - GeoDataType::GeometryCollection(_) => { - ChunkedGeometryCollectionArray(geometry.as_ref().as_geometry_collection().clone()) - .to_shapely(py)? - .to_object(py) - } - GeoDataType::WKB => ChunkedWKBArray(geometry.as_ref().as_wkb().clone()) + .to_object(py) + } + GeoDataType::Mixed(_) => ChunkedMixedGeometryArray(geometry.as_ref().as_mixed().clone()) + .to_shapely(py)? + .to_object(py), + GeoDataType::GeometryCollection(_) => { + ChunkedGeometryCollectionArray(geometry.as_ref().as_geometry_collection().clone()) .to_shapely(py)? - .to_object(py), - t => { - return Err(PyValueError::new_err(format!("unexpected type {:?}", t)).into()); - } - }; + .to_object(py) + } + GeoDataType::WKB => ChunkedWKBArray(geometry.as_ref().as_wkb().clone()) + .to_shapely(py)? + .to_object(py), + t => { + return Err(PyValueError::new_err(format!("unexpected type {:?}", t)).into()); + } + }; - let args = (pandas_df,); - let kwargs = PyDict::new_bound(py); - kwargs.set_item("geometry", shapely_geometry)?; - Ok(geodataframe_class.call(args, Some(&kwargs))?.to_object(py)) - } + let args = (pandas_df,); + let kwargs = PyDict::new_bound(py); + kwargs.set_item("geometry", shapely_geometry)?; + Ok(geodataframe_class.call(args, Some(&kwargs))?.to_object(py)) } diff --git a/python/core/src/interop/pyogrio/from_pyogrio.rs b/python/core/src/interop/pyogrio/from_pyogrio.rs index bdca2e77..b2f66db9 100644 --- a/python/core/src/interop/pyogrio/from_pyogrio.rs +++ b/python/core/src/interop/pyogrio/from_pyogrio.rs @@ -1,10 +1,10 @@ use crate::error::PyGeoArrowResult; use crate::interop::util::import_pyarrow; -use crate::table::GeoTable; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::PyDict; use pyo3::PyAny; +use pyo3_arrow::PyRecordBatchReader; /// Read from an OGR data source to a GeoTable /// @@ -114,7 +114,7 @@ pub fn read_pyogrio( return_fids: bool, batch_size: usize, kwargs: Option<&Bound>, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { // Imports and validation // Import pyarrow to validate it's >=14 and will have PyCapsule interface let _pyarrow_mod = import_pyarrow(py)?; @@ -152,19 +152,19 @@ pub fn read_pyogrio( .call_method0(intern!(py, "__enter__"))? .extract::<(PyObject, PyObject)>()?; - let maybe_table = GeoTable::from_arrow( - &py.get_type_bound::(), + let rust_reader = PyRecordBatchReader::from_arrow( + &py.get_type_bound::(), record_batch_reader.bind(py), ); // If the eval threw an exception we'll pass it through to the context manager. // Otherwise, __exit__ is called with empty arguments (Python "None"). // https://pyo3.rs/v0.20.2/python_from_rust.html#need-to-use-a-context-manager-from-rust - match maybe_table { - Ok(table) => { + match rust_reader { + Ok(reader) => { let none = py.None(); context_manager.call_method1("__exit__", (&none, &none, &none))?; - Ok(table) + Ok(reader) } Err(e) => { context_manager.call_method1( diff --git a/python/core/src/interop/util.rs b/python/core/src/interop/util.rs index 53c0e0b6..53c2d70c 100644 --- a/python/core/src/interop/util.rs +++ b/python/core/src/interop/util.rs @@ -1,7 +1,9 @@ use crate::error::PyGeoArrowResult; +use geoarrow::error::GeoArrowError; use pyo3::exceptions::PyValueError; use pyo3::intern; use pyo3::prelude::*; +use pyo3_arrow::PyTable; /// Import pyarrow and assert version 14 or higher. pub(crate) fn import_pyarrow(py: Python) -> PyGeoArrowResult> { @@ -21,3 +23,13 @@ pub(crate) fn import_pyarrow(py: Python) -> PyGeoArrowResult> { Ok(pyarrow_mod) } } + +pub(crate) fn table_to_pytable(table: geoarrow::table::Table) -> PyTable { + let (schema, batches) = table.into_inner(); + PyTable::new(schema, batches) +} + +pub(crate) fn pytable_to_table(table: PyTable) -> Result { + let (batches, schema) = table.into_inner(); + geoarrow::table::Table::try_new(schema, batches) +} diff --git a/python/core/src/io/csv.rs b/python/core/src/io/csv.rs index 48b7f40e..2917c05e 100644 --- a/python/core/src/io/csv.rs +++ b/python/core/src/io/csv.rs @@ -1,11 +1,11 @@ use crate::error::PyGeoArrowResult; +use crate::interop::util::table_to_pytable; use crate::io::input::sync::{BinaryFileReader, BinaryFileWriter}; -use crate::stream::PyRecordBatchReader; -use crate::table::GeoTable; use geoarrow::io::csv::read_csv as _read_csv; use geoarrow::io::csv::write_csv as _write_csv; use geoarrow::io::csv::CSVReaderOptions; use pyo3::prelude::*; +use pyo3_arrow::{PyRecordBatchReader, PyTable}; /// Read a CSV file from a path on disk into a GeoTable. /// @@ -23,11 +23,11 @@ pub fn read_csv( file: PyObject, geometry_column_name: &str, batch_size: usize, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { let mut reader = file.extract::(py)?; let options = CSVReaderOptions::new(Default::default(), batch_size); let table = _read_csv(&mut reader, geometry_column_name, options)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } /// Write a GeoTable to a CSV file on disk. diff --git a/python/core/src/io/flatgeobuf.rs b/python/core/src/io/flatgeobuf.rs index 6da2e232..8d9af58e 100644 --- a/python/core/src/io/flatgeobuf.rs +++ b/python/core/src/io/flatgeobuf.rs @@ -1,15 +1,15 @@ use crate::error::{PyGeoArrowError, PyGeoArrowResult}; +use crate::interop::util::table_to_pytable; use crate::io::input::sync::BinaryFileWriter; use crate::io::input::{construct_reader, FileReader}; use crate::io::object_store::PyObjectStore; -use crate::stream::PyRecordBatchReader; -use crate::table::GeoTable; use flatgeobuf::FgbWriterOptions; use geoarrow::io::flatgeobuf::read_flatgeobuf_async as _read_flatgeobuf_async; use geoarrow::io::flatgeobuf::write_flatgeobuf_with_options as _write_flatgeobuf; use geoarrow::io::flatgeobuf::{read_flatgeobuf as _read_flatgeobuf, FlatGeobufReaderOptions}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3_arrow::{PyRecordBatchReader, PyTable}; /// Read a FlatGeobuf file from a path on disk or a remote location into a GeoTable. /// @@ -74,7 +74,7 @@ pub fn read_flatgeobuf( fs: Option, batch_size: usize, bbox: Option<(f64, f64, f64, f64)>, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { let reader = construct_reader(py, file, fs)?; match reader { FileReader::Async(async_reader) => async_reader.runtime.block_on(async move { @@ -87,7 +87,7 @@ pub fn read_flatgeobuf( .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) }), FileReader::Sync(mut sync_reader) => { let options = FlatGeobufReaderOptions { @@ -96,7 +96,7 @@ pub fn read_flatgeobuf( ..Default::default() }; let table = _read_flatgeobuf(&mut sync_reader, options)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } } } @@ -163,7 +163,7 @@ pub fn read_flatgeobuf_async( .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) })?; Ok(fut.into()) } diff --git a/python/core/src/io/geojson.rs b/python/core/src/io/geojson.rs index 6fc3c4ad..225f7a6a 100644 --- a/python/core/src/io/geojson.rs +++ b/python/core/src/io/geojson.rs @@ -1,10 +1,10 @@ use crate::error::PyGeoArrowResult; +use crate::interop::util::table_to_pytable; use crate::io::input::sync::{BinaryFileReader, BinaryFileWriter}; -use crate::stream::PyRecordBatchReader; -use crate::table::GeoTable; use geoarrow::io::geojson::read_geojson as _read_geojson; use geoarrow::io::geojson::write_geojson as _write_geojson; use pyo3::prelude::*; +use pyo3_arrow::{PyRecordBatchReader, PyTable}; /// Read a GeoJSON file from a path on disk into a GeoTable. /// @@ -16,10 +16,10 @@ use pyo3::prelude::*; /// Table from GeoJSON file. #[pyfunction] #[pyo3(signature = (file, *, batch_size=65536))] -pub fn read_geojson(py: Python, file: PyObject, batch_size: usize) -> PyGeoArrowResult { +pub fn read_geojson(py: Python, file: PyObject, batch_size: usize) -> PyGeoArrowResult { let mut reader = file.extract::(py)?; let table = _read_geojson(&mut reader, Some(batch_size))?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } /// Write a GeoTable to a GeoJSON file on disk. diff --git a/python/core/src/io/geojson_lines.rs b/python/core/src/io/geojson_lines.rs index dbd052f2..c554f21d 100644 --- a/python/core/src/io/geojson_lines.rs +++ b/python/core/src/io/geojson_lines.rs @@ -1,10 +1,10 @@ use crate::error::PyGeoArrowResult; +use crate::interop::util::table_to_pytable; use crate::io::input::sync::{BinaryFileReader, BinaryFileWriter}; -use crate::stream::PyRecordBatchReader; -use crate::table::GeoTable; use geoarrow::io::geojson_lines::read_geojson_lines as _read_geojson_lines; use geoarrow::io::geojson_lines::write_geojson_lines as _write_geojson_lines; use pyo3::prelude::*; +use pyo3_arrow::{PyRecordBatchReader, PyTable}; /// Read a newline-delimited GeoJSON file from a path on disk into a GeoTable. /// @@ -22,10 +22,10 @@ pub fn read_geojson_lines( py: Python, file: PyObject, batch_size: usize, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { let mut reader = file.extract::(py)?; let table = _read_geojson_lines(&mut reader, Some(batch_size))?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } /// Write a GeoTable to a newline-delimited GeoJSON file on disk. diff --git a/python/core/src/io/ipc.rs b/python/core/src/io/ipc.rs index f7ecf5dd..dc929967 100644 --- a/python/core/src/io/ipc.rs +++ b/python/core/src/io/ipc.rs @@ -1,12 +1,12 @@ use crate::error::PyGeoArrowResult; +use crate::interop::util::table_to_pytable; use crate::io::input::sync::{BinaryFileReader, BinaryFileWriter}; -use crate::stream::PyRecordBatchReader; -use crate::table::GeoTable; use geoarrow::io::ipc::read_ipc as _read_ipc; use geoarrow::io::ipc::read_ipc_stream as _read_ipc_stream; use geoarrow::io::ipc::write_ipc as _write_ipc; use geoarrow::io::ipc::write_ipc_stream as _write_ipc_stream; use pyo3::prelude::*; +use pyo3_arrow::{PyRecordBatchReader, PyTable}; /// Read into a Table from Arrow IPC (Feather v2) file. /// @@ -17,10 +17,10 @@ use pyo3::prelude::*; /// Table from Arrow IPC file. #[pyfunction] #[pyo3(signature = (file))] -pub fn read_ipc(py: Python, file: PyObject) -> PyGeoArrowResult { +pub fn read_ipc(py: Python, file: PyObject) -> PyGeoArrowResult { let mut reader = file.extract::(py)?; let table = _read_ipc(&mut reader)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } /// Read into a Table from Arrow IPC record batch stream. @@ -32,10 +32,10 @@ pub fn read_ipc(py: Python, file: PyObject) -> PyGeoArrowResult { /// Table from Arrow IPC file. #[pyfunction] #[pyo3(signature = (file))] -pub fn read_ipc_stream(py: Python, file: PyObject) -> PyGeoArrowResult { +pub fn read_ipc_stream(py: Python, file: PyObject) -> PyGeoArrowResult { let mut reader = file.extract::(py)?; let table = _read_ipc_stream(&mut reader)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } /// Write a GeoTable to an Arrow IPC (Feather v2) file on disk. diff --git a/python/core/src/io/parquet/reader.rs b/python/core/src/io/parquet/reader.rs index 207ad0fa..e5ab3648 100644 --- a/python/core/src/io/parquet/reader.rs +++ b/python/core/src/io/parquet/reader.rs @@ -3,11 +3,11 @@ use std::sync::Arc; use crate::array::PolygonArray; use crate::error::{PyGeoArrowError, PyGeoArrowResult}; +use crate::interop::util::table_to_pytable; use crate::io::input::sync::BinaryFileReader; use crate::io::input::{construct_reader, FileReader}; use crate::io::object_store::PyObjectStore; use crate::io::parquet::options::{create_options, GeoParquetBboxPaths}; -use crate::table::GeoTable; use geoarrow::error::GeoArrowError; use geoarrow::geo_traits::{CoordTrait, RectTrait}; @@ -20,6 +20,7 @@ use object_store::ObjectStore; use parquet::arrow::async_reader::ParquetObjectReader; use pyo3::exceptions::{PyFileNotFoundError, PyValueError}; use pyo3::prelude::*; +use pyo3_arrow::PyTable; use tokio::runtime::Runtime; /// Read a GeoParquet file from a path on disk into a GeoTable. @@ -69,7 +70,7 @@ pub fn read_parquet( path: PyObject, fs: Option, batch_size: Option, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { let reader = construct_reader(py, path, fs)?; match reader { FileReader::Async(async_reader) => { @@ -89,7 +90,7 @@ pub fn read_parquet( .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok::<_, PyGeoArrowError>(GeoTable(table)) + Ok::<_, PyGeoArrowError>(table_to_pytable(table)) })?; Ok(table) } @@ -103,7 +104,7 @@ pub fn read_parquet( ..Default::default() }; let table = _read_geoparquet(file, options)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) } _ => Err(PyValueError::new_err("File objects not supported in Parquet reader.").into()), }, @@ -170,7 +171,7 @@ pub fn read_parquet_async( .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) })?; Ok(fut.into()) } @@ -311,7 +312,7 @@ impl ParquetFile { .read(options) .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) })?; Ok(fut.into()) } @@ -325,7 +326,7 @@ impl ParquetFile { offset: Option, bbox: Option<[f64; 4]>, bbox_paths: Option, - ) -> PyGeoArrowResult { + ) -> PyGeoArrowResult { let file = self.file.clone(); let options = create_options(batch_size, limit, offset, bbox, bbox_paths); self.rt.block_on(async move { @@ -333,7 +334,7 @@ impl ParquetFile { .read(options) .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) }) } @@ -355,7 +356,7 @@ impl ParquetFile { .read_row_groups(row_groups, Default::default()) .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) })?; Ok(fut.into()) } @@ -367,14 +368,14 @@ impl ParquetFile { /// /// Returns: /// parsed table. - fn read_row_groups(&self, row_groups: Vec) -> PyGeoArrowResult { + fn read_row_groups(&self, row_groups: Vec) -> PyGeoArrowResult { let file = self.file.clone(); self.rt.block_on(async move { let table = file .read_row_groups(row_groups, Default::default()) .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) }) } } @@ -461,7 +462,7 @@ impl ParquetDataset { .read(options) .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) })?; Ok(fut.into()) } @@ -475,7 +476,7 @@ impl ParquetDataset { offset: Option, bbox: Option<[f64; 4]>, bbox_paths: Option, - ) -> PyGeoArrowResult { + ) -> PyGeoArrowResult { let inner = self.inner.clone(); let options = create_options(batch_size, limit, offset, bbox, bbox_paths); self.rt.block_on(async move { @@ -483,7 +484,7 @@ impl ParquetDataset { .read(options) .await .map_err(PyGeoArrowError::GeoArrowError)?; - Ok(GeoTable(table)) + Ok(table_to_pytable(table)) }) } } diff --git a/python/core/src/io/parquet/writer.rs b/python/core/src/io/parquet/writer.rs index 7ff43375..bdbcc989 100644 --- a/python/core/src/io/parquet/writer.rs +++ b/python/core/src/io/parquet/writer.rs @@ -2,10 +2,8 @@ use std::fs::File; use std::io::BufWriter; use crate::error::PyGeoArrowResult; +use crate::interop::util::pytable_to_table; use crate::io::input::sync::BinaryFileWriter; -use crate::record_batch::RecordBatch; -use crate::schema::Schema; -use crate::table::GeoTable; use geoarrow::io::parquet::{ write_geoparquet as _write_geoparquet, GeoParquetWriter as _GeoParquetWriter, @@ -13,6 +11,7 @@ use geoarrow::io::parquet::{ }; use pyo3::exceptions::{PyFileNotFoundError, PyValueError}; use pyo3::prelude::*; +use pyo3_arrow::{PyRecordBatch, PySchema, PyTable}; pub enum GeoParquetEncoding { WKB, @@ -55,7 +54,7 @@ impl From for geoarrow::io::parquet::GeoParquetWriterEncodin text_signature = "(table, file, *, encoding = 'WKB')") ] pub fn write_parquet( - mut table: GeoTable, + table: PyTable, file: String, encoding: GeoParquetEncoding, ) -> PyGeoArrowResult<()> { @@ -66,7 +65,8 @@ pub fn write_parquet( encoding: encoding.into(), ..Default::default() }; - _write_geoparquet(&mut table.0, writer, &options)?; + let mut table = pytable_to_table(table)?; + _write_geoparquet(&mut table, writer, &options)?; Ok(()) } @@ -79,10 +79,10 @@ pub struct ParquetWriter { #[pymethods] impl ParquetWriter { #[new] - pub fn new(py: Python, file: PyObject, schema: Schema) -> PyGeoArrowResult { + pub fn new(py: Python, file: PyObject, schema: PySchema) -> PyGeoArrowResult { let file_writer = file.extract::(py)?; let geoparquet_writer = - _GeoParquetWriter::try_new(file_writer, &schema.0, &Default::default())?; + _GeoParquetWriter::try_new(file_writer, schema.as_ref(), &Default::default())?; Ok(Self { file: Some(geoparquet_writer), }) @@ -92,9 +92,9 @@ impl ParquetWriter { pub fn __enter__(&self) {} /// Write a single record batch to the Parquet file - pub fn write_batch(&mut self, batch: RecordBatch) -> PyGeoArrowResult<()> { + pub fn write_batch(&mut self, batch: PyRecordBatch) -> PyGeoArrowResult<()> { if let Some(file) = self.file.as_mut() { - file.write_batch(&batch.0)?; + file.write_batch(batch.as_ref())?; Ok(()) } else { Err(PyValueError::new_err("File is already closed.").into()) @@ -102,9 +102,9 @@ impl ParquetWriter { } /// Write a table or stream of batches to the Parquet file - pub fn write_table(&mut self, table: GeoTable) -> PyGeoArrowResult<()> { + pub fn write_table(&mut self, table: PyTable) -> PyGeoArrowResult<()> { if let Some(file) = self.file.as_mut() { - for batch in table.0.batches() { + for batch in table.batches() { file.write_batch(batch)?; } Ok(()) diff --git a/python/core/src/io/postgis.rs b/python/core/src/io/postgis.rs index f8b348cd..52f88d8f 100644 --- a/python/core/src/io/postgis.rs +++ b/python/core/src/io/postgis.rs @@ -1,8 +1,9 @@ use crate::error::{PyGeoArrowError, PyGeoArrowResult}; -use crate::table::GeoTable; +use crate::interop::util::table_to_pytable; use geoarrow::error::GeoArrowError; use geoarrow::io::postgis::read_postgis as _read_postgis; use pyo3::prelude::*; +use pyo3_arrow::PyTable; use sqlx::postgres::PgPoolOptions; /// Read a PostGIS query into a GeoTable. @@ -10,7 +11,7 @@ use sqlx::postgres::PgPoolOptions; /// Returns: /// Table from query. #[pyfunction] -pub fn read_postgis(connection_url: String, sql: String) -> PyGeoArrowResult> { +pub fn read_postgis(connection_url: String, sql: String) -> PyGeoArrowResult> { // https://tokio.rs/tokio/topics/bridging#what-tokiomain-expands-to tokio::runtime::Builder::new_multi_thread() .enable_all() @@ -26,7 +27,7 @@ pub fn read_postgis(connection_url: String, sql: String) -> PyGeoArrowResult) -> PyResult<()> { // m.add_class::()?; // m.add_class::()?; - // RecordBatchReader - m.add_class::()?; - // Table m.add_class::()?; diff --git a/python/core/src/record_batch.rs b/python/core/src/record_batch.rs deleted file mode 100644 index fe1103c8..00000000 --- a/python/core/src/record_batch.rs +++ /dev/null @@ -1,16 +0,0 @@ -use pyo3::prelude::*; - -#[pyclass(module = "geoarrow.rust.core._rust")] -pub struct RecordBatch(pub(crate) arrow_array::RecordBatch); - -impl From for RecordBatch { - fn from(value: arrow_array::RecordBatch) -> Self { - Self(value) - } -} - -impl From for arrow_array::RecordBatch { - fn from(value: RecordBatch) -> Self { - value.0 - } -} diff --git a/python/core/src/schema/mod.rs b/python/core/src/schema/mod.rs deleted file mode 100644 index 44dc755a..00000000 --- a/python/core/src/schema/mod.rs +++ /dev/null @@ -1,16 +0,0 @@ -use pyo3::prelude::*; - -#[pyclass(module = "geoarrow.rust.core._rust")] -pub struct Schema(pub(crate) arrow::datatypes::SchemaRef); - -impl From for Schema { - fn from(value: arrow::datatypes::SchemaRef) -> Self { - Self(value) - } -} - -impl From for arrow::datatypes::SchemaRef { - fn from(value: Schema) -> Self { - value.0 - } -} diff --git a/python/core/src/stream/mod.rs b/python/core/src/stream/mod.rs deleted file mode 100644 index dafa08de..00000000 --- a/python/core/src/stream/mod.rs +++ /dev/null @@ -1,22 +0,0 @@ -use arrow_array::RecordBatchReader as _RecordBatchReader; -use geoarrow::error::GeoArrowError; -use pyo3::prelude::*; - -use crate::error::PyGeoArrowResult; - -/// A wrapper around an [arrow_array::RecordBatchReader] -#[pyclass( - module = "geoarrow.rust.core._rust", - name = "RecordBatchReader", - subclass -)] -pub struct PyRecordBatchReader(pub(crate) Option>); - -impl PyRecordBatchReader { - pub fn into_reader(mut self) -> PyGeoArrowResult> { - let stream = self.0.take().ok_or(GeoArrowError::General( - "Cannot write from closed stream.".to_string(), - ))?; - Ok(stream) - } -} From a8ad9e882fe7567fb6ccaac7c8667bce14262f54 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 1 Jul 2024 15:37:28 -0400 Subject: [PATCH 2/6] Update geotable pyi --- python/core/python/geoarrow/rust/core/_rust.pyi | 10 ---------- python/core/src/table/mod.rs | 6 ------ 2 files changed, 16 deletions(-) diff --git a/python/core/python/geoarrow/rust/core/_rust.pyi b/python/core/python/geoarrow/rust/core/_rust.pyi index 72328aa4..5ad01aab 100644 --- a/python/core/python/geoarrow/rust/core/_rust.pyi +++ b/python/core/python/geoarrow/rust/core/_rust.pyi @@ -1128,17 +1128,10 @@ class ChunkedFloat64Array: # def to_numpy(self) -> NDArray[np.uint8]: ... class GeoTable: - def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ... - def __eq__(self, other: Self) -> bool: ... @property def __geo_interface__(self) -> dict: ... - def __len__(self) -> int: ... def __repr__(self) -> str: ... def explode(self) -> Self: ... - @classmethod - def from_arrow(cls, input: ArrowStreamExportable) -> Self: ... - @classmethod - def from_geopandas(cls, input: gpd.GeoDataFrame) -> Self: ... @property def geometry( self, @@ -1152,9 +1145,6 @@ class GeoTable: | ChunkedMixedGeometryArray | ChunkedGeometryCollectionArray ): ... - @property - def num_columns(self) -> int: ... - def to_geopandas(self) -> gpd.GeoDataFrame: ... # Top-level array/chunked array functions diff --git a/python/core/src/table/mod.rs b/python/core/src/table/mod.rs index 13c167d0..ca5b80ad 100644 --- a/python/core/src/table/mod.rs +++ b/python/core/src/table/mod.rs @@ -24,12 +24,6 @@ impl GeoTable { Python::with_gil(|py| chunked_geometry_array_to_pyobject(py, chunked_geom_arr)) } - /// Number of columns in this table. - #[getter] - fn num_columns(&self) -> usize { - self.0.num_columns() - } - fn __repr__(&self) -> String { self.0.to_string() } From cfb293f333c91c58f5df16db7f91672d53e9b969 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 1 Jul 2024 15:42:07 -0400 Subject: [PATCH 3/6] Update types --- .../core/python/geoarrow/rust/core/_rust.pyi | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/python/core/python/geoarrow/rust/core/_rust.pyi b/python/core/python/geoarrow/rust/core/_rust.pyi index 5ad01aab..722e1d4d 100644 --- a/python/core/python/geoarrow/rust/core/_rust.pyi +++ b/python/core/python/geoarrow/rust/core/_rust.pyi @@ -14,6 +14,8 @@ from typing import ( overload, ) +from arro3.core import RecordBatchReader, Table + try: import numpy as np from numpy.typing import NDArray @@ -25,13 +27,14 @@ try: except ImportError: pass +from .enums import AreaMethod, GeoParquetEncoding, LengthMethod, SimplifyMethod from .types import ( AffineInputT, AffineTransform, + AreaMethodT, ArrowArrayExportable, ArrowSchemaExportable, ArrowStreamExportable, - AreaMethodT, BboxPaths, BroadcastGeometry, GeoInterfaceProtocol, @@ -44,7 +47,6 @@ from .types import ( SimplifyInputT, SimplifyMethodT, ) -from .enums import AreaMethod, GeoParquetEncoding, LengthMethod, SimplifyMethod class Point: def __arrow_c_array__( @@ -1510,37 +1512,37 @@ def read_csv( geometry_column_name: str, *, batch_size: int = 65536, -) -> GeoTable: ... +) -> Table: ... def read_flatgeobuf( file: Union[str, Path, BinaryIO], *, fs: Optional[ObjectStore] = None, batch_size: int = 65536, bbox: Tuple[float, float, float, float] | None = None, -) -> GeoTable: ... +) -> Table: ... async def read_flatgeobuf_async( path: str, *, fs: Optional[ObjectStore] = None, batch_size: int = 65536, bbox: Tuple[float, float, float, float] | None = None, -) -> GeoTable: ... +) -> Table: ... def read_geojson( file: Union[str, Path, BinaryIO], *, batch_size: int = 65536 -) -> GeoTable: ... +) -> Table: ... def read_geojson_lines( file: Union[str, Path, BinaryIO], *, batch_size: int = 65536 -) -> GeoTable: ... -def read_ipc(file: Union[str, Path, BinaryIO]) -> GeoTable: ... -def read_ipc_stream(file: Union[str, Path, BinaryIO]) -> GeoTable: ... +) -> Table: ... +def read_ipc(file: Union[str, Path, BinaryIO]) -> Table: ... +def read_ipc_stream(file: Union[str, Path, BinaryIO]) -> Table: ... def read_parquet( path: str, *, fs: Optional[ObjectStore] = None, batch_size: int = 65536 -) -> GeoTable: ... +) -> Table: ... async def read_parquet_async( path: str, *, fs: Optional[ObjectStore] = None, batch_size: int = 65536 -) -> GeoTable: ... -def read_postgis(connection_url: str, sql: str) -> Optional[GeoTable]: ... -async def read_postgis_async(connection_url: str, sql: str) -> Optional[GeoTable]: ... +) -> Table: ... +def read_postgis(connection_url: str, sql: str) -> Optional[Table]: ... +async def read_postgis_async(connection_url: str, sql: str) -> Optional[Table]: ... def read_pyogrio( path_or_buffer: Path | str | bytes, /, @@ -1559,7 +1561,7 @@ def read_pyogrio( return_fids=False, batch_size=65536, **kwargs, -) -> GeoTable: ... +) -> RecordBatchReader: ... def write_csv(table: ArrowStreamExportable, file: str | Path | BinaryIO) -> None: ... def write_flatgeobuf( table: ArrowStreamExportable, From 5ea4410c10776a8f02615b73a2eeec98b5214d4d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 1 Jul 2024 16:18:17 -0400 Subject: [PATCH 4/6] Fix tests --- .../core/python/geoarrow/rust/core/_rust.pyi | 42 +++++++++---------- .../core/python/geoarrow/rust/core/types.py | 6 +-- python/core/src/algorithm/native/explode.rs | 23 +++------- .../core/src/interop/pyogrio/from_pyogrio.rs | 16 ++++--- python/core/src/io/csv.rs | 4 +- python/core/src/lib.rs | 4 +- python/core/src/table/mod.rs | 23 +++++----- .../core/tests/algorithm/test_affine_ops.py | 2 +- python/core/tests/algorithm/test_area.py | 2 +- python/core/tests/interop/test_geopandas.py | 4 +- python/core/tests/io/test_flatgeobuf.py | 8 ++-- python/core/tests/io/test_pyogrio.py | 2 +- 12 files changed, 61 insertions(+), 75 deletions(-) diff --git a/python/core/python/geoarrow/rust/core/_rust.pyi b/python/core/python/geoarrow/rust/core/_rust.pyi index 722e1d4d..96d675f1 100644 --- a/python/core/python/geoarrow/rust/core/_rust.pyi +++ b/python/core/python/geoarrow/rust/core/_rust.pyi @@ -1133,20 +1133,6 @@ class GeoTable: @property def __geo_interface__(self) -> dict: ... def __repr__(self) -> str: ... - def explode(self) -> Self: ... - @property - def geometry( - self, - ) -> ( - ChunkedPointArray - | ChunkedLineStringArray - | ChunkedPolygonArray - | ChunkedMultiPointArray - | ChunkedMultiLineStringArray - | ChunkedMultiPolygonArray - | ChunkedMixedGeometryArray - | ChunkedGeometryCollectionArray - ): ... # Top-level array/chunked array functions @@ -1421,7 +1407,19 @@ def total_bounds( # Top-level table functions -def explode(input: ArrowStreamExportable) -> GeoTable: ... +def explode(input: ArrowStreamExportable) -> Table: ... +def geometry_col( + table: ArrowStreamExportable, +) -> ( + ChunkedPointArray + | ChunkedLineStringArray + | ChunkedPolygonArray + | ChunkedMultiPointArray + | ChunkedMultiLineStringArray + | ChunkedMultiPolygonArray + | ChunkedMixedGeometryArray + | ChunkedGeometryCollectionArray +): ... # I/O @@ -1455,7 +1453,7 @@ class ParquetFile: offset: int | None = None, bbox: Sequence[IntFloat] | None = None, bbox_paths: BboxPaths | None = None, - ) -> GeoTable: ... + ) -> Table: ... def read( self, *, @@ -1464,9 +1462,9 @@ class ParquetFile: offset: int | None = None, bbox: Sequence[IntFloat] | None = None, bbox_paths: BboxPaths | None = None, - ) -> GeoTable: ... - async def read_row_groups_async(self, row_groups: Sequence[int]) -> GeoTable: ... - def read_row_groups(self, row_groups: Sequence[int]) -> GeoTable: ... + ) -> Table: ... + async def read_row_groups_async(self, row_groups: Sequence[int]) -> Table: ... + def read_row_groups(self, row_groups: Sequence[int]) -> Table: ... class ParquetDataset: def __init__(self, paths: Sequence[str], fs: ObjectStore) -> None: ... @@ -1482,7 +1480,7 @@ class ParquetDataset: offset: int | None = None, bbox: Sequence[IntFloat] | None = None, bbox_paths: BboxPaths | None = None, - ) -> GeoTable: ... + ) -> Table: ... def read( self, *, @@ -1491,7 +1489,7 @@ class ParquetDataset: offset: int | None = None, bbox: Sequence[IntFloat] | None = None, bbox_paths: BboxPaths | None = None, - ) -> GeoTable: ... + ) -> Table: ... class ParquetWriter: def __init__( @@ -1601,7 +1599,7 @@ def from_ewkb( | MixedGeometryArray | GeometryCollectionArray ): ... -def from_geopandas(input: gpd.GeoDataFrame) -> GeoTable: ... +def from_geopandas(input: gpd.GeoDataFrame) -> Table: ... def from_shapely( input, ) -> ( diff --git a/python/core/python/geoarrow/rust/core/types.py b/python/core/python/geoarrow/rust/core/types.py index 8677bb27..fba7aac7 100644 --- a/python/core/python/geoarrow/rust/core/types.py +++ b/python/core/python/geoarrow/rust/core/types.py @@ -153,15 +153,13 @@ def __arrow_c_schema__(self) -> object: ... class ArrowArrayExportable(Protocol): """An Arrow or GeoArrow array or RecordBatch.""" - def __arrow_c_array__( - self, requested_schema: object | None = None - ) -> Tuple[object, object]: ... + def __arrow_c_array__(self, requested_schema) -> Tuple[object, object]: ... class ArrowStreamExportable(Protocol): """An Arrow or GeoArrow ChunkedArray or Table.""" - def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ... + def __arrow_c_stream__(self, requested_schema) -> object: ... class GeoInterfaceProtocol(Protocol): diff --git a/python/core/src/algorithm/native/explode.rs b/python/core/src/algorithm/native/explode.rs index 3cce67d2..d06afff6 100644 --- a/python/core/src/algorithm/native/explode.rs +++ b/python/core/src/algorithm/native/explode.rs @@ -1,7 +1,8 @@ use crate::error::PyGeoArrowResult; -use crate::table::GeoTable; +use crate::interop::util::{pytable_to_table, table_to_pytable}; use geoarrow::algorithm::native::ExplodeTable; use pyo3::prelude::*; +use pyo3_arrow::PyTable; /// Explode a table. /// @@ -14,20 +15,8 @@ use pyo3::prelude::*; /// Returns: /// A new table with multi-part geometries exploded to separate rows. #[pyfunction] -pub fn explode(input: GeoTable) -> PyGeoArrowResult { - input.explode() -} - -#[pymethods] -impl GeoTable { - /// Explode a table. - /// - /// This is intended to be equivalent to the [`explode`][geopandas.GeoDataFrame.explode] - /// function in GeoPandas. - /// - /// Returns: - /// A new table with multi-part geometries exploded to separate rows. - pub fn explode(&self) -> PyGeoArrowResult { - Ok(self.0.explode(None)?.into()) - } +pub fn explode(input: PyTable) -> PyGeoArrowResult { + let table = pytable_to_table(input)?; + let exploded_table = table.explode(None)?; + Ok(table_to_pytable(exploded_table)) } diff --git a/python/core/src/interop/pyogrio/from_pyogrio.rs b/python/core/src/interop/pyogrio/from_pyogrio.rs index b2f66db9..49de351b 100644 --- a/python/core/src/interop/pyogrio/from_pyogrio.rs +++ b/python/core/src/interop/pyogrio/from_pyogrio.rs @@ -1,10 +1,9 @@ use crate::error::PyGeoArrowResult; -use crate::interop::util::import_pyarrow; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::PyDict; use pyo3::PyAny; -use pyo3_arrow::PyRecordBatchReader; +use pyo3_arrow::PyTable; /// Read from an OGR data source to a GeoTable /// @@ -114,10 +113,9 @@ pub fn read_pyogrio( return_fids: bool, batch_size: usize, kwargs: Option<&Bound>, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { // Imports and validation // Import pyarrow to validate it's >=14 and will have PyCapsule interface - let _pyarrow_mod = import_pyarrow(py)?; let pyogrio_mod = py.import_bound(intern!(py, "pyogrio"))?; let args = (path_or_buffer,); @@ -152,19 +150,19 @@ pub fn read_pyogrio( .call_method0(intern!(py, "__enter__"))? .extract::<(PyObject, PyObject)>()?; - let rust_reader = PyRecordBatchReader::from_arrow( - &py.get_type_bound::(), + let maybe_table = PyTable::from_arrow( + &py.get_type_bound::(), record_batch_reader.bind(py), ); // If the eval threw an exception we'll pass it through to the context manager. // Otherwise, __exit__ is called with empty arguments (Python "None"). // https://pyo3.rs/v0.20.2/python_from_rust.html#need-to-use-a-context-manager-from-rust - match rust_reader { - Ok(reader) => { + match maybe_table { + Ok(table) => { let none = py.None(); context_manager.call_method1("__exit__", (&none, &none, &none))?; - Ok(reader) + Ok(table.to_arro3(py)?) } Err(e) => { context_manager.call_method1( diff --git a/python/core/src/io/csv.rs b/python/core/src/io/csv.rs index 2917c05e..0802bc67 100644 --- a/python/core/src/io/csv.rs +++ b/python/core/src/io/csv.rs @@ -7,7 +7,7 @@ use geoarrow::io::csv::CSVReaderOptions; use pyo3::prelude::*; use pyo3_arrow::{PyRecordBatchReader, PyTable}; -/// Read a CSV file from a path on disk into a GeoTable. +/// Read a CSV file from a path on disk into a Table. /// /// Args: /// file: the path to the file or a Python file object in binary read mode. @@ -30,7 +30,7 @@ pub fn read_csv( Ok(table_to_pytable(table)) } -/// Write a GeoTable to a CSV file on disk. +/// Write a Table to a CSV file on disk. /// /// Args: /// table: the table to write. diff --git a/python/core/src/lib.rs b/python/core/src/lib.rs index c67ca24a..bcc7a8eb 100644 --- a/python/core/src/lib.rs +++ b/python/core/src/lib.rs @@ -90,8 +90,8 @@ fn _rust(_py: Python, m: &Bound) -> PyResult<()> { // m.add_class::()?; // m.add_class::()?; - // Table - m.add_class::()?; + // Table functions + m.add_function(wrap_pyfunction!(crate::table::geometry_col, m)?)?; // Top-level array/chunked array functions m.add_function(wrap_pyfunction!( diff --git a/python/core/src/table/mod.rs b/python/core/src/table/mod.rs index ca5b80ad..b6e5a685 100644 --- a/python/core/src/table/mod.rs +++ b/python/core/src/table/mod.rs @@ -2,7 +2,9 @@ mod geo_interface; use crate::error::PyGeoArrowResult; use crate::ffi::to_python::chunked_geometry_array_to_pyobject; +use crate::interop::util::pytable_to_table; use pyo3::prelude::*; +use pyo3_arrow::PyTable; /// A spatially-enabled table. /// @@ -14,21 +16,22 @@ pub struct GeoTable(pub(crate) geoarrow::table::Table); #[pymethods] impl GeoTable { - /// Access the geometry column of this table - /// - /// Returns: - /// A chunked geometry array - #[getter] - pub fn geometry(&self) -> PyGeoArrowResult { - let chunked_geom_arr = self.0.geometry_column(None)?; - Python::with_gil(|py| chunked_geometry_array_to_pyobject(py, chunked_geom_arr)) - } - fn __repr__(&self) -> String { self.0.to_string() } } +/// Access the geometry column of this table +/// +/// Returns: +/// A chunked geometry array +#[pyfunction] +pub fn geometry_col(py: Python, table: PyTable) -> PyGeoArrowResult { + let table = pytable_to_table(table)?; + let chunked_geom_arr = table.geometry_column(None)?; + chunked_geometry_array_to_pyobject(py, chunked_geom_arr) +} + impl From for GeoTable { fn from(value: geoarrow::table::Table) -> Self { Self(value) diff --git a/python/core/tests/algorithm/test_affine_ops.py b/python/core/tests/algorithm/test_affine_ops.py index 6ff70f2e..0ea8b34c 100644 --- a/python/core/tests/algorithm/test_affine_ops.py +++ b/python/core/tests/algorithm/test_affine_ops.py @@ -13,7 +13,7 @@ def test_affine_function(): assert isinstance(gdf, gpd.GeoDataFrame) table = gars.from_geopandas(gdf) - geom = table.geometry + geom = gars.geometry_col(table) xoff = 10 yoff = 20 diff --git a/python/core/tests/algorithm/test_area.py b/python/core/tests/algorithm/test_area.py index b6762580..777d581d 100644 --- a/python/core/tests/algorithm/test_area.py +++ b/python/core/tests/algorithm/test_area.py @@ -13,7 +13,7 @@ def test_area(): assert isinstance(gdf, gpd.GeoDataFrame) table = gars.from_geopandas(gdf) - ga_area = table.geometry.area() + ga_area = gars.geometry_col(table).area() assert ga_area.num_chunks() == 1 pa_area = pa.array(ga_area.chunk(0)) diff --git a/python/core/tests/interop/test_geopandas.py b/python/core/tests/interop/test_geopandas.py index 8b1183c9..f79ace3b 100644 --- a/python/core/tests/interop/test_geopandas.py +++ b/python/core/tests/interop/test_geopandas.py @@ -13,7 +13,7 @@ def test_geopandas_round_trip(): gdf = gpd.read_file(nybb_path) assert isinstance(gdf, gpd.GeoDataFrame) table = gars.from_geopandas(gdf) - _ = table.to_geopandas() + _ = gars.to_geopandas(table) @pytest.mark.xfail(reason="CRS is lost.") @@ -21,5 +21,5 @@ def test_geopandas_round_trip_maintains_crs(): gdf = gpd.read_file(nybb_path) assert isinstance(gdf, gpd.GeoDataFrame) table = gars.from_geopandas(gdf) - gdf_back = table.to_geopandas() + gdf_back = gars.to_geopandas(table) assert_geodataframe_equal(gdf, gdf_back) diff --git a/python/core/tests/io/test_flatgeobuf.py b/python/core/tests/io/test_flatgeobuf.py index 345a7fb9..dbdd7cf7 100644 --- a/python/core/tests/io/test_flatgeobuf.py +++ b/python/core/tests/io/test_flatgeobuf.py @@ -12,7 +12,7 @@ def test_read_flatgeobuf(): path = FIXTURES_DIR / "flatgeobuf" / "countries.fgb" table = gars.read_flatgeobuf(path) assert len(table) == 179 - assert isinstance(table.geometry, gars.ChunkedMultiPolygonArray) + assert isinstance(gars.geometry_col(table), gars.ChunkedMultiPolygonArray) def test_read_flatgeobuf_file_object(): @@ -20,7 +20,7 @@ def test_read_flatgeobuf_file_object(): with open(path, "rb") as f: table = gars.read_flatgeobuf(f) assert len(table) == 179 - assert isinstance(table.geometry, gars.ChunkedMultiPolygonArray) + assert isinstance(gars.geometry_col(table), gars.ChunkedMultiPolygonArray) def test_round_trip_flatgeobuf(): @@ -31,7 +31,7 @@ def test_round_trip_flatgeobuf(): gars.write_flatgeobuf(table, buf) buf.seek(0) table_back = gars.read_flatgeobuf(buf) - assert table == table_back + assert table == table_back # type: ignore @pytest.mark.xfail(reason="fix propagate CRS") @@ -40,5 +40,5 @@ def test_matches_pyogrio(): table = gars.read_flatgeobuf(path) gdf_direct = gpd.read_file(path) - gdf_from_rust = table.to_geopandas() + gdf_from_rust = gars.to_geopandas(table) assert_geodataframe_equal(gdf_direct, gdf_from_rust) diff --git a/python/core/tests/io/test_pyogrio.py b/python/core/tests/io/test_pyogrio.py index 668bbe8d..79370db1 100644 --- a/python/core/tests/io/test_pyogrio.py +++ b/python/core/tests/io/test_pyogrio.py @@ -9,4 +9,4 @@ def test_read_pyogrio(): table = gars.read_pyogrio(nybb_path) gdf = gpd.read_file(nybb_path) - assert np.array_equal(table.geometry.to_shapely(), gdf.geometry) + assert np.array_equal(gars.geometry_col(table).to_shapely(), gdf.geometry) From 592dbacaf621db0efac2a3ecadb2a285a2c53622 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 1 Jul 2024 16:29:46 -0400 Subject: [PATCH 5/6] add arro3-core dependency --- python/core/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/core/pyproject.toml b/python/core/pyproject.toml index b691b90f..92725652 100644 --- a/python/core/pyproject.toml +++ b/python/core/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "geoarrow-rust-core" requires-python = ">=3.8" -dependencies = [] +dependencies = ["arro3-core"] classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", From 469da6178a5ec05e09d7908b0f44ed56834a23c7 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 1 Jul 2024 16:36:36 -0400 Subject: [PATCH 6/6] remove arro3 for now --- python/core/pyproject.toml | 4 +++- python/core/src/interop/pyogrio/from_pyogrio.rs | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/core/pyproject.toml b/python/core/pyproject.toml index 92725652..30079c9b 100644 --- a/python/core/pyproject.toml +++ b/python/core/pyproject.toml @@ -5,7 +5,9 @@ build-backend = "maturin" [project] name = "geoarrow-rust-core" requires-python = ">=3.8" -dependencies = ["arro3-core"] +# TODO: restore once arro3 wheels exist +# dependencies = ["arro3-core"] +dependencies = [] classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/python/core/src/interop/pyogrio/from_pyogrio.rs b/python/core/src/interop/pyogrio/from_pyogrio.rs index 49de351b..f828276a 100644 --- a/python/core/src/interop/pyogrio/from_pyogrio.rs +++ b/python/core/src/interop/pyogrio/from_pyogrio.rs @@ -113,7 +113,7 @@ pub fn read_pyogrio( return_fids: bool, batch_size: usize, kwargs: Option<&Bound>, -) -> PyGeoArrowResult { +) -> PyGeoArrowResult { // Imports and validation // Import pyarrow to validate it's >=14 and will have PyCapsule interface let pyogrio_mod = py.import_bound(intern!(py, "pyogrio"))?; @@ -162,7 +162,9 @@ pub fn read_pyogrio( Ok(table) => { let none = py.None(); context_manager.call_method1("__exit__", (&none, &none, &none))?; - Ok(table.to_arro3(py)?) + // TODO: restore once arro3 wheels exist + // Ok(table.to_arro3(py)?) + Ok(table) } Err(e) => { context_manager.call_method1(