Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pyo3-arrow for Python bindings #657

Merged
merged 6 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions python/core/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ pyo3 = { version = "0.21.0", features = [
"serde",
"anyhow",
] }
pyo3-arrow = "0.1"
pyo3-asyncio-0-21 = { version = "0.21", features = ["tokio-runtime"] }
# pyo3-asyncio = { version = "0.20", features = ["tokio-runtime"] }
geo = "0.28"
geoarrow = { path = "../../", features = [
"csv",
Expand Down
2 changes: 2 additions & 0 deletions python/core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ build-backend = "maturin"
[project]
name = "geoarrow-rust-core"
requires-python = ">=3.8"
# TODO: restore once arro3 wheels exist
# dependencies = ["arro3-core"]
dependencies = []
classifiers = [
"Programming Language :: Rust",
Expand Down
82 changes: 36 additions & 46 deletions python/core/python/geoarrow/rust/core/_rust.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ from typing import (
overload,
)

from arro3.core import RecordBatchReader, Table

try:
import numpy as np
from numpy.typing import NDArray
Expand All @@ -25,13 +27,14 @@ try:
except ImportError:
pass

from .enums import AreaMethod, GeoParquetEncoding, LengthMethod, SimplifyMethod
from .types import (
AffineInputT,
AffineTransform,
AreaMethodT,
ArrowArrayExportable,
ArrowSchemaExportable,
ArrowStreamExportable,
AreaMethodT,
BboxPaths,
BroadcastGeometry,
GeoInterfaceProtocol,
Expand All @@ -44,7 +47,6 @@ from .types import (
SimplifyInputT,
SimplifyMethodT,
)
from .enums import AreaMethod, GeoParquetEncoding, LengthMethod, SimplifyMethod

class Point:
def __arrow_c_array__(
Expand Down Expand Up @@ -1128,33 +1130,9 @@ class ChunkedFloat64Array:
# def to_numpy(self) -> NDArray[np.uint8]: ...

class GeoTable:
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
def __eq__(self, other: Self) -> bool: ...
@property
def __geo_interface__(self) -> dict: ...
def __len__(self) -> int: ...
def __repr__(self) -> str: ...
def explode(self) -> Self: ...
@classmethod
def from_arrow(cls, input: ArrowStreamExportable) -> Self: ...
@classmethod
def from_geopandas(cls, input: gpd.GeoDataFrame) -> Self: ...
@property
def geometry(
self,
) -> (
ChunkedPointArray
| ChunkedLineStringArray
| ChunkedPolygonArray
| ChunkedMultiPointArray
| ChunkedMultiLineStringArray
| ChunkedMultiPolygonArray
| ChunkedMixedGeometryArray
| ChunkedGeometryCollectionArray
): ...
@property
def num_columns(self) -> int: ...
def to_geopandas(self) -> gpd.GeoDataFrame: ...

# Top-level array/chunked array functions

Expand Down Expand Up @@ -1429,7 +1407,19 @@ def total_bounds(

# Top-level table functions

def explode(input: ArrowStreamExportable) -> GeoTable: ...
def explode(input: ArrowStreamExportable) -> Table: ...
def geometry_col(
table: ArrowStreamExportable,
) -> (
ChunkedPointArray
| ChunkedLineStringArray
| ChunkedPolygonArray
| ChunkedMultiPointArray
| ChunkedMultiLineStringArray
| ChunkedMultiPolygonArray
| ChunkedMixedGeometryArray
| ChunkedGeometryCollectionArray
): ...

# I/O

Expand Down Expand Up @@ -1463,7 +1453,7 @@ class ParquetFile:
offset: int | None = None,
bbox: Sequence[IntFloat] | None = None,
bbox_paths: BboxPaths | None = None,
) -> GeoTable: ...
) -> Table: ...
def read(
self,
*,
Expand All @@ -1472,9 +1462,9 @@ class ParquetFile:
offset: int | None = None,
bbox: Sequence[IntFloat] | None = None,
bbox_paths: BboxPaths | None = None,
) -> GeoTable: ...
async def read_row_groups_async(self, row_groups: Sequence[int]) -> GeoTable: ...
def read_row_groups(self, row_groups: Sequence[int]) -> GeoTable: ...
) -> Table: ...
async def read_row_groups_async(self, row_groups: Sequence[int]) -> Table: ...
def read_row_groups(self, row_groups: Sequence[int]) -> Table: ...

class ParquetDataset:
def __init__(self, paths: Sequence[str], fs: ObjectStore) -> None: ...
Expand All @@ -1490,7 +1480,7 @@ class ParquetDataset:
offset: int | None = None,
bbox: Sequence[IntFloat] | None = None,
bbox_paths: BboxPaths | None = None,
) -> GeoTable: ...
) -> Table: ...
def read(
self,
*,
Expand All @@ -1499,7 +1489,7 @@ class ParquetDataset:
offset: int | None = None,
bbox: Sequence[IntFloat] | None = None,
bbox_paths: BboxPaths | None = None,
) -> GeoTable: ...
) -> Table: ...

class ParquetWriter:
def __init__(
Expand All @@ -1520,37 +1510,37 @@ def read_csv(
geometry_column_name: str,
*,
batch_size: int = 65536,
) -> GeoTable: ...
) -> Table: ...
def read_flatgeobuf(
file: Union[str, Path, BinaryIO],
*,
fs: Optional[ObjectStore] = None,
batch_size: int = 65536,
bbox: Tuple[float, float, float, float] | None = None,
) -> GeoTable: ...
) -> Table: ...
async def read_flatgeobuf_async(
path: str,
*,
fs: Optional[ObjectStore] = None,
batch_size: int = 65536,
bbox: Tuple[float, float, float, float] | None = None,
) -> GeoTable: ...
) -> Table: ...
def read_geojson(
file: Union[str, Path, BinaryIO], *, batch_size: int = 65536
) -> GeoTable: ...
) -> Table: ...
def read_geojson_lines(
file: Union[str, Path, BinaryIO], *, batch_size: int = 65536
) -> GeoTable: ...
def read_ipc(file: Union[str, Path, BinaryIO]) -> GeoTable: ...
def read_ipc_stream(file: Union[str, Path, BinaryIO]) -> GeoTable: ...
) -> Table: ...
def read_ipc(file: Union[str, Path, BinaryIO]) -> Table: ...
def read_ipc_stream(file: Union[str, Path, BinaryIO]) -> Table: ...
def read_parquet(
path: str, *, fs: Optional[ObjectStore] = None, batch_size: int = 65536
) -> GeoTable: ...
) -> Table: ...
async def read_parquet_async(
path: str, *, fs: Optional[ObjectStore] = None, batch_size: int = 65536
) -> GeoTable: ...
def read_postgis(connection_url: str, sql: str) -> Optional[GeoTable]: ...
async def read_postgis_async(connection_url: str, sql: str) -> Optional[GeoTable]: ...
) -> Table: ...
def read_postgis(connection_url: str, sql: str) -> Optional[Table]: ...
async def read_postgis_async(connection_url: str, sql: str) -> Optional[Table]: ...
def read_pyogrio(
path_or_buffer: Path | str | bytes,
/,
Expand All @@ -1569,7 +1559,7 @@ def read_pyogrio(
return_fids=False,
batch_size=65536,
**kwargs,
) -> GeoTable: ...
) -> RecordBatchReader: ...
def write_csv(table: ArrowStreamExportable, file: str | Path | BinaryIO) -> None: ...
def write_flatgeobuf(
table: ArrowStreamExportable,
Expand Down Expand Up @@ -1609,7 +1599,7 @@ def from_ewkb(
| MixedGeometryArray
| GeometryCollectionArray
): ...
def from_geopandas(input: gpd.GeoDataFrame) -> GeoTable: ...
def from_geopandas(input: gpd.GeoDataFrame) -> Table: ...
def from_shapely(
input,
) -> (
Expand Down
6 changes: 2 additions & 4 deletions python/core/python/geoarrow/rust/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,13 @@ def __arrow_c_schema__(self) -> object: ...
class ArrowArrayExportable(Protocol):
"""An Arrow or GeoArrow array or RecordBatch."""

def __arrow_c_array__(
self, requested_schema: object | None = None
) -> Tuple[object, object]: ...
def __arrow_c_array__(self, requested_schema) -> Tuple[object, object]: ...


class ArrowStreamExportable(Protocol):
"""An Arrow or GeoArrow ChunkedArray or Table."""

def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
def __arrow_c_stream__(self, requested_schema) -> object: ...


class GeoInterfaceProtocol(Protocol):
Expand Down
3 changes: 0 additions & 3 deletions python/core/src/algorithm/native/eq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
use crate::array::*;
use crate::chunked_array::*;
use crate::scalar::*;
use crate::table::GeoTable;
use pyo3::prelude::*;

macro_rules! impl_eq {
Expand Down Expand Up @@ -78,5 +77,3 @@ impl_eq!(ChunkedInt32Array);
impl_eq!(ChunkedInt64Array);
impl_eq!(ChunkedStringArray);
impl_eq!(ChunkedLargeStringArray);

impl_eq!(GeoTable);
23 changes: 6 additions & 17 deletions python/core/src/algorithm/native/explode.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::error::PyGeoArrowResult;
use crate::table::GeoTable;
use crate::interop::util::{pytable_to_table, table_to_pytable};
use geoarrow::algorithm::native::ExplodeTable;
use pyo3::prelude::*;
use pyo3_arrow::PyTable;

/// Explode a table.
///
Expand All @@ -14,20 +15,8 @@ use pyo3::prelude::*;
/// Returns:
/// A new table with multi-part geometries exploded to separate rows.
#[pyfunction]
pub fn explode(input: GeoTable) -> PyGeoArrowResult<GeoTable> {
input.explode()
}

#[pymethods]
impl GeoTable {
/// Explode a table.
///
/// This is intended to be equivalent to the [`explode`][geopandas.GeoDataFrame.explode]
/// function in GeoPandas.
///
/// Returns:
/// A new table with multi-part geometries exploded to separate rows.
pub fn explode(&self) -> PyGeoArrowResult<GeoTable> {
Ok(self.0.explode(None)?.into())
}
pub fn explode(input: PyTable) -> PyGeoArrowResult<PyTable> {
let table = pytable_to_table(input)?;
let exploded_table = table.explode(None)?;
Ok(table_to_pytable(exploded_table))
}
3 changes: 0 additions & 3 deletions python/core/src/algorithm/native/len.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::array::*;
use crate::chunked_array::*;
use crate::table::GeoTable;
use arrow_array::Array;
use geoarrow::GeometryArrayTrait;
use pyo3::prelude::*;
Expand Down Expand Up @@ -66,5 +65,3 @@ impl_len!(ChunkedInt32Array);
impl_len!(ChunkedInt64Array);
impl_len!(ChunkedStringArray);
impl_len!(ChunkedLargeStringArray);

impl_len!(GeoTable);
8 changes: 8 additions & 0 deletions python/core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use pyo3::prelude::*;
pub enum PyGeoArrowError {
GeoArrowError(geoarrow::error::GeoArrowError),
PyErr(PyErr),
PyArrowError(pyo3_arrow::error::PyArrowError),
ObjectStoreError(object_store::Error),
ObjectStorePathError(object_store::path::Error),
UrlParseError(url::ParseError),
Expand All @@ -14,6 +15,7 @@ impl From<PyGeoArrowError> for PyErr {
match error {
PyGeoArrowError::GeoArrowError(err) => PyException::new_err(err.to_string()),
PyGeoArrowError::PyErr(err) => err,
PyGeoArrowError::PyArrowError(err) => err.into(),
PyGeoArrowError::ObjectStoreError(err) => PyException::new_err(err.to_string()),
PyGeoArrowError::ObjectStorePathError(err) => PyException::new_err(err.to_string()),
PyGeoArrowError::UrlParseError(err) => PyException::new_err(err.to_string()),
Expand All @@ -27,6 +29,12 @@ impl From<geoarrow::error::GeoArrowError> for PyGeoArrowError {
}
}

impl From<pyo3_arrow::error::PyArrowError> for PyGeoArrowError {
fn from(other: pyo3_arrow::error::PyArrowError) -> Self {
Self::PyArrowError(other)
}
}

impl From<object_store::Error> for PyGeoArrowError {
fn from(other: object_store::Error) -> Self {
Self::ObjectStoreError(other)
Expand Down
2 changes: 0 additions & 2 deletions python/core/src/ffi/from_python/array.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::array::*;
use crate::ffi::from_python::utils::import_arrow_c_array;
use crate::table::GeoTable;
use pyo3::exceptions::PyTypeError;
use pyo3::prelude::*;
use pyo3::types::PyType;
Expand Down Expand Up @@ -65,4 +64,3 @@ impl_from_arrow!(MultiPolygonArray);
impl_from_arrow!(MixedGeometryArray);
// impl_from_arrow!(RectArray);
impl_from_arrow!(GeometryCollectionArray);
impl_from_arrow!(GeoTable);
3 changes: 0 additions & 3 deletions python/core/src/ffi/from_python/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ pub mod array;
pub mod chunked;
pub mod ffi_stream;
pub mod input;
pub mod record_batch;
pub mod record_batch_reader;
pub mod scalar;
pub mod schema;
pub mod table;
pub mod utils;

Expand Down
Loading
Loading