Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up Python file readers #913

Merged
merged 8 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions python/geoarrow-io/python/geoarrow/rust/io/_csv.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pathlib import Path
from typing import BinaryIO

from arro3.core import Table
from arro3.core.types import ArrowStreamExportable

def read_csv(
file: str | Path | BinaryIO,
geometry_column_name: str,
*,
batch_size: int = 65536,
) -> Table:
"""
Read a CSV file from a path on disk into a Table.

Args:
file: the path to the file or a Python file object in binary read mode.
geometry_column_name: the name of the geometry column within the CSV.
batch_size: the number of rows to include in each internal batch of the table.

Returns:
Table from CSV file.
"""

def write_csv(table: ArrowStreamExportable, file: str | Path | BinaryIO) -> None:
"""
Write a Table to a CSV file on disk.

Args:
table: the Arrow RecordBatch, Table, or RecordBatchReader to write.
file: the path to the file or a Python file object in binary write mode.

Returns:
None
"""
6 changes: 5 additions & 1 deletion python/geoarrow-io/python/geoarrow/rust/io/_flatgeobuf.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ from typing import BinaryIO, Optional, Tuple, Union

from arro3.core import Table
from arro3.core.types import ArrowStreamExportable
from geoarrow.rust.core.enums import CoordType
from geoarrow.rust.core.types import CoordTypeT

def read_flatgeobuf(
file: Union[str, Path, BinaryIO],
Expand Down Expand Up @@ -77,6 +79,7 @@ async def read_flatgeobuf_async(
store: Optional[ObjectStore] = None,
batch_size: int = 65536,
bbox: Tuple[float, float, float, float] | None = None,
coord_type: CoordType | CoordTypeT | None = None,
) -> Table:
"""
Read a FlatGeobuf file from a url into an Arrow Table.
Expand Down Expand Up @@ -115,7 +118,8 @@ async def read_flatgeobuf_async(
store: an ObjectStore instance for this url. This is required for non-HTTP urls.
batch_size: the number of rows to include in each internal batch of the table.
bbox: A spatial filter for reading rows, of the format (minx, miny, maxx, maxy). If set to
`None`, no spatial filtering will be performed.
`None`, no spatial filtering will be performed.
coord_type: The GeoArrow coordinate variant to use.

Returns:
Table from FlatGeobuf file.
Expand Down
70 changes: 70 additions & 0 deletions python/geoarrow-io/python/geoarrow/rust/io/_geojson.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

from pathlib import Path
from typing import BinaryIO, Union

from arro3.core import Table
from arro3.core.types import ArrowStreamExportable

def read_geojson(file: Union[str, Path, BinaryIO], *, batch_size: int = 65536) -> Table:
"""
Read a GeoJSON file from a path on disk into an Arrow Table.

Args:
file: the path to the file or a Python file object in binary read mode.
batch_size: the number of rows to include in each internal batch of the table.

Returns:
Table from GeoJSON file.
"""

def read_geojson_lines(
file: Union[str, Path, BinaryIO], *, batch_size: int = 65536
) -> Table:
"""
Read a newline-delimited GeoJSON file from a path on disk into an Arrow Table.

This expects a GeoJSON Feature on each line of a text file, with a newline character separating
each Feature.

Args:
file: the path to the file or a Python file object in binary read mode.
batch_size: the number of rows to include in each internal batch of the table.

Returns:
Table from GeoJSON file.
"""

def write_geojson(
table: ArrowStreamExportable, file: Union[str, Path, BinaryIO]
) -> None:
"""
Write to a GeoJSON file on disk.

Note that the GeoJSON specification mandates coordinates to be in the WGS84 (EPSG:4326)
coordinate system, but this function will not automatically reproject into WGS84 for you.

Args:
table: the Arrow RecordBatch, Table, or RecordBatchReader to write.
file: the path to the file or a Python file object in binary write mode.

Returns:
None
"""

def write_geojson_lines(
table: ArrowStreamExportable, file: Union[str, Path, BinaryIO]
) -> None:
"""
Write to a newline-delimited GeoJSON file on disk.

Note that the GeoJSON specification mandates coordinates to be in the WGS84 (EPSG:4326)
coordinate system, but this function will not automatically reproject into WGS84 for you.

Args:
table: the Arrow RecordBatch, Table, or RecordBatchReader to write.
file: the path to the file or a Python file object in binary write mode.

Returns:
None
"""
Loading
Loading