Skip to content

Commit

Permalink
Improve docs for geoarrow::io module (#959)
Browse files Browse the repository at this point in the history
### Change list

- Simplify implementation of `RecordBatchReader` wrapper.
- Deny missing documentation in `geoarrow::io`.
- Implement writing a `RectArray` to a `WKBArray`
  • Loading branch information
kylebarron authored Dec 20, 2024
1 parent 11abe73 commit 681b6a4
Show file tree
Hide file tree
Showing 26 changed files with 228 additions and 120 deletions.
7 changes: 3 additions & 4 deletions python/tests/interop/test_wkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ def test_geometry_collection():
retour = to_wkb(parsed_geoarrow)
retour_shapely = shapely.from_wkb(retour[0].as_py())

# Need to unpack the geoms because they're returned as multi-geoms
assert retour_shapely.geoms[0].geoms[0] == point
assert retour_shapely.geoms[1].geoms[0] == point2
assert retour_shapely.geoms[2].geoms[0] == line_string
assert retour_shapely.geoms[0] == point
assert retour_shapely.geoms[1] == point2
assert retour_shapely.geoms[2] == line_string


def test_ewkb_srid():
Expand Down
18 changes: 6 additions & 12 deletions rust/geoarrow/src/array/geometry/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -676,18 +676,14 @@ impl NativeGeometryAccessor for GeometryArray {
4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)),
5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)),
6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)),
7 => {
panic!("nested geometry collections not supported")
}
7 => Geometry::GeometryCollection(self.gc_xy.value(offset)),
11 => Geometry::Point(self.point_xyz.value(offset)),
12 => Geometry::LineString(self.line_string_xyz.value(offset)),
13 => Geometry::Polygon(self.polygon_xyz.value(offset)),
14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)),
15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)),
16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)),
17 => {
panic!("nested geometry collections not supported")
}
17 => Geometry::GeometryCollection(self.gc_xyz.value(offset)),
_ => panic!("unknown type_id {}", type_id),
}
}
Expand Down Expand Up @@ -719,18 +715,14 @@ impl<'a> ArrayAccessor<'a> for GeometryArray {
4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)),
5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)),
6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)),
7 => {
panic!("nested geometry collections not supported")
}
7 => Geometry::GeometryCollection(self.gc_xy.value(offset)),
11 => Geometry::Point(self.point_xyz.value(offset)),
12 => Geometry::LineString(self.line_string_xyz.value(offset)),
13 => Geometry::Polygon(self.polygon_xyz.value(offset)),
14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)),
15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)),
16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)),
17 => {
panic!("nested geometry collections not supported")
}
17 => Geometry::GeometryCollection(self.gc_xyz.value(offset)),
_ => panic!("unknown type_id {}", type_id),
}
}
Expand All @@ -752,12 +744,14 @@ impl IntoArrow for GeometryArray {
self.mpoint_xy.into_array_ref(),
self.mline_string_xy.into_array_ref(),
self.mpolygon_xy.into_array_ref(),
self.gc_xy.into_array_ref(),
self.point_xyz.into_array_ref(),
self.line_string_xyz.into_array_ref(),
self.polygon_xyz.into_array_ref(),
self.mpoint_xyz.into_array_ref(),
self.mline_string_xyz.into_array_ref(),
self.mpolygon_xyz.into_array_ref(),
self.gc_xyz.into_array_ref(),
];

UnionArray::try_new(
Expand Down
96 changes: 63 additions & 33 deletions rust/geoarrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,9 @@ fn rect_data_type(dim: Dimension) -> DataType {
DataType::Struct(rect_fields(dim))
}

fn unknown_data_type(coord_type: CoordType) -> DataType {
fn geometry_data_type(coord_type: CoordType) -> DataType {
let mut fields = vec![];
let type_ids = vec![1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16];
let type_ids = vec![1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17];

// Note: we manually construct the fields because these fields shouldn't have their own
// GeoArrow extension metadata
Expand All @@ -344,42 +344,72 @@ fn unknown_data_type(coord_type: CoordType) -> DataType {
NativeType::Point(coord_type, Dimension::XY).to_data_type(),
true,
));

let linestring = NativeType::LineString(coord_type, Dimension::XY);
fields.push(Field::new("", linestring.to_data_type(), true));

let polygon = NativeType::Polygon(coord_type, Dimension::XY);
fields.push(Field::new("", polygon.to_data_type(), true));

let multi_point = NativeType::MultiPoint(coord_type, Dimension::XY);
fields.push(Field::new("", multi_point.to_data_type(), true));

let multi_line_string = NativeType::MultiLineString(coord_type, Dimension::XY);
fields.push(Field::new("", multi_line_string.to_data_type(), true));

let multi_polygon = NativeType::MultiPolygon(coord_type, Dimension::XY);
fields.push(Field::new("", multi_polygon.to_data_type(), true));
fields.push(Field::new(
"",
NativeType::LineString(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::Polygon(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPoint(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiLineString(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPolygon(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::GeometryCollection(coord_type, Dimension::XY).to_data_type(),
true,
));

fields.push(Field::new(
"",
NativeType::Point(coord_type, Dimension::XYZ).to_data_type(),
true,
));

let linestring = NativeType::LineString(coord_type, Dimension::XYZ);
fields.push(Field::new("", linestring.to_data_type(), true));

let polygon = NativeType::Polygon(coord_type, Dimension::XYZ);
fields.push(Field::new("", polygon.to_data_type(), true));

let multi_point = NativeType::MultiPoint(coord_type, Dimension::XYZ);
fields.push(Field::new("", multi_point.to_data_type(), true));

let multi_line_string = NativeType::MultiLineString(coord_type, Dimension::XYZ);
fields.push(Field::new("", multi_line_string.to_data_type(), true));

let multi_polygon = NativeType::MultiPolygon(coord_type, Dimension::XYZ);
fields.push(Field::new("", multi_polygon.to_data_type(), true));
fields.push(Field::new(
"",
NativeType::LineString(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::Polygon(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPoint(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiLineString(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPolygon(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::GeometryCollection(coord_type, Dimension::XYZ).to_data_type(),
true,
));

let union_fields = UnionFields::new(type_ids, fields);
DataType::Union(union_fields, UnionMode::Dense)
Expand Down Expand Up @@ -445,7 +475,7 @@ impl NativeType {
MultiPolygon(coord_type, dim) => multi_polygon_data_type(*coord_type, *dim),
GeometryCollection(coord_type, dim) => geometry_collection_data_type(*coord_type, *dim),
Rect(dim) => rect_data_type(*dim),
Geometry(coord_type) => unknown_data_type(*coord_type),
Geometry(coord_type) => geometry_data_type(*coord_type),
}
}

Expand Down
8 changes: 8 additions & 0 deletions rust/geoarrow/src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
//! Read from and write to CSV files.
//!
//! The CSV reader implements [`RecordBatchReader`], so you can iterate over the batches of the CSV
//! without materializing the entire file in memory.
//!
//! [`RecordBatchReader`]: arrow_array::RecordBatchReader
//!
//! Additionally, the CSV writer takes in a [`RecordBatchReader`], so you can write an Arrow
//! iterator to CSV without materializing all batches in memory at once.
//!
//! # Examples
//!
//! ```
Expand Down
6 changes: 6 additions & 0 deletions rust/geoarrow/src/io/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ pub struct CSVReaderOptions {
/// When `true`, the first row of the CSV file is treated as a header row
pub has_header: Option<bool>,

/// The maximum number of records to read for schema inference.
///
/// See [`arrow_csv::reader::Format::infer_schema`].
///
/// **By default, all rows are read to infer the CSV schema.**
pub max_records: Option<usize>,

/// Specify a custom delimiter character, defaults to comma `','`
Expand Down Expand Up @@ -119,6 +124,7 @@ pub struct CSVReader<R> {
}

impl<R> CSVReader<R> {
/// Access the schema of this reader
pub fn schema(&self) -> SchemaRef {
self.output_schema.clone()
}
Expand Down
4 changes: 2 additions & 2 deletions rust/geoarrow/src/io/csv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use std::sync::Arc;

/// Write a Table to CSV
pub fn write_csv<W: Write, S: Into<RecordBatchReader>>(stream: S, writer: W) -> Result<()> {
let mut stream: RecordBatchReader = stream.into();
let reader = stream.take().unwrap();
let stream: RecordBatchReader = stream.into();
let reader = stream.into_inner();

let mut csv_writer = arrow_csv::Writer::new(writer);
for batch in reader {
Expand Down
1 change: 1 addition & 0 deletions rust/geoarrow/src/io/flatgeobuf/reader/async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::io::geozero::array::MixedGeometryStreamBuilder;
use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions};
use crate::table::Table;

/// Read a FlatGeobuf file to a Table asynchronously from object storage.
pub async fn read_flatgeobuf_async(
reader: Arc<dyn ObjectStore>,
location: Path,
Expand Down
10 changes: 5 additions & 5 deletions rust/geoarrow/src/io/flatgeobuf/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ pub struct FlatGeobufWriterOptions {
pub detect_type: bool,
/// Convert single to multi geometries, if `geometry_type` is multi type or Unknown
pub promote_to_multi: bool,
// Dataset title
/// Dataset title
pub title: Option<String>,
// Dataset description (intended for free form long text)
/// Dataset description (intended for free form long text)
pub description: Option<String>,
// Dataset metadata (intended to be application specific and
/// Dataset metadata (intended to be application specific and
pub metadata: Option<String>,
/// A method for transforming CRS to WKT
///
Expand Down Expand Up @@ -119,7 +119,7 @@ pub fn write_flatgeobuf_with_options<W: Write, S: Into<RecordBatchReader>>(
) -> Result<()> {
let mut stream: RecordBatchReader = stream.into();

let schema = stream.schema()?;
let schema = stream.schema();
let fields = &schema.fields;
let geom_col_idxs = schema.as_ref().geometry_columns();
if geom_col_idxs.len() != 1 {
Expand All @@ -133,7 +133,7 @@ pub fn write_flatgeobuf_with_options<W: Write, S: Into<RecordBatchReader>>(
let wkt_crs_str = options.create_wkt_crs(&array_meta)?;
let fgb_options = options.create_fgb_options(geo_data_type, wkt_crs_str.as_deref());

let geometry_type = infer_flatgeobuf_geometry_type(stream.schema()?.as_ref())?;
let geometry_type = infer_flatgeobuf_geometry_type(stream.schema().as_ref())?;

let mut fgb = FgbWriter::create_with_options(name, geometry_type, fgb_options)?;
stream.process(&mut fgb)?;
Expand Down
3 changes: 2 additions & 1 deletion rust/geoarrow/src/io/geos/array/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use crate::array::WKBArray;
use crate::error::Result;

impl<O: OffsetSizeTrait> WKBArray<O> {
pub fn from_geos(value: Vec<Option<geos::Geometry>>) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>) -> Result<Self> {
let mut builder = GenericBinaryBuilder::new();
for maybe_geom in value {
if let Some(geom) = maybe_geom {
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/linestring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSLineString;

impl LineStringBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSLineString>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl LineStringBuilder {
}

impl LineStringArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = LineStringBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/multilinestring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSMultiLineString;

impl MultiLineStringBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiLineString>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl MultiLineStringBuilder {
}

impl MultiLineStringArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = MultiLineStringBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/multipoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSMultiPoint;

impl MultiPointBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPoint>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl MultiPointBuilder {
}

impl MultiPointArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = MultiPointBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/multipolygon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSMultiPolygon;

impl MultiPolygonBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPolygon>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl MultiPolygonBuilder {
}

impl MultiPolygonArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = MultiPolygonBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSPoint;

impl PointBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_linestring_objects: Vec<Option<GEOSPoint>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl PointBuilder {
}

impl PointArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = PointBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
Loading

0 comments on commit 681b6a4

Please sign in to comment.