Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve docs for geoarrow::io module & GeometryArray FFI fix #959

Merged
merged 4 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions python/tests/interop/test_wkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ def test_geometry_collection():
retour = to_wkb(parsed_geoarrow)
retour_shapely = shapely.from_wkb(retour[0].as_py())

# Need to unpack the geoms because they're returned as multi-geoms
assert retour_shapely.geoms[0].geoms[0] == point
assert retour_shapely.geoms[1].geoms[0] == point2
assert retour_shapely.geoms[2].geoms[0] == line_string
assert retour_shapely.geoms[0] == point
assert retour_shapely.geoms[1] == point2
assert retour_shapely.geoms[2] == line_string


def test_ewkb_srid():
Expand Down
18 changes: 6 additions & 12 deletions rust/geoarrow/src/array/geometry/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -676,18 +676,14 @@ impl NativeGeometryAccessor for GeometryArray {
4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)),
5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)),
6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)),
7 => {
panic!("nested geometry collections not supported")
}
7 => Geometry::GeometryCollection(self.gc_xy.value(offset)),
11 => Geometry::Point(self.point_xyz.value(offset)),
12 => Geometry::LineString(self.line_string_xyz.value(offset)),
13 => Geometry::Polygon(self.polygon_xyz.value(offset)),
14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)),
15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)),
16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)),
17 => {
panic!("nested geometry collections not supported")
}
17 => Geometry::GeometryCollection(self.gc_xyz.value(offset)),
_ => panic!("unknown type_id {}", type_id),
}
}
Expand Down Expand Up @@ -719,18 +715,14 @@ impl<'a> ArrayAccessor<'a> for GeometryArray {
4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)),
5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)),
6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)),
7 => {
panic!("nested geometry collections not supported")
}
7 => Geometry::GeometryCollection(self.gc_xy.value(offset)),
11 => Geometry::Point(self.point_xyz.value(offset)),
12 => Geometry::LineString(self.line_string_xyz.value(offset)),
13 => Geometry::Polygon(self.polygon_xyz.value(offset)),
14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)),
15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)),
16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)),
17 => {
panic!("nested geometry collections not supported")
}
17 => Geometry::GeometryCollection(self.gc_xyz.value(offset)),
_ => panic!("unknown type_id {}", type_id),
}
}
Expand All @@ -752,12 +744,14 @@ impl IntoArrow for GeometryArray {
self.mpoint_xy.into_array_ref(),
self.mline_string_xy.into_array_ref(),
self.mpolygon_xy.into_array_ref(),
self.gc_xy.into_array_ref(),
self.point_xyz.into_array_ref(),
self.line_string_xyz.into_array_ref(),
self.polygon_xyz.into_array_ref(),
self.mpoint_xyz.into_array_ref(),
self.mline_string_xyz.into_array_ref(),
self.mpolygon_xyz.into_array_ref(),
self.gc_xyz.into_array_ref(),
];

UnionArray::try_new(
Expand Down
96 changes: 63 additions & 33 deletions rust/geoarrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,9 @@ fn rect_data_type(dim: Dimension) -> DataType {
DataType::Struct(rect_fields(dim))
}

fn unknown_data_type(coord_type: CoordType) -> DataType {
fn geometry_data_type(coord_type: CoordType) -> DataType {
let mut fields = vec![];
let type_ids = vec![1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16];
let type_ids = vec![1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17];

// Note: we manually construct the fields because these fields shouldn't have their own
// GeoArrow extension metadata
Expand All @@ -344,42 +344,72 @@ fn unknown_data_type(coord_type: CoordType) -> DataType {
NativeType::Point(coord_type, Dimension::XY).to_data_type(),
true,
));

let linestring = NativeType::LineString(coord_type, Dimension::XY);
fields.push(Field::new("", linestring.to_data_type(), true));

let polygon = NativeType::Polygon(coord_type, Dimension::XY);
fields.push(Field::new("", polygon.to_data_type(), true));

let multi_point = NativeType::MultiPoint(coord_type, Dimension::XY);
fields.push(Field::new("", multi_point.to_data_type(), true));

let multi_line_string = NativeType::MultiLineString(coord_type, Dimension::XY);
fields.push(Field::new("", multi_line_string.to_data_type(), true));

let multi_polygon = NativeType::MultiPolygon(coord_type, Dimension::XY);
fields.push(Field::new("", multi_polygon.to_data_type(), true));
fields.push(Field::new(
"",
NativeType::LineString(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::Polygon(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPoint(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiLineString(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPolygon(coord_type, Dimension::XY).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::GeometryCollection(coord_type, Dimension::XY).to_data_type(),
true,
));

fields.push(Field::new(
"",
NativeType::Point(coord_type, Dimension::XYZ).to_data_type(),
true,
));

let linestring = NativeType::LineString(coord_type, Dimension::XYZ);
fields.push(Field::new("", linestring.to_data_type(), true));

let polygon = NativeType::Polygon(coord_type, Dimension::XYZ);
fields.push(Field::new("", polygon.to_data_type(), true));

let multi_point = NativeType::MultiPoint(coord_type, Dimension::XYZ);
fields.push(Field::new("", multi_point.to_data_type(), true));

let multi_line_string = NativeType::MultiLineString(coord_type, Dimension::XYZ);
fields.push(Field::new("", multi_line_string.to_data_type(), true));

let multi_polygon = NativeType::MultiPolygon(coord_type, Dimension::XYZ);
fields.push(Field::new("", multi_polygon.to_data_type(), true));
fields.push(Field::new(
"",
NativeType::LineString(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::Polygon(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPoint(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiLineString(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::MultiPolygon(coord_type, Dimension::XYZ).to_data_type(),
true,
));
fields.push(Field::new(
"",
NativeType::GeometryCollection(coord_type, Dimension::XYZ).to_data_type(),
true,
));

let union_fields = UnionFields::new(type_ids, fields);
DataType::Union(union_fields, UnionMode::Dense)
Expand Down Expand Up @@ -445,7 +475,7 @@ impl NativeType {
MultiPolygon(coord_type, dim) => multi_polygon_data_type(*coord_type, *dim),
GeometryCollection(coord_type, dim) => geometry_collection_data_type(*coord_type, *dim),
Rect(dim) => rect_data_type(*dim),
Geometry(coord_type) => unknown_data_type(*coord_type),
Geometry(coord_type) => geometry_data_type(*coord_type),
}
}

Expand Down
8 changes: 8 additions & 0 deletions rust/geoarrow/src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
//! Read from and write to CSV files.
//!
//! The CSV reader implements [`RecordBatchReader`], so you can iterate over the batches of the CSV
//! without materializing the entire file in memory.
//!
//! [`RecordBatchReader`]: arrow_array::RecordBatchReader
//!
//! Additionally, the CSV writer takes in a [`RecordBatchReader`], so you can write an Arrow
//! iterator to CSV without materializing all batches in memory at once.
//!
//! # Examples
//!
//! ```
Expand Down
6 changes: 6 additions & 0 deletions rust/geoarrow/src/io/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ pub struct CSVReaderOptions {
/// When `true`, the first row of the CSV file is treated as a header row
pub has_header: Option<bool>,

/// The maximum number of records to read for schema inference.
///
/// See [`arrow_csv::reader::Format::infer_schema`].
///
/// **By default, all rows are read to infer the CSV schema.**
pub max_records: Option<usize>,

/// Specify a custom delimiter character, defaults to comma `','`
Expand Down Expand Up @@ -119,6 +124,7 @@ pub struct CSVReader<R> {
}

impl<R> CSVReader<R> {
/// Access the schema of this reader
pub fn schema(&self) -> SchemaRef {
self.output_schema.clone()
}
Expand Down
4 changes: 2 additions & 2 deletions rust/geoarrow/src/io/csv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use std::sync::Arc;

/// Write a Table to CSV
pub fn write_csv<W: Write, S: Into<RecordBatchReader>>(stream: S, writer: W) -> Result<()> {
let mut stream: RecordBatchReader = stream.into();
let reader = stream.take().unwrap();
let stream: RecordBatchReader = stream.into();
let reader = stream.into_inner();

let mut csv_writer = arrow_csv::Writer::new(writer);
for batch in reader {
Expand Down
1 change: 1 addition & 0 deletions rust/geoarrow/src/io/flatgeobuf/reader/async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::io::geozero::array::MixedGeometryStreamBuilder;
use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions};
use crate::table::Table;

/// Read a FlatGeobuf file to a Table asynchronously from object storage.
pub async fn read_flatgeobuf_async(
reader: Arc<dyn ObjectStore>,
location: Path,
Expand Down
10 changes: 5 additions & 5 deletions rust/geoarrow/src/io/flatgeobuf/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ pub struct FlatGeobufWriterOptions {
pub detect_type: bool,
/// Convert single to multi geometries, if `geometry_type` is multi type or Unknown
pub promote_to_multi: bool,
// Dataset title
/// Dataset title
pub title: Option<String>,
// Dataset description (intended for free form long text)
/// Dataset description (intended for free form long text)
pub description: Option<String>,
// Dataset metadata (intended to be application specific and
/// Dataset metadata (intended to be application specific and
pub metadata: Option<String>,
/// A method for transforming CRS to WKT
///
Expand Down Expand Up @@ -119,7 +119,7 @@ pub fn write_flatgeobuf_with_options<W: Write, S: Into<RecordBatchReader>>(
) -> Result<()> {
let mut stream: RecordBatchReader = stream.into();

let schema = stream.schema()?;
let schema = stream.schema();
let fields = &schema.fields;
let geom_col_idxs = schema.as_ref().geometry_columns();
if geom_col_idxs.len() != 1 {
Expand All @@ -133,7 +133,7 @@ pub fn write_flatgeobuf_with_options<W: Write, S: Into<RecordBatchReader>>(
let wkt_crs_str = options.create_wkt_crs(&array_meta)?;
let fgb_options = options.create_fgb_options(geo_data_type, wkt_crs_str.as_deref());

let geometry_type = infer_flatgeobuf_geometry_type(stream.schema()?.as_ref())?;
let geometry_type = infer_flatgeobuf_geometry_type(stream.schema().as_ref())?;

let mut fgb = FgbWriter::create_with_options(name, geometry_type, fgb_options)?;
stream.process(&mut fgb)?;
Expand Down
3 changes: 2 additions & 1 deletion rust/geoarrow/src/io/geos/array/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use crate::array::WKBArray;
use crate::error::Result;

impl<O: OffsetSizeTrait> WKBArray<O> {
pub fn from_geos(value: Vec<Option<geos::Geometry>>) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>) -> Result<Self> {
let mut builder = GenericBinaryBuilder::new();
for maybe_geom in value {
if let Some(geom) = maybe_geom {
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/linestring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSLineString;

impl LineStringBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSLineString>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl LineStringBuilder {
}

impl LineStringArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = LineStringBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/multilinestring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSMultiLineString;

impl MultiLineStringBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiLineString>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl MultiLineStringBuilder {
}

impl MultiLineStringArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = MultiLineStringBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/multipoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSMultiPoint;

impl MultiPointBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPoint>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl MultiPointBuilder {
}

impl MultiPointArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = MultiPointBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/multipolygon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSMultiPolygon;

impl MultiPolygonBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPolygon>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl MultiPolygonBuilder {
}

impl MultiPolygonArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = MultiPolygonBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
6 changes: 4 additions & 2 deletions rust/geoarrow/src/io/geos/array/point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use crate::error::Result;
use crate::io::geos::scalar::GEOSPoint;

impl PointBuilder {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
// TODO: don't use new_unchecked
let geos_linestring_objects: Vec<Option<GEOSPoint>> = value
.into_iter()
Expand All @@ -15,7 +16,8 @@ impl PointBuilder {
}

impl PointArray {
pub fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
#[allow(dead_code)]
pub(crate) fn from_geos(value: Vec<Option<geos::Geometry>>, dim: Dimension) -> Result<Self> {
let mutable_arr = PointBuilder::from_geos(value, dim)?;
Ok(mutable_arr.into())
}
Expand Down
Loading
Loading