From 432b81ba6775e33000aea3685bb54a5cadeda9fd Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 4 Dec 2024 16:50:21 -0500 Subject: [PATCH] Rename UnknownGeometryArray to GeometryArray (#897) --- .../src/interop/shapely/to_shapely.rs | 2 +- .../geoarrow/src/algorithm/geo_index/rtree.rs | 6 +- rust/geoarrow/src/algorithm/geos/area.rs | 4 +- rust/geoarrow/src/algorithm/geos/is_empty.rs | 5 +- rust/geoarrow/src/algorithm/geos/is_ring.rs | 4 +- rust/geoarrow/src/algorithm/geos/is_simple.rs | 4 +- rust/geoarrow/src/algorithm/geos/is_valid.rs | 4 +- rust/geoarrow/src/algorithm/geos/length.rs | 4 +- rust/geoarrow/src/algorithm/native/cast.rs | 4 +- .../src/algorithm/native/total_bounds.rs | 6 +- rust/geoarrow/src/algorithm/native/unary.rs | 5 +- rust/geoarrow/src/array/cast.rs | 23 +- rust/geoarrow/src/array/dynamic.rs | 2 +- rust/geoarrow/src/array/geometry/array.rs | 1434 +++++++++++++---- .../array/{unknown => geometry}/builder.rs | 44 +- .../array/{unknown => geometry}/capacity.rs | 8 +- rust/geoarrow/src/array/geometry/iterator.rs | 1 - rust/geoarrow/src/array/geometry/mod.rs | 13 +- rust/geoarrow/src/array/mod.rs | 3 +- rust/geoarrow/src/array/unknown/array.rs | 1214 -------------- rust/geoarrow/src/array/unknown/mod.rs | 7 - rust/geoarrow/src/chunked_array/dynamic.rs | 4 +- rust/geoarrow/src/chunked_array/mod.rs | 2 +- rust/geoarrow/src/datatypes.rs | 16 +- rust/geoarrow/src/io/flatgeobuf/writer.rs | 2 +- .../src/io/geozero/scalar/geometry_array.rs | 2 +- .../src/io/geozero/table/data_source.rs | 4 +- .../src/io/parquet/writer/metadata.rs | 6 +- rust/geoarrow/src/io/wkb/api.rs | 12 +- rust/geoarrow/src/io/wkb/writer/geometry.rs | 6 +- rust/geoarrow/src/io/wkt/reader.rs | 10 +- rust/geoarrow/src/io/wkt/writer.rs | 4 +- rust/geoarrow/src/scalar/scalar.rs | 57 +- 33 files changed, 1223 insertions(+), 1699 deletions(-) rename rust/geoarrow/src/array/{unknown => geometry}/builder.rs (96%) rename rust/geoarrow/src/array/{unknown => geometry}/capacity.rs (98%) delete mode 100644 rust/geoarrow/src/array/geometry/iterator.rs delete mode 100644 rust/geoarrow/src/array/unknown/array.rs delete mode 100644 rust/geoarrow/src/array/unknown/mod.rs diff --git a/python/geoarrow-core/src/interop/shapely/to_shapely.rs b/python/geoarrow-core/src/interop/shapely/to_shapely.rs index efd7d97d..9955e0d2 100644 --- a/python/geoarrow-core/src/interop/shapely/to_shapely.rs +++ b/python/geoarrow-core/src/interop/shapely/to_shapely.rs @@ -106,7 +106,7 @@ fn pyarray_to_shapely(py: Python, input: PyArray) -> PyGeoArrowResult rect_arr(py, array.as_ref().as_rect().clone()), Mixed(_, _) => via_wkb(py, array), GeometryCollection(_, _) => via_wkb(py, array), - Unknown(_) => via_wkb(py, array), + Geometry(_) => via_wkb(py, array), } } AnyType::Serialized(typ) => { diff --git a/rust/geoarrow/src/algorithm/geo_index/rtree.rs b/rust/geoarrow/src/algorithm/geo_index/rtree.rs index 826ff208..8286e4d5 100644 --- a/rust/geoarrow/src/algorithm/geo_index/rtree.rs +++ b/rust/geoarrow/src/algorithm/geo_index/rtree.rs @@ -51,7 +51,7 @@ impl_rtree!(MultiPolygonArray, bounding_rect_multipolygon); impl_rtree!(MixedGeometryArray, bounding_rect_geometry); impl_rtree!(GeometryCollectionArray, bounding_rect_geometry_collection); impl_rtree!(RectArray, bounding_rect_rect); -impl_rtree!(UnknownGeometryArray, bounding_rect_geometry); +impl_rtree!(GeometryArray, bounding_rect_geometry); impl RTree for &dyn NativeArray { type Output = OwnedRTree; @@ -75,7 +75,7 @@ impl RTree for &dyn NativeArray { Mixed(_, _) => impl_method!(as_mixed), GeometryCollection(_, _) => impl_method!(as_geometry_collection), Rect(_) => impl_method!(as_rect), - Unknown(_) => impl_method!(as_unknown), + Geometry(_) => impl_method!(as_geometry), } } } @@ -110,7 +110,7 @@ impl RTree for &dyn ChunkedNativeArray { Mixed(_, _) => impl_method!(as_mixed), GeometryCollection(_, _) => impl_method!(as_geometry_collection), Rect(_) => impl_method!(as_rect), - Unknown(_) => todo!("Chunked unknown array"), // impl_method!(as_unknown), + Geometry(_) => todo!("Chunked unknown array"), // impl_method!(as_unknown), }; Ok(result) } diff --git a/rust/geoarrow/src/algorithm/geos/area.rs b/rust/geoarrow/src/algorithm/geos/area.rs index 4bd0f93b..bbb1ef85 100644 --- a/rust/geoarrow/src/algorithm/geos/area.rs +++ b/rust/geoarrow/src/algorithm/geos/area.rs @@ -51,7 +51,7 @@ iter_geos_impl!(MultiPolygonArray); iter_geos_impl!(MixedGeometryArray); iter_geos_impl!(GeometryCollectionArray); iter_geos_impl!(RectArray); -iter_geos_impl!(UnknownGeometryArray); +iter_geos_impl!(GeometryArray); impl Area for &dyn NativeArray { type Output = Result; @@ -69,7 +69,7 @@ impl Area for &dyn NativeArray { Mixed(_, _) => self.as_mixed().area(), GeometryCollection(_, _) => self.as_geometry_collection().area(), Rect(_) => self.as_rect().area(), - Unknown(_) => self.as_unknown().area(), + Geometry(_) => self.as_geometry().area(), } } } diff --git a/rust/geoarrow/src/algorithm/geos/is_empty.rs b/rust/geoarrow/src/algorithm/geos/is_empty.rs index 45952ff0..fcf8c2fe 100644 --- a/rust/geoarrow/src/algorithm/geos/is_empty.rs +++ b/rust/geoarrow/src/algorithm/geos/is_empty.rs @@ -1,5 +1,4 @@ use crate::algorithm::native::Unary; -use crate::array::unknown::UnknownGeometryArray; use crate::array::*; use crate::chunked_array::{ChunkedArray, ChunkedGeometryArray}; use crate::datatypes::NativeType; @@ -37,7 +36,7 @@ iter_geos_impl!(MultiPolygonArray); iter_geos_impl!(MixedGeometryArray); iter_geos_impl!(GeometryCollectionArray); iter_geos_impl!(RectArray); -iter_geos_impl!(UnknownGeometryArray); +iter_geos_impl!(GeometryArray); impl IsEmpty for &dyn NativeArray { type Output = Result; @@ -55,7 +54,7 @@ impl IsEmpty for &dyn NativeArray { Mixed(_, _) => IsEmpty::is_empty(self.as_mixed()), GeometryCollection(_, _) => IsEmpty::is_empty(self.as_geometry_collection()), Rect(_) => IsEmpty::is_empty(self.as_rect()), - Unknown(_) => IsEmpty::is_empty(self.as_unknown()), + Geometry(_) => IsEmpty::is_empty(self.as_geometry()), } } } diff --git a/rust/geoarrow/src/algorithm/geos/is_ring.rs b/rust/geoarrow/src/algorithm/geos/is_ring.rs index 1bcec2e2..e3a8af45 100644 --- a/rust/geoarrow/src/algorithm/geos/is_ring.rs +++ b/rust/geoarrow/src/algorithm/geos/is_ring.rs @@ -36,7 +36,7 @@ iter_geos_impl!(MultiPolygonArray); iter_geos_impl!(MixedGeometryArray); iter_geos_impl!(GeometryCollectionArray); iter_geos_impl!(RectArray); -iter_geos_impl!(UnknownGeometryArray); +iter_geos_impl!(GeometryArray); impl IsRing for &dyn NativeArray { type Output = Result; @@ -54,7 +54,7 @@ impl IsRing for &dyn NativeArray { Mixed(_, _) => self.as_mixed().is_ring(), GeometryCollection(_, _) => self.as_geometry_collection().is_ring(), Rect(_) => self.as_rect().is_ring(), - Unknown(_) => self.as_unknown().is_ring(), + Geometry(_) => self.as_geometry().is_ring(), } } } diff --git a/rust/geoarrow/src/algorithm/geos/is_simple.rs b/rust/geoarrow/src/algorithm/geos/is_simple.rs index 71b5c94c..377736c2 100644 --- a/rust/geoarrow/src/algorithm/geos/is_simple.rs +++ b/rust/geoarrow/src/algorithm/geos/is_simple.rs @@ -36,7 +36,7 @@ iter_geos_impl!(MultiPolygonArray); iter_geos_impl!(MixedGeometryArray); iter_geos_impl!(GeometryCollectionArray); iter_geos_impl!(RectArray); -iter_geos_impl!(UnknownGeometryArray); +iter_geos_impl!(GeometryArray); impl IsSimple for &dyn NativeArray { type Output = Result; @@ -54,7 +54,7 @@ impl IsSimple for &dyn NativeArray { Mixed(_, _) => self.as_mixed().is_simple(), GeometryCollection(_, _) => self.as_geometry_collection().is_simple(), Rect(_) => self.as_rect().is_simple(), - Unknown(_) => self.as_unknown().is_simple(), + Geometry(_) => self.as_geometry().is_simple(), } } } diff --git a/rust/geoarrow/src/algorithm/geos/is_valid.rs b/rust/geoarrow/src/algorithm/geos/is_valid.rs index c9561a2c..988dccb0 100644 --- a/rust/geoarrow/src/algorithm/geos/is_valid.rs +++ b/rust/geoarrow/src/algorithm/geos/is_valid.rs @@ -37,7 +37,7 @@ iter_geos_impl!(MultiPolygonArray); iter_geos_impl!(MixedGeometryArray); iter_geos_impl!(GeometryCollectionArray); iter_geos_impl!(RectArray); -iter_geos_impl!(UnknownGeometryArray); +iter_geos_impl!(GeometryArray); impl IsValid for &dyn NativeArray { type Output = Result; @@ -55,7 +55,7 @@ impl IsValid for &dyn NativeArray { Mixed(_, _) => IsValid::is_valid(self.as_mixed()), GeometryCollection(_, _) => IsValid::is_valid(self.as_geometry_collection()), Rect(_) => IsValid::is_valid(self.as_rect()), - Unknown(_) => IsValid::is_valid(self.as_unknown()), + Geometry(_) => IsValid::is_valid(self.as_geometry()), } } } diff --git a/rust/geoarrow/src/algorithm/geos/length.rs b/rust/geoarrow/src/algorithm/geos/length.rs index 69e79558..6e5f248d 100644 --- a/rust/geoarrow/src/algorithm/geos/length.rs +++ b/rust/geoarrow/src/algorithm/geos/length.rs @@ -44,7 +44,7 @@ iter_geos_impl!(MultiPolygonArray); iter_geos_impl!(MixedGeometryArray); iter_geos_impl!(GeometryCollectionArray); iter_geos_impl!(RectArray); -iter_geos_impl!(UnknownGeometryArray); +iter_geos_impl!(GeometryArray); impl Length for &dyn NativeArray { type Output = Result; @@ -62,7 +62,7 @@ impl Length for &dyn NativeArray { Mixed(_, _) => self.as_mixed().length(), GeometryCollection(_, _) => self.as_geometry_collection().length(), Rect(_) => self.as_rect().length(), - Unknown(_) => self.as_unknown().length(), + Geometry(_) => self.as_geometry().length(), } } } diff --git a/rust/geoarrow/src/algorithm/native/cast.rs b/rust/geoarrow/src/algorithm/native/cast.rs index 3199ce15..22da66c9 100644 --- a/rust/geoarrow/src/algorithm/native/cast.rs +++ b/rust/geoarrow/src/algorithm/native/cast.rs @@ -275,7 +275,7 @@ macro_rules! impl_chunked_cast_non_generic { Mixed(_, _) => impl_cast!(as_mixed), GeometryCollection(_, _) => impl_cast!(as_geometry_collection), Rect(_) => impl_cast!(as_rect), - Unknown(_) => todo!("cast to unknown"), + Geometry(_) => todo!("cast to unknown"), }; Ok(result) } @@ -314,7 +314,7 @@ macro_rules! impl_chunked_cast_generic { Mixed(_, _) => impl_cast!(as_mixed), GeometryCollection(_, _) => impl_cast!(as_geometry_collection), Rect(_) => impl_cast!(as_rect), - Unknown(_) => todo!("cast to unknown"), + Geometry(_) => todo!("cast to unknown"), }; Ok(result) } diff --git a/rust/geoarrow/src/algorithm/native/total_bounds.rs b/rust/geoarrow/src/algorithm/native/total_bounds.rs index b9ecb97e..dc0f5356 100644 --- a/rust/geoarrow/src/algorithm/native/total_bounds.rs +++ b/rust/geoarrow/src/algorithm/native/total_bounds.rs @@ -51,7 +51,7 @@ impl_array!(MultiLineStringArray, add_multi_line_string); impl_array!(MultiPolygonArray, add_multi_polygon); impl_array!(MixedGeometryArray, add_geometry); impl_array!(GeometryCollectionArray, add_geometry_collection); -impl_array!(UnknownGeometryArray, add_geometry); +impl_array!(GeometryArray, add_geometry); // impl TotalBounds for WKBArray { // fn total_bounds(&self) -> BoundingRect { @@ -77,7 +77,7 @@ impl TotalBounds for &dyn NativeArray { Mixed(_, _) => self.as_mixed().total_bounds(), GeometryCollection(_, _) => self.as_geometry_collection().total_bounds(), Rect(_) => self.as_rect().total_bounds(), - Unknown(_) => self.as_unknown().total_bounds(), + Geometry(_) => self.as_geometry().total_bounds(), // WKB => self.as_wkb().total_bounds(), // LargeWKB => self.as_large_wkb().total_bounds(), } @@ -107,7 +107,7 @@ impl TotalBounds for &dyn ChunkedNativeArray { Mixed(_, _) => self.as_mixed().total_bounds(), GeometryCollection(_, _) => self.as_geometry_collection().total_bounds(), Rect(_) => self.as_rect().total_bounds(), - Unknown(_) => self.as_unknown().total_bounds(), + Geometry(_) => self.as_geometry().total_bounds(), } } } diff --git a/rust/geoarrow/src/algorithm/native/unary.rs b/rust/geoarrow/src/algorithm/native/unary.rs index 75aecdca..865df387 100644 --- a/rust/geoarrow/src/algorithm/native/unary.rs +++ b/rust/geoarrow/src/algorithm/native/unary.rs @@ -2,7 +2,6 @@ use arrow_array::types::ArrowPrimitiveType; use arrow_array::{BooleanArray, OffsetSizeTrait, PrimitiveArray}; use arrow_buffer::{BooleanBufferBuilder, BufferBuilder}; -use crate::array::unknown::UnknownGeometryArray; use crate::array::*; use crate::datatypes::Dimension; use crate::trait_::ArrayAccessor; @@ -97,7 +96,7 @@ impl Unary<'_> for MultiPolygonArray {} impl Unary<'_> for MixedGeometryArray {} impl Unary<'_> for GeometryCollectionArray {} impl Unary<'_> for RectArray {} -impl Unary<'_> for UnknownGeometryArray {} +impl Unary<'_> for GeometryArray {} impl Unary<'_> for WKBArray {} #[allow(dead_code)] @@ -157,4 +156,4 @@ impl UnaryPoint<'_> for MultiPolygonArray {} impl UnaryPoint<'_> for MixedGeometryArray {} impl UnaryPoint<'_> for GeometryCollectionArray {} impl UnaryPoint<'_> for RectArray {} -impl UnaryPoint<'_> for UnknownGeometryArray {} +impl UnaryPoint<'_> for GeometryArray {} diff --git a/rust/geoarrow/src/array/cast.rs b/rust/geoarrow/src/array/cast.rs index c0cca472..e90df2d4 100644 --- a/rust/geoarrow/src/array/cast.rs +++ b/rust/geoarrow/src/array/cast.rs @@ -1,4 +1,3 @@ -use crate::array::unknown::UnknownGeometryArray; use crate::array::*; use crate::chunked_array::*; @@ -89,13 +88,13 @@ pub trait AsNativeArray { self.as_rect_opt().unwrap() } - /// Downcast this to a [`UnknownGeometryArray`] returning `None` if not possible - fn as_unknown_opt(&self) -> Option<&UnknownGeometryArray>; + /// Downcast this to a [`GeometryArray`] returning `None` if not possible + fn as_geometry_opt(&self) -> Option<&GeometryArray>; - /// Downcast this to a [`UnknownGeometryArray`] panicking if not possible + /// Downcast this to a [`GeometryArray`] panicking if not possible #[inline] - fn as_unknown(&self) -> &UnknownGeometryArray { - self.as_unknown_opt().unwrap() + fn as_geometry(&self) -> &GeometryArray { + self.as_geometry_opt().unwrap() } } @@ -146,8 +145,8 @@ impl AsNativeArray for &dyn NativeArray { } #[inline] - fn as_unknown_opt(&self) -> Option<&UnknownGeometryArray> { - self.as_any().downcast_ref::() + fn as_geometry_opt(&self) -> Option<&GeometryArray> { + self.as_any().downcast_ref::() } } @@ -271,12 +270,12 @@ pub trait AsChunkedNativeArray { } /// Downcast this to a [`ChunkedUnknownGeometryArray`] returning `None` if not possible - fn as_unknown_opt(&self) -> Option<&ChunkedUnknownGeometryArray>; + fn as_geometry_opt(&self) -> Option<&ChunkedUnknownGeometryArray>; /// Downcast this to a [`ChunkedUnknownGeometryArray`] panicking if not possible #[inline] - fn as_unknown(&self) -> &ChunkedUnknownGeometryArray { - self.as_unknown_opt().unwrap() + fn as_geometry(&self) -> &ChunkedUnknownGeometryArray { + self.as_geometry_opt().unwrap() } } @@ -328,7 +327,7 @@ impl AsChunkedNativeArray for &dyn ChunkedNativeArray { } #[inline] - fn as_unknown_opt(&self) -> Option<&ChunkedUnknownGeometryArray> { + fn as_geometry_opt(&self) -> Option<&ChunkedUnknownGeometryArray> { self.as_any().downcast_ref::() } } diff --git a/rust/geoarrow/src/array/dynamic.rs b/rust/geoarrow/src/array/dynamic.rs index ac42567d..f0a4a892 100644 --- a/rust/geoarrow/src/array/dynamic.rs +++ b/rust/geoarrow/src/array/dynamic.rs @@ -43,7 +43,7 @@ impl NativeArrayDyn { Arc::new(GeometryCollectionArray::try_from((array, field))?) } Rect(_) => Arc::new(RectArray::try_from((array, field))?), - Unknown(_) => Arc::new(UnknownGeometryArray::try_from((array, field))?), + Geometry(_) => Arc::new(GeometryArray::try_from((array, field))?), }; Ok(Self(geo_arr)) diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index 8fb3018b..b13d5382 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -1,454 +1,1214 @@ -#![allow(deprecated)] - +use std::collections::HashSet; use std::sync::Arc; -use arrow_array::{Array, OffsetSizeTrait}; -use arrow_buffer::NullBuffer; -use arrow_schema::{DataType, Field}; +use arrow_array::{Array, OffsetSizeTrait, UnionArray}; +use arrow_buffer::{NullBuffer, ScalarBuffer}; +use arrow_schema::{DataType, Field, UnionMode}; -use crate::algorithm::native::type_id::TypeIds; -// use crate::algorithm::native::type_id::TypeIds; +use crate::array::geometry::GeometryBuilder; +use crate::array::geometry::GeometryCapacity; +use crate::array::metadata::ArrayMetadata; use crate::array::{ - CoordType, LineStringArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray, - PointArray, PolygonArray, RectArray, WKBArray, + CoordType, GeometryCollectionArray, LineStringArray, MultiLineStringArray, MultiPointArray, + MultiPolygonArray, PointArray, PolygonArray, WKBArray, }; -use crate::datatypes::NativeType; -use crate::error::GeoArrowError; +use crate::datatypes::{Dimension, NativeType}; +use crate::error::{GeoArrowError, Result}; use crate::scalar::Geometry; -use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow}; -use crate::NativeArray; +use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; +use crate::{ArrayBase, NativeArray}; +use geo_traits::GeometryTrait; -/// A GeometryArray is an enum over the various underlying _zero copy_ GeoArrow array types. +/// # Invariants +/// +/// - All arrays must have the same dimension +/// - All arrays must have the same coordinate layout (interleaved or separated) /// -/// Notably this does _not_ include [`WKBArray`] as a variant, because that is not zero-copy to -/// parse. -#[derive(Debug, Clone)] -#[deprecated = "Use Arc instead."] -pub enum GeometryArray { - Point(PointArray), - LineString(LineStringArray), - Polygon(PolygonArray), - MultiPoint(MultiPointArray), - MultiLineString(MultiLineStringArray), - MultiPolygon(MultiPolygonArray), - Rect(RectArray), +/// - 1: Point +/// - 2: LineString +/// - 3: Polygon +/// - 4: MultiPoint +/// - 5: MultiLineString +/// - 6: MultiPolygon +/// - 7: GeometryCollection +/// - 11: Point Z +/// - 12: LineString Z +/// - 13: Polygon Z +/// - 14: MultiPoint Z +/// - 15: MultiLineString Z +/// - 16: MultiPolygon Z +/// - 17: GeometryCollection Z +/// - 21: Point M +/// - 22: LineString M +/// - 23: Polygon M +/// - 24: MultiPoint M +/// - 25: MultiLineString M +/// - 26: MultiPolygon M +/// - 27: GeometryCollection M +/// - 31: Point ZM +/// - 32: LineString ZM +/// - 33: Polygon ZM +/// - 34: MultiPoint ZM +/// - 35: MultiLineString ZM +/// - 36: MultiPolygon ZM +/// - 37: GeometryCollection ZM +#[derive(Debug, Clone, PartialEq)] +pub struct GeometryArray { + /// Always NativeType::Unknown + data_type: NativeType, + + pub(crate) metadata: Arc, + + /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not + /// provided. If `type_ids` exist in the NativeType, then every item in `type_ids` is `> 0 && ` + pub(crate) type_ids: ScalarBuffer, + + /// Invariant: `offsets.len() == type_ids.len()` + pub(crate) offsets: ScalarBuffer, + + // In the future we'll additionally have xym, xyzm array variants. + pub(crate) point_xy: PointArray, + pub(crate) line_string_xy: LineStringArray, + pub(crate) polygon_xy: PolygonArray, + pub(crate) mpoint_xy: MultiPointArray, + pub(crate) mline_string_xy: MultiLineStringArray, + pub(crate) mpolygon_xy: MultiPolygonArray, + pub(crate) gc_xy: GeometryCollectionArray, + + pub(crate) point_xyz: PointArray, + pub(crate) line_string_xyz: LineStringArray, + pub(crate) polygon_xyz: PolygonArray, + pub(crate) mpoint_xyz: MultiPointArray, + pub(crate) mline_string_xyz: MultiLineStringArray, + pub(crate) mpolygon_xyz: MultiPolygonArray, + pub(crate) gc_xyz: GeometryCollectionArray, + + /// An offset used for slicing into this array. The offset will be 0 if the array has not been + /// sliced. + /// + /// In order to slice this array efficiently (and zero-cost) we can't slice the underlying + /// fields directly. If this were always a _sparse_ union array, we could! We could then always + /// slice from offset to length of each underlying array. But we're under the assumption that + /// most or all of the time we have a dense union array, where the `offsets` buffer is defined. + /// In that case, to know how to slice each underlying array, we'd have to walk the `type_ids` + /// and `offsets` arrays (in O(N) time) to figure out how to slice the underlying arrays. + /// + /// Instead, we store the slice offset. + /// + /// Note that this offset is only for slicing into the **fields**, i.e. the geometry arrays. + /// The `type_ids` and `offsets` arrays are sliced as usual. + /// + /// TODO: when exporting this array, export to arrow2 and then slice from scratch because we + /// can't set the `offset` in a UnionArray constructor + pub(crate) slice_offset: usize, } -impl NativeArray for GeometryArray { - fn as_any(&self) -> &dyn std::any::Any { - // Note: I don't think this will work because you presumably can't downcast past the - // enum...? - match self { - GeometryArray::Point(arr) => arr.as_any(), - GeometryArray::LineString(arr) => arr.as_any(), - GeometryArray::Polygon(arr) => arr.as_any(), - GeometryArray::MultiPoint(arr) => arr.as_any(), - GeometryArray::MultiLineString(arr) => arr.as_any(), - GeometryArray::MultiPolygon(arr) => arr.as_any(), - GeometryArray::Rect(arr) => arr.as_any(), +impl GeometryArray { + /// Create a new MixedGeometryArray from parts + /// + /// # Implementation + /// + /// This function is `O(1)`. + /// + /// # Panics + /// + /// - if the validity is not `None` and its length is different from the number of geometries + /// - if the largest geometry offset does not match the number of coordinates + #[allow(clippy::too_many_arguments)] + pub fn new( + type_ids: ScalarBuffer, + offsets: ScalarBuffer, + point_xy: PointArray, + line_string_xy: LineStringArray, + polygon_xy: PolygonArray, + mpoint_xy: MultiPointArray, + mline_string_xy: MultiLineStringArray, + mpolygon_xy: MultiPolygonArray, + gc_xy: GeometryCollectionArray, + point_xyz: PointArray, + line_string_xyz: LineStringArray, + polygon_xyz: PolygonArray, + mpoint_xyz: MultiPointArray, + mline_string_xyz: MultiLineStringArray, + mpolygon_xyz: MultiPolygonArray, + gc_xyz: GeometryCollectionArray, + metadata: Arc, + ) -> Self { + let mut coord_types = HashSet::new(); + coord_types.insert(point_xy.coord_type()); + coord_types.insert(line_string_xy.coord_type()); + coord_types.insert(polygon_xy.coord_type()); + coord_types.insert(mpoint_xy.coord_type()); + coord_types.insert(mline_string_xy.coord_type()); + coord_types.insert(mpolygon_xy.coord_type()); + coord_types.insert(gc_xy.coord_type()); + + coord_types.insert(point_xyz.coord_type()); + coord_types.insert(line_string_xyz.coord_type()); + coord_types.insert(polygon_xyz.coord_type()); + coord_types.insert(mpoint_xyz.coord_type()); + coord_types.insert(mline_string_xyz.coord_type()); + coord_types.insert(mpolygon_xyz.coord_type()); + coord_types.insert(gc_xyz.coord_type()); + assert_eq!(coord_types.len(), 1); + + let coord_type = coord_types.into_iter().next().unwrap(); + + let data_type = NativeType::Geometry(coord_type); + + Self { + data_type, + type_ids, + offsets, + point_xy, + line_string_xy, + polygon_xy, + mpoint_xy, + mline_string_xy, + mpolygon_xy, + gc_xy, + point_xyz, + line_string_xyz, + polygon_xyz, + mpoint_xyz, + mline_string_xyz, + mpolygon_xyz, + gc_xyz, + slice_offset: 0, + metadata, } } - fn data_type(&self) -> &NativeType { - match self { - GeometryArray::Point(arr) => arr.data_type(), - GeometryArray::LineString(arr) => arr.data_type(), - GeometryArray::Polygon(arr) => arr.data_type(), - GeometryArray::MultiPoint(arr) => arr.data_type(), - GeometryArray::MultiLineString(arr) => arr.data_type(), - GeometryArray::MultiPolygon(arr) => arr.data_type(), - GeometryArray::Rect(arr) => arr.data_type(), + /// The lengths of each buffer contained in this array. + pub fn buffer_lengths(&self) -> GeometryCapacity { + GeometryCapacity::new( + 0, + self.point_xy.buffer_lengths(), + self.line_string_xy.buffer_lengths(), + self.polygon_xy.buffer_lengths(), + self.mpoint_xy.buffer_lengths(), + self.mline_string_xy.buffer_lengths(), + self.mpolygon_xy.buffer_lengths(), + self.gc_xy.buffer_lengths(), + self.point_xyz.buffer_lengths(), + self.line_string_xyz.buffer_lengths(), + self.polygon_xyz.buffer_lengths(), + self.mpoint_xyz.buffer_lengths(), + self.mline_string_xyz.buffer_lengths(), + self.mpolygon_xyz.buffer_lengths(), + self.gc_xyz.buffer_lengths(), + false, + ) + } + + pub fn has_points(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.point_xy.is_empty(), + Dimension::XYZ => !self.point_xyz.is_empty(), } } - fn storage_type(&self) -> DataType { - match self { - GeometryArray::Point(arr) => arr.storage_type(), - GeometryArray::LineString(arr) => arr.storage_type(), - GeometryArray::Polygon(arr) => arr.storage_type(), - GeometryArray::MultiPoint(arr) => arr.storage_type(), - GeometryArray::MultiLineString(arr) => arr.storage_type(), - GeometryArray::MultiPolygon(arr) => arr.storage_type(), - GeometryArray::Rect(arr) => arr.storage_type(), + pub fn has_line_strings(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.line_string_xy.is_empty(), + Dimension::XYZ => !self.line_string_xyz.is_empty(), } } - fn extension_field(&self) -> Arc { - match self { - GeometryArray::Point(arr) => arr.extension_field(), - GeometryArray::LineString(arr) => arr.extension_field(), - GeometryArray::Polygon(arr) => arr.extension_field(), - GeometryArray::MultiPoint(arr) => arr.extension_field(), - GeometryArray::MultiLineString(arr) => arr.extension_field(), - GeometryArray::MultiPolygon(arr) => arr.extension_field(), - GeometryArray::Rect(arr) => arr.extension_field(), + pub fn has_polygons(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.polygon_xy.is_empty(), + Dimension::XYZ => !self.polygon_xyz.is_empty(), } } - fn metadata(&self) -> Arc { - todo!() + pub fn has_multi_points(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.mpoint_xy.is_empty(), + Dimension::XYZ => !self.mpoint_xyz.is_empty(), + } } - fn extension_name(&self) -> &str { - match self { - GeometryArray::Point(arr) => arr.extension_name(), - GeometryArray::LineString(arr) => arr.extension_name(), - GeometryArray::Polygon(arr) => arr.extension_name(), - GeometryArray::MultiPoint(arr) => arr.extension_name(), - GeometryArray::MultiLineString(arr) => arr.extension_name(), - GeometryArray::MultiPolygon(arr) => arr.extension_name(), - GeometryArray::Rect(arr) => arr.extension_name(), + pub fn has_multi_line_strings(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.mline_string_xy.is_empty(), + Dimension::XYZ => !self.mline_string_xyz.is_empty(), } } - fn into_array_ref(self) -> Arc { - match self { - GeometryArray::Point(arr) => arr.into_array_ref(), - GeometryArray::LineString(arr) => arr.into_array_ref(), - GeometryArray::Polygon(arr) => arr.into_array_ref(), - GeometryArray::MultiPoint(arr) => arr.into_array_ref(), - GeometryArray::MultiLineString(arr) => arr.into_array_ref(), - GeometryArray::MultiPolygon(arr) => arr.into_array_ref(), - GeometryArray::Rect(arr) => arr.into_array_ref(), + pub fn has_multi_polygons(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.mpolygon_xy.is_empty(), + Dimension::XYZ => !self.mpolygon_xyz.is_empty(), } } - fn to_array_ref(&self) -> arrow_array::ArrayRef { - self.clone().into_array_ref() + /// Return `true` if this array holds at least one geometry array of the given dimension + pub fn has_dimension(&self, dim: Dimension) -> bool { + use Dimension::*; + match dim { + XY => { + self.has_points(XY) + || self.has_line_strings(XY) + || self.has_polygons(XY) + || self.has_multi_points(XY) + || self.has_multi_line_strings(XY) + || self.has_multi_polygons(XY) + } + XYZ => { + self.has_points(XYZ) + || self.has_line_strings(XYZ) + || self.has_polygons(XYZ) + || self.has_multi_points(XYZ) + || self.has_multi_line_strings(XYZ) + || self.has_multi_polygons(XYZ) + } + } } - fn coord_type(&self) -> crate::array::CoordType { - match self { - GeometryArray::Point(arr) => arr.coord_type(), - GeometryArray::LineString(arr) => arr.coord_type(), - GeometryArray::Polygon(arr) => arr.coord_type(), - GeometryArray::MultiPoint(arr) => arr.coord_type(), - GeometryArray::MultiLineString(arr) => arr.coord_type(), - GeometryArray::MultiPolygon(arr) => arr.coord_type(), - GeometryArray::Rect(arr) => arr.coord_type(), + /// Return `true` if this array holds at least one geometry array of the given dimension and no + /// arrays of any other dimension. + pub fn has_only_dimension(&self, dim: Dimension) -> bool { + use Dimension::*; + match dim { + XY => self.has_dimension(XY) && !self.has_dimension(XYZ), + XYZ => self.has_dimension(XYZ) && !self.has_dimension(XY), } } - fn to_coord_type(&self, coord_type: CoordType) -> Arc { - Arc::new(self.clone().into_coord_type(coord_type)) + // /// The number of non-empty child arrays + // fn num_non_empty_children(&self) -> usize { + // let mut count = 0; + + // if !self.point_xy.is_empty() { + // count += 1 + // }; + // if !self.line_string_xy.is_empty() { + // count += 1 + // }; + // if !self.polygon_xy.is_empty() { + // count += 1 + // }; + // if !self.mpoint_xy.is_empty() { + // count += 1 + // }; + // if !self.mline_string_xy.is_empty() { + // count += 1 + // }; + // if !self.mpolygon_xy.is_empty() { + // count += 1 + // }; + + // if !self.point_xyz.is_empty() { + // count += 1 + // }; + // if !self.line_string_xyz.is_empty() { + // count += 1 + // }; + // if !self.polygon_xyz.is_empty() { + // count += 1 + // }; + // if !self.mpoint_xyz.is_empty() { + // count += 1 + // }; + // if !self.mline_string_xyz.is_empty() { + // count += 1 + // }; + // if !self.mpolygon_xyz.is_empty() { + // count += 1 + // }; + + // count + // } + + // TODO: restore to enable downcasting + + // pub fn has_only_type(&self, typ: NativeType) -> bool { + // use Dimension::*; + + // if self.num_non_empty_children() == 0 { + // // Empty array + // false + // } + + // if self.num_non_empty_children() > 1 {} + + // match typ { + // NativeType::Point(_, dim) + // } + + // self.has_points(XY) + // && !self.has_line_strings(XY) + // && !self.has_polygons(XY) + // && !self.has_multi_points(XY) + // && !self.has_multi_line_strings(XY) + // && !self.has_multi_polygons(XY) + // && !self.has_points(XYZ) + // && !self.has_line_strings(XYZ) + // && !self.has_polygons(XYZ) + // && !self.has_multi_points(XYZ) + // && !self.has_multi_line_strings(XYZ) + // && !self.has_multi_polygons(XYZ) + // } + + // pub fn has_only_line_strings(&self) -> bool { + // !self.has_points() + // && self.has_line_strings() + // && !self.has_polygons() + // && !self.has_multi_points() + // && !self.has_multi_line_strings() + // && !self.has_multi_polygons() + // } + + // pub fn has_only_polygons(&self) -> bool { + // !self.has_points() + // && !self.has_line_strings() + // && self.has_polygons() + // && !self.has_multi_points() + // && !self.has_multi_line_strings() + // && !self.has_multi_polygons() + // } + + // pub fn has_only_multi_points(&self) -> bool { + // !self.has_points() + // && !self.has_line_strings() + // && !self.has_polygons() + // && self.has_multi_points() + // && !self.has_multi_line_strings() + // && !self.has_multi_polygons() + // } + + // pub fn has_only_multi_line_strings(&self) -> bool { + // !self.has_points() + // && !self.has_line_strings() + // && !self.has_polygons() + // && !self.has_multi_points() + // && self.has_multi_line_strings() + // && !self.has_multi_polygons() + // } + + // pub fn has_only_multi_polygons(&self) -> bool { + // !self.has_points() + // && !self.has_line_strings() + // && !self.has_polygons() + // && !self.has_multi_points() + // && !self.has_multi_line_strings() + // && self.has_multi_polygons() + // } + + /// The number of bytes occupied by this array. + pub fn num_bytes(&self) -> usize { + self.buffer_lengths().num_bytes() } - /// The length of the [`GeometryArray`]. Every array has a length corresponding to the number - /// of geometries it contains. - fn len(&self) -> usize { - match self { - GeometryArray::Point(arr) => arr.len(), - GeometryArray::LineString(arr) => arr.len(), - GeometryArray::Polygon(arr) => arr.len(), - GeometryArray::MultiPoint(arr) => arr.len(), - GeometryArray::MultiLineString(arr) => arr.len(), - GeometryArray::MultiPolygon(arr) => arr.len(), - GeometryArray::Rect(arr) => arr.len(), + /// Slices this [`MixedGeometryArray`] in place. + /// + /// # Implementation + /// + /// This operation is `O(F)` where `F` is the number of fields. + /// + /// # Panic + /// + /// This function panics iff `offset + length > self.len()`. + #[inline] + pub fn slice(&self, offset: usize, length: usize) -> Self { + assert!( + offset + length <= self.len(), + "offset + length may not exceed length of array" + ); + Self { + data_type: self.data_type, + type_ids: self.type_ids.slice(offset, length), + offsets: self.offsets.slice(offset, length), + + point_xy: self.point_xy.clone(), + line_string_xy: self.line_string_xy.clone(), + polygon_xy: self.polygon_xy.clone(), + mpoint_xy: self.mpoint_xy.clone(), + mline_string_xy: self.mline_string_xy.clone(), + mpolygon_xy: self.mpolygon_xy.clone(), + gc_xy: self.gc_xy.clone(), + + point_xyz: self.point_xyz.clone(), + line_string_xyz: self.line_string_xyz.clone(), + polygon_xyz: self.polygon_xyz.clone(), + mpoint_xyz: self.mpoint_xyz.clone(), + mline_string_xyz: self.mline_string_xyz.clone(), + mpolygon_xyz: self.mpolygon_xyz.clone(), + gc_xyz: self.gc_xyz.clone(), + + slice_offset: self.slice_offset + offset, + metadata: self.metadata.clone(), } } - /// The validity of the [`GeometryArray`]: every array has an optional [`Bitmap`] that, when - /// available specifies whether the geometry at a given slot is valid or not (null). When the - /// validity is [`None`], all slots are valid. + pub fn to_coord_type(&self, coord_type: CoordType) -> Self { + self.clone().into_coord_type(coord_type) + } + + pub fn into_coord_type(self, coord_type: CoordType) -> Self { + Self::new( + self.type_ids, + self.offsets, + self.point_xy.into_coord_type(coord_type), + self.line_string_xy.into_coord_type(coord_type), + self.polygon_xy.into_coord_type(coord_type), + self.mpoint_xy.into_coord_type(coord_type), + self.mline_string_xy.into_coord_type(coord_type), + self.mpolygon_xy.into_coord_type(coord_type), + self.gc_xy.into_coord_type(coord_type), + self.point_xyz.into_coord_type(coord_type), + self.line_string_xyz.into_coord_type(coord_type), + self.polygon_xyz.into_coord_type(coord_type), + self.mpoint_xyz.into_coord_type(coord_type), + self.mline_string_xyz.into_coord_type(coord_type), + self.mpolygon_xyz.into_coord_type(coord_type), + self.gc_xyz.into_coord_type(coord_type), + self.metadata, + ) + } +} + +impl ArrayBase for GeometryArray { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn storage_type(&self) -> DataType { + self.data_type.to_data_type() + } + + fn extension_field(&self) -> Arc { + Arc::new( + self.data_type + .to_field_with_metadata("geometry", true, &self.metadata), + ) + } + + fn extension_name(&self) -> &str { + self.data_type.extension_name() + } + + fn into_array_ref(self) -> Arc { + Arc::new(self.into_arrow()) + } + + fn to_array_ref(&self) -> arrow_array::ArrayRef { + self.clone().into_array_ref() + } + + fn metadata(&self) -> Arc { + self.metadata.clone() + } + + /// Returns the number of geometries in this array + #[inline] + fn len(&self) -> usize { + // Note that `type_ids` is sliced as usual, and thus always has the correct length. + self.type_ids.len() + } + + /// Returns the optional validity. + #[inline] fn nulls(&self) -> Option<&NullBuffer> { - match self { - GeometryArray::Point(arr) => arr.nulls(), - GeometryArray::LineString(arr) => arr.nulls(), - GeometryArray::Polygon(arr) => arr.nulls(), - GeometryArray::MultiPoint(arr) => arr.nulls(), - GeometryArray::MultiLineString(arr) => arr.nulls(), - GeometryArray::MultiPolygon(arr) => arr.nulls(), - GeometryArray::Rect(arr) => arr.nulls(), - } + None + } +} + +impl NativeArray for GeometryArray { + fn data_type(&self) -> NativeType { + self.data_type + } + + fn coord_type(&self) -> crate::array::CoordType { + self.data_type.coord_type() + } + + fn to_coord_type(&self, coord_type: CoordType) -> Arc { + Arc::new(self.clone().into_coord_type(coord_type)) + } + + fn with_metadata(&self, metadata: Arc) -> crate::trait_::NativeArrayRef { + let mut arr = self.clone(); + arr.metadata = metadata; + Arc::new(arr) } fn as_ref(&self) -> &dyn NativeArray { self } - // /// Clones this [`GeometryArray`] with a new assigned bitmap. - // /// # Panic - // /// This function panics iff `validity.len() != self.len()`. - // pub fn with_validity(&self, validity: Option) -> Box; + fn slice(&self, offset: usize, length: usize) -> Arc { + Arc::new(self.slice(offset, length)) + } } -impl GeometryArraySelfMethods for GeometryArray { - fn with_coords(self, coords: crate::array::CoordBuffer) -> Self { - match self { - GeometryArray::Point(arr) => GeometryArray::Point(arr.with_coords(coords)), - GeometryArray::LineString(arr) => GeometryArray::LineString(arr.with_coords(coords)), - GeometryArray::Polygon(arr) => GeometryArray::Polygon(arr.with_coords(coords)), - GeometryArray::MultiPoint(arr) => GeometryArray::MultiPoint(arr.with_coords(coords)), - GeometryArray::MultiLineString(arr) => { - GeometryArray::MultiLineString(arr.with_coords(coords)) - } - GeometryArray::MultiPolygon(arr) => { - GeometryArray::MultiPolygon(arr.with_coords(coords)) - } - GeometryArray::Rect(arr) => GeometryArray::Rect(arr.with_coords(coords)), - } +impl GeometryArraySelfMethods for GeometryArray { + fn with_coords(self, _coords: crate::array::CoordBuffer) -> Self { + todo!(); } - fn into_coord_type(self, coord_type: crate::array::CoordType) -> Self { - match self { - GeometryArray::Point(arr) => GeometryArray::Point(arr.into_coord_type(coord_type)), - GeometryArray::LineString(arr) => { - GeometryArray::LineString(arr.into_coord_type(coord_type)) - } - GeometryArray::Polygon(arr) => GeometryArray::Polygon(arr.into_coord_type(coord_type)), - GeometryArray::MultiPoint(arr) => { - GeometryArray::MultiPoint(arr.into_coord_type(coord_type)) - } - GeometryArray::MultiLineString(arr) => { - GeometryArray::MultiLineString(arr.into_coord_type(coord_type)) - } - GeometryArray::MultiPolygon(arr) => { - GeometryArray::MultiPolygon(arr.into_coord_type(coord_type)) - } - GeometryArray::Rect(arr) => GeometryArray::Rect(arr.into_coord_type(coord_type)), - } + fn into_coord_type(self, _coord_type: crate::array::CoordType) -> Self { + todo!(); } +} - /// Slices the [`GeometryArray`] in place - /// # Implementation - /// This operation is `O(1)` over `len`, as it amounts to increase two ref counts - /// and moving the struct to the heap. - /// # Panic - /// This function panics iff `offset + length > self.len()`. - fn slice(&self, offset: usize, length: usize) -> Self { - match self { - GeometryArray::Point(arr) => GeometryArray::Point(arr.slice(offset, length)), - GeometryArray::LineString(arr) => GeometryArray::LineString(arr.slice(offset, length)), - GeometryArray::Polygon(arr) => GeometryArray::Polygon(arr.slice(offset, length)), - GeometryArray::MultiPoint(arr) => GeometryArray::MultiPoint(arr.slice(offset, length)), - GeometryArray::MultiLineString(arr) => { - GeometryArray::MultiLineString(arr.slice(offset, length)) +impl NativeGeometryAccessor for GeometryArray { + unsafe fn value_as_geometry_unchecked(&self, index: usize) -> crate::scalar::Geometry { + let type_id = self.type_ids[index]; + let offset = self.offsets[index] as usize; + + match type_id { + 1 => Geometry::Point(self.point_xy.value(offset)), + 2 => Geometry::LineString(self.line_string_xy.value(offset)), + 3 => Geometry::Polygon(self.polygon_xy.value(offset)), + 4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)), + 5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)), + 6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)), + 7 => { + panic!("nested geometry collections not supported") } - GeometryArray::MultiPolygon(arr) => { - GeometryArray::MultiPolygon(arr.slice(offset, length)) + 11 => Geometry::Point(self.point_xyz.value(offset)), + 12 => Geometry::LineString(self.line_string_xyz.value(offset)), + 13 => Geometry::Polygon(self.polygon_xyz.value(offset)), + 14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)), + 15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)), + 16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)), + 17 => { + panic!("nested geometry collections not supported") } - GeometryArray::Rect(arr) => GeometryArray::Rect(arr.slice(offset, length)), + _ => panic!("unknown type_id {}", type_id), } } } -impl<'a, O: OffsetSizeTrait> ArrayAccessor<'a> for GeometryArray { - type Item = Geometry<'a, O>; +#[cfg(feature = "geos")] +impl<'a> crate::trait_::NativeGEOSGeometryAccessor<'a> for GeometryArray { + unsafe fn value_as_geometry_unchecked( + &'a self, + index: usize, + ) -> std::result::Result { + let geom = NativeGeometryAccessor::value_as_geometry_unchecked(self, index); + (&geom).try_into() + } +} + +impl<'a> ArrayAccessor<'a> for GeometryArray { + type Item = Geometry<'a>; type ItemGeo = geo::Geometry; unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { - match self { - GeometryArray::Point(arr) => Geometry::Point(arr.value_unchecked(index)), - GeometryArray::LineString(arr) => Geometry::LineString(arr.value_unchecked(index)), - GeometryArray::Polygon(arr) => Geometry::Polygon(arr.value_unchecked(index)), - GeometryArray::MultiPoint(arr) => Geometry::MultiPoint(arr.value_unchecked(index)), - GeometryArray::MultiLineString(arr) => { - Geometry::MultiLineString(arr.value_unchecked(index)) + let type_id = self.type_ids[index]; + let offset = self.offsets[index] as usize; + + match type_id { + 1 => Geometry::Point(self.point_xy.value(offset)), + 2 => Geometry::LineString(self.line_string_xy.value(offset)), + 3 => Geometry::Polygon(self.polygon_xy.value(offset)), + 4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)), + 5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)), + 6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)), + 7 => { + panic!("nested geometry collections not supported") + } + 11 => Geometry::Point(self.point_xyz.value(offset)), + 12 => Geometry::LineString(self.line_string_xyz.value(offset)), + 13 => Geometry::Polygon(self.polygon_xyz.value(offset)), + 14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)), + 15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)), + 16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)), + 17 => { + panic!("nested geometry collections not supported") } - GeometryArray::MultiPolygon(arr) => Geometry::MultiPolygon(arr.value_unchecked(index)), - GeometryArray::Rect(arr) => Geometry::Rect(arr.value_unchecked(index)), + _ => panic!("unknown type_id {}", type_id), } } } -impl IntoArrow for GeometryArray { - type ArrowArray = Arc; +impl IntoArrow for GeometryArray { + type ArrowArray = UnionArray; fn into_arrow(self) -> Self::ArrowArray { - match self { - GeometryArray::Point(arr) => arr.into_arrow(), - GeometryArray::LineString(arr) => Arc::new(arr.into_arrow()), - GeometryArray::Polygon(arr) => Arc::new(arr.into_arrow()), - GeometryArray::MultiPoint(arr) => Arc::new(arr.into_arrow()), - GeometryArray::MultiLineString(arr) => Arc::new(arr.into_arrow()), - GeometryArray::MultiPolygon(arr) => Arc::new(arr.into_arrow()), - GeometryArray::Rect(arr) => Arc::new(arr.into_arrow()), - } + let union_fields = match self.data_type.to_data_type() { + DataType::Union(union_fields, _) => union_fields, + _ => unreachable!(), + }; + + let child_arrays = vec![ + self.point_xy.into_array_ref(), + self.line_string_xy.into_array_ref(), + self.polygon_xy.into_array_ref(), + self.mpoint_xy.into_array_ref(), + self.mline_string_xy.into_array_ref(), + self.mpolygon_xy.into_array_ref(), + self.point_xyz.into_array_ref(), + self.line_string_xyz.into_array_ref(), + self.polygon_xyz.into_array_ref(), + self.mpoint_xyz.into_array_ref(), + self.mline_string_xyz.into_array_ref(), + self.mpolygon_xyz.into_array_ref(), + ]; + + UnionArray::try_new( + union_fields, + self.type_ids, + Some(self.offsets), + child_arrays, + ) + .unwrap() } } -impl TryFrom<(&Field, &dyn Array)> for GeometryArray { +impl TryFrom<&UnionArray> for GeometryArray { type Error = GeoArrowError; - fn try_from((field, array): (&Field, &dyn Array)) -> Result { - if let Some(extension_name) = field.metadata().get("ARROW:extension:name") { - let geom_arr = match extension_name.as_str() { - "geoarrow.point" => Ok(GeometryArray::Point(array.try_into()?)), - "geoarrow.linestring" => Ok(GeometryArray::LineString(array.try_into()?)), - "geoarrow.polygon" => Ok(GeometryArray::Polygon(array.try_into()?)), - "geoarrow.multipoint" => Ok(GeometryArray::MultiPoint(array.try_into()?)), - "geoarrow.multilinestring" => Ok(GeometryArray::MultiLineString(array.try_into()?)), - "geoarrow.multipolygon" => Ok(GeometryArray::MultiPolygon(array.try_into()?)), - // TODO: create a top-level API that parses any named geoarrow array? - // "geoarrow.wkb" => Ok(GeometryArray::WKB(array.try_into()?)), - _ => Err(GeoArrowError::General(format!( - "Unknown geoarrow type {}", - extension_name - ))), - }; - geom_arr - } else { - // TODO: better error here, and document that arrays without geoarrow extension - // metadata should use TryFrom for a specific geometry type directly, instead of using - // GeometryArray - Err(GeoArrowError::General( - "Can only construct an array with an extension type name.".to_string(), - )) - } + fn try_from(value: &UnionArray) -> std::result::Result { + let mut point_xy: Option = None; + let mut line_string_xy: Option = None; + let mut polygon_xy: Option = None; + let mut mpoint_xy: Option = None; + let mut mline_string_xy: Option = None; + let mut mpolygon_xy: Option = None; + let mut gc_xy: Option = None; + + let mut point_xyz: Option = None; + let mut line_string_xyz: Option = None; + let mut polygon_xyz: Option = None; + let mut mpoint_xyz: Option = None; + let mut mline_string_xyz: Option = None; + let mut mpolygon_xyz: Option = None; + let mut gc_xyz: Option = None; + + match value.data_type() { + DataType::Union(fields, mode) => { + if !matches!(mode, UnionMode::Dense) { + return Err(GeoArrowError::General("Expected dense union".to_string())); + } + + for (type_id, _field) in fields.iter() { + let dimension = if type_id < 10 { + Dimension::XY + } else if type_id < 20 { + Dimension::XYZ + } else { + return Err(GeoArrowError::General(format!( + "Unsupported type_id: {}", + type_id + ))); + }; + + match type_id { + 1 => { + point_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 2 => { + line_string_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 3 => { + polygon_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 4 => { + mpoint_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 5 => { + mline_string_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 6 => { + mpolygon_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 7 => { + gc_xy = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 11 => { + point_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 12 => { + line_string_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 13 => { + polygon_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 14 => { + mpoint_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 15 => { + mline_string_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 16 => { + mpolygon_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + 17 => { + gc_xyz = Some( + (value.child(type_id).as_ref(), dimension) + .try_into() + .unwrap(), + ); + } + _ => { + return Err(GeoArrowError::General(format!( + "Unexpected type_id {}", + type_id + ))) + } + } + } + } + _ => panic!("expected union type"), + }; + + let type_ids = value.type_ids().clone(); + // This is after checking for dense union + let offsets = value.offsets().unwrap().clone(); + + Ok(Self::new( + type_ids, + offsets, + point_xy.unwrap_or_default(), + line_string_xy.unwrap_or_default(), + polygon_xy.unwrap_or_default(), + mpoint_xy.unwrap_or_default(), + mline_string_xy.unwrap_or_default(), + mpolygon_xy.unwrap_or_default(), + gc_xy.unwrap_or_default(), + point_xyz.unwrap_or_default(), + line_string_xyz.unwrap_or_default(), + polygon_xyz.unwrap_or_default(), + mpoint_xyz.unwrap_or_default(), + mline_string_xyz.unwrap_or_default(), + mpolygon_xyz.unwrap_or_default(), + gc_xyz.unwrap_or_default(), + Default::default(), + )) } } -impl TryFrom<(&Field, &dyn Array)> for GeometryArray { +impl TryFrom<&dyn Array> for GeometryArray { type Error = GeoArrowError; - fn try_from((field, array): (&Field, &dyn Array)) -> Result { - if let Some(extension_name) = field.metadata().get("ARROW:extension:name") { - let geom_arr = match extension_name.as_str() { - "geoarrow.point" => Ok(GeometryArray::Point(array.try_into()?)), - "geoarrow.linestring" => Ok(GeometryArray::LineString(array.try_into()?)), - "geoarrow.polygon" => Ok(GeometryArray::Polygon(array.try_into()?)), - "geoarrow.multipoint" => Ok(GeometryArray::MultiPoint(array.try_into()?)), - "geoarrow.multilinestring" => Ok(GeometryArray::MultiLineString(array.try_into()?)), - "geoarrow.multipolygon" => Ok(GeometryArray::MultiPolygon(array.try_into()?)), - // TODO: create a top-level API that parses any named geoarrow array? - // "geoarrow.wkb" => Ok(GeometryArray::WKB(array.try_into()?)), - _ => Err(GeoArrowError::General(format!( - "Unknown geoarrow type {}", - extension_name - ))), - }; - geom_arr - } else { - // TODO: better error here, and document that arrays without geoarrow extension - // metadata should use TryFrom for a specific geometry type directly, instead of using - // GeometryArray - Err(GeoArrowError::General( - "Can only construct an array with an extension type name.".to_string(), - )) + fn try_from(value: &dyn Array) -> Result { + match value.data_type() { + DataType::Union(_, _) => { + let downcasted = value.as_any().downcast_ref::().unwrap(); + downcasted.try_into() + } + _ => Err(GeoArrowError::General(format!( + "Unexpected type: {:?}", + value.data_type() + ))), } } } -// TODO: write a macro to dedupe these `From`s -impl From for GeometryArray { - fn from(value: PointArray) -> Self { - GeometryArray::Point(value) - } -} +impl TryFrom<(&dyn Array, &Field)> for GeometryArray { + type Error = GeoArrowError; -impl From> for GeometryArray { - fn from(value: LineStringArray) -> Self { - GeometryArray::LineString(value) + fn try_from((arr, field): (&dyn Array, &Field)) -> Result { + let mut arr: Self = arr.try_into()?; + arr.metadata = Arc::new(ArrayMetadata::try_from(field)?); + Ok(arr) } } -impl From> for GeometryArray { - fn from(value: PolygonArray) -> Self { - GeometryArray::Polygon(value) +impl> TryFrom<&[G]> for GeometryArray { + type Error = GeoArrowError; + + fn try_from(geoms: &[G]) -> Result { + let mut_arr: GeometryBuilder = geoms.try_into()?; + Ok(mut_arr.into()) } } -impl From> for GeometryArray { - fn from(value: MultiPointArray) -> Self { - GeometryArray::MultiPoint(value) +impl> TryFrom>> for GeometryArray { + type Error = GeoArrowError; + + fn try_from(geoms: Vec>) -> Result { + let mut_arr: GeometryBuilder = geoms.try_into()?; + Ok(mut_arr.into()) } } -impl From> for GeometryArray { - fn from(value: MultiLineStringArray) -> Self { - GeometryArray::MultiLineString(value) +impl TryFrom> for GeometryArray { + type Error = GeoArrowError; + + fn try_from(value: WKBArray) -> Result { + let mut_arr: GeometryBuilder = value.try_into()?; + Ok(mut_arr.into()) } } -impl From> for GeometryArray { - fn from(value: MultiPolygonArray) -> Self { - GeometryArray::MultiPolygon(value) +// impl From for GeometryArray { +// fn from(value: PointArray) -> Self { +// let type_ids = match value.dimension() { +// Dimension::XY => vec![1; value.len()], +// Dimension::XYZ => vec![11; value.len()], +// }; +// let metadata = value.metadata.clone(); +// Self::new( +// ScalarBuffer::from(type_ids), +// ScalarBuffer::from_iter(0..value.len() as i32), +// value, +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// metadata, +// ) +// } +// } + +// impl From for GeometryArray { +// fn from(value: LineStringArray) -> Self { +// let type_ids = match value.dimension() { +// Dimension::XY => vec![2; value.len()], +// Dimension::XYZ => vec![12; value.len()], +// }; +// let metadata = value.metadata.clone(); +// Self::new( +// ScalarBuffer::from(type_ids), +// ScalarBuffer::from_iter(0..value.len() as i32), +// Default::default(), +// value, +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// metadata, +// ) +// } +// } + +// impl From for GeometryArray { +// fn from(value: PolygonArray) -> Self { +// let type_ids = match value.dimension() { +// Dimension::XY => vec![3; value.len()], +// Dimension::XYZ => vec![13; value.len()], +// }; +// let metadata = value.metadata.clone(); +// Self::new( +// ScalarBuffer::from(type_ids), +// ScalarBuffer::from_iter(0..value.len() as i32), +// Default::default(), +// Default::default(), +// value, +// Default::default(), +// Default::default(), +// Default::default(), +// metadata, +// ) +// } +// } + +// impl From for GeometryArray { +// fn from(value: MultiPointArray) -> Self { +// let type_ids = match value.dimension() { +// Dimension::XY => vec![4; value.len()], +// Dimension::XYZ => vec![14; value.len()], +// }; +// let metadata = value.metadata.clone(); +// Self::new( +// ScalarBuffer::from(type_ids), +// ScalarBuffer::from_iter(0..value.len() as i32), +// Default::default(), +// Default::default(), +// Default::default(), +// value, +// Default::default(), +// Default::default(), +// metadata, +// ) +// } +// } + +// impl From for GeometryArray { +// fn from(value: MultiLineStringArray) -> Self { +// let type_ids = match value.dimension() { +// Dimension::XY => vec![5; value.len()], +// Dimension::XYZ => vec![15; value.len()], +// }; +// let metadata = value.metadata.clone(); +// Self::new( +// ScalarBuffer::from(type_ids), +// ScalarBuffer::from_iter(0..value.len() as i32), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// value, +// Default::default(), +// metadata, +// ) +// } +// } + +// impl From for GeometryArray { +// fn from(value: MultiPolygonArray) -> Self { +// let type_ids = match value.dimension() { +// Dimension::XY => vec![6; value.len()], +// Dimension::XYZ => vec![16; value.len()], +// }; +// let metadata = value.metadata.clone(); +// Self::new( +// ScalarBuffer::from(type_ids), +// ScalarBuffer::from_iter(0..value.len() as i32), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// Default::default(), +// value, +// metadata, +// ) +// } +// } + +// impl TryFrom for GeometryArray { +// type Error = GeoArrowError; + +// fn try_from(value: GeometryCollectionArray) -> std::result::Result { +// if !can_downcast_multi(&value.geom_offsets) { +// return Err(GeoArrowError::General("Unable to cast".to_string())); +// } + +// if value.null_count() > 0 { +// return Err(GeoArrowError::General( +// "Unable to cast with nulls".to_string(), +// )); +// } + +// Ok(value.array) +// } +// } + +/// Default to an empty array +impl Default for GeometryArray { + fn default() -> Self { + GeometryBuilder::default().into() } } -impl TryFrom> for GeometryArray { - type Error = GeoArrowError; - fn try_from(value: WKBArray) -> Result { - let type_ids = value.get_unique_type_ids(); +#[cfg(test)] +mod test { + use super::*; + use crate::test::{linestring, multilinestring, multipoint, multipolygon, point, polygon}; - if type_ids.is_empty() { - return Err(GeoArrowError::General( - "Input WKB array is empty.".to_string(), - )); - } + #[test] + fn geo_roundtrip_accurate_points() { + let geoms: Vec = vec![ + geo::Geometry::Point(point::p0()), + geo::Geometry::Point(point::p1()), + geo::Geometry::Point(point::p2()), + ]; - if type_ids.len() == 1 { - if type_ids.contains(&0) { - return Ok(GeometryArray::Point(value.try_into()?)); - } + let arr: GeometryArray = GeometryBuilder::from_geometries( + geoms.as_slice(), + Default::default(), + Default::default(), + false, + ) + .unwrap() + .finish(); - if type_ids.contains(&1) { - return Ok(GeometryArray::LineString(value.try_into()?)); - } + assert_eq!(arr.value_as_geo(0), geo::Geometry::Point(point::p0())); + assert_eq!(arr.value_as_geo(1), geo::Geometry::Point(point::p1())); + assert_eq!(arr.value_as_geo(2), geo::Geometry::Point(point::p2())); + } - if type_ids.contains(&3) { - return Ok(GeometryArray::Polygon(value.try_into()?)); - } + #[test] + fn geo_roundtrip_accurate_multi_points() { + let geoms: Vec = vec![ + geo::Geometry::Point(point::p0()), + geo::Geometry::Point(point::p1()), + geo::Geometry::Point(point::p2()), + ]; + let arr: GeometryArray = GeometryBuilder::from_geometries( + geoms.as_slice(), + Default::default(), + Default::default(), + true, + ) + .unwrap() + .finish(); - if type_ids.contains(&4) { - return Ok(GeometryArray::MultiPoint(value.try_into()?)); - } + assert_eq!( + arr.value_as_geo(0), + geo::Geometry::MultiPoint(geo::MultiPoint(vec![point::p0()])) + ); + assert_eq!( + arr.value_as_geo(1), + geo::Geometry::MultiPoint(geo::MultiPoint(vec![point::p1()])) + ); + assert_eq!( + arr.value_as_geo(2), + geo::Geometry::MultiPoint(geo::MultiPoint(vec![point::p2()])) + ); + } - if type_ids.contains(&5) { - return Ok(GeometryArray::MultiLineString(value.try_into()?)); - } + #[test] + fn geo_roundtrip_accurate_all() { + let geoms: Vec = vec![ + geo::Geometry::Point(point::p0()), + geo::Geometry::LineString(linestring::ls0()), + geo::Geometry::Polygon(polygon::p0()), + geo::Geometry::MultiPoint(multipoint::mp0()), + geo::Geometry::MultiLineString(multilinestring::ml0()), + geo::Geometry::MultiPolygon(multipolygon::mp0()), + ]; - if type_ids.contains(&6) { - return Ok(GeometryArray::MultiPolygon(value.try_into()?)); - } - } + let arr: GeometryArray = GeometryBuilder::from_geometries( + geoms.as_slice(), + Default::default(), + Default::default(), + false, + ) + .unwrap() + .finish(); - if type_ids.len() == 3 { - if type_ids.contains(&0) && type_ids.contains(&4) { - return Ok(GeometryArray::MultiPoint(value.try_into()?)); - } + assert_eq!(arr.value_as_geo(0), geoms[0]); + assert_eq!(arr.value_as_geo(1), geoms[1]); + assert_eq!(arr.value_as_geo(2), geoms[2]); + assert_eq!(arr.value_as_geo(3), geoms[3]); + assert_eq!(arr.value_as_geo(4), geoms[4]); + assert_eq!(arr.value_as_geo(5), geoms[5]); + } - if type_ids.contains(&1) && type_ids.contains(&5) { - return Ok(GeometryArray::MultiLineString(value.try_into()?)); - } + #[test] + fn arrow_roundtrip() { + let geoms: Vec = vec![ + geo::Geometry::Point(point::p0()), + geo::Geometry::LineString(linestring::ls0()), + geo::Geometry::Polygon(polygon::p0()), + geo::Geometry::MultiPoint(multipoint::mp0()), + geo::Geometry::MultiLineString(multilinestring::ml0()), + geo::Geometry::MultiPolygon(multipolygon::mp0()), + ]; - if type_ids.contains(&3) && type_ids.contains(&6) { - return Ok(GeometryArray::MultiPolygon(value.try_into()?)); - } - } + let arr: GeometryArray = GeometryBuilder::from_geometries( + geoms.as_slice(), + Default::default(), + Default::default(), + false, + ) + .unwrap() + .finish(); - Err(GeoArrowError::General( - "Mixed WKB parsing not yet implemented".to_string(), - )) + // Round trip to/from arrow-rs + let arrow_array = arr.into_arrow(); + let round_trip_arr: GeometryArray = (&arrow_array).try_into().unwrap(); + + assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]); + assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]); + assert_eq!(round_trip_arr.value_as_geo(2), geoms[2]); + assert_eq!(round_trip_arr.value_as_geo(3), geoms[3]); + assert_eq!(round_trip_arr.value_as_geo(4), geoms[4]); + assert_eq!(round_trip_arr.value_as_geo(5), geoms[5]); } -} -impl From> for GeometryArray { - fn from(value: GeometryArray) -> Self { - match value { - GeometryArray::Point(arr) => GeometryArray::Point(arr), - GeometryArray::LineString(arr) => GeometryArray::LineString(arr.into()), - GeometryArray::Polygon(arr) => GeometryArray::Polygon(arr.into()), - GeometryArray::MultiPoint(arr) => GeometryArray::MultiPoint(arr.into()), - GeometryArray::MultiLineString(arr) => GeometryArray::MultiLineString(arr.into()), - GeometryArray::MultiPolygon(arr) => GeometryArray::MultiPolygon(arr.into()), - GeometryArray::Rect(arr) => GeometryArray::Rect(arr), - } + #[test] + fn arrow_roundtrip_not_all_types() { + let geoms: Vec = vec![ + geo::Geometry::MultiPoint(multipoint::mp0()), + geo::Geometry::MultiLineString(multilinestring::ml0()), + geo::Geometry::MultiPolygon(multipolygon::mp0()), + ]; + + let arr: GeometryArray = GeometryBuilder::from_geometries( + geoms.as_slice(), + Default::default(), + Default::default(), + false, + ) + .unwrap() + .finish(); + + // Round trip to/from arrow-rs + let arrow_array = arr.into_arrow(); + let round_trip_arr: GeometryArray = (&arrow_array).try_into().unwrap(); + + assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]); + assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]); + assert_eq!(round_trip_arr.value_as_geo(2), geoms[2]); } -} -impl TryFrom> for GeometryArray { - type Error = GeoArrowError; + #[test] + fn arrow_roundtrip_not_all_types2() { + let geoms: Vec = vec![ + geo::Geometry::MultiPoint(multipoint::mp0()), + geo::Geometry::MultiPolygon(multipolygon::mp0()), + ]; + + let arr: GeometryArray = GeometryBuilder::from_geometries( + geoms.as_slice(), + Default::default(), + Default::default(), + false, + ) + .unwrap() + .finish(); + + // Round trip to/from arrow-rs + let arrow_array = arr.into_arrow(); + let round_trip_arr: GeometryArray = (&arrow_array).try_into().unwrap(); - fn try_from(value: GeometryArray) -> Result { - Ok(match value { - GeometryArray::Point(arr) => GeometryArray::Point(arr), - GeometryArray::LineString(arr) => GeometryArray::LineString(arr.try_into()?), - GeometryArray::Polygon(arr) => GeometryArray::Polygon(arr.try_into()?), - GeometryArray::MultiPoint(arr) => GeometryArray::MultiPoint(arr.try_into()?), - GeometryArray::MultiLineString(arr) => GeometryArray::MultiLineString(arr.try_into()?), - GeometryArray::MultiPolygon(arr) => GeometryArray::MultiPolygon(arr.try_into()?), - GeometryArray::Rect(arr) => GeometryArray::Rect(arr), - }) + assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]); + assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]); } } diff --git a/rust/geoarrow/src/array/unknown/builder.rs b/rust/geoarrow/src/array/geometry/builder.rs similarity index 96% rename from rust/geoarrow/src/array/unknown/builder.rs rename to rust/geoarrow/src/array/geometry/builder.rs index 586f61cc..ae20265b 100644 --- a/rust/geoarrow/src/array/unknown/builder.rs +++ b/rust/geoarrow/src/array/geometry/builder.rs @@ -1,8 +1,8 @@ use std::sync::Arc; +use crate::array::geometry::array::GeometryArray; +use crate::array::geometry::capacity::GeometryCapacity; use crate::array::metadata::ArrayMetadata; -use crate::array::unknown::array::UnknownGeometryArray; -use crate::array::unknown::capacity::UnknownCapacity; use crate::array::{ CoordType, GeometryCollectionBuilder, LineStringBuilder, MultiLineStringBuilder, MultiPointBuilder, MultiPolygonBuilder, PointBuilder, PolygonBuilder, WKBArray, @@ -25,13 +25,13 @@ pub(crate) const DEFAULT_PREFER_MULTI: bool = false; /// This currently has the caveat that these geometries must be a _primitive_ geometry type. This /// does not currently support nested GeometryCollection objects. /// -/// Converting an [`UnknownGeometryBuilder`] into a [`UnknownGeometryArray`] is `O(1)`. +/// Converting an [`GeometryBuilder`] into a [`GeometryArray`] is `O(1)`. /// /// # Invariants /// /// - All arrays must have the same coordinate layout (interleaved or separated) #[derive(Debug)] -pub struct UnknownGeometryBuilder { +pub struct GeometryBuilder { metadata: Arc, // Invariant: every item in `types` is `> 0 && < fields.len()` @@ -83,8 +83,8 @@ pub struct UnknownGeometryBuilder { deferred_nulls: usize, } -impl<'a> UnknownGeometryBuilder { - /// Creates a new empty [`UnknownGeometryBuilder`]. +impl<'a> GeometryBuilder { + /// Creates a new empty [`GeometryBuilder`]. pub fn new() -> Self { Self::new_with_options(Default::default(), Default::default(), DEFAULT_PREFER_MULTI) } @@ -98,7 +98,7 @@ impl<'a> UnknownGeometryBuilder { } /// Creates a new [`MixedGeometryBuilder`] with given capacity and no validity. - pub fn with_capacity(capacity: UnknownCapacity) -> Self { + pub fn with_capacity(capacity: GeometryCapacity) -> Self { Self::with_capacity_and_options( capacity, Default::default(), @@ -108,7 +108,7 @@ impl<'a> UnknownGeometryBuilder { } pub fn with_capacity_and_options( - capacity: UnknownCapacity, + capacity: GeometryCapacity, coord_type: CoordType, metadata: Arc, prefer_multi: bool, @@ -211,7 +211,7 @@ impl<'a> UnknownGeometryBuilder { } } - pub fn reserve(&mut self, capacity: UnknownCapacity) { + pub fn reserve(&mut self, capacity: GeometryCapacity) { let total_num_geoms = capacity.total_num_geoms(); self.types.reserve(total_num_geoms); self.offsets.reserve(total_num_geoms); @@ -233,7 +233,7 @@ impl<'a> UnknownGeometryBuilder { self.gc_xyz.reserve(capacity.gc_xyz()); } - pub fn reserve_exact(&mut self, capacity: UnknownCapacity) { + pub fn reserve_exact(&mut self, capacity: GeometryCapacity) { let total_num_geoms = capacity.total_num_geoms(); self.types.reserve_exact(total_num_geoms); @@ -288,7 +288,7 @@ impl<'a> UnknownGeometryBuilder { // }) // } - pub fn finish(self) -> UnknownGeometryArray { + pub fn finish(self) -> GeometryArray { self.into() } @@ -309,7 +309,7 @@ impl<'a> UnknownGeometryBuilder { metadata: Arc, prefer_multi: bool, ) -> Result { - let counter = UnknownCapacity::from_geometries(geoms, prefer_multi)?; + let counter = GeometryCapacity::from_geometries(geoms, prefer_multi)?; Ok(Self::with_capacity_and_options( counter, coord_type, @@ -323,7 +323,7 @@ impl<'a> UnknownGeometryBuilder { geoms: impl Iterator>, prefer_multi: bool, ) -> Result<()> { - let counter = UnknownCapacity::from_geometries(geoms, prefer_multi)?; + let counter = GeometryCapacity::from_geometries(geoms, prefer_multi)?; self.reserve(counter); Ok(()) } @@ -333,7 +333,7 @@ impl<'a> UnknownGeometryBuilder { geoms: impl Iterator>, prefer_multi: bool, ) -> Result<()> { - let counter = UnknownCapacity::from_geometries(geoms, prefer_multi)?; + let counter = GeometryCapacity::from_geometries(geoms, prefer_multi)?; self.reserve_exact(counter); Ok(()) } @@ -924,13 +924,13 @@ impl<'a> UnknownGeometryBuilder { } } -impl Default for UnknownGeometryBuilder { +impl Default for GeometryBuilder { fn default() -> Self { Self::new() } } -impl IntoArrow for UnknownGeometryBuilder { +impl IntoArrow for GeometryBuilder { type ArrowArray = UnionArray; fn into_arrow(self) -> Self::ArrowArray { @@ -938,8 +938,8 @@ impl IntoArrow for UnknownGeometryBuilder { } } -impl From for UnknownGeometryArray { - fn from(other: UnknownGeometryBuilder) -> Self { +impl From for GeometryArray { + fn from(other: GeometryBuilder) -> Self { Self::new( other.types.into(), other.offsets.into(), @@ -962,7 +962,7 @@ impl From for UnknownGeometryArray { } } -impl> TryFrom<&[G]> for UnknownGeometryBuilder { +impl> TryFrom<&[G]> for GeometryBuilder { type Error = GeoArrowError; fn try_from(geoms: &[G]) -> Result { @@ -970,7 +970,7 @@ impl> TryFrom<&[G]> for UnknownGeometryBuilder { } } -impl> TryFrom>> for UnknownGeometryBuilder { +impl> TryFrom>> for GeometryBuilder { type Error = GeoArrowError; fn try_from(geoms: Vec>) -> Result { @@ -978,7 +978,7 @@ impl> TryFrom>> for UnknownGeometryBuild } } -impl TryFrom> for UnknownGeometryBuilder { +impl TryFrom> for GeometryBuilder { type Error = GeoArrowError; fn try_from(value: WKBArray) -> std::result::Result { @@ -994,7 +994,7 @@ impl TryFrom> for UnknownGeometryBuilder { } } -impl GeometryArrayBuilder for UnknownGeometryBuilder { +impl GeometryArrayBuilder for GeometryBuilder { fn len(&self) -> usize { self.types.len() } diff --git a/rust/geoarrow/src/array/unknown/capacity.rs b/rust/geoarrow/src/array/geometry/capacity.rs similarity index 98% rename from rust/geoarrow/src/array/unknown/capacity.rs rename to rust/geoarrow/src/array/geometry/capacity.rs index 7fab91af..0836ffa4 100644 --- a/rust/geoarrow/src/array/unknown/capacity.rs +++ b/rust/geoarrow/src/array/geometry/capacity.rs @@ -9,11 +9,11 @@ use crate::array::GeometryCollectionCapacity; use crate::error::Result; use geo_traits::*; -/// A counter for the buffer sizes of a [`UnknownGeometryArray`][crate::array::UnknownGeometryArray]. +/// A counter for the buffer sizes of a [`GeometryArray`][crate::array::GeometryArray]. /// /// This can be used to reduce allocations by allocating once for exactly the array size you need. #[derive(Default, Debug, Clone, Copy)] -pub struct UnknownCapacity { +pub struct GeometryCapacity { /// The number of null geometries. Ideally the builder will assign these to any array that has /// already been allocated. Otherwise we don't know where to assign them. nulls: usize, @@ -39,7 +39,7 @@ pub struct UnknownCapacity { prefer_multi: bool, } -impl UnknownCapacity { +impl GeometryCapacity { #[allow(clippy::too_many_arguments)] pub fn new( nulls: usize, @@ -457,7 +457,7 @@ impl UnknownCapacity { } } -impl AddAssign for UnknownCapacity { +impl AddAssign for GeometryCapacity { fn add_assign(&mut self, rhs: Self) { self.nulls += rhs.nulls; diff --git a/rust/geoarrow/src/array/geometry/iterator.rs b/rust/geoarrow/src/array/geometry/iterator.rs deleted file mode 100644 index 8b137891..00000000 --- a/rust/geoarrow/src/array/geometry/iterator.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/rust/geoarrow/src/array/geometry/mod.rs b/rust/geoarrow/src/array/geometry/mod.rs index 8fc7f204..523438f5 100644 --- a/rust/geoarrow/src/array/geometry/mod.rs +++ b/rust/geoarrow/src/array/geometry/mod.rs @@ -1,8 +1,7 @@ -//! Contains the [`GeometryArray`], which is an enum over all geometry array types. +mod array; +mod builder; +mod capacity; -// #[allow(deprecated)] -// pub use array::GeometryArray; - -// mod array; -// mod iterator; -// mod mutable; +pub use array::GeometryArray; +pub use builder::GeometryBuilder; +pub use capacity::GeometryCapacity; diff --git a/rust/geoarrow/src/array/mod.rs b/rust/geoarrow/src/array/mod.rs index fd22c959..595fc3a4 100644 --- a/rust/geoarrow/src/array/mod.rs +++ b/rust/geoarrow/src/array/mod.rs @@ -9,6 +9,7 @@ pub use coord::{ InterleavedCoordBufferBuilder, SeparatedCoordBuffer, SeparatedCoordBufferBuilder, }; pub use dynamic::{NativeArrayDyn, SerializedArrayDyn}; +pub use geometry::{GeometryArray, GeometryBuilder, GeometryCapacity}; pub use geometrycollection::{ GeometryCollectionArray, GeometryCollectionBuilder, GeometryCollectionCapacity, }; @@ -20,7 +21,6 @@ pub use multipolygon::{MultiPolygonArray, MultiPolygonBuilder, MultiPolygonCapac pub use point::{PointArray, PointBuilder}; pub use polygon::{PolygonArray, PolygonBuilder, PolygonCapacity}; pub use rect::{RectArray, RectBuilder}; -pub use unknown::{UnknownCapacity, UnknownGeometryArray, UnknownGeometryBuilder}; pub use wkt::WKTArray; pub use crate::trait_::{ArrayBase, NativeArray, SerializedArray}; @@ -41,7 +41,6 @@ pub(crate) mod offset_builder; pub(crate) mod point; pub(crate) mod polygon; pub(crate) mod rect; -pub(crate) mod unknown; pub(crate) mod util; pub(crate) mod wkt; diff --git a/rust/geoarrow/src/array/unknown/array.rs b/rust/geoarrow/src/array/unknown/array.rs deleted file mode 100644 index 0aa9e075..00000000 --- a/rust/geoarrow/src/array/unknown/array.rs +++ /dev/null @@ -1,1214 +0,0 @@ -use std::collections::HashSet; -use std::sync::Arc; - -use arrow_array::{Array, OffsetSizeTrait, UnionArray}; -use arrow_buffer::{NullBuffer, ScalarBuffer}; -use arrow_schema::{DataType, Field, UnionMode}; - -use crate::array::metadata::ArrayMetadata; -use crate::array::unknown::builder::UnknownGeometryBuilder; -use crate::array::unknown::capacity::UnknownCapacity; -use crate::array::{ - CoordType, GeometryCollectionArray, LineStringArray, MultiLineStringArray, MultiPointArray, - MultiPolygonArray, PointArray, PolygonArray, WKBArray, -}; -use crate::datatypes::{Dimension, NativeType}; -use crate::error::{GeoArrowError, Result}; -use crate::scalar::Geometry; -use crate::trait_::{ArrayAccessor, GeometryArraySelfMethods, IntoArrow, NativeGeometryAccessor}; -use crate::{ArrayBase, NativeArray}; -use geo_traits::GeometryTrait; - -/// # Invariants -/// -/// - All arrays must have the same dimension -/// - All arrays must have the same coordinate layout (interleaved or separated) -/// -/// - 1: Point -/// - 2: LineString -/// - 3: Polygon -/// - 4: MultiPoint -/// - 5: MultiLineString -/// - 6: MultiPolygon -/// - 7: GeometryCollection -/// - 11: Point Z -/// - 12: LineString Z -/// - 13: Polygon Z -/// - 14: MultiPoint Z -/// - 15: MultiLineString Z -/// - 16: MultiPolygon Z -/// - 17: GeometryCollection Z -/// - 21: Point M -/// - 22: LineString M -/// - 23: Polygon M -/// - 24: MultiPoint M -/// - 25: MultiLineString M -/// - 26: MultiPolygon M -/// - 27: GeometryCollection M -/// - 31: Point ZM -/// - 32: LineString ZM -/// - 33: Polygon ZM -/// - 34: MultiPoint ZM -/// - 35: MultiLineString ZM -/// - 36: MultiPolygon ZM -/// - 37: GeometryCollection ZM -#[derive(Debug, Clone, PartialEq)] -pub struct UnknownGeometryArray { - /// Always NativeType::Unknown - data_type: NativeType, - - pub(crate) metadata: Arc, - - /// Invariant: every item in `type_ids` is `> 0 && < fields.len()` if `type_ids` are not - /// provided. If `type_ids` exist in the NativeType, then every item in `type_ids` is `> 0 && ` - pub(crate) type_ids: ScalarBuffer, - - /// Invariant: `offsets.len() == type_ids.len()` - pub(crate) offsets: ScalarBuffer, - - // In the future we'll additionally have xym, xyzm array variants. - pub(crate) point_xy: PointArray, - pub(crate) line_string_xy: LineStringArray, - pub(crate) polygon_xy: PolygonArray, - pub(crate) mpoint_xy: MultiPointArray, - pub(crate) mline_string_xy: MultiLineStringArray, - pub(crate) mpolygon_xy: MultiPolygonArray, - pub(crate) gc_xy: GeometryCollectionArray, - - pub(crate) point_xyz: PointArray, - pub(crate) line_string_xyz: LineStringArray, - pub(crate) polygon_xyz: PolygonArray, - pub(crate) mpoint_xyz: MultiPointArray, - pub(crate) mline_string_xyz: MultiLineStringArray, - pub(crate) mpolygon_xyz: MultiPolygonArray, - pub(crate) gc_xyz: GeometryCollectionArray, - - /// An offset used for slicing into this array. The offset will be 0 if the array has not been - /// sliced. - /// - /// In order to slice this array efficiently (and zero-cost) we can't slice the underlying - /// fields directly. If this were always a _sparse_ union array, we could! We could then always - /// slice from offset to length of each underlying array. But we're under the assumption that - /// most or all of the time we have a dense union array, where the `offsets` buffer is defined. - /// In that case, to know how to slice each underlying array, we'd have to walk the `type_ids` - /// and `offsets` arrays (in O(N) time) to figure out how to slice the underlying arrays. - /// - /// Instead, we store the slice offset. - /// - /// Note that this offset is only for slicing into the **fields**, i.e. the geometry arrays. - /// The `type_ids` and `offsets` arrays are sliced as usual. - /// - /// TODO: when exporting this array, export to arrow2 and then slice from scratch because we - /// can't set the `offset` in a UnionArray constructor - pub(crate) slice_offset: usize, -} - -impl UnknownGeometryArray { - /// Create a new MixedGeometryArray from parts - /// - /// # Implementation - /// - /// This function is `O(1)`. - /// - /// # Panics - /// - /// - if the validity is not `None` and its length is different from the number of geometries - /// - if the largest geometry offset does not match the number of coordinates - #[allow(clippy::too_many_arguments)] - pub fn new( - type_ids: ScalarBuffer, - offsets: ScalarBuffer, - point_xy: PointArray, - line_string_xy: LineStringArray, - polygon_xy: PolygonArray, - mpoint_xy: MultiPointArray, - mline_string_xy: MultiLineStringArray, - mpolygon_xy: MultiPolygonArray, - gc_xy: GeometryCollectionArray, - point_xyz: PointArray, - line_string_xyz: LineStringArray, - polygon_xyz: PolygonArray, - mpoint_xyz: MultiPointArray, - mline_string_xyz: MultiLineStringArray, - mpolygon_xyz: MultiPolygonArray, - gc_xyz: GeometryCollectionArray, - metadata: Arc, - ) -> Self { - let mut coord_types = HashSet::new(); - coord_types.insert(point_xy.coord_type()); - coord_types.insert(line_string_xy.coord_type()); - coord_types.insert(polygon_xy.coord_type()); - coord_types.insert(mpoint_xy.coord_type()); - coord_types.insert(mline_string_xy.coord_type()); - coord_types.insert(mpolygon_xy.coord_type()); - coord_types.insert(gc_xy.coord_type()); - - coord_types.insert(point_xyz.coord_type()); - coord_types.insert(line_string_xyz.coord_type()); - coord_types.insert(polygon_xyz.coord_type()); - coord_types.insert(mpoint_xyz.coord_type()); - coord_types.insert(mline_string_xyz.coord_type()); - coord_types.insert(mpolygon_xyz.coord_type()); - coord_types.insert(gc_xyz.coord_type()); - assert_eq!(coord_types.len(), 1); - - let coord_type = coord_types.into_iter().next().unwrap(); - - let data_type = NativeType::Unknown(coord_type); - - Self { - data_type, - type_ids, - offsets, - point_xy, - line_string_xy, - polygon_xy, - mpoint_xy, - mline_string_xy, - mpolygon_xy, - gc_xy, - point_xyz, - line_string_xyz, - polygon_xyz, - mpoint_xyz, - mline_string_xyz, - mpolygon_xyz, - gc_xyz, - slice_offset: 0, - metadata, - } - } - - /// The lengths of each buffer contained in this array. - pub fn buffer_lengths(&self) -> UnknownCapacity { - UnknownCapacity::new( - 0, - self.point_xy.buffer_lengths(), - self.line_string_xy.buffer_lengths(), - self.polygon_xy.buffer_lengths(), - self.mpoint_xy.buffer_lengths(), - self.mline_string_xy.buffer_lengths(), - self.mpolygon_xy.buffer_lengths(), - self.gc_xy.buffer_lengths(), - self.point_xyz.buffer_lengths(), - self.line_string_xyz.buffer_lengths(), - self.polygon_xyz.buffer_lengths(), - self.mpoint_xyz.buffer_lengths(), - self.mline_string_xyz.buffer_lengths(), - self.mpolygon_xyz.buffer_lengths(), - self.gc_xyz.buffer_lengths(), - false, - ) - } - - pub fn has_points(&self, dim: Dimension) -> bool { - match dim { - Dimension::XY => !self.point_xy.is_empty(), - Dimension::XYZ => !self.point_xyz.is_empty(), - } - } - - pub fn has_line_strings(&self, dim: Dimension) -> bool { - match dim { - Dimension::XY => !self.line_string_xy.is_empty(), - Dimension::XYZ => !self.line_string_xyz.is_empty(), - } - } - - pub fn has_polygons(&self, dim: Dimension) -> bool { - match dim { - Dimension::XY => !self.polygon_xy.is_empty(), - Dimension::XYZ => !self.polygon_xyz.is_empty(), - } - } - - pub fn has_multi_points(&self, dim: Dimension) -> bool { - match dim { - Dimension::XY => !self.mpoint_xy.is_empty(), - Dimension::XYZ => !self.mpoint_xyz.is_empty(), - } - } - - pub fn has_multi_line_strings(&self, dim: Dimension) -> bool { - match dim { - Dimension::XY => !self.mline_string_xy.is_empty(), - Dimension::XYZ => !self.mline_string_xyz.is_empty(), - } - } - - pub fn has_multi_polygons(&self, dim: Dimension) -> bool { - match dim { - Dimension::XY => !self.mpolygon_xy.is_empty(), - Dimension::XYZ => !self.mpolygon_xyz.is_empty(), - } - } - - /// Return `true` if this array holds at least one geometry array of the given dimension - pub fn has_dimension(&self, dim: Dimension) -> bool { - use Dimension::*; - match dim { - XY => { - self.has_points(XY) - || self.has_line_strings(XY) - || self.has_polygons(XY) - || self.has_multi_points(XY) - || self.has_multi_line_strings(XY) - || self.has_multi_polygons(XY) - } - XYZ => { - self.has_points(XYZ) - || self.has_line_strings(XYZ) - || self.has_polygons(XYZ) - || self.has_multi_points(XYZ) - || self.has_multi_line_strings(XYZ) - || self.has_multi_polygons(XYZ) - } - } - } - - /// Return `true` if this array holds at least one geometry array of the given dimension and no - /// arrays of any other dimension. - pub fn has_only_dimension(&self, dim: Dimension) -> bool { - use Dimension::*; - match dim { - XY => self.has_dimension(XY) && !self.has_dimension(XYZ), - XYZ => self.has_dimension(XYZ) && !self.has_dimension(XY), - } - } - - // /// The number of non-empty child arrays - // fn num_non_empty_children(&self) -> usize { - // let mut count = 0; - - // if !self.point_xy.is_empty() { - // count += 1 - // }; - // if !self.line_string_xy.is_empty() { - // count += 1 - // }; - // if !self.polygon_xy.is_empty() { - // count += 1 - // }; - // if !self.mpoint_xy.is_empty() { - // count += 1 - // }; - // if !self.mline_string_xy.is_empty() { - // count += 1 - // }; - // if !self.mpolygon_xy.is_empty() { - // count += 1 - // }; - - // if !self.point_xyz.is_empty() { - // count += 1 - // }; - // if !self.line_string_xyz.is_empty() { - // count += 1 - // }; - // if !self.polygon_xyz.is_empty() { - // count += 1 - // }; - // if !self.mpoint_xyz.is_empty() { - // count += 1 - // }; - // if !self.mline_string_xyz.is_empty() { - // count += 1 - // }; - // if !self.mpolygon_xyz.is_empty() { - // count += 1 - // }; - - // count - // } - - // TODO: restore to enable downcasting - - // pub fn has_only_type(&self, typ: NativeType) -> bool { - // use Dimension::*; - - // if self.num_non_empty_children() == 0 { - // // Empty array - // false - // } - - // if self.num_non_empty_children() > 1 {} - - // match typ { - // NativeType::Point(_, dim) - // } - - // self.has_points(XY) - // && !self.has_line_strings(XY) - // && !self.has_polygons(XY) - // && !self.has_multi_points(XY) - // && !self.has_multi_line_strings(XY) - // && !self.has_multi_polygons(XY) - // && !self.has_points(XYZ) - // && !self.has_line_strings(XYZ) - // && !self.has_polygons(XYZ) - // && !self.has_multi_points(XYZ) - // && !self.has_multi_line_strings(XYZ) - // && !self.has_multi_polygons(XYZ) - // } - - // pub fn has_only_line_strings(&self) -> bool { - // !self.has_points() - // && self.has_line_strings() - // && !self.has_polygons() - // && !self.has_multi_points() - // && !self.has_multi_line_strings() - // && !self.has_multi_polygons() - // } - - // pub fn has_only_polygons(&self) -> bool { - // !self.has_points() - // && !self.has_line_strings() - // && self.has_polygons() - // && !self.has_multi_points() - // && !self.has_multi_line_strings() - // && !self.has_multi_polygons() - // } - - // pub fn has_only_multi_points(&self) -> bool { - // !self.has_points() - // && !self.has_line_strings() - // && !self.has_polygons() - // && self.has_multi_points() - // && !self.has_multi_line_strings() - // && !self.has_multi_polygons() - // } - - // pub fn has_only_multi_line_strings(&self) -> bool { - // !self.has_points() - // && !self.has_line_strings() - // && !self.has_polygons() - // && !self.has_multi_points() - // && self.has_multi_line_strings() - // && !self.has_multi_polygons() - // } - - // pub fn has_only_multi_polygons(&self) -> bool { - // !self.has_points() - // && !self.has_line_strings() - // && !self.has_polygons() - // && !self.has_multi_points() - // && !self.has_multi_line_strings() - // && self.has_multi_polygons() - // } - - /// The number of bytes occupied by this array. - pub fn num_bytes(&self) -> usize { - self.buffer_lengths().num_bytes() - } - - /// Slices this [`MixedGeometryArray`] in place. - /// - /// # Implementation - /// - /// This operation is `O(F)` where `F` is the number of fields. - /// - /// # Panic - /// - /// This function panics iff `offset + length > self.len()`. - #[inline] - pub fn slice(&self, offset: usize, length: usize) -> Self { - assert!( - offset + length <= self.len(), - "offset + length may not exceed length of array" - ); - Self { - data_type: self.data_type, - type_ids: self.type_ids.slice(offset, length), - offsets: self.offsets.slice(offset, length), - - point_xy: self.point_xy.clone(), - line_string_xy: self.line_string_xy.clone(), - polygon_xy: self.polygon_xy.clone(), - mpoint_xy: self.mpoint_xy.clone(), - mline_string_xy: self.mline_string_xy.clone(), - mpolygon_xy: self.mpolygon_xy.clone(), - gc_xy: self.gc_xy.clone(), - - point_xyz: self.point_xyz.clone(), - line_string_xyz: self.line_string_xyz.clone(), - polygon_xyz: self.polygon_xyz.clone(), - mpoint_xyz: self.mpoint_xyz.clone(), - mline_string_xyz: self.mline_string_xyz.clone(), - mpolygon_xyz: self.mpolygon_xyz.clone(), - gc_xyz: self.gc_xyz.clone(), - - slice_offset: self.slice_offset + offset, - metadata: self.metadata.clone(), - } - } - - pub fn to_coord_type(&self, coord_type: CoordType) -> Self { - self.clone().into_coord_type(coord_type) - } - - pub fn into_coord_type(self, coord_type: CoordType) -> Self { - Self::new( - self.type_ids, - self.offsets, - self.point_xy.into_coord_type(coord_type), - self.line_string_xy.into_coord_type(coord_type), - self.polygon_xy.into_coord_type(coord_type), - self.mpoint_xy.into_coord_type(coord_type), - self.mline_string_xy.into_coord_type(coord_type), - self.mpolygon_xy.into_coord_type(coord_type), - self.gc_xy.into_coord_type(coord_type), - self.point_xyz.into_coord_type(coord_type), - self.line_string_xyz.into_coord_type(coord_type), - self.polygon_xyz.into_coord_type(coord_type), - self.mpoint_xyz.into_coord_type(coord_type), - self.mline_string_xyz.into_coord_type(coord_type), - self.mpolygon_xyz.into_coord_type(coord_type), - self.gc_xyz.into_coord_type(coord_type), - self.metadata, - ) - } -} - -impl ArrayBase for UnknownGeometryArray { - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn storage_type(&self) -> DataType { - self.data_type.to_data_type() - } - - fn extension_field(&self) -> Arc { - Arc::new( - self.data_type - .to_field_with_metadata("geometry", true, &self.metadata), - ) - } - - fn extension_name(&self) -> &str { - self.data_type.extension_name() - } - - fn into_array_ref(self) -> Arc { - Arc::new(self.into_arrow()) - } - - fn to_array_ref(&self) -> arrow_array::ArrayRef { - self.clone().into_array_ref() - } - - fn metadata(&self) -> Arc { - self.metadata.clone() - } - - /// Returns the number of geometries in this array - #[inline] - fn len(&self) -> usize { - // Note that `type_ids` is sliced as usual, and thus always has the correct length. - self.type_ids.len() - } - - /// Returns the optional validity. - #[inline] - fn nulls(&self) -> Option<&NullBuffer> { - None - } -} - -impl NativeArray for UnknownGeometryArray { - fn data_type(&self) -> NativeType { - self.data_type - } - - fn coord_type(&self) -> crate::array::CoordType { - self.data_type.coord_type() - } - - fn to_coord_type(&self, coord_type: CoordType) -> Arc { - Arc::new(self.clone().into_coord_type(coord_type)) - } - - fn with_metadata(&self, metadata: Arc) -> crate::trait_::NativeArrayRef { - let mut arr = self.clone(); - arr.metadata = metadata; - Arc::new(arr) - } - - fn as_ref(&self) -> &dyn NativeArray { - self - } - - fn slice(&self, offset: usize, length: usize) -> Arc { - Arc::new(self.slice(offset, length)) - } -} - -impl GeometryArraySelfMethods for UnknownGeometryArray { - fn with_coords(self, _coords: crate::array::CoordBuffer) -> Self { - todo!(); - } - - fn into_coord_type(self, _coord_type: crate::array::CoordType) -> Self { - todo!(); - } -} - -impl NativeGeometryAccessor for UnknownGeometryArray { - unsafe fn value_as_geometry_unchecked(&self, index: usize) -> crate::scalar::Geometry { - let type_id = self.type_ids[index]; - let offset = self.offsets[index] as usize; - - match type_id { - 1 => Geometry::Point(self.point_xy.value(offset)), - 2 => Geometry::LineString(self.line_string_xy.value(offset)), - 3 => Geometry::Polygon(self.polygon_xy.value(offset)), - 4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)), - 5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)), - 6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)), - 7 => { - panic!("nested geometry collections not supported") - } - 11 => Geometry::Point(self.point_xyz.value(offset)), - 12 => Geometry::LineString(self.line_string_xyz.value(offset)), - 13 => Geometry::Polygon(self.polygon_xyz.value(offset)), - 14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)), - 15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)), - 16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)), - 17 => { - panic!("nested geometry collections not supported") - } - _ => panic!("unknown type_id {}", type_id), - } - } -} - -#[cfg(feature = "geos")] -impl<'a> crate::trait_::NativeGEOSGeometryAccessor<'a> for UnknownGeometryArray { - unsafe fn value_as_geometry_unchecked( - &'a self, - index: usize, - ) -> std::result::Result { - let geom = NativeGeometryAccessor::value_as_geometry_unchecked(self, index); - (&geom).try_into() - } -} - -impl<'a> ArrayAccessor<'a> for UnknownGeometryArray { - type Item = Geometry<'a>; - type ItemGeo = geo::Geometry; - - unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { - let type_id = self.type_ids[index]; - let offset = self.offsets[index] as usize; - - match type_id { - 1 => Geometry::Point(self.point_xy.value(offset)), - 2 => Geometry::LineString(self.line_string_xy.value(offset)), - 3 => Geometry::Polygon(self.polygon_xy.value(offset)), - 4 => Geometry::MultiPoint(self.mpoint_xy.value(offset)), - 5 => Geometry::MultiLineString(self.mline_string_xy.value(offset)), - 6 => Geometry::MultiPolygon(self.mpolygon_xy.value(offset)), - 7 => { - panic!("nested geometry collections not supported") - } - 11 => Geometry::Point(self.point_xyz.value(offset)), - 12 => Geometry::LineString(self.line_string_xyz.value(offset)), - 13 => Geometry::Polygon(self.polygon_xyz.value(offset)), - 14 => Geometry::MultiPoint(self.mpoint_xyz.value(offset)), - 15 => Geometry::MultiLineString(self.mline_string_xyz.value(offset)), - 16 => Geometry::MultiPolygon(self.mpolygon_xyz.value(offset)), - 17 => { - panic!("nested geometry collections not supported") - } - _ => panic!("unknown type_id {}", type_id), - } - } -} - -impl IntoArrow for UnknownGeometryArray { - type ArrowArray = UnionArray; - - fn into_arrow(self) -> Self::ArrowArray { - let union_fields = match self.data_type.to_data_type() { - DataType::Union(union_fields, _) => union_fields, - _ => unreachable!(), - }; - - let child_arrays = vec![ - self.point_xy.into_array_ref(), - self.line_string_xy.into_array_ref(), - self.polygon_xy.into_array_ref(), - self.mpoint_xy.into_array_ref(), - self.mline_string_xy.into_array_ref(), - self.mpolygon_xy.into_array_ref(), - self.point_xyz.into_array_ref(), - self.line_string_xyz.into_array_ref(), - self.polygon_xyz.into_array_ref(), - self.mpoint_xyz.into_array_ref(), - self.mline_string_xyz.into_array_ref(), - self.mpolygon_xyz.into_array_ref(), - ]; - - UnionArray::try_new( - union_fields, - self.type_ids, - Some(self.offsets), - child_arrays, - ) - .unwrap() - } -} - -impl TryFrom<&UnionArray> for UnknownGeometryArray { - type Error = GeoArrowError; - - fn try_from(value: &UnionArray) -> std::result::Result { - let mut point_xy: Option = None; - let mut line_string_xy: Option = None; - let mut polygon_xy: Option = None; - let mut mpoint_xy: Option = None; - let mut mline_string_xy: Option = None; - let mut mpolygon_xy: Option = None; - let mut gc_xy: Option = None; - - let mut point_xyz: Option = None; - let mut line_string_xyz: Option = None; - let mut polygon_xyz: Option = None; - let mut mpoint_xyz: Option = None; - let mut mline_string_xyz: Option = None; - let mut mpolygon_xyz: Option = None; - let mut gc_xyz: Option = None; - - match value.data_type() { - DataType::Union(fields, mode) => { - if !matches!(mode, UnionMode::Dense) { - return Err(GeoArrowError::General("Expected dense union".to_string())); - } - - for (type_id, _field) in fields.iter() { - let dimension = if type_id < 10 { - Dimension::XY - } else if type_id < 20 { - Dimension::XYZ - } else { - return Err(GeoArrowError::General(format!( - "Unsupported type_id: {}", - type_id - ))); - }; - - match type_id { - 1 => { - point_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 2 => { - line_string_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 3 => { - polygon_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 4 => { - mpoint_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 5 => { - mline_string_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 6 => { - mpolygon_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 7 => { - gc_xy = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 11 => { - point_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 12 => { - line_string_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 13 => { - polygon_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 14 => { - mpoint_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 15 => { - mline_string_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 16 => { - mpolygon_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - 17 => { - gc_xyz = Some( - (value.child(type_id).as_ref(), dimension) - .try_into() - .unwrap(), - ); - } - _ => { - return Err(GeoArrowError::General(format!( - "Unexpected type_id {}", - type_id - ))) - } - } - } - } - _ => panic!("expected union type"), - }; - - let type_ids = value.type_ids().clone(); - // This is after checking for dense union - let offsets = value.offsets().unwrap().clone(); - - Ok(Self::new( - type_ids, - offsets, - point_xy.unwrap_or_default(), - line_string_xy.unwrap_or_default(), - polygon_xy.unwrap_or_default(), - mpoint_xy.unwrap_or_default(), - mline_string_xy.unwrap_or_default(), - mpolygon_xy.unwrap_or_default(), - gc_xy.unwrap_or_default(), - point_xyz.unwrap_or_default(), - line_string_xyz.unwrap_or_default(), - polygon_xyz.unwrap_or_default(), - mpoint_xyz.unwrap_or_default(), - mline_string_xyz.unwrap_or_default(), - mpolygon_xyz.unwrap_or_default(), - gc_xyz.unwrap_or_default(), - Default::default(), - )) - } -} - -impl TryFrom<&dyn Array> for UnknownGeometryArray { - type Error = GeoArrowError; - - fn try_from(value: &dyn Array) -> Result { - match value.data_type() { - DataType::Union(_, _) => { - let downcasted = value.as_any().downcast_ref::().unwrap(); - downcasted.try_into() - } - _ => Err(GeoArrowError::General(format!( - "Unexpected type: {:?}", - value.data_type() - ))), - } - } -} - -impl TryFrom<(&dyn Array, &Field)> for UnknownGeometryArray { - type Error = GeoArrowError; - - fn try_from((arr, field): (&dyn Array, &Field)) -> Result { - let mut arr: Self = arr.try_into()?; - arr.metadata = Arc::new(ArrayMetadata::try_from(field)?); - Ok(arr) - } -} - -impl> TryFrom<&[G]> for UnknownGeometryArray { - type Error = GeoArrowError; - - fn try_from(geoms: &[G]) -> Result { - let mut_arr: UnknownGeometryBuilder = geoms.try_into()?; - Ok(mut_arr.into()) - } -} - -impl> TryFrom>> for UnknownGeometryArray { - type Error = GeoArrowError; - - fn try_from(geoms: Vec>) -> Result { - let mut_arr: UnknownGeometryBuilder = geoms.try_into()?; - Ok(mut_arr.into()) - } -} - -impl TryFrom> for UnknownGeometryArray { - type Error = GeoArrowError; - - fn try_from(value: WKBArray) -> Result { - let mut_arr: UnknownGeometryBuilder = value.try_into()?; - Ok(mut_arr.into()) - } -} - -// impl From for UnknownGeometryArray { -// fn from(value: PointArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![1; value.len()], -// Dimension::XYZ => vec![11; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for UnknownGeometryArray { -// fn from(value: LineStringArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![2; value.len()], -// Dimension::XYZ => vec![12; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for UnknownGeometryArray { -// fn from(value: PolygonArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![3; value.len()], -// Dimension::XYZ => vec![13; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for UnknownGeometryArray { -// fn from(value: MultiPointArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![4; value.len()], -// Dimension::XYZ => vec![14; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for UnknownGeometryArray { -// fn from(value: MultiLineStringArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![5; value.len()], -// Dimension::XYZ => vec![15; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for UnknownGeometryArray { -// fn from(value: MultiPolygonArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![6; value.len()], -// Dimension::XYZ => vec![16; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// metadata, -// ) -// } -// } - -// impl TryFrom for UnknownGeometryArray { -// type Error = GeoArrowError; - -// fn try_from(value: GeometryCollectionArray) -> std::result::Result { -// if !can_downcast_multi(&value.geom_offsets) { -// return Err(GeoArrowError::General("Unable to cast".to_string())); -// } - -// if value.null_count() > 0 { -// return Err(GeoArrowError::General( -// "Unable to cast with nulls".to_string(), -// )); -// } - -// Ok(value.array) -// } -// } - -/// Default to an empty array -impl Default for UnknownGeometryArray { - fn default() -> Self { - UnknownGeometryBuilder::default().into() - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::test::{linestring, multilinestring, multipoint, multipolygon, point, polygon}; - - #[test] - fn geo_roundtrip_accurate_points() { - let geoms: Vec = vec![ - geo::Geometry::Point(point::p0()), - geo::Geometry::Point(point::p1()), - geo::Geometry::Point(point::p2()), - ]; - - let arr: UnknownGeometryArray = UnknownGeometryBuilder::from_geometries( - geoms.as_slice(), - Default::default(), - Default::default(), - false, - ) - .unwrap() - .finish(); - - assert_eq!(arr.value_as_geo(0), geo::Geometry::Point(point::p0())); - assert_eq!(arr.value_as_geo(1), geo::Geometry::Point(point::p1())); - assert_eq!(arr.value_as_geo(2), geo::Geometry::Point(point::p2())); - } - - #[test] - fn geo_roundtrip_accurate_multi_points() { - let geoms: Vec = vec![ - geo::Geometry::Point(point::p0()), - geo::Geometry::Point(point::p1()), - geo::Geometry::Point(point::p2()), - ]; - let arr: UnknownGeometryArray = UnknownGeometryBuilder::from_geometries( - geoms.as_slice(), - Default::default(), - Default::default(), - true, - ) - .unwrap() - .finish(); - - assert_eq!( - arr.value_as_geo(0), - geo::Geometry::MultiPoint(geo::MultiPoint(vec![point::p0()])) - ); - assert_eq!( - arr.value_as_geo(1), - geo::Geometry::MultiPoint(geo::MultiPoint(vec![point::p1()])) - ); - assert_eq!( - arr.value_as_geo(2), - geo::Geometry::MultiPoint(geo::MultiPoint(vec![point::p2()])) - ); - } - - #[test] - fn geo_roundtrip_accurate_all() { - let geoms: Vec = vec![ - geo::Geometry::Point(point::p0()), - geo::Geometry::LineString(linestring::ls0()), - geo::Geometry::Polygon(polygon::p0()), - geo::Geometry::MultiPoint(multipoint::mp0()), - geo::Geometry::MultiLineString(multilinestring::ml0()), - geo::Geometry::MultiPolygon(multipolygon::mp0()), - ]; - - let arr: UnknownGeometryArray = UnknownGeometryBuilder::from_geometries( - geoms.as_slice(), - Default::default(), - Default::default(), - false, - ) - .unwrap() - .finish(); - - assert_eq!(arr.value_as_geo(0), geoms[0]); - assert_eq!(arr.value_as_geo(1), geoms[1]); - assert_eq!(arr.value_as_geo(2), geoms[2]); - assert_eq!(arr.value_as_geo(3), geoms[3]); - assert_eq!(arr.value_as_geo(4), geoms[4]); - assert_eq!(arr.value_as_geo(5), geoms[5]); - } - - #[test] - fn arrow_roundtrip() { - let geoms: Vec = vec![ - geo::Geometry::Point(point::p0()), - geo::Geometry::LineString(linestring::ls0()), - geo::Geometry::Polygon(polygon::p0()), - geo::Geometry::MultiPoint(multipoint::mp0()), - geo::Geometry::MultiLineString(multilinestring::ml0()), - geo::Geometry::MultiPolygon(multipolygon::mp0()), - ]; - - let arr: UnknownGeometryArray = UnknownGeometryBuilder::from_geometries( - geoms.as_slice(), - Default::default(), - Default::default(), - false, - ) - .unwrap() - .finish(); - - // Round trip to/from arrow-rs - let arrow_array = arr.into_arrow(); - let round_trip_arr: UnknownGeometryArray = (&arrow_array).try_into().unwrap(); - - assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]); - assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]); - assert_eq!(round_trip_arr.value_as_geo(2), geoms[2]); - assert_eq!(round_trip_arr.value_as_geo(3), geoms[3]); - assert_eq!(round_trip_arr.value_as_geo(4), geoms[4]); - assert_eq!(round_trip_arr.value_as_geo(5), geoms[5]); - } - - #[test] - fn arrow_roundtrip_not_all_types() { - let geoms: Vec = vec![ - geo::Geometry::MultiPoint(multipoint::mp0()), - geo::Geometry::MultiLineString(multilinestring::ml0()), - geo::Geometry::MultiPolygon(multipolygon::mp0()), - ]; - - let arr: UnknownGeometryArray = UnknownGeometryBuilder::from_geometries( - geoms.as_slice(), - Default::default(), - Default::default(), - false, - ) - .unwrap() - .finish(); - - // Round trip to/from arrow-rs - let arrow_array = arr.into_arrow(); - let round_trip_arr: UnknownGeometryArray = (&arrow_array).try_into().unwrap(); - - assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]); - assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]); - assert_eq!(round_trip_arr.value_as_geo(2), geoms[2]); - } - - #[test] - fn arrow_roundtrip_not_all_types2() { - let geoms: Vec = vec![ - geo::Geometry::MultiPoint(multipoint::mp0()), - geo::Geometry::MultiPolygon(multipolygon::mp0()), - ]; - - let arr: UnknownGeometryArray = UnknownGeometryBuilder::from_geometries( - geoms.as_slice(), - Default::default(), - Default::default(), - false, - ) - .unwrap() - .finish(); - - // Round trip to/from arrow-rs - let arrow_array = arr.into_arrow(); - let round_trip_arr: UnknownGeometryArray = (&arrow_array).try_into().unwrap(); - - assert_eq!(round_trip_arr.value_as_geo(0), geoms[0]); - assert_eq!(round_trip_arr.value_as_geo(1), geoms[1]); - } -} diff --git a/rust/geoarrow/src/array/unknown/mod.rs b/rust/geoarrow/src/array/unknown/mod.rs deleted file mode 100644 index 29af94a2..00000000 --- a/rust/geoarrow/src/array/unknown/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod array; -mod builder; -mod capacity; - -pub use array::UnknownGeometryArray; -pub use builder::UnknownGeometryBuilder; -pub use capacity::UnknownCapacity; diff --git a/rust/geoarrow/src/chunked_array/dynamic.rs b/rust/geoarrow/src/chunked_array/dynamic.rs index 16078e66..057945ec 100644 --- a/rust/geoarrow/src/chunked_array/dynamic.rs +++ b/rust/geoarrow/src/chunked_array/dynamic.rs @@ -69,7 +69,7 @@ impl ChunkedNativeArrayDyn { impl_downcast!(GeometryCollectionArray) } Rect(_) => impl_downcast!(RectArray), - Unknown(_) => impl_downcast!(UnknownGeometryArray), + Geometry(_) => impl_downcast!(GeometryArray), }; Ok(Self(ca)) } @@ -123,7 +123,7 @@ impl ChunkedNativeArrayDyn { Mixed(_, _) => impl_downcast!(as_mixed), GeometryCollection(_, _) => impl_downcast!(as_geometry_collection), Rect(_) => impl_downcast!(as_rect), - Unknown(_) => impl_downcast!(as_unknown), + Geometry(_) => impl_downcast!(as_geometry), }; Ok(Self(result)) } else { diff --git a/rust/geoarrow/src/chunked_array/mod.rs b/rust/geoarrow/src/chunked_array/mod.rs index 12eaa9ce..bff05268 100644 --- a/rust/geoarrow/src/chunked_array/mod.rs +++ b/rust/geoarrow/src/chunked_array/mod.rs @@ -598,7 +598,7 @@ pub type ChunkedGeometryCollectionArray = ChunkedGeometryArray; /// A chunked unknown geometry array. -pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray; +pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray; /// A chunked WKB array. pub type ChunkedWKBArray = ChunkedGeometryArray>; diff --git a/rust/geoarrow/src/datatypes.rs b/rust/geoarrow/src/datatypes.rs index e4b4fee9..de9e8408 100644 --- a/rust/geoarrow/src/datatypes.rs +++ b/rust/geoarrow/src/datatypes.rs @@ -144,7 +144,7 @@ pub enum NativeType { Rect(Dimension), /// Represents a mixed geometry array of unknown types or dimensions - Unknown(CoordType), + Geometry(CoordType), } /// A type enum representing "serialized" GeoArrow geometry types. @@ -394,7 +394,7 @@ impl NativeType { Mixed(ct, _) => *ct, GeometryCollection(ct, _) => *ct, Rect(_) => CoordType::Separated, - Unknown(ct) => *ct, + Geometry(ct) => *ct, } } @@ -413,7 +413,7 @@ impl NativeType { Mixed(_, dim) => Some(*dim), GeometryCollection(_, dim) => Some(*dim), Rect(dim) => Some(*dim), - Unknown(_) => None, + Geometry(_) => None, } } @@ -443,7 +443,7 @@ impl NativeType { Mixed(coord_type, dim) => mixed_data_type(*coord_type, *dim), GeometryCollection(coord_type, dim) => geometry_collection_data_type(*coord_type, *dim), Rect(dim) => rect_data_type(*dim), - Unknown(coord_type) => unknown_data_type(*coord_type), + Geometry(coord_type) => unknown_data_type(*coord_type), } } @@ -469,7 +469,7 @@ impl NativeType { Mixed(_, _) => "geoarrow.geometry", GeometryCollection(_, _) => "geoarrow.geometrycollection", Rect(_) => "geoarrow.box", - Unknown(_) => "geoarrow.unknown", + Geometry(_) => "geoarrow.unknown", } } @@ -554,7 +554,7 @@ impl NativeType { Mixed(_, dim) => Mixed(coord_type, dim), GeometryCollection(_, dim) => GeometryCollection(coord_type, dim), Rect(dim) => Rect(dim), - Unknown(_) => Unknown(coord_type), + Geometry(_) => Geometry(coord_type), } } @@ -580,7 +580,7 @@ impl NativeType { Mixed(coord_type, _) => Mixed(coord_type, dim), GeometryCollection(coord_type, _) => GeometryCollection(coord_type, dim), Rect(_) => Rect(dim), - Unknown(coord_type) => Unknown(coord_type), + Geometry(coord_type) => Geometry(coord_type), } } } @@ -1072,7 +1072,7 @@ fn parse_unknown(field: &Field) -> Result { } let coord_type = coord_types.drain().next().unwrap(); - Ok(NativeType::Unknown(coord_type)) + Ok(NativeType::Geometry(coord_type)) } else { Err(GeoArrowError::General("Expected union type".to_string())) } diff --git a/rust/geoarrow/src/io/flatgeobuf/writer.rs b/rust/geoarrow/src/io/flatgeobuf/writer.rs index 2a3fe010..dbf5e999 100644 --- a/rust/geoarrow/src/io/flatgeobuf/writer.rs +++ b/rust/geoarrow/src/io/flatgeobuf/writer.rs @@ -86,7 +86,7 @@ fn infer_flatgeobuf_geometry_type( matches!(dim, Dimension::XYZ), ), // We'll just claim that it does have 3d data. Not sure whether this is bad to lie here? - Unknown(_) => (flatgeobuf::GeometryType::Unknown, true), + Geometry(_) => (flatgeobuf::GeometryType::Unknown, true), }; Ok((geometry_type, has_z)) } diff --git a/rust/geoarrow/src/io/geozero/scalar/geometry_array.rs b/rust/geoarrow/src/io/geozero/scalar/geometry_array.rs index e704bd81..b2146ee6 100644 --- a/rust/geoarrow/src/io/geozero/scalar/geometry_array.rs +++ b/rust/geoarrow/src/io/geozero/scalar/geometry_array.rs @@ -62,7 +62,7 @@ pub fn process_geometry_scalar_array( // process_geometry(&wkb_object, geom_idx, processor) // } Rect(_) => todo!(), - Unknown(_) => impl_process!(process_geometry, as_unknown), + Geometry(_) => impl_process!(process_geometry, as_geometry), } } diff --git a/rust/geoarrow/src/io/geozero/table/data_source.rs b/rust/geoarrow/src/io/geozero/table/data_source.rs index fe1acbaa..76436a05 100644 --- a/rust/geoarrow/src/io/geozero/table/data_source.rs +++ b/rust/geoarrow/src/io/geozero/table/data_source.rs @@ -414,8 +414,8 @@ fn process_geometry_n( // let geom = arr.as_ref().as_rect::<2>().value(i); // process_rect } - Unknown(_) => { - let geom = arr.as_unknown().value(i); + Geometry(_) => { + let geom = arr.as_geometry().value(i); process_geometry(&geom, 0, processor)?; } } diff --git a/rust/geoarrow/src/io/parquet/writer/metadata.rs b/rust/geoarrow/src/io/parquet/writer/metadata.rs index 04553f6c..db3e7783 100644 --- a/rust/geoarrow/src/io/parquet/writer/metadata.rs +++ b/rust/geoarrow/src/io/parquet/writer/metadata.rs @@ -104,8 +104,8 @@ impl ColumnInfo { } } - if let NativeType::Unknown(_) = array_ref.data_type() { - let arr = array_ref.as_unknown(); + if let NativeType::Geometry(_) = array_ref.data_type() { + let arr = array_ref.as_geometry(); if arr.has_points(Dimension::XY) || arr.has_points(Dimension::XYZ) { self.geometry_types.insert(GeoParquetGeometryType::Point); } @@ -284,7 +284,7 @@ pub fn get_geometry_types(data_type: &NativeType) -> HashSet { geometry_types.insert(MultiPolygonZ); } - NativeType::Mixed(_, _) | NativeType::Unknown(_) => { + NativeType::Mixed(_, _) | NativeType::Geometry(_) => { // We don't have access to the actual data here, so we can't inspect better than this. } NativeType::GeometryCollection(_, Dimension::XY) => { diff --git a/rust/geoarrow/src/io/wkb/api.rs b/rust/geoarrow/src/io/wkb/api.rs index 70d04bc0..b446bc7f 100644 --- a/rust/geoarrow/src/io/wkb/api.rs +++ b/rust/geoarrow/src/io/wkb/api.rs @@ -1,8 +1,6 @@ use std::sync::Arc; use crate::algorithm::native::Downcast; -use crate::array::geometrycollection::GeometryCollectionBuilder; -use crate::array::unknown::UnknownGeometryBuilder; use crate::array::*; use crate::chunked_array::*; use crate::datatypes::{Dimension, NativeType}; @@ -250,8 +248,8 @@ pub fn from_wkb( "Unexpected data type {:?}", target_geo_data_type, ))), - Unknown(coord_type) => { - let builder = UnknownGeometryBuilder::from_wkb( + Geometry(coord_type) => { + let builder = GeometryBuilder::from_wkb( &wkb_objects, Some(coord_type), arr.metadata(), @@ -290,7 +288,7 @@ impl ToWKB for &dyn NativeArray { GeometryCollection(_, _) => self.as_geometry_collection().into(), Rect(_) => todo!(), - Unknown(_) => self.as_unknown().into(), + Geometry(_) => self.as_geometry().into(), } } } @@ -321,7 +319,7 @@ impl ToWKB for &dyn ChunkedNativeArray { ChunkedGeometryArray::new(self.as_geometry_collection().map(|chunk| chunk.into())) } Rect(_) => todo!(), - Unknown(_) => ChunkedGeometryArray::new(self.as_mixed().map(|chunk| chunk.into())), + Geometry(_) => ChunkedGeometryArray::new(self.as_mixed().map(|chunk| chunk.into())), } } } @@ -340,7 +338,7 @@ pub fn to_wkb(arr: &dyn NativeArray) -> WKBArray { Mixed(_, _) => arr.as_mixed().into(), GeometryCollection(_, _) => arr.as_geometry_collection().into(), Rect(_) => todo!(), - Unknown(_) => arr.as_unknown().into(), + Geometry(_) => arr.as_geometry().into(), } } diff --git a/rust/geoarrow/src/io/wkb/writer/geometry.rs b/rust/geoarrow/src/io/wkb/writer/geometry.rs index 55fe8e46..b02e2ce7 100644 --- a/rust/geoarrow/src/io/wkb/writer/geometry.rs +++ b/rust/geoarrow/src/io/wkb/writer/geometry.rs @@ -4,7 +4,7 @@ use wkb::writer::{geometry_wkb_size, write_geometry}; use wkb::Endianness; use crate::array::offset_builder::OffsetsBuilder; -use crate::array::{MixedGeometryArray, UnknownGeometryArray, WKBArray}; +use crate::array::{GeometryArray, MixedGeometryArray, WKBArray}; use crate::trait_::ArrayAccessor; use crate::ArrayBase; use std::io::Cursor; @@ -42,8 +42,8 @@ impl From<&MixedGeometryArray> for WKBArray { } } -impl From<&UnknownGeometryArray> for WKBArray { - fn from(value: &UnknownGeometryArray) -> Self { +impl From<&GeometryArray> for WKBArray { + fn from(value: &GeometryArray) -> Self { let mut offsets: OffsetsBuilder = OffsetsBuilder::with_capacity(value.len()); // First pass: calculate binary array offsets diff --git a/rust/geoarrow/src/io/wkt/reader.rs b/rust/geoarrow/src/io/wkt/reader.rs index 6ccab50f..2f52a7b7 100644 --- a/rust/geoarrow/src/io/wkt/reader.rs +++ b/rust/geoarrow/src/io/wkt/reader.rs @@ -4,13 +4,13 @@ use std::sync::Arc; use arrow_array::OffsetSizeTrait; use crate::array::metadata::ArrayMetadata; -use crate::array::{CoordType, UnknownGeometryArray, UnknownGeometryBuilder, WKTArray}; +use crate::array::{CoordType, GeometryArray, GeometryBuilder, WKTArray}; use crate::error::{GeoArrowError, Result}; use crate::{ArrayBase, NativeArray}; /// Parse a WKT array into a native GeoArrow array. /// -/// Currently, an [UnknownGeometryArray] is always returned. This may change in the future with the +/// Currently, a [GeometryArray] is always returned. This may change in the future with the /// addition of a `downcast` parameter, which would automatically downcast the result. pub fn read_wkt( arr: &WKTArray, @@ -27,8 +27,8 @@ fn from_str_iter<'a>( coord_type: CoordType, metadata: Arc, prefer_multi: bool, -) -> Result { - let mut builder = UnknownGeometryBuilder::new_with_options(coord_type, metadata, prefer_multi); +) -> Result { + let mut builder = GeometryBuilder::new_with_options(coord_type, metadata, prefer_multi); for wkt_str in iter { if let Some(s) = wkt_str { let wkt = wkt::Wkt::::from_str(s).map_err(GeoArrowError::WktStrError)?; @@ -61,7 +61,7 @@ mod test { let parsed = read_wkt(&arr, Default::default(), false).unwrap(); let parsed_ref = parsed.as_ref(); - let geom_arr = parsed_ref.as_unknown(); + let geom_arr = parsed_ref.as_geometry(); assert_eq!( geom_arr.value_as_geo(0), diff --git a/rust/geoarrow/src/io/wkt/writer.rs b/rust/geoarrow/src/io/wkt/writer.rs index 03ee304a..957d1727 100644 --- a/rust/geoarrow/src/io/wkt/writer.rs +++ b/rust/geoarrow/src/io/wkt/writer.rs @@ -64,7 +64,7 @@ impl ToWKT for &dyn NativeArray { impl_to_wkt!(as_geometry_collection, write_geometry_collection) } Rect(_) => impl_to_wkt!(as_rect, write_rect), - Unknown(_) => impl_to_wkt!(as_unknown, write_geometry), + Geometry(_) => impl_to_wkt!(as_geometry, write_geometry), } Ok(WKTArray::new(output_array.finish(), metadata)) @@ -94,7 +94,7 @@ impl ToWKT for &dyn ChunkedNativeArray { Mixed(_, _) => impl_to_wkt!(as_mixed), GeometryCollection(_, _) => impl_to_wkt!(as_geometry_collection), Rect(_) => impl_to_wkt!(as_rect), - Unknown(_) => impl_to_wkt!(as_unknown), + Geometry(_) => impl_to_wkt!(as_geometry), } } } diff --git a/rust/geoarrow/src/scalar/scalar.rs b/rust/geoarrow/src/scalar/scalar.rs index 57b6b643..16fd0973 100644 --- a/rust/geoarrow/src/scalar/scalar.rs +++ b/rust/geoarrow/src/scalar/scalar.rs @@ -1,7 +1,6 @@ use crate::array::{ - AsNativeArray, GeometryCollectionArray, LineStringArray, MixedGeometryArray, + AsNativeArray, GeometryArray, GeometryCollectionArray, LineStringArray, MixedGeometryArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, RectArray, - UnknownGeometryArray, }; use crate::datatypes::{Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; @@ -52,24 +51,22 @@ impl GeometryScalar { | Mixed(_, dim) | GeometryCollection(_, dim) | Rect(dim) => dim, - Unknown(_) => todo!(), // WKB => { - // let arr = self.0.as_ref(); - // let wkb_arr = arr.as_wkb().value(0); - // let wkb_obj = wkb_arr.to_wkb_object(); - // wkb_obj.dimension() - // } - // LargeWKB => { - // let arr = self.0.as_ref(); - // let wkb_arr = arr.as_large_wkb().value(0); - // let wkb_obj = wkb_arr.to_wkb_object(); - // wkb_obj.dimension() - // } + Geometry(_) => todo!(), // WKB => { + // let arr = self.0.as_ref(); + // let wkb_arr = arr.as_wkb().value(0); + // let wkb_obj = wkb_arr.to_wkb_object(); + // wkb_obj.dimension() + // } + // LargeWKB => { + // let arr = self.0.as_ref(); + // let wkb_arr = arr.as_large_wkb().value(0); + // let wkb_obj = wkb_arr.to_wkb_object(); + // wkb_obj.dimension() + // } } } pub fn as_geometry(&self) -> Option> { - use NativeType::*; - // Note: we use `.downcast_ref` directly here because we need to pass in the generic // TODO: may be able to change this now that we don't have // @@ -79,23 +76,23 @@ impl GeometryScalar { // to work around that. match self.data_type() { - Point(_, _) => { + NativeType::Point(_, _) => { let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0).map(Geometry::Point) } - LineString(_, _) => { + NativeType::LineString(_, _) => { let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0).map(Geometry::LineString) } - Polygon(_, _) => { + NativeType::Polygon(_, _) => { let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0).map(Geometry::Polygon) } - MultiPoint(_, _) => { + NativeType::MultiPoint(_, _) => { let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0).map(Geometry::MultiPoint) } - MultiLineString(_, _) => { + NativeType::MultiLineString(_, _) => { let arr = self .0 .as_any() @@ -103,11 +100,11 @@ impl GeometryScalar { .unwrap(); arr.get(0).map(Geometry::MultiLineString) } - MultiPolygon(_, _) => { + NativeType::MultiPolygon(_, _) => { let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0).map(Geometry::MultiPolygon) } - Mixed(_, _) => { + NativeType::Mixed(_, _) => { let arr = self .0 .as_any() @@ -115,7 +112,7 @@ impl GeometryScalar { .unwrap(); arr.get(0) } - GeometryCollection(_, _) => { + NativeType::GeometryCollection(_, _) => { let arr = self .0 .as_any() @@ -123,16 +120,12 @@ impl GeometryScalar { .unwrap(); arr.get(0).map(Geometry::GeometryCollection) } - Rect(_) => { + NativeType::Rect(_) => { let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0).map(Geometry::Rect) } - Unknown(_) => { - let arr = self - .0 - .as_any() - .downcast_ref::() - .unwrap(); + NativeType::Geometry(_) => { + let arr = self.0.as_any().downcast_ref::().unwrap(); arr.get(0) } } @@ -157,7 +150,7 @@ impl GeometryScalar { Mixed(_, _) => impl_to_geo!(as_mixed), GeometryCollection(_, _) => impl_to_geo!(as_geometry_collection), Rect(_) => impl_to_geo!(as_rect), - Unknown(_) => impl_to_geo!(as_unknown), + Geometry(_) => impl_to_geo!(as_geometry), } }