diff --git a/rust/geoarrow/src/algorithm/native/cast.rs b/rust/geoarrow/src/algorithm/native/cast.rs index 22da66c9..33dce570 100644 --- a/rust/geoarrow/src/algorithm/native/cast.rs +++ b/rust/geoarrow/src/algorithm/native/cast.rs @@ -28,7 +28,7 @@ impl Default for CastOptions { /// Note: not currently used and outdated #[allow(dead_code)] -fn can_cast_types(from_type: &NativeType, to_type: &NativeType) -> bool { +fn can_cast_types(from_type: NativeType, to_type: NativeType) -> bool { if from_type == to_type { return true; } @@ -51,13 +51,13 @@ pub trait Cast { type Output; /// Note: **does not currently implement dimension casts** - fn cast(&self, to_type: &NativeType) -> Self::Output; + fn cast(&self, to_type: NativeType) -> Self::Output; } impl Cast for PointArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -66,6 +66,7 @@ impl Cast for PointArray { MultiPoint(_, _) => Ok(Arc::new(MultiPointArray::from(array))), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -76,7 +77,7 @@ impl Cast for PointArray { impl Cast for LineStringArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -86,6 +87,7 @@ impl Cast for LineStringArray { MultiLineString(_, _) => Ok(Arc::new(MultiLineStringArray::from(array))), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -96,7 +98,7 @@ impl Cast for LineStringArray { impl Cast for PolygonArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -106,6 +108,7 @@ impl Cast for PolygonArray { MultiPolygon(_, _) => Ok(Arc::new(MultiPolygonArray::from(array))), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -116,7 +119,7 @@ impl Cast for PolygonArray { impl Cast for MultiPointArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -126,6 +129,7 @@ impl Cast for MultiPointArray { MultiPoint(_, _) => Ok(Arc::new(array)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -136,7 +140,7 @@ impl Cast for MultiPointArray { impl Cast for MultiLineStringArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -145,6 +149,7 @@ impl Cast for MultiLineStringArray { LineString(_, _) => Ok(Arc::new(LineStringArray::try_from(array)?)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -155,7 +160,7 @@ impl Cast for MultiLineStringArray { impl Cast for MultiPolygonArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -164,6 +169,7 @@ impl Cast for MultiPolygonArray { Polygon(_, _) => Ok(Arc::new(PolygonArray::try_from(array)?)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::from(array))), GeometryCollection(_, _) => Ok(Arc::new(GeometryCollectionArray::from(array))), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -174,7 +180,7 @@ impl Cast for MultiPolygonArray { impl Cast for MixedGeometryArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -198,7 +204,7 @@ impl Cast for MixedGeometryArray { impl Cast for GeometryCollectionArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { use NativeType::*; let array = self.to_coord_type(to_type.coord_type()); @@ -212,6 +218,7 @@ impl Cast for GeometryCollectionArray { MultiPolygon(_, _) => Ok(Arc::new(MultiPolygonArray::try_from(array)?)), Mixed(_, _) => Ok(Arc::new(MixedGeometryArray::try_from(array)?)), GeometryCollection(_, _) => Ok(Arc::new(array)), + Geometry(_) => Ok(Arc::new(GeometryArray::from(array))), dt => Err(GeoArrowError::General(format!( "invalid cast to type {dt:?}" ))), @@ -219,10 +226,21 @@ impl Cast for GeometryCollectionArray { } } +impl Cast for GeometryArray { + type Output = Result>; + + fn cast(&self, to_type: NativeType) -> Self::Output { + // TODO: validate dimension + let array = self.to_coord_type(to_type.coord_type()); + let mixed_array = MixedGeometryArray::try_from(array)?; + mixed_array.cast(to_type) + } +} + impl Cast for &dyn NativeArray { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { // TODO: not working :/ // if self.data_type() == to_type { // return Ok(Arc::new(self.to_owned())); @@ -239,56 +257,18 @@ impl Cast for &dyn NativeArray { MultiPolygon(_, _) => self.as_ref().as_multi_polygon().cast(to_type), Mixed(_, _) => self.as_ref().as_mixed().cast(to_type), GeometryCollection(_, _) => self.as_ref().as_geometry_collection().cast(to_type), + Geometry(_) => self.as_ref().as_geometry().cast(to_type), _ => todo!(), } } } -macro_rules! impl_chunked_cast_non_generic { - ($chunked_array:ty) => { - impl Cast for $chunked_array { - type Output = Result>; - - fn cast(&self, to_type: &NativeType) -> Self::Output { - macro_rules! impl_cast { - ($method:ident) => { - Arc::new(ChunkedGeometryArray::new( - self.geometry_chunks() - .iter() - .map(|chunk| { - Ok(chunk.as_ref().cast(to_type)?.as_ref().$method().clone()) - }) - .collect::>>()?, - )) - }; - } - - use NativeType::*; - - let result: Arc = match to_type { - Point(_, _) => impl_cast!(as_point), - LineString(_, _) => impl_cast!(as_line_string), - Polygon(_, _) => impl_cast!(as_polygon), - MultiPoint(_, _) => impl_cast!(as_multi_point), - MultiLineString(_, _) => impl_cast!(as_multi_line_string), - MultiPolygon(_, _) => impl_cast!(as_multi_polygon), - Mixed(_, _) => impl_cast!(as_mixed), - GeometryCollection(_, _) => impl_cast!(as_geometry_collection), - Rect(_) => impl_cast!(as_rect), - Geometry(_) => todo!("cast to unknown"), - }; - Ok(result) - } - } - }; -} - -macro_rules! impl_chunked_cast_generic { +macro_rules! impl_chunked_cast { ($chunked_array:ty) => { impl Cast for $chunked_array { type Output = Result>; - fn cast(&self, to_type: &NativeType) -> Self::Output { + fn cast(&self, to_type: NativeType) -> Self::Output { macro_rules! impl_cast { ($method:ident) => { Arc::new(ChunkedGeometryArray::new( @@ -314,7 +294,7 @@ macro_rules! impl_chunked_cast_generic { Mixed(_, _) => impl_cast!(as_mixed), GeometryCollection(_, _) => impl_cast!(as_geometry_collection), Rect(_) => impl_cast!(as_rect), - Geometry(_) => todo!("cast to unknown"), + Geometry(_) => impl_cast!(as_geometry), }; Ok(result) } @@ -322,13 +302,14 @@ macro_rules! impl_chunked_cast_generic { }; } -impl_chunked_cast_non_generic!(ChunkedPointArray); -impl_chunked_cast_non_generic!(ChunkedRectArray); -impl_chunked_cast_non_generic!(&dyn ChunkedNativeArray); -impl_chunked_cast_generic!(ChunkedLineStringArray); -impl_chunked_cast_generic!(ChunkedPolygonArray); -impl_chunked_cast_generic!(ChunkedMultiPointArray); -impl_chunked_cast_generic!(ChunkedMultiLineStringArray); -impl_chunked_cast_generic!(ChunkedMultiPolygonArray); -impl_chunked_cast_generic!(ChunkedMixedGeometryArray); -impl_chunked_cast_generic!(ChunkedGeometryCollectionArray); +impl_chunked_cast!(ChunkedPointArray); +impl_chunked_cast!(ChunkedRectArray); +impl_chunked_cast!(&dyn ChunkedNativeArray); +impl_chunked_cast!(ChunkedLineStringArray); +impl_chunked_cast!(ChunkedPolygonArray); +impl_chunked_cast!(ChunkedMultiPointArray); +impl_chunked_cast!(ChunkedMultiLineStringArray); +impl_chunked_cast!(ChunkedMultiPolygonArray); +impl_chunked_cast!(ChunkedMixedGeometryArray); +impl_chunked_cast!(ChunkedGeometryCollectionArray); +impl_chunked_cast!(ChunkedUnknownGeometryArray); diff --git a/rust/geoarrow/src/algorithm/native/downcast.rs b/rust/geoarrow/src/algorithm/native/downcast.rs index cad3c47a..5704afe8 100644 --- a/rust/geoarrow/src/algorithm/native/downcast.rs +++ b/rust/geoarrow/src/algorithm/native/downcast.rs @@ -7,8 +7,6 @@ use arrow_array::OffsetSizeTrait; use arrow_buffer::OffsetBuffer; use crate::algorithm::native::cast::Cast; -use crate::array::offset_builder::OffsetsBuilder; -use crate::array::util::OffsetBufferUtils; use crate::array::*; use crate::chunked_array::*; use crate::datatypes::{Dimension, NativeType}; @@ -18,6 +16,8 @@ use crate::table::Table; use crate::NativeArray; /// Downcast will change between geometry types but will not affect the dimension of the data. +/// +/// Downcast will not change the coordinate type of the data. pub trait Downcast { type Output; @@ -49,61 +49,30 @@ impl Downcast for PointArray { } } -/// Returns `true` if this offsets buffer is type `i64` and would fit in an `i32` -/// -/// If the offset type `O` is already `i32`, will return false -#[allow(dead_code)] -fn can_downcast_offsets_i32(buffer: &OffsetBuffer) -> bool { - if O::IS_LARGE { - buffer.last().to_usize().unwrap() < i32::MAX as usize - } else { - false - } -} - -/// Downcast an i64 offset buffer to i32 -/// -/// This copies the buffer into an i32 -#[allow(dead_code)] -fn downcast_offsets(buffer: &OffsetBuffer) -> OffsetBuffer { - if O::IS_LARGE { - let mut builder = OffsetsBuilder::with_capacity(buffer.len_proxy()); - buffer - .iter() - .for_each(|x| builder.try_push(x.to_usize().unwrap() as i32).unwrap()); - builder.finish() - } else { - // This function should never be called when offsets are i32 - unreachable!() - } -} - /// Returns `true` if this Multi-geometry array can fit into a non-multi array /// /// Note that we can't just check the value of the last offset, because there could be a null /// element with length 0 and then a multi point of length 2. We need to check that every offset is /// <= 1. +/// +/// Also note that for now, we explicitly check `== 1` instead of `<= 1`. Having an offset of +/// length 0 means that the geometry is empty, and the cast functionality would need to handle +/// that. pub(crate) fn can_downcast_multi(buffer: &OffsetBuffer) -> bool { buffer .windows(2) - .all(|slice| *slice.get(1).unwrap() - *slice.first().unwrap() <= O::one()) + .all(|slice| *slice.get(1).unwrap() - *slice.first().unwrap() == O::one()) } impl Downcast for LineStringArray { type Output = Arc; fn downcasted_data_type(&self) -> NativeType { - match self.data_type() { - NativeType::LineString(ct, dim) => NativeType::LineString(ct, dim), - _ => unreachable!(), - } + self.data_type() } fn downcast(&self) -> Self::Output { - match (self.data_type(), self.downcasted_data_type()) { - (NativeType::LineString(_, _), NativeType::LineString(_, _)) => Arc::new(self.clone()), - _ => unreachable!(), - } + Arc::new(self.clone()) } } @@ -111,10 +80,7 @@ impl Downcast for PolygonArray { type Output = Arc; fn downcasted_data_type(&self) -> NativeType { - match self.data_type() { - NativeType::Polygon(ct, dim) => NativeType::Polygon(ct, dim), - _ => unreachable!(), - } + self.data_type() } fn downcast(&self) -> Self::Output { @@ -138,16 +104,11 @@ impl Downcast for MultiPointArray { } } fn downcast(&self) -> Self::Output { - // Note: this won't allow a downcast for empty MultiPoints - if *self.geom_offsets.last() as usize == self.len() { - return Arc::new(PointArray::new( - self.coords.clone(), - self.validity.clone(), - self.metadata(), - )); + if let Ok(array) = PointArray::try_from(self.clone()) { + Arc::new(array) + } else { + Arc::new(self.clone()) } - - Arc::new(self.clone()) } } @@ -168,16 +129,11 @@ impl Downcast for MultiLineStringArray { } fn downcast(&self) -> Self::Output { - if *self.geom_offsets.last() as usize == self.len() { - return Arc::new(LineStringArray::new( - self.coords.clone(), - self.ring_offsets.clone(), - self.validity.clone(), - self.metadata(), - )); + if let Ok(array) = LineStringArray::try_from(self.clone()) { + Arc::new(array) + } else { + Arc::new(self.clone()) } - - Arc::new(self.clone()) } } @@ -198,58 +154,36 @@ impl Downcast for MultiPolygonArray { } fn downcast(&self) -> Self::Output { - if *self.geom_offsets.last() as usize == self.len() { - return Arc::new(PolygonArray::new( - self.coords.clone(), - self.polygon_offsets.clone(), - self.ring_offsets.clone(), - self.validity.clone(), - self.metadata(), - )); + if let Ok(array) = PolygonArray::try_from(self.clone()) { + Arc::new(array) + } else { + Arc::new(self.clone()) } - - Arc::new(self.clone()) } } +// Note: this will not downcast on sliced data when it otherwise could, because the children +// haven't been sliced, just the offsets. So it still looks like the children have data. impl Downcast for MixedGeometryArray { - type Output = Arc; + type Output = Result>; fn downcasted_data_type(&self) -> NativeType { - let coord_type = self.coord_type(); - - if self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return NativeType::Point(coord_type, Dimension::XY); - } - - if !self.has_points() - && self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.line_strings.downcasted_data_type(); - } - - if !self.has_points() - && !self.has_line_strings() - && self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.polygons.downcasted_data_type(); + let types = self.contained_types(); + if types.len() == 1 { + let typ = *types.iter().next().unwrap(); + + // Only has non-multi geometry children + if matches!(typ, NativeType::Point(_, _)) + || matches!(typ, NativeType::LineString(_, _)) + || matches!(typ, NativeType::Polygon(_, _)) + { + return typ; + } } - if !self.has_points() - && !self.has_line_strings() + // Whether or not we have the single-geom type, if we only otherwise have the multi-geom + // type, then we can downcast if we can downcast the multi-geom type. + if !self.has_line_strings() && !self.has_polygons() && self.has_multi_points() && !self.has_multi_line_strings() @@ -259,7 +193,6 @@ impl Downcast for MixedGeometryArray { } if !self.has_points() - && !self.has_line_strings() && !self.has_polygons() && !self.has_multi_points() && self.has_multi_line_strings() @@ -270,7 +203,6 @@ impl Downcast for MixedGeometryArray { if !self.has_points() && !self.has_line_strings() - && !self.has_polygons() && !self.has_multi_points() && !self.has_multi_line_strings() && self.has_multi_polygons() @@ -282,85 +214,24 @@ impl Downcast for MixedGeometryArray { } fn downcast(&self) -> Self::Output { - // TODO: do I need to handle the slice offset? - if self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return Arc::new(self.points.clone()); - } - - if !self.has_points() - && self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.line_strings.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.polygons.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && self.has_multi_points() - && !self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.multi_points.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && self.has_multi_line_strings() - && !self.has_multi_polygons() - { - return self.multi_line_strings.downcast(); - } - - if !self.has_points() - && !self.has_line_strings() - && !self.has_polygons() - && !self.has_multi_points() - && !self.has_multi_line_strings() - && self.has_multi_polygons() - { - return self.multi_polygons.downcast(); - } - - Arc::new(self.clone()) + self.cast(self.downcasted_data_type()) } } impl Downcast for GeometryCollectionArray { - type Output = Arc; + type Output = Result>; fn downcasted_data_type(&self) -> NativeType { - todo!() - } - fn downcast(&self) -> Self::Output { // TODO: support downcasting with null elements - if *self.geom_offsets.last() as usize == self.len() && self.null_count() == 0 { - // Call downcast on the mixed array - return self.array.downcast(); + if can_downcast_multi(&self.geom_offsets) && self.null_count() == 0 { + self.array.downcasted_data_type() + } else { + self.data_type() } + } - Arc::new(self.clone()) + fn downcast(&self) -> Self::Output { + self.cast(self.downcasted_data_type()) } } @@ -370,13 +241,30 @@ impl Downcast for RectArray { fn downcasted_data_type(&self) -> NativeType { self.data_type() } + fn downcast(&self) -> Self::Output { Arc::new(self.clone()) } } +impl Downcast for GeometryArray { + type Output = Result>; + + fn downcasted_data_type(&self) -> NativeType { + if let Ok(mixed_array) = MixedGeometryArray::try_from(self.clone()) { + mixed_array.downcasted_data_type() + } else { + self.data_type() + } + } + + fn downcast(&self) -> Self::Output { + self.cast(self.downcasted_data_type()) + } +} + impl Downcast for &dyn NativeArray { - type Output = Arc; + type Output = Result>; fn downcasted_data_type(&self) -> NativeType { use NativeType::*; @@ -391,7 +279,7 @@ impl Downcast for &dyn NativeArray { Mixed(_, _) => self.as_mixed().downcasted_data_type(), GeometryCollection(_, _) => self.as_geometry_collection().downcasted_data_type(), Rect(_) => self.as_rect().downcasted_data_type(), - _ => todo!("3d support"), + Geometry(_) => self.as_geometry().downcasted_data_type(), } } @@ -399,16 +287,16 @@ impl Downcast for &dyn NativeArray { use NativeType::*; match self.data_type() { - Point(_, _) => self.as_point().downcast(), - LineString(_, _) => self.as_line_string().downcast(), - Polygon(_, _) => self.as_polygon().downcast(), - MultiPoint(_, _) => self.as_multi_point().downcast(), - MultiLineString(_, _) => self.as_multi_line_string().downcast(), - MultiPolygon(_, _) => self.as_multi_polygon().downcast(), + Point(_, _) => Ok(self.as_point().downcast()), + LineString(_, _) => Ok(self.as_line_string().downcast()), + Polygon(_, _) => Ok(self.as_polygon().downcast()), + MultiPoint(_, _) => Ok(self.as_multi_point().downcast()), + MultiLineString(_, _) => Ok(self.as_multi_line_string().downcast()), + MultiPolygon(_, _) => Ok(self.as_multi_polygon().downcast()), Mixed(_, _) => self.as_mixed().downcast(), GeometryCollection(_, _) => self.as_geometry_collection().downcast(), - Rect(_) => self.as_rect().downcast(), - _ => todo!("3d support"), + Rect(_) => Ok(self.as_rect().downcast()), + Geometry(_) => self.as_geometry().downcast(), } } } @@ -416,35 +304,62 @@ impl Downcast for &dyn NativeArray { /// Given a set of types, return a single type that the result should be casted to fn resolve_types(types: &HashSet) -> NativeType { if types.is_empty() { + // TODO: error here panic!("empty types"); - } else if types.len() == 1 { - *types.iter().next().unwrap() - } else if types.len() == 2 { - let mut extension_name_set = HashSet::new(); - // let mut coord_types = HashSet::new(); - types.iter().for_each(|t| { - extension_name_set.insert(t.extension_name()); - }); - if extension_name_set.contains("geoarrow.point") - && extension_name_set.contains("geoarrow.multipoint") + } + + // If only one type, we can cast to that. + if types.len() == 1 { + return *types.iter().next().unwrap(); + } + + // If Geometry is in the type set, short circuit to that. + if types.contains(&NativeType::Geometry(CoordType::Interleaved)) { + return NativeType::Geometry(CoordType::Interleaved); + } else if types.contains(&NativeType::Geometry(CoordType::Separated)) { + return NativeType::Geometry(CoordType::Separated); + } + + // Since we don't have NativeType::Geometry, dimension should never be null + let dimensions: HashSet = + HashSet::from_iter(types.iter().map(|ty| ty.dimension().unwrap())); + let coord_types: HashSet = + HashSet::from_iter(types.iter().map(|ty| ty.coord_type())); + + // Just take the first one + let coord_type = *coord_types.iter().next().unwrap(); + + // For data with multiple dimensions, we must cast to GeometryArray + if dimensions.len() > 1 { + return NativeType::Geometry(coord_type); + } + // Otherwise, we have just one dimension + let dimension = *dimensions.iter().next().unwrap(); + + // We want to compare geometry types without looking at dimension or coord type. This is a + // slight hack but for now we do that by the string geometry type. + let geometry_type_names: HashSet<&str> = + HashSet::from_iter(types.iter().map(|x| x.extension_name())); + + if geometry_type_names.len() == 2 { + if geometry_type_names.contains("geoarrow.point") + && geometry_type_names.contains("geoarrow.multipoint") { - NativeType::MultiPoint(Default::default(), Dimension::XY) - } else if extension_name_set.contains("geoarrow.linestring") - && extension_name_set.contains("geoarrow.multilinestring") + return NativeType::MultiPoint(coord_type, dimension); + } else if geometry_type_names.contains("geoarrow.linestring") + && geometry_type_names.contains("geoarrow.multilinestring") { - NativeType::MultiLineString(Default::default(), Dimension::XY) - } else if extension_name_set.contains("geoarrow.polygon") - && extension_name_set.contains("geoarrow.multipolygon") + return NativeType::MultiLineString(coord_type, dimension); + } else if geometry_type_names.contains("geoarrow.polygon") + && geometry_type_names.contains("geoarrow.multipolygon") { - NativeType::MultiPolygon(Default::default(), Dimension::XY) - } else if extension_name_set.contains("geoarrow.geometrycollection") { - NativeType::GeometryCollection(Default::default(), Dimension::XY) - } else { - NativeType::Mixed(Default::default(), Dimension::XY) + return NativeType::MultiPolygon(coord_type, dimension); + } else if geometry_type_names.contains("geoarrow.geometrycollection") { + return NativeType::GeometryCollection(coord_type, dimension); } - } else { - NativeType::Mixed(Default::default(), Dimension::XY) } + + NativeType::Geometry(coord_type) } impl Downcast for ChunkedPointArray { @@ -477,7 +392,7 @@ macro_rules! impl_chunked_downcast { return Arc::new(self.clone()); } - self.cast(&to_data_type).unwrap() + self.cast(to_data_type).unwrap() } } }; @@ -490,6 +405,7 @@ impl_chunked_downcast!(ChunkedMultiLineStringArray); impl_chunked_downcast!(ChunkedMultiPolygonArray); impl_chunked_downcast!(ChunkedMixedGeometryArray); impl_chunked_downcast!(ChunkedGeometryCollectionArray); +impl_chunked_downcast!(ChunkedUnknownGeometryArray); impl Downcast for ChunkedRectArray { type Output = Arc; @@ -506,38 +422,36 @@ impl Downcast for &dyn ChunkedNativeArray { type Output = Arc; fn downcasted_data_type(&self) -> NativeType { - use Dimension::*; use NativeType::*; match self.data_type() { - Point(_, XY) => self.as_point().downcasted_data_type(), - LineString(_, XY) => self.as_line_string().downcasted_data_type(), - Polygon(_, XY) => self.as_polygon().downcasted_data_type(), - MultiPoint(_, XY) => self.as_multi_point().downcasted_data_type(), - MultiLineString(_, XY) => self.as_multi_line_string().downcasted_data_type(), - MultiPolygon(_, XY) => self.as_multi_polygon().downcasted_data_type(), - Mixed(_, XY) => self.as_mixed().downcasted_data_type(), - GeometryCollection(_, XY) => self.as_geometry_collection().downcasted_data_type(), - Rect(XY) => self.as_rect().downcasted_data_type(), - _ => todo!("3d support"), + Point(_, _) => self.as_point().downcasted_data_type(), + LineString(_, _) => self.as_line_string().downcasted_data_type(), + Polygon(_, _) => self.as_polygon().downcasted_data_type(), + MultiPoint(_, _) => self.as_multi_point().downcasted_data_type(), + MultiLineString(_, _) => self.as_multi_line_string().downcasted_data_type(), + MultiPolygon(_, _) => self.as_multi_polygon().downcasted_data_type(), + Mixed(_, _) => self.as_mixed().downcasted_data_type(), + GeometryCollection(_, _) => self.as_geometry_collection().downcasted_data_type(), + Rect(_) => self.as_rect().downcasted_data_type(), + Geometry(_) => self.as_geometry().downcasted_data_type(), } } fn downcast(&self) -> Self::Output { - use Dimension::*; use NativeType::*; match self.data_type() { - Point(_, XY) => self.as_point().downcast(), - LineString(_, XY) => self.as_line_string().downcast(), - Polygon(_, XY) => self.as_polygon().downcast(), - MultiPoint(_, XY) => self.as_multi_point().downcast(), - MultiLineString(_, XY) => self.as_multi_line_string().downcast(), - MultiPolygon(_, XY) => self.as_multi_polygon().downcast(), - Mixed(_, XY) => self.as_mixed().downcast(), - GeometryCollection(_, XY) => self.as_geometry_collection().downcast(), - Rect(XY) => self.as_rect().downcast(), - _ => todo!("3d support"), + Point(_, _) => self.as_point().downcast(), + LineString(_, _) => self.as_line_string().downcast(), + Polygon(_, _) => self.as_polygon().downcast(), + MultiPoint(_, _) => self.as_multi_point().downcast(), + MultiLineString(_, _) => self.as_multi_line_string().downcast(), + MultiPolygon(_, _) => self.as_multi_polygon().downcast(), + Mixed(_, _) => self.as_mixed().downcast(), + GeometryCollection(_, _) => self.as_geometry_collection().downcast(), + Rect(_) => self.as_rect().downcast(), + Geometry(_) => self.as_geometry().downcast(), } } } diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index 81a9ecc8..cd804ff6 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -9,8 +9,8 @@ use crate::array::geometry::GeometryBuilder; use crate::array::geometry::GeometryCapacity; use crate::array::metadata::ArrayMetadata; use crate::array::{ - CoordType, GeometryCollectionArray, LineStringArray, MultiLineStringArray, MultiPointArray, - MultiPolygonArray, PointArray, PolygonArray, WKBArray, + CoordType, GeometryCollectionArray, LineStringArray, MixedGeometryArray, MultiLineStringArray, + MultiPointArray, MultiPolygonArray, PointArray, PolygonArray, WKBArray, }; use crate::datatypes::{Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; @@ -181,6 +181,7 @@ impl GeometryArray { ) } + // TODO: handle slicing pub fn has_points(&self, dim: Dimension) -> bool { match dim { Dimension::XY => !self.point_xy.is_empty(), @@ -223,6 +224,13 @@ impl GeometryArray { } } + pub fn has_geometry_collections(&self, dim: Dimension) -> bool { + match dim { + Dimension::XY => !self.gc_xy.is_empty(), + Dimension::XYZ => !self.gc_xyz.is_empty(), + } + } + /// Return `true` if this array holds at least one geometry array of the given dimension pub fn has_dimension(&self, dim: Dimension) -> bool { use Dimension::*; @@ -256,6 +264,9 @@ impl GeometryArray { } } + // Handle sliced data before downcasting. + // pub fn compact_children() + // /// The number of non-empty child arrays // fn num_non_empty_children(&self) -> usize { // let mut count = 0; @@ -446,6 +457,56 @@ impl GeometryArray { self.metadata, ) } + + // TODO: recursively expand the types from the geometry collection array + pub fn contained_types(&self) -> HashSet { + let mut types = HashSet::new(); + if self.has_points(Dimension::XY) { + types.insert(self.point_xy.data_type()); + } + if self.has_line_strings(Dimension::XY) { + types.insert(self.line_string_xy.data_type()); + } + if self.has_polygons(Dimension::XY) { + types.insert(self.polygon_xy.data_type()); + } + if self.has_multi_points(Dimension::XY) { + types.insert(self.mpoint_xy.data_type()); + } + if self.has_multi_line_strings(Dimension::XY) { + types.insert(self.mline_string_xy.data_type()); + } + if self.has_multi_polygons(Dimension::XY) { + types.insert(self.mpolygon_xy.data_type()); + } + if self.has_geometry_collections(Dimension::XY) { + types.insert(self.gc_xy.data_type()); + } + + if self.has_points(Dimension::XYZ) { + types.insert(self.point_xyz.data_type()); + } + if self.has_line_strings(Dimension::XYZ) { + types.insert(self.line_string_xyz.data_type()); + } + if self.has_polygons(Dimension::XYZ) { + types.insert(self.polygon_xyz.data_type()); + } + if self.has_multi_points(Dimension::XYZ) { + types.insert(self.mpoint_xyz.data_type()); + } + if self.has_multi_line_strings(Dimension::XYZ) { + types.insert(self.mline_string_xyz.data_type()); + } + if self.has_multi_polygons(Dimension::XYZ) { + types.insert(self.mpolygon_xyz.data_type()); + } + if self.has_geometry_collections(Dimension::XYZ) { + types.insert(self.gc_xyz.data_type()); + } + + types + } } impl ArrayBase for GeometryArray { @@ -867,155 +928,104 @@ impl TryFrom> for GeometryArray { } } -// impl From for GeometryArray { -// fn from(value: PointArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![1; value.len()], -// Dimension::XYZ => vec![11; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: LineStringArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![2; value.len()], -// Dimension::XYZ => vec![12; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: PolygonArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![3; value.len()], -// Dimension::XYZ => vec![13; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: MultiPointArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![4; value.len()], -// Dimension::XYZ => vec![14; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: MultiLineStringArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![5; value.len()], -// Dimension::XYZ => vec![15; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// Default::default(), -// metadata, -// ) -// } -// } - -// impl From for GeometryArray { -// fn from(value: MultiPolygonArray) -> Self { -// let type_ids = match value.dimension() { -// Dimension::XY => vec![6; value.len()], -// Dimension::XYZ => vec![16; value.len()], -// }; -// let metadata = value.metadata.clone(); -// Self::new( -// ScalarBuffer::from(type_ids), -// ScalarBuffer::from_iter(0..value.len() as i32), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// Default::default(), -// value, -// metadata, -// ) -// } -// } - -// impl TryFrom for GeometryArray { -// type Error = GeoArrowError; - -// fn try_from(value: GeometryCollectionArray) -> std::result::Result { -// if !can_downcast_multi(&value.geom_offsets) { -// return Err(GeoArrowError::General("Unable to cast".to_string())); -// } - -// if value.null_count() > 0 { -// return Err(GeoArrowError::General( -// "Unable to cast with nulls".to_string(), -// )); -// } - -// Ok(value.array) -// } -// } +macro_rules! impl_to_geometry_array { + ($source_array:ty, $typeid_xy:expr, $typeid_xyz:expr, $child_xy:ident, $child_xyz:ident) => { + impl From<$source_array> for GeometryArray { + fn from(value: $source_array) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![$typeid_xy; value.len()], + Dimension::XYZ => vec![$typeid_xyz; value.len()], + }; + let mut slf = Self { + data_type: NativeType::Geometry(value.coord_type()), + metadata: value.metadata().clone(), + type_ids: type_ids.into(), + offsets: ScalarBuffer::from_iter(0..value.len() as i32), + ..Default::default() + }; + match dim { + Dimension::XY => { + slf.$child_xy = value; + } + Dimension::XYZ => { + slf.$child_xyz = value; + } + } + slf + } + } + }; +} + +impl_to_geometry_array!(PointArray, 1, 11, point_xy, point_xyz); +impl_to_geometry_array!(LineStringArray, 1, 11, line_string_xy, line_string_xy); +impl_to_geometry_array!(PolygonArray, 1, 11, polygon_xy, polygon_xyz); +impl_to_geometry_array!(MultiPointArray, 1, 11, mpoint_xy, mpoint_xyz); +impl_to_geometry_array!( + MultiLineStringArray, + 1, + 11, + mline_string_xy, + mline_string_xyz +); +impl_to_geometry_array!(MultiPolygonArray, 1, 11, mpolygon_xy, mpolygon_xyz); +impl_to_geometry_array!(GeometryCollectionArray, 1, 11, gc_xy, gc_xyz); + +impl TryFrom for MixedGeometryArray { + type Error = GeoArrowError; + + /// Will error if: + /// + /// - the contained geometries are not all of the same dimension + /// - any geometry collection child exists + fn try_from(value: GeometryArray) -> std::result::Result { + if value.has_only_dimension(Dimension::XY) { + if value.gc_xy.is_empty() { + Ok(MixedGeometryArray::new( + value.type_ids, + value.offsets, + value.point_xy, + value.line_string_xy, + value.polygon_xy, + value.mpoint_xy, + value.mline_string_xy, + value.mpolygon_xy, + value.metadata, + )) + } else { + Err(GeoArrowError::General( + "Cannot cast to MixedGeometryArray with non-empty GeometryCollection child." + .to_string(), + )) + } + } else if value.has_only_dimension(Dimension::XYZ) { + if value.gc_xyz.is_empty() { + Ok(MixedGeometryArray::new( + value.type_ids, + value.offsets, + value.point_xyz, + value.line_string_xyz, + value.polygon_xyz, + value.mpoint_xyz, + value.mline_string_xyz, + value.mpolygon_xyz, + value.metadata, + )) + } else { + Err(GeoArrowError::General( + "Cannot cast to MixedGeometryArray with non-empty GeometryCollection child." + .to_string(), + )) + } + } else { + Err(GeoArrowError::General( + "Cannot cast to MixedGeometryArray when GeometryArray contains multiple dimensions" + .to_string(), + )) + } + } +} /// Default to an empty array impl Default for GeometryArray { diff --git a/rust/geoarrow/src/array/linestring/array.rs b/rust/geoarrow/src/array/linestring/array.rs index 8c400c17..a08f73c1 100644 --- a/rust/geoarrow/src/array/linestring/array.rs +++ b/rust/geoarrow/src/array/linestring/array.rs @@ -477,12 +477,13 @@ impl TryFrom for LineStringArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_line_strings() { - return Ok(value.line_strings); + return Ok(value.line_strings.slice(offset, length)); } if value.has_only_multi_line_strings() { - return value.multi_line_strings.try_into(); + return value.multi_line_strings.slice(offset, length).try_into(); } let mut capacity = value.line_strings.buffer_lengths(); diff --git a/rust/geoarrow/src/array/mixed/array.rs b/rust/geoarrow/src/array/mixed/array.rs index cbd1d49c..0f2d4be3 100644 --- a/rust/geoarrow/src/array/mixed/array.rs +++ b/rust/geoarrow/src/array/mixed/array.rs @@ -73,6 +73,10 @@ pub struct MixedGeometryArray { pub(crate) multi_points: MultiPointArray, pub(crate) multi_line_strings: MultiLineStringArray, pub(crate) multi_polygons: MultiPolygonArray, + + /// We don't need a separate slice_length, because that's the length of the full + /// MixedGeometryArray + slice_offset: usize, } impl MixedGeometryArray { @@ -121,7 +125,6 @@ impl MixedGeometryArray { let dim = dimensions.into_iter().next().unwrap(); let data_type = NativeType::Mixed(coord_type, dim); - Self { data_type, type_ids, @@ -133,6 +136,7 @@ impl MixedGeometryArray { multi_line_strings, multi_polygons, metadata, + slice_offset: 0, } } @@ -148,28 +152,143 @@ impl MixedGeometryArray { ) } + /// Return `true` if this array has been sliced. + pub(crate) fn is_sliced(&self) -> bool { + // Note this is still not a valid check, because it could've been sliced with start 0 but + // length less than the full length. + // self.slice_offset > 0 || self.slice_length + + let mut child_lengths = 0; + child_lengths += self.points.len(); + child_lengths += self.line_strings.len(); + child_lengths += self.polygons.len(); + child_lengths += self.multi_points.len(); + child_lengths += self.multi_line_strings.len(); + child_lengths += self.multi_polygons.len(); + + child_lengths > self.len() + } + + /// The offset and length by which this array has been sliced. + /// + /// If this array has not been sliced, the slice offset will be `0`. The length will always be + /// equal to `self.len()`. + pub(crate) fn slice_offset_length(&self) -> (usize, usize) { + (self.slice_offset, self.len()) + } + pub fn has_points(&self) -> bool { - !self.points.is_empty() + if self.points.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 1 { + return true; + } + } + + return false; + } + + true } pub fn has_line_strings(&self) -> bool { - !self.line_strings.is_empty() + if self.line_strings.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 2 { + return true; + } + } + + return false; + } + + true } pub fn has_polygons(&self) -> bool { - !self.polygons.is_empty() + if self.polygons.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 3 { + return true; + } + } + + return false; + } + + true } pub fn has_multi_points(&self) -> bool { - !self.multi_points.is_empty() + if self.multi_points.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 4 { + return true; + } + } + + return false; + } + + true } pub fn has_multi_line_strings(&self) -> bool { - !self.multi_line_strings.is_empty() + if self.multi_line_strings.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 5 { + return true; + } + } + + return false; + } + + true } pub fn has_multi_polygons(&self) -> bool { - !self.multi_polygons.is_empty() + if self.multi_polygons.is_empty() { + return false; + } + + // If the array has been sliced, check a point type id still exists + if self.is_sliced() { + for t in self.type_ids.iter() { + if *t % 10 == 6 { + return true; + } + } + + return false; + } + + true } pub fn has_only_points(&self) -> bool { @@ -257,6 +376,7 @@ impl MixedGeometryArray { multi_line_strings: self.multi_line_strings.clone(), multi_polygons: self.multi_polygons.clone(), metadata: self.metadata.clone(), + slice_offset: self.slice_offset + offset, } } @@ -277,6 +397,30 @@ impl MixedGeometryArray { self.metadata, ) } + + pub fn contained_types(&self) -> HashSet { + let mut types = HashSet::new(); + if self.has_points() { + types.insert(self.points.data_type()); + } + if self.has_line_strings() { + types.insert(self.line_strings.data_type()); + } + if self.has_polygons() { + types.insert(self.polygons.data_type()); + } + if self.has_multi_points() { + types.insert(self.multi_points.data_type()); + } + if self.has_multi_line_strings() { + types.insert(self.multi_line_strings.data_type()); + } + if self.has_multi_polygons() { + types.insert(self.multi_polygons.data_type()); + } + + types + } } impl ArrayBase for MixedGeometryArray { diff --git a/rust/geoarrow/src/array/multilinestring/array.rs b/rust/geoarrow/src/array/multilinestring/array.rs index d9ae018e..517df2e3 100644 --- a/rust/geoarrow/src/array/multilinestring/array.rs +++ b/rust/geoarrow/src/array/multilinestring/array.rs @@ -513,12 +513,13 @@ impl TryFrom for MultiLineStringArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_line_strings() { - return Ok(value.line_strings.into()); + return Ok(value.line_strings.slice(offset, length).into()); } if value.has_only_multi_line_strings() { - return Ok(value.multi_line_strings); + return Ok(value.multi_line_strings.slice(offset, length)); } let mut capacity = value.multi_line_strings.buffer_lengths(); diff --git a/rust/geoarrow/src/array/multipoint/array.rs b/rust/geoarrow/src/array/multipoint/array.rs index 1722470e..06fe7862 100644 --- a/rust/geoarrow/src/array/multipoint/array.rs +++ b/rust/geoarrow/src/array/multipoint/array.rs @@ -457,12 +457,13 @@ impl TryFrom for MultiPointArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_points() { - return Ok(value.points.into()); + return Ok(value.points.slice(offset, length).into()); } if value.has_only_multi_points() { - return Ok(value.multi_points); + return Ok(value.multi_points.slice(offset, length)); } let mut capacity = value.multi_points.buffer_lengths(); diff --git a/rust/geoarrow/src/array/multipolygon/array.rs b/rust/geoarrow/src/array/multipolygon/array.rs index 5390ed2d..7d2d247b 100644 --- a/rust/geoarrow/src/array/multipolygon/array.rs +++ b/rust/geoarrow/src/array/multipolygon/array.rs @@ -595,12 +595,13 @@ impl TryFrom for MultiPolygonArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_polygons() { - return Ok(value.polygons.into()); + return Ok(value.polygons.slice(offset, length).into()); } if value.has_only_multi_polygons() { - return Ok(value.multi_polygons); + return Ok(value.multi_polygons.slice(offset, length)); } let mut capacity = value.multi_polygons.buffer_lengths(); diff --git a/rust/geoarrow/src/array/point/array.rs b/rust/geoarrow/src/array/point/array.rs index 24798f80..4fa11225 100644 --- a/rust/geoarrow/src/array/point/array.rs +++ b/rust/geoarrow/src/array/point/array.rs @@ -412,12 +412,13 @@ impl TryFrom for PointArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_points() { - return Ok(value.points); + return Ok(value.points.slice(offset, length)); } if value.has_only_multi_points() { - return value.multi_points.try_into(); + return value.multi_points.slice(offset, length).try_into(); } let mut builder = PointBuilder::with_capacity_and_options( diff --git a/rust/geoarrow/src/array/polygon/array.rs b/rust/geoarrow/src/array/polygon/array.rs index a1660c72..a254750f 100644 --- a/rust/geoarrow/src/array/polygon/array.rs +++ b/rust/geoarrow/src/array/polygon/array.rs @@ -552,12 +552,13 @@ impl TryFrom for PolygonArray { return Err(GeoArrowError::General("Unable to cast".to_string())); } + let (offset, length) = value.slice_offset_length(); if value.has_only_polygons() { - return Ok(value.polygons); + return Ok(value.polygons.slice(offset, length)); } if value.has_only_multi_polygons() { - return value.multi_polygons.try_into(); + return value.multi_polygons.slice(offset, length).try_into(); } let mut capacity = value.polygons.buffer_lengths(); diff --git a/rust/geoarrow/src/datatypes.rs b/rust/geoarrow/src/datatypes.rs index de9e8408..666756f3 100644 --- a/rust/geoarrow/src/datatypes.rs +++ b/rust/geoarrow/src/datatypes.rs @@ -469,7 +469,7 @@ impl NativeType { Mixed(_, _) => "geoarrow.geometry", GeometryCollection(_, _) => "geoarrow.geometrycollection", Rect(_) => "geoarrow.box", - Geometry(_) => "geoarrow.unknown", + Geometry(_) => "geoarrow.geometry", } } @@ -794,7 +794,7 @@ fn parse_multi_polygon(field: &Field) -> Result { } } -fn parse_geometry(field: &Field) -> Result { +fn parse_mixed(field: &Field) -> Result { match field.data_type() { DataType::Union(fields, _) => { let mut coord_types: HashSet = HashSet::new(); @@ -927,13 +927,13 @@ fn parse_geometry_collection(field: &Field) -> Result { // We need to parse the _inner_ type of the geometry collection as a union so that we can check // what coordinate type it's using. match field.data_type() { - DataType::List(inner_field) => match parse_geometry(inner_field)? { + DataType::List(inner_field) => match parse_mixed(inner_field)? { NativeType::Mixed(coord_type, dim) => { Ok(NativeType::GeometryCollection(coord_type, dim)) } _ => panic!(), }, - DataType::LargeList(inner_field) => match parse_geometry(inner_field)? { + DataType::LargeList(inner_field) => match parse_mixed(inner_field)? { NativeType::Mixed(coord_type, dim) => { Ok(NativeType::GeometryCollection(coord_type, dim)) } @@ -970,7 +970,7 @@ fn parse_rect(field: &Field) -> NativeType { } } -fn parse_unknown(field: &Field) -> Result { +fn parse_geometry(field: &Field) -> Result { if let DataType::Union(fields, _mode) = field.data_type() { let mut coord_types: HashSet = HashSet::new(); @@ -1090,10 +1090,11 @@ impl TryFrom<&Field> for NativeType { "geoarrow.multipoint" => parse_multi_point(field)?, "geoarrow.multilinestring" => parse_multi_linestring(field)?, "geoarrow.multipolygon" => parse_multi_polygon(field)?, - "geoarrow.geometry" => parse_geometry(field)?, "geoarrow.geometrycollection" => parse_geometry_collection(field)?, "geoarrow.box" => parse_rect(field), - "geoarrow.unknown" => parse_unknown(field)?, + "geoarrow.geometry" => parse_geometry(field)?, + // We always parse geoarrow.geometry to a GeometryArray + // "geoarrow.geometry" => parse_mixed(field)?, name => return Err(GeoArrowError::General(format!("Expected GeoArrow native type, got '{}'.\nIf you're passing a serialized GeoArrow type like 'geoarrow.wkb' or 'geoarrow.wkt', you need to parse to a native representation.", name))), }; Ok(data_type) @@ -1162,7 +1163,7 @@ impl TryFrom<&Field> for AnyType { #[cfg(test)] mod test { use super::*; - use crate::array::MixedGeometryBuilder; + use crate::array::GeometryBuilder; use crate::{ArrayBase, NativeArray}; #[test] @@ -1177,7 +1178,7 @@ mod test { let data_type: NativeType = field.as_ref().try_into().unwrap(); assert_eq!(ml_array.data_type(), data_type); - let mut builder = MixedGeometryBuilder::new(Dimension::XY); + let mut builder = GeometryBuilder::new(); builder.push_point(Some(&crate::test::point::p0())).unwrap(); builder.push_point(Some(&crate::test::point::p1())).unwrap(); builder.push_point(Some(&crate::test::point::p2())).unwrap(); @@ -1187,9 +1188,9 @@ mod test { builder .push_multi_line_string(Some(&crate::test::multilinestring::ml1())) .unwrap(); - let mixed_array = builder.finish(); - let field = mixed_array.extension_field(); + let geom_array = builder.finish(); + let field = geom_array.extension_field(); let data_type: NativeType = field.as_ref().try_into().unwrap(); - assert_eq!(mixed_array.data_type(), data_type); + assert_eq!(geom_array.data_type(), data_type); } } diff --git a/rust/geoarrow/src/io/wkb/api.rs b/rust/geoarrow/src/io/wkb/api.rs index b55b713c..95e4e0ea 100644 --- a/rust/geoarrow/src/io/wkb/api.rs +++ b/rust/geoarrow/src/io/wkb/api.rs @@ -116,7 +116,7 @@ impl FromWKB for Arc { arr.metadata(), true, )?; - Ok(builder.finish().downcast()) + builder.finish().downcast() } } @@ -303,7 +303,7 @@ impl ToWKB for &dyn ChunkedNativeArray { ChunkedGeometryArray::new(self.as_geometry_collection().map(|chunk| chunk.into())) } Rect(_) => todo!(), - Geometry(_) => ChunkedGeometryArray::new(self.as_mixed().map(|chunk| chunk.into())), + Geometry(_) => ChunkedGeometryArray::new(self.as_geometry().map(|chunk| chunk.into())), } } } @@ -356,9 +356,10 @@ mod test { true, ) .unwrap(); + let rt_ref = roundtrip.as_ref(); let rt_mixed_arr = rt_ref.as_mixed(); - let downcasted = rt_mixed_arr.downcast(); + let downcasted = rt_mixed_arr.downcast().unwrap(); let downcasted_ref = downcasted.as_ref(); let rt_point_arr = downcasted_ref.as_point(); assert_eq!(&arr, rt_point_arr); diff --git a/rust/geoarrow/src/table.rs b/rust/geoarrow/src/table.rs index 9bc87bb6..6052483f 100644 --- a/rust/geoarrow/src/table.rs +++ b/rust/geoarrow/src/table.rs @@ -29,7 +29,6 @@ pub(crate) static GEOARROW_EXTENSION_NAMES: Set<&'static str> = phf_set! { "geoarrow.geometrycollection", "geoarrow.wkb", "geoarrow.wkt", - "geoarrow.unknown", "ogc.wkb", }; @@ -153,10 +152,10 @@ impl Table { /// let index = table.default_geometry_column_idx().unwrap(); /// /// // Change to separated storage of coordinates - /// table.cast_geometry(index, &NativeType::LineString(CoordType::Separated, Dimension::XY)).unwrap(); + /// table.cast_geometry(index, NativeType::LineString(CoordType::Separated, Dimension::XY)).unwrap(); /// # } /// ``` - pub fn cast_geometry(&mut self, index: usize, to_type: &NativeType) -> Result<()> { + pub fn cast_geometry(&mut self, index: usize, to_type: NativeType) -> Result<()> { let orig_field = self.schema().field(index); let array_slices = self