From faf51d9c04da14f89b43ca55768dc329689c6f91 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 11 Dec 2024 11:34:19 -0500 Subject: [PATCH 01/11] bump geos to 9.1.1 (#937) Should fix a memory leak in GEOS bindings. Closes https://github.com/geoarrow/geoarrow-rs/issues/936, ref https://github.com/georust/geos/issues/160#issuecomment-2516122859 --- Cargo.lock | 18 ++---------------- rust/geoarrow/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d27eca86..f00042c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1358,16 +1358,14 @@ dependencies = [ [[package]] name = "geos" -version = "9.0.0" +version = "9.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "468b7ac7233c0f417b2c161d28b6d1e88c025eaf79303b69e6e1aa40a2ac1367" +checksum = "56d199db00644057267a8a68ee72df92aa59a32036b487b2a2b76fd0b3fca32b" dependencies = [ "c_vec", - "geo-types", "geos-sys", "libc", "num", - "wkt 0.10.3", ] [[package]] @@ -4308,18 +4306,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "wkt" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c2252781f8927974e8ba6a67c965a759a2b88ea2b1825f6862426bbb1c8f41" -dependencies = [ - "geo-types", - "log", - "num-traits", - "thiserror", -] - [[package]] name = "wkt" version = "0.11.0" diff --git a/rust/geoarrow/Cargo.toml b/rust/geoarrow/Cargo.toml index 6648c1bd..6582f52e 100644 --- a/rust/geoarrow/Cargo.toml +++ b/rust/geoarrow/Cargo.toml @@ -67,7 +67,7 @@ gdal = { version = "0.17", optional = true } geo = "0.29.3" geo-index = "0.1.1" geo-traits = "0.2" -geos = { version = "9.0", features = ["v3_10_0", "geo"], optional = true } +geos = { version = "9.1.1", features = ["v3_10_0"], optional = true } geozero = { version = "0.14", features = ["with-wkb"] } half = { version = "2.4.1" } http-range-client = { version = "0.8", optional = true } From f307960bc68bd9f93eabc84e57a9eeff9206a665 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 11 Dec 2024 18:14:07 -0500 Subject: [PATCH 02/11] Fix type_ids in GeometryArray::from (#940) --- rust/geoarrow/src/array/geometry/array.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index 72b52146..c060245c 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -956,18 +956,18 @@ macro_rules! impl_to_geometry_array { } impl_to_geometry_array!(PointArray, 1, 11, point_xy, point_xyz); -impl_to_geometry_array!(LineStringArray, 1, 11, line_string_xy, line_string_xy); -impl_to_geometry_array!(PolygonArray, 1, 11, polygon_xy, polygon_xyz); -impl_to_geometry_array!(MultiPointArray, 1, 11, mpoint_xy, mpoint_xyz); +impl_to_geometry_array!(LineStringArray, 2, 12, line_string_xy, line_string_xy); +impl_to_geometry_array!(PolygonArray, 3, 13, polygon_xy, polygon_xyz); +impl_to_geometry_array!(MultiPointArray, 4, 14, mpoint_xy, mpoint_xyz); impl_to_geometry_array!( MultiLineStringArray, - 1, - 11, + 5, + 15, mline_string_xy, mline_string_xyz ); -impl_to_geometry_array!(MultiPolygonArray, 1, 11, mpolygon_xy, mpolygon_xyz); -impl_to_geometry_array!(GeometryCollectionArray, 1, 11, gc_xy, gc_xyz); +impl_to_geometry_array!(MultiPolygonArray, 6, 16, mpolygon_xy, mpolygon_xyz); +impl_to_geometry_array!(GeometryCollectionArray, 7, 17, gc_xy, gc_xyz); impl From for GeometryArray { fn from(value: MixedGeometryArray) -> Self { From a726afda54d21bbd8a61e6c2718bcf4d7f0b61dc Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 12 Dec 2024 11:30:31 -0500 Subject: [PATCH 03/11] Fix GeometryArray and MixedArray constructors for separated coordinates (#941) While exploring https://github.com/geoarrow/geoarrow-rs/pull/939, I realized that it's easy to create an invalid `GeometryArray` or `MixedArray` (or one that panics in `new()`) because `Default::default()` chooses the default coord type. So if you're using the separated coord type, you'll have a mix of user-provided arrays that use separated coords, but the `Default::default` generated arrays will have interleaved coords. --- rust/geoarrow/src/array/geometry/array.rs | 298 ++++++++++-------- rust/geoarrow/src/array/geometry/builder.rs | 28 +- rust/geoarrow/src/array/mixed/array.rs | 192 ++++++----- rust/geoarrow/src/array/mixed/builder.rs | 12 +- .../src/io/geozero/table/builder/anyvalue.rs | 1 + .../io/geozero/table/builder/properties.rs | 1 + 6 files changed, 307 insertions(+), 225 deletions(-) diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index c060245c..0703b31f 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -96,62 +96,133 @@ impl GeometryArray { pub fn new( type_ids: ScalarBuffer, offsets: ScalarBuffer, - point_xy: PointArray, - line_string_xy: LineStringArray, - polygon_xy: PolygonArray, - mpoint_xy: MultiPointArray, - mline_string_xy: MultiLineStringArray, - mpolygon_xy: MultiPolygonArray, - gc_xy: GeometryCollectionArray, - point_xyz: PointArray, - line_string_xyz: LineStringArray, - polygon_xyz: PolygonArray, - mpoint_xyz: MultiPointArray, - mline_string_xyz: MultiLineStringArray, - mpolygon_xyz: MultiPolygonArray, - gc_xyz: GeometryCollectionArray, + point_xy: Option, + line_string_xy: Option, + polygon_xy: Option, + mpoint_xy: Option, + mline_string_xy: Option, + mpolygon_xy: Option, + gc_xy: Option, + point_xyz: Option, + line_string_xyz: Option, + polygon_xyz: Option, + mpoint_xyz: Option, + mline_string_xyz: Option, + mpolygon_xyz: Option, + gc_xyz: Option, metadata: Arc, ) -> Self { let mut coord_types = HashSet::new(); - coord_types.insert(point_xy.coord_type()); - coord_types.insert(line_string_xy.coord_type()); - coord_types.insert(polygon_xy.coord_type()); - coord_types.insert(mpoint_xy.coord_type()); - coord_types.insert(mline_string_xy.coord_type()); - coord_types.insert(mpolygon_xy.coord_type()); - coord_types.insert(gc_xy.coord_type()); - - coord_types.insert(point_xyz.coord_type()); - coord_types.insert(line_string_xyz.coord_type()); - coord_types.insert(polygon_xyz.coord_type()); - coord_types.insert(mpoint_xyz.coord_type()); - coord_types.insert(mline_string_xyz.coord_type()); - coord_types.insert(mpolygon_xyz.coord_type()); - coord_types.insert(gc_xyz.coord_type()); - assert_eq!(coord_types.len(), 1); - - let coord_type = coord_types.into_iter().next().unwrap(); + if let Some(point_xy) = &point_xy { + coord_types.insert(point_xy.coord_type()); + } + if let Some(line_string_xy) = &line_string_xy { + coord_types.insert(line_string_xy.coord_type()); + } + if let Some(polygon_xy) = &polygon_xy { + coord_types.insert(polygon_xy.coord_type()); + } + if let Some(mpoint_xy) = &mpoint_xy { + coord_types.insert(mpoint_xy.coord_type()); + } + if let Some(mline_string_xy) = &mline_string_xy { + coord_types.insert(mline_string_xy.coord_type()); + } + if let Some(mpolygon_xy) = &mpolygon_xy { + coord_types.insert(mpolygon_xy.coord_type()); + } + if let Some(gc_xy) = &gc_xy { + coord_types.insert(gc_xy.coord_type()); + } + + if let Some(point_xyz) = &point_xyz { + coord_types.insert(point_xyz.coord_type()); + } + if let Some(line_string_xyz) = &line_string_xyz { + coord_types.insert(line_string_xyz.coord_type()); + } + if let Some(polygon_xyz) = &polygon_xyz { + coord_types.insert(polygon_xyz.coord_type()); + } + if let Some(mpoint_xyz) = &mpoint_xyz { + coord_types.insert(mpoint_xyz.coord_type()); + } + if let Some(mline_string_xyz) = &mline_string_xyz { + coord_types.insert(mline_string_xyz.coord_type()); + } + if let Some(mpolygon_xyz) = &mpolygon_xyz { + coord_types.insert(mpolygon_xyz.coord_type()); + } + if let Some(gc_xyz) = &gc_xyz { + coord_types.insert(gc_xyz.coord_type()); + } + assert!(coord_types.len() <= 1); + + let coord_type = coord_types.into_iter().next().unwrap_or_default(); let data_type = NativeType::Geometry(coord_type); + use Dimension::*; Self { data_type, type_ids, offsets, - point_xy, - line_string_xy, - polygon_xy, - mpoint_xy, - mline_string_xy, - mpolygon_xy, - gc_xy, - point_xyz, - line_string_xyz, - polygon_xyz, - mpoint_xyz, - mline_string_xyz, - mpolygon_xyz, - gc_xyz, + point_xy: point_xy.unwrap_or( + PointBuilder::new_with_options(XY, coord_type, Default::default()).finish(), + ), + line_string_xy: line_string_xy.unwrap_or( + LineStringBuilder::new_with_options(XY, coord_type, Default::default()).finish(), + ), + polygon_xy: polygon_xy.unwrap_or( + PolygonBuilder::new_with_options(XY, coord_type, Default::default()).finish(), + ), + mpoint_xy: mpoint_xy.unwrap_or( + MultiPointBuilder::new_with_options(XY, coord_type, Default::default()).finish(), + ), + mline_string_xy: mline_string_xy.unwrap_or( + MultiLineStringBuilder::new_with_options(XY, coord_type, Default::default()) + .finish(), + ), + mpolygon_xy: mpolygon_xy.unwrap_or( + MultiPolygonBuilder::new_with_options(XY, coord_type, Default::default()).finish(), + ), + gc_xy: gc_xy.unwrap_or( + GeometryCollectionBuilder::new_with_options( + XY, + coord_type, + Default::default(), + false, + ) + .finish(), + ), + point_xyz: point_xyz.unwrap_or( + PointBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), + ), + line_string_xyz: line_string_xyz.unwrap_or( + LineStringBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), + ), + polygon_xyz: polygon_xyz.unwrap_or( + PolygonBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), + ), + mpoint_xyz: mpoint_xyz.unwrap_or( + MultiPointBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), + ), + mline_string_xyz: mline_string_xyz.unwrap_or( + MultiLineStringBuilder::new_with_options(XYZ, coord_type, Default::default()) + .finish(), + ), + mpolygon_xyz: mpolygon_xyz.unwrap_or( + MultiPolygonBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), + ), + gc_xyz: gc_xyz.unwrap_or( + GeometryCollectionBuilder::new_with_options( + XYZ, + coord_type, + Default::default(), + false, + ) + .finish(), + ), metadata, } } @@ -437,20 +508,20 @@ impl GeometryArray { Self::new( self.type_ids, self.offsets, - self.point_xy.into_coord_type(coord_type), - self.line_string_xy.into_coord_type(coord_type), - self.polygon_xy.into_coord_type(coord_type), - self.mpoint_xy.into_coord_type(coord_type), - self.mline_string_xy.into_coord_type(coord_type), - self.mpolygon_xy.into_coord_type(coord_type), - self.gc_xy.into_coord_type(coord_type), - self.point_xyz.into_coord_type(coord_type), - self.line_string_xyz.into_coord_type(coord_type), - self.polygon_xyz.into_coord_type(coord_type), - self.mpoint_xyz.into_coord_type(coord_type), - self.mline_string_xyz.into_coord_type(coord_type), - self.mpolygon_xyz.into_coord_type(coord_type), - self.gc_xyz.into_coord_type(coord_type), + Some(self.point_xy.into_coord_type(coord_type)), + Some(self.line_string_xy.into_coord_type(coord_type)), + Some(self.polygon_xy.into_coord_type(coord_type)), + Some(self.mpoint_xy.into_coord_type(coord_type)), + Some(self.mline_string_xy.into_coord_type(coord_type)), + Some(self.mpolygon_xy.into_coord_type(coord_type)), + Some(self.gc_xy.into_coord_type(coord_type)), + Some(self.point_xyz.into_coord_type(coord_type)), + Some(self.line_string_xyz.into_coord_type(coord_type)), + Some(self.polygon_xyz.into_coord_type(coord_type)), + Some(self.mpoint_xyz.into_coord_type(coord_type)), + Some(self.mline_string_xyz.into_coord_type(coord_type)), + Some(self.mpolygon_xyz.into_coord_type(coord_type)), + Some(self.gc_xyz.into_coord_type(coord_type)), self.metadata, ) } @@ -852,20 +923,20 @@ impl TryFrom<&UnionArray> for GeometryArray { Ok(Self::new( type_ids, offsets, - point_xy.unwrap_or_default(), - line_string_xy.unwrap_or_default(), - polygon_xy.unwrap_or_default(), - mpoint_xy.unwrap_or_default(), - mline_string_xy.unwrap_or_default(), - mpolygon_xy.unwrap_or_default(), - gc_xy.unwrap_or_default(), - point_xyz.unwrap_or_default(), - line_string_xyz.unwrap_or_default(), - polygon_xyz.unwrap_or_default(), - mpoint_xyz.unwrap_or_default(), - mline_string_xyz.unwrap_or_default(), - mpolygon_xyz.unwrap_or_default(), - gc_xyz.unwrap_or_default(), + point_xy, + line_string_xy, + polygon_xy, + mpoint_xy, + mline_string_xy, + mpolygon_xy, + gc_xy, + point_xyz, + line_string_xyz, + polygon_xyz, + mpoint_xyz, + mline_string_xyz, + mpolygon_xyz, + gc_xyz, Default::default(), )) } @@ -988,7 +1059,6 @@ impl From for GeometryArray { let mut mline_string_xyz: Option = None; let mut mpolygon_xyz: Option = None; - let coord_type = value.coord_type(); match value.dimension() { XY => { point_xy = Some(value.points); @@ -1011,46 +1081,20 @@ impl From for GeometryArray { Self::new( value.type_ids, value.offsets, - point_xy.unwrap_or( - PointBuilder::new_with_options(XY, coord_type, Default::default()).finish(), - ), - line_string_xy.unwrap_or( - LineStringBuilder::new_with_options(XY, coord_type, Default::default()).finish(), - ), - polygon_xy.unwrap_or( - PolygonBuilder::new_with_options(XY, coord_type, Default::default()).finish(), - ), - mpoint_xy.unwrap_or( - MultiPointBuilder::new_with_options(XY, coord_type, Default::default()).finish(), - ), - mline_string_xy.unwrap_or( - MultiLineStringBuilder::new_with_options(XY, coord_type, Default::default()) - .finish(), - ), - mpolygon_xy.unwrap_or( - MultiPolygonBuilder::new_with_options(XY, coord_type, Default::default()).finish(), - ), - Default::default(), - point_xyz.unwrap_or( - PointBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), - ), - line_string_xyz.unwrap_or( - LineStringBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), - ), - polygon_xyz.unwrap_or( - PolygonBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), - ), - mpoint_xyz.unwrap_or( - MultiPointBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), - ), - mline_string_xyz.unwrap_or( - MultiLineStringBuilder::new_with_options(XYZ, coord_type, Default::default()) - .finish(), - ), - mpolygon_xyz.unwrap_or( - MultiPolygonBuilder::new_with_options(XYZ, coord_type, Default::default()).finish(), - ), - Default::default(), + point_xy, + line_string_xy, + polygon_xy, + mpoint_xy, + mline_string_xy, + mpolygon_xy, + None, + point_xyz, + line_string_xyz, + polygon_xyz, + mpoint_xyz, + mline_string_xyz, + mpolygon_xyz, + None, value.metadata, ) } @@ -1069,12 +1113,12 @@ impl TryFrom for MixedGeometryArray { Ok(MixedGeometryArray::new( value.type_ids, value.offsets, - value.point_xy, - value.line_string_xy, - value.polygon_xy, - value.mpoint_xy, - value.mline_string_xy, - value.mpolygon_xy, + Some(value.point_xy), + Some(value.line_string_xy), + Some(value.polygon_xy), + Some(value.mpoint_xy), + Some(value.mline_string_xy), + Some(value.mpolygon_xy), value.metadata, )) } else { @@ -1088,12 +1132,12 @@ impl TryFrom for MixedGeometryArray { Ok(MixedGeometryArray::new( value.type_ids, value.offsets, - value.point_xyz, - value.line_string_xyz, - value.polygon_xyz, - value.mpoint_xyz, - value.mline_string_xyz, - value.mpolygon_xyz, + Some(value.point_xyz), + Some(value.line_string_xyz), + Some(value.polygon_xyz), + Some(value.mpoint_xyz), + Some(value.mline_string_xyz), + Some(value.mpolygon_xyz), value.metadata, )) } else { diff --git a/rust/geoarrow/src/array/geometry/builder.rs b/rust/geoarrow/src/array/geometry/builder.rs index 01eabc7c..5455596d 100644 --- a/rust/geoarrow/src/array/geometry/builder.rs +++ b/rust/geoarrow/src/array/geometry/builder.rs @@ -943,20 +943,20 @@ impl From for GeometryArray { Self::new( other.types.into(), other.offsets.into(), - other.point_xy.into(), - other.line_string_xy.into(), - other.polygon_xy.into(), - other.mpoint_xy.into(), - other.mline_string_xy.into(), - other.mpolygon_xy.into(), - other.gc_xy.into(), - other.point_xyz.into(), - other.line_string_xyz.into(), - other.polygon_xyz.into(), - other.mpoint_xyz.into(), - other.mline_string_xyz.into(), - other.mpolygon_xyz.into(), - other.gc_xyz.into(), + Some(other.point_xy.into()), + Some(other.line_string_xy.into()), + Some(other.polygon_xy.into()), + Some(other.mpoint_xy.into()), + Some(other.mline_string_xy.into()), + Some(other.mpolygon_xy.into()), + Some(other.gc_xy.into()), + Some(other.point_xyz.into()), + Some(other.line_string_xyz.into()), + Some(other.polygon_xyz.into()), + Some(other.mpoint_xyz.into()), + Some(other.mline_string_xyz.into()), + Some(other.mpolygon_xyz.into()), + Some(other.gc_xyz.into()), other.metadata, ) } diff --git a/rust/geoarrow/src/array/mixed/array.rs b/rust/geoarrow/src/array/mixed/array.rs index f50bc1f0..c893a37c 100644 --- a/rust/geoarrow/src/array/mixed/array.rs +++ b/rust/geoarrow/src/array/mixed/array.rs @@ -10,8 +10,9 @@ use crate::array::metadata::ArrayMetadata; use crate::array::mixed::builder::MixedGeometryBuilder; use crate::array::mixed::MixedCapacity; use crate::array::{ - CoordType, GeometryCollectionArray, LineStringArray, MultiLineStringArray, MultiPointArray, - MultiPolygonArray, PointArray, PolygonArray, WKBArray, + CoordType, GeometryCollectionArray, LineStringArray, LineStringBuilder, MultiLineStringArray, + MultiLineStringBuilder, MultiPointArray, MultiPointBuilder, MultiPolygonArray, + MultiPolygonBuilder, PointArray, PointBuilder, PolygonArray, PolygonBuilder, WKBArray, }; use crate::datatypes::{mixed_data_type, Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; @@ -96,34 +97,56 @@ impl MixedGeometryArray { pub fn new( type_ids: ScalarBuffer, offsets: ScalarBuffer, - points: PointArray, - line_strings: LineStringArray, - polygons: PolygonArray, - multi_points: MultiPointArray, - multi_line_strings: MultiLineStringArray, - multi_polygons: MultiPolygonArray, + points: Option, + line_strings: Option, + polygons: Option, + multi_points: Option, + multi_line_strings: Option, + multi_polygons: Option, metadata: Arc, ) -> Self { let mut coord_types = HashSet::new(); - coord_types.insert(points.coord_type()); - coord_types.insert(line_strings.coord_type()); - coord_types.insert(polygons.coord_type()); - coord_types.insert(multi_points.coord_type()); - coord_types.insert(multi_line_strings.coord_type()); - coord_types.insert(multi_polygons.coord_type()); - assert_eq!(coord_types.len(), 1); - - let coord_type = coord_types.into_iter().next().unwrap(); + if let Some(points) = &points { + coord_types.insert(points.coord_type()); + } + if let Some(line_strings) = &line_strings { + coord_types.insert(line_strings.coord_type()); + } + if let Some(polygons) = &polygons { + coord_types.insert(polygons.coord_type()); + } + if let Some(multi_points) = &multi_points { + coord_types.insert(multi_points.coord_type()); + } + if let Some(multi_line_strings) = &multi_line_strings { + coord_types.insert(multi_line_strings.coord_type()); + } + if let Some(multi_polygons) = &multi_polygons { + coord_types.insert(multi_polygons.coord_type()); + } + assert!(coord_types.len() <= 1); + let coord_type = coord_types.into_iter().next().unwrap_or_default(); let mut dimensions = HashSet::new(); - dimensions.insert(points.dimension()); - dimensions.insert(line_strings.dimension()); - dimensions.insert(polygons.dimension()); - dimensions.insert(multi_points.dimension()); - dimensions.insert(multi_line_strings.dimension()); - dimensions.insert(multi_polygons.dimension()); + if let Some(points) = &points { + dimensions.insert(points.dimension()); + } + if let Some(line_strings) = &line_strings { + dimensions.insert(line_strings.dimension()); + } + if let Some(polygons) = &polygons { + dimensions.insert(polygons.dimension()); + } + if let Some(multi_points) = &multi_points { + dimensions.insert(multi_points.dimension()); + } + if let Some(multi_line_strings) = &multi_line_strings { + dimensions.insert(multi_line_strings.dimension()); + } + if let Some(multi_polygons) = &multi_polygons { + dimensions.insert(multi_polygons.dimension()); + } assert_eq!(dimensions.len(), 1); - let dim = dimensions.into_iter().next().unwrap(); Self { @@ -131,12 +154,25 @@ impl MixedGeometryArray { dim, type_ids, offsets, - points, - line_strings, - polygons, - multi_points, - multi_line_strings, - multi_polygons, + points: points.unwrap_or( + PointBuilder::new_with_options(dim, coord_type, Default::default()).finish(), + ), + line_strings: line_strings.unwrap_or( + LineStringBuilder::new_with_options(dim, coord_type, Default::default()).finish(), + ), + polygons: polygons.unwrap_or( + PolygonBuilder::new_with_options(dim, coord_type, Default::default()).finish(), + ), + multi_points: multi_points.unwrap_or( + MultiPointBuilder::new_with_options(dim, coord_type, Default::default()).finish(), + ), + multi_line_strings: multi_line_strings.unwrap_or( + MultiLineStringBuilder::new_with_options(dim, coord_type, Default::default()) + .finish(), + ), + multi_polygons: multi_polygons.unwrap_or( + MultiPolygonBuilder::new_with_options(dim, coord_type, Default::default()).finish(), + ), metadata, slice_offset: 0, } @@ -391,12 +427,12 @@ impl MixedGeometryArray { Self::new( self.type_ids, self.offsets, - self.points.into_coord_type(coord_type), - self.line_strings.into_coord_type(coord_type), - self.polygons.into_coord_type(coord_type), - self.multi_points.into_coord_type(coord_type), - self.multi_line_strings.into_coord_type(coord_type), - self.multi_polygons.into_coord_type(coord_type), + Some(self.points.into_coord_type(coord_type)), + Some(self.line_strings.into_coord_type(coord_type)), + Some(self.polygons.into_coord_type(coord_type)), + Some(self.multi_points.into_coord_type(coord_type)), + Some(self.multi_line_strings.into_coord_type(coord_type)), + Some(self.multi_polygons.into_coord_type(coord_type)), self.metadata, ) } @@ -706,12 +742,12 @@ impl TryFrom<(&UnionArray, Dimension)> for MixedGeometryArray { Ok(Self::new( type_ids, offsets, - points.unwrap_or_default(), - line_strings.unwrap_or_default(), - polygons.unwrap_or_default(), - multi_points.unwrap_or_default(), - multi_line_strings.unwrap_or_default(), - multi_polygons.unwrap_or_default(), + points, + line_strings, + polygons, + multi_points, + multi_line_strings, + multi_polygons, Default::default(), )) } @@ -786,12 +822,12 @@ impl From for MixedGeometryArray { Self::new( ScalarBuffer::from(type_ids), ScalarBuffer::from_iter(0..value.len() as i32), - value, - Default::default(), - Default::default(), - Default::default(), - Default::default(), - Default::default(), + Some(value), + None, + None, + None, + None, + None, metadata, ) } @@ -807,12 +843,12 @@ impl From for MixedGeometryArray { Self::new( ScalarBuffer::from(type_ids), ScalarBuffer::from_iter(0..value.len() as i32), - Default::default(), - value, - Default::default(), - Default::default(), - Default::default(), - Default::default(), + None, + Some(value), + None, + None, + None, + None, metadata, ) } @@ -828,12 +864,12 @@ impl From for MixedGeometryArray { Self::new( ScalarBuffer::from(type_ids), ScalarBuffer::from_iter(0..value.len() as i32), - Default::default(), - Default::default(), - value, - Default::default(), - Default::default(), - Default::default(), + None, + None, + Some(value), + None, + None, + None, metadata, ) } @@ -849,12 +885,12 @@ impl From for MixedGeometryArray { Self::new( ScalarBuffer::from(type_ids), ScalarBuffer::from_iter(0..value.len() as i32), - Default::default(), - Default::default(), - Default::default(), - value, - Default::default(), - Default::default(), + None, + None, + None, + Some(value), + None, + None, metadata, ) } @@ -870,12 +906,12 @@ impl From for MixedGeometryArray { Self::new( ScalarBuffer::from(type_ids), ScalarBuffer::from_iter(0..value.len() as i32), - Default::default(), - Default::default(), - Default::default(), - Default::default(), - value, - Default::default(), + None, + None, + None, + None, + Some(value), + None, metadata, ) } @@ -891,12 +927,12 @@ impl From for MixedGeometryArray { Self::new( ScalarBuffer::from(type_ids), ScalarBuffer::from_iter(0..value.len() as i32), - Default::default(), - Default::default(), - Default::default(), - Default::default(), - Default::default(), - value, + None, + None, + None, + None, + None, + Some(value), metadata, ) } diff --git a/rust/geoarrow/src/array/mixed/builder.rs b/rust/geoarrow/src/array/mixed/builder.rs index 196bd1b8..2ce259c5 100644 --- a/rust/geoarrow/src/array/mixed/builder.rs +++ b/rust/geoarrow/src/array/mixed/builder.rs @@ -529,12 +529,12 @@ impl From for MixedGeometryArray { Self::new( other.types.into(), other.offsets.into(), - other.points.into(), - other.line_strings.into(), - other.polygons.into(), - other.multi_points.into(), - other.multi_line_strings.into(), - other.multi_polygons.into(), + Some(other.points.into()), + Some(other.line_strings.into()), + Some(other.polygons.into()), + Some(other.multi_points.into()), + Some(other.multi_line_strings.into()), + Some(other.multi_polygons.into()), other.metadata, ) } diff --git a/rust/geoarrow/src/io/geozero/table/builder/anyvalue.rs b/rust/geoarrow/src/io/geozero/table/builder/anyvalue.rs index 2ae55327..7a978719 100644 --- a/rust/geoarrow/src/io/geozero/table/builder/anyvalue.rs +++ b/rust/geoarrow/src/io/geozero/table/builder/anyvalue.rs @@ -49,6 +49,7 @@ impl AnyBuilder { /// /// This is a relative hack around the geozero type system because we have an already-parsed /// datetime value and geozero only supports string-formatted timestamps. + #[allow(dead_code)] pub(crate) fn from_timestamp_value_prefill( value: DateTime, row_index: usize, diff --git a/rust/geoarrow/src/io/geozero/table/builder/properties.rs b/rust/geoarrow/src/io/geozero/table/builder/properties.rs index 8e233fac..a5bd1316 100644 --- a/rust/geoarrow/src/io/geozero/table/builder/properties.rs +++ b/rust/geoarrow/src/io/geozero/table/builder/properties.rs @@ -55,6 +55,7 @@ impl PropertiesBatchBuilder { /// /// This is a relative hack around the geozero type system because we have an already-parsed /// datetime value and geozero only supports string-formatted timestamps. + #[allow(dead_code)] pub(crate) fn add_timestamp_property( &mut self, name: &str, From b61286935f941c34cc1dd30ba33a6952797d924e Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 13 Dec 2024 00:36:28 -0500 Subject: [PATCH 04/11] Initial DataFusion extension crate (#939) ### Change list - Initial UDFs that wrap native Rust code - Array types for Geometry, Box2D, Box3D, Point2D, and Point3D. - Function support matrix in the README compared to PostGIS --- Cargo.lock | 664 +++++++++++++++++- Cargo.toml | 2 +- .../src/algorithm/native/bounding_rect.rs | 78 ++ rust/geoarrow/src/algorithm/native/mod.rs | 1 + rust/geoarrow/src/array/geometry/array.rs | 407 +++++++++-- rust/geoarrow/src/array/rect/builder.rs | 19 + rust/geoarrow/src/datatypes.rs | 6 + rust/geodatafusion/Cargo.toml | 32 + rust/geodatafusion/README.md | 365 ++++++++++ rust/geodatafusion/src/data_types.rs | 52 ++ rust/geodatafusion/src/error.rs | 33 + rust/geodatafusion/src/lib.rs | 3 + rust/geodatafusion/src/udf/geos/mod.rs | 1 + rust/geodatafusion/src/udf/mod.rs | 2 + .../src/udf/native/accessors/coord_dim.rs | 96 +++ .../src/udf/native/accessors/envelope.rs | 73 ++ .../src/udf/native/accessors/line_string.rs | 99 +++ .../src/udf/native/accessors/mod.rs | 12 + .../src/udf/native/bounding_box/box.rs | 70 ++ .../src/udf/native/bounding_box/extrema.rs | 288 ++++++++ .../src/udf/native/bounding_box/mod.rs | 13 + .../src/udf/native/constructors/mod.rs | 9 + .../src/udf/native/constructors/point.rs | 234 ++++++ rust/geodatafusion/src/udf/native/io/mod.rs | 14 + rust/geodatafusion/src/udf/native/io/wkb.rs | 137 ++++ rust/geodatafusion/src/udf/native/io/wkt.rs | 137 ++++ .../src/udf/native/measurement/area.rs | 118 ++++ .../src/udf/native/measurement/mod.rs | 8 + rust/geodatafusion/src/udf/native/mod.rs | 20 + .../src/udf/native/processing/centroid.rs | 125 ++++ .../src/udf/native/processing/convex_hull.rs | 75 ++ .../src/udf/native/processing/mod.rs | 10 + 32 files changed, 3157 insertions(+), 46 deletions(-) create mode 100644 rust/geodatafusion/Cargo.toml create mode 100644 rust/geodatafusion/README.md create mode 100644 rust/geodatafusion/src/data_types.rs create mode 100644 rust/geodatafusion/src/error.rs create mode 100644 rust/geodatafusion/src/lib.rs create mode 100644 rust/geodatafusion/src/udf/geos/mod.rs create mode 100644 rust/geodatafusion/src/udf/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/accessors/coord_dim.rs create mode 100644 rust/geodatafusion/src/udf/native/accessors/envelope.rs create mode 100644 rust/geodatafusion/src/udf/native/accessors/line_string.rs create mode 100644 rust/geodatafusion/src/udf/native/accessors/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/bounding_box/box.rs create mode 100644 rust/geodatafusion/src/udf/native/bounding_box/extrema.rs create mode 100644 rust/geodatafusion/src/udf/native/bounding_box/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/constructors/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/constructors/point.rs create mode 100644 rust/geodatafusion/src/udf/native/io/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/io/wkb.rs create mode 100644 rust/geodatafusion/src/udf/native/io/wkt.rs create mode 100644 rust/geodatafusion/src/udf/native/measurement/area.rs create mode 100644 rust/geodatafusion/src/udf/native/measurement/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/mod.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/centroid.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/convex_hull.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/mod.rs diff --git a/Cargo.lock b/Cargo.lock index f00042c2..e63c7c98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,6 +97,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "arrow" version = "53.3.0" @@ -175,6 +187,7 @@ dependencies = [ "atoi", "base64 0.22.1", "chrono", + "comfy-table", "half", "lexical-core 1.0.2", "num", @@ -317,6 +330,24 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "async-compression" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -465,6 +496,28 @@ dependencies = [ "serde", ] +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -476,9 +529,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -519,6 +572,27 @@ version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "c_vec" version = "2.0.0" @@ -665,6 +739,17 @@ dependencies = [ "cc", ] +[[package]] +name = "comfy-table" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -691,6 +776,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "core-foundation" version = "0.9.4" @@ -847,6 +938,425 @@ dependencies = [ "memchr", ] +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +dependencies = [ + "arrow-schema", + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", +] + +[[package]] +name = "datafusion-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "paste", + "sqlparser", + "tokio", +] + +[[package]] +name = "datafusion-common-runtime" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown 0.14.5", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "chrono", + "datafusion-common", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "serde_json", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +dependencies = [ + "arrow", + "datafusion-common", + "itertools 0.13.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "hashbrown 0.14.5", + "hex", + "itertools 0.13.0", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "indexmap", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", + "rand", +] + +[[package]] +name = "datafusion-functions-nested" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "itertools 0.13.0", + "log", + "paste", + "rand", +] + +[[package]] +name = "datafusion-functions-window" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +dependencies = [ + "datafusion-common", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "rand", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-plan", + "itertools 0.13.0", +] + +[[package]] +name = "datafusion-physical-plan" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.13.0", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "regex", + "sqlparser", + "strum", +] + [[package]] name = "dbase" version = "0.5.0" @@ -992,6 +1502,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "flatbuffers" version = "24.3.25" @@ -1335,6 +1851,28 @@ dependencies = [ "wkt 0.12.0", ] +[[package]] +name = "geodatafusion" +version = "0.1.0-dev" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "async-stream", + "async-trait", + "datafusion", + "geo 0.29.3", + "geo-traits", + "geoarrow", + "thiserror", + "tokio", +] + [[package]] name = "geographiclib-rs" version = "0.2.4" @@ -1772,6 +2310,18 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2073,6 +2623,17 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "md-5" version = "0.10.6" @@ -2246,6 +2807,16 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + [[package]] name = "num_enum" version = "0.7.3" @@ -2396,9 +2967,9 @@ dependencies = [ [[package]] name = "parquet" -version = "53.1.0" +version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310c46a70a3ba90d98fec39fa2da6d9d731e544191da6fb56c9d199484d0dd3e" +checksum = "2b449890367085eb65d7d3321540abc3d7babbd179ce31df0016e90719114191" dependencies = [ "ahash", "arrow-array", @@ -2415,7 +2986,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "lz4_flex", "num", "num-bigint", @@ -2466,6 +3037,16 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "phf" version = "0.11.2" @@ -3055,6 +3636,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + [[package]] name = "ryu" version = "1.0.18" @@ -3327,6 +3914,27 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "sqlparser" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.79", +] + [[package]] name = "sqlx" version = "0.7.4" @@ -3551,6 +4159,28 @@ dependencies = [ "unicode-properties", ] +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.79", +] + [[package]] name = "subtle" version = "2.6.1" @@ -3899,6 +4529,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -3928,6 +4564,15 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +dependencies = [ + "getrandom", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -4342,6 +4987,15 @@ dependencies = [ "rustix", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "zerocopy" version = "0.7.35" diff --git a/Cargo.toml b/Cargo.toml index e9d56a31..6f0487d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["rust/geoarrow"] +members = ["rust/geoarrow", "rust/geodatafusion"] exclude = ["js"] resolver = "2" diff --git a/rust/geoarrow/src/algorithm/native/bounding_rect.rs b/rust/geoarrow/src/algorithm/native/bounding_rect.rs index 12c64b49..9d8198ee 100644 --- a/rust/geoarrow/src/algorithm/native/bounding_rect.rs +++ b/rust/geoarrow/src/algorithm/native/bounding_rect.rs @@ -6,6 +6,11 @@ use geo_traits::{ MultiLineStringTrait, MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait, RectTrait, }; +use crate::array::*; +use crate::datatypes::{Dimension, NativeType}; +use crate::error::Result; +use crate::trait_::ArrayAccessor; + #[derive(Debug, Clone, Copy)] pub struct BoundingRect { minx: f64, @@ -301,4 +306,77 @@ pub fn bounding_rect_rect(geom: &impl RectTrait) -> ([f64; 2], [f64; 2] rect.into() } +/// Calculation of the bounding rectangle of a geometry. +pub trait BoundingRectArray { + type Output; + + fn bounding_rect(&self) -> Self::Output; +} + +/// Implementation that iterates over geo objects +macro_rules! array_impl { + ($type:ty, $bounding_rect_fn:ident) => { + impl BoundingRectArray for $type { + type Output = RectArray; + + fn bounding_rect(&self) -> Self::Output { + let mut builder = RectBuilder::with_capacity_and_options( + Dimension::XY, + self.len(), + self.metadata().clone(), + ); + for geom in self.iter() { + if let Some(geom) = geom { + let ([minx, miny], [maxx, maxy]) = $bounding_rect_fn(&geom); + builder.push_box2d(Some([minx, miny, maxx, maxy])); + } else { + builder.push_null(); + } + } + + builder.finish() + } + } + }; +} + +array_impl!(PointArray, bounding_rect_point); +array_impl!(LineStringArray, bounding_rect_linestring); +array_impl!(PolygonArray, bounding_rect_polygon); +array_impl!(MultiPointArray, bounding_rect_multipoint); +array_impl!(MultiLineStringArray, bounding_rect_multilinestring); +array_impl!(MultiPolygonArray, bounding_rect_multipolygon); +array_impl!(MixedGeometryArray, bounding_rect_geometry); +array_impl!(GeometryCollectionArray, bounding_rect_geometry_collection); +array_impl!(GeometryArray, bounding_rect_geometry); + +impl BoundingRectArray for RectArray { + type Output = RectArray; + + fn bounding_rect(&self) -> Self::Output { + self.clone() + } +} + +impl BoundingRectArray for &dyn NativeArray { + type Output = Result; + + fn bounding_rect(&self) -> Self::Output { + use NativeType::*; + + let result = match self.data_type() { + Point(_, _) => self.as_point().bounding_rect(), + LineString(_, _) => self.as_line_string().bounding_rect(), + Polygon(_, _) => self.as_polygon().bounding_rect(), + MultiPoint(_, _) => self.as_multi_point().bounding_rect(), + MultiLineString(_, _) => self.as_multi_line_string().bounding_rect(), + MultiPolygon(_, _) => self.as_multi_polygon().bounding_rect(), + GeometryCollection(_, _) => self.as_geometry_collection().bounding_rect(), + Geometry(_) => self.as_geometry().bounding_rect(), + Rect(_) => self.as_rect().bounding_rect(), + }; + Ok(result) + } +} + // TODO: add tests from geo diff --git a/rust/geoarrow/src/algorithm/native/mod.rs b/rust/geoarrow/src/algorithm/native/mod.rs index 9509e355..f6a896fe 100644 --- a/rust/geoarrow/src/algorithm/native/mod.rs +++ b/rust/geoarrow/src/algorithm/native/mod.rs @@ -19,6 +19,7 @@ pub(crate) mod type_id; mod unary; pub use binary::Binary; +pub use bounding_rect::BoundingRectArray; pub use cast::Cast; pub use concatenate::Concatenate; pub use downcast::{Downcast, DowncastTable}; diff --git a/rust/geoarrow/src/array/geometry/array.rs b/rust/geoarrow/src/array/geometry/array.rs index 0703b31f..b1c0fce6 100644 --- a/rust/geoarrow/src/array/geometry/array.rs +++ b/rust/geoarrow/src/array/geometry/array.rs @@ -996,49 +996,376 @@ impl TryFrom> for GeometryArray { } } -macro_rules! impl_to_geometry_array { - ($source_array:ty, $typeid_xy:expr, $typeid_xyz:expr, $child_xy:ident, $child_xyz:ident) => { - impl From<$source_array> for GeometryArray { - fn from(value: $source_array) -> Self { - let dim = value.dimension(); - let type_ids = match dim { - Dimension::XY => vec![$typeid_xy; value.len()], - Dimension::XYZ => vec![$typeid_xyz; value.len()], - }; - let mut slf = Self { - data_type: NativeType::Geometry(value.coord_type()), - metadata: value.metadata().clone(), - type_ids: type_ids.into(), - offsets: ScalarBuffer::from_iter(0..value.len() as i32), - ..Default::default() - }; - match dim { - Dimension::XY => { - slf.$child_xy = value; - } - Dimension::XYZ => { - slf.$child_xyz = value; - } - } - slf - } +impl From for GeometryArray { + fn from(value: PointArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![1; value.len()], + Dimension::XYZ => vec![11; value.len()], + } + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + Some(value), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + Some(value), + None, + None, + None, + None, + None, + None, + metadata, + ), + } + } +} + +impl From for GeometryArray { + fn from(value: LineStringArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![2; value.len()], + Dimension::XYZ => vec![12; value.len()], + } + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + None, + Some(value), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + None, + Some(value), + None, + None, + None, + None, + None, + metadata, + ), + } + } +} + +impl From for GeometryArray { + fn from(value: PolygonArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![3; value.len()], + Dimension::XYZ => vec![13; value.len()], + } + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + None, + None, + Some(value), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(value), + None, + None, + None, + None, + metadata, + ), + } + } +} + +impl From for GeometryArray { + fn from(value: MultiPointArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![4; value.len()], + Dimension::XYZ => vec![14; value.len()], } - }; + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + None, + None, + None, + Some(value), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(value), + None, + None, + None, + metadata, + ), + } + } +} + +impl From for GeometryArray { + fn from(value: MultiLineStringArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![5; value.len()], + Dimension::XYZ => vec![15; value.len()], + } + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + Some(value), + None, + None, + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(value), + None, + None, + metadata, + ), + } + } +} + +impl From for GeometryArray { + fn from(value: MultiPolygonArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![6; value.len()], + Dimension::XYZ => vec![16; value.len()], + } + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + Some(value), + None, + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(value), + None, + metadata, + ), + } + } } -impl_to_geometry_array!(PointArray, 1, 11, point_xy, point_xyz); -impl_to_geometry_array!(LineStringArray, 2, 12, line_string_xy, line_string_xy); -impl_to_geometry_array!(PolygonArray, 3, 13, polygon_xy, polygon_xyz); -impl_to_geometry_array!(MultiPointArray, 4, 14, mpoint_xy, mpoint_xyz); -impl_to_geometry_array!( - MultiLineStringArray, - 5, - 15, - mline_string_xy, - mline_string_xyz -); -impl_to_geometry_array!(MultiPolygonArray, 6, 16, mpolygon_xy, mpolygon_xyz); -impl_to_geometry_array!(GeometryCollectionArray, 7, 17, gc_xy, gc_xyz); +impl From for GeometryArray { + fn from(value: GeometryCollectionArray) -> Self { + let dim = value.dimension(); + let type_ids = match dim { + Dimension::XY => vec![7; value.len()], + Dimension::XYZ => vec![17; value.len()], + } + .into(); + let offsets = ScalarBuffer::from_iter(0..value.len() as i32); + let metadata = value.metadata().clone(); + match dim { + Dimension::XY => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + Some(value), + None, + None, + None, + None, + None, + None, + None, + metadata, + ), + Dimension::XYZ => Self::new( + type_ids, + offsets, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(value), + metadata, + ), + } + } +} impl From for GeometryArray { fn from(value: MixedGeometryArray) -> Self { diff --git a/rust/geoarrow/src/array/rect/builder.rs b/rust/geoarrow/src/array/rect/builder.rs index 4136021d..4f589156 100644 --- a/rust/geoarrow/src/array/rect/builder.rs +++ b/rust/geoarrow/src/array/rect/builder.rs @@ -149,6 +149,25 @@ impl RectBuilder { self.push_rect(None::<&Rect>); } + /// Push a 2D box to the builder. + /// + /// The array should be `[minx, miny, maxx, maxy]`. + #[inline] + pub fn push_box2d(&mut self, value: Option<[f64; 4]>) { + if let Some(value) = value { + self.lower + .push_coord(&geo::coord! { x: value[0], y: value[1] }); + self.upper + .push_coord(&geo::coord! { x: value[2], y: value[3] }); + self.validity.append_non_null() + } else { + // Since it's a struct, we still need to push coords when null + self.lower.push_nan_coord(); + self.upper.push_nan_coord(); + self.validity.append_null(); + } + } + /// Create this builder from a iterator of Rects. pub fn from_rects<'a>( geoms: impl ExactSizeIterator + 'a)>, diff --git a/rust/geoarrow/src/datatypes.rs b/rust/geoarrow/src/datatypes.rs index 8857648a..4258ec08 100644 --- a/rust/geoarrow/src/datatypes.rs +++ b/rust/geoarrow/src/datatypes.rs @@ -146,6 +146,12 @@ pub enum NativeType { Geometry(CoordType), } +impl From for DataType { + fn from(value: NativeType) -> Self { + value.to_data_type() + } +} + /// A type enum representing "serialized" GeoArrow geometry types. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum SerializedType { diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml new file mode 100644 index 00000000..dcbcfc1e --- /dev/null +++ b/rust/geodatafusion/Cargo.toml @@ -0,0 +1,32 @@ + +[package] +name = "geodatafusion" +version = "0.1.0-dev" +authors = ["Kyle Barron "] +edition = "2021" +license = "MIT OR Apache-2.0" +repository = "https://github.com/geoarrow/geoarrow-rs" +description = "Rust implementation of GeoArrow" +categories = ["science::geo"] +rust-version = "1.82" + + +[dependencies] +datafusion = "43" +arrow = { version = "53.3", features = ["ffi"] } +arrow-array = { version = "53.3", features = ["chrono-tz"] } +arrow-buffer = "53.3" +arrow-cast = { version = "53.3" } +arrow-csv = { version = "53", optional = true } +arrow-data = "53.3" +arrow-ipc = "53.3" +arrow-schema = "53.3" +async-stream = { version = "0.3", optional = true } +async-trait = { version = "0.1", optional = true } +geo = "0.29.3" +geo-traits = "0.2" +geoarrow = { path = "../geoarrow", features = ["flatgeobuf"] } +thiserror = "1" + +[dev-dependencies] +tokio = { version = "1.9", features = ["macros", "fs", "rt-multi-thread"] } diff --git a/rust/geodatafusion/README.md b/rust/geodatafusion/README.md new file mode 100644 index 00000000..c8f1cde1 --- /dev/null +++ b/rust/geodatafusion/README.md @@ -0,0 +1,365 @@ +# `geodatafusion` + +Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an extensible query engine written in Rust that uses Apache Arrow as its in-memory format. + +## Functions supported + +### Geometry Constructors + +| Name | Implemented | Description | +| --------------------- | ----------- | -------------------------------------------------------------------------------------------------------------------------- | +| ST_Collect | | Creates a GeometryCollection or Multi\* geometry from a set of geometries. | +| ST_LineFromMultiPoint | | Creates a LineString from a MultiPoint geometry. | +| ST_MakeEnvelope | | Creates a rectangular Polygon from minimum and maximum coordinates. | +| ST_MakeLine | | Creates a LineString from Point, MultiPoint, or LineString geometries. | +| ST_MakePoint | ✅ | Creates a 2D, 3DZ or 4D Point. | +| ST_MakePointM | | Creates a Point from X, Y and M values. | +| ST_MakePolygon | | Creates a Polygon from a shell and optional list of holes. | +| ST_Point | ✅ | Creates a Point with X, Y and SRID values. | +| ST_PointZ | | Creates a Point with X, Y, Z and SRID values. | +| ST_PointM | | Creates a Point with X, Y, M and SRID values. | +| ST_PointZM | | Creates a Point with X, Y, Z, M and SRID values. | +| ST_Polygon | | Creates a Polygon from a LineString with a specified SRID. | +| ST_TileEnvelope | | Creates a rectangular Polygon in Web Mercator (SRID:3857) using the XYZ tile system. | +| ST_HexagonGrid | | Returns a set of hexagons and cell indices that completely cover the bounds of the geometry argument. | +| ST_Hexagon | | Returns a single hexagon, using the provided edge size and cell coordinate within the hexagon grid space. | +| ST_SquareGrid | | Returns a set of grid squares and cell indices that completely cover the bounds of the geometry argument. | +| ST_Square | | Returns a single square, using the provided edge size and cell coordinate within the square grid space. | +| ST_Letters | | Returns the input letters rendered as geometry with a default start position at the origin and default text height of 100. | + +### Geometry Accessors + +| Name | Implemented | Description | +| ------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------- | +| GeometryType | | Returns the type of a geometry as text. | +| ST_Boundary | | Returns the boundary of a geometry. | +| ST_BoundingDiagonal | | Returns the diagonal of a geometry's bounding box. | +| ST_CoordDim | ✅ | Return the coordinate dimension of a geometry. | +| ST_Dimension | | Returns the topological dimension of a geometry. | +| ST_Dump | | Returns a set of geometry_dump rows for the components of a geometry. | +| ST_DumpPoints | | Returns a set of geometry_dump rows for the coordinates in a geometry. | +| ST_DumpSegments | | Returns a set of geometry_dump rows for the segments in a geometry. | +| ST_DumpRings | | Returns a set of geometry_dump rows for the exterior and interior rings of a Polygon. | +| ST_EndPoint | | Returns the last point of a LineString or CircularLineString. | +| ST_Envelope | ✅ | Returns a geometry representing the bounding box of a geometry. | +| ST_ExteriorRing | | Returns a LineString representing the exterior ring of a Polygon. | +| ST_GeometryN | | Return an element of a geometry collection. | +| ST_GeometryType | | Returns the SQL-MM type of a geometry as text. | +| ST_HasArc | | Tests if a geometry contains a circular arc | +| ST_InteriorRingN | | Returns the Nth interior ring (hole) of a Polygon. | +| ST_NumCurves | | Return the number of component curves in a CompoundCurve. | +| ST_CurveN | | Returns the Nth component curve geometry of a CompoundCurve. | +| ST_IsClosed | | Tests if a LineStrings's start and end points are coincident. For a PolyhedralSurface tests if it is closed (volumetric). | +| ST_IsCollection | | Tests if a geometry is a geometry collection type. | +| ST_IsEmpty | | Tests if a geometry is empty. | +| ST_IsPolygonCCW | | Tests if Polygons have exterior rings oriented counter-clockwise and interior rings oriented clockwise. | +| ST_IsPolygonCW | | Tests if Polygons have exterior rings oriented clockwise and interior rings oriented counter-clockwise. | +| ST_IsRing | | Tests if a LineString is closed and simple. | +| ST_IsSimple | | Tests if a geometry has no points of self-intersection or self-tangency. | +| ST_M | | Returns the M coordinate of a Point. | +| ST_MemSize | | Returns the amount of memory space a geometry takes. | +| ST_NDims | | Returns the coordinate dimension of a geometry. | +| ST_NPoints | | Returns the number of points (vertices) in a geometry. | +| ST_NRings | | Returns the number of rings in a polygonal geometry. | +| ST_NumGeometries | | Returns the number of elements in a geometry collection. | +| ST_NumInteriorRings | | Returns the number of interior rings (holes) of a Polygon. | +| ST_NumInteriorRing | | Returns the number of interior rings (holes) of a Polygon. Aias for ST_NumInteriorRings | +| ST_NumPatches | | Return the number of faces on a Polyhedral Surface. Will return null for non-polyhedral geometries. | +| ST_NumPoints | | Returns the number of points in a LineString or CircularString. | +| ST_PatchN | | Returns the Nth geometry (face) of a PolyhedralSurface. | +| ST_PointN | | Returns the Nth point in the first LineString or circular LineString in a geometry. | +| ST_Points | | Returns a MultiPoint containing the coordinates of a geometry. | +| ST_StartPoint | ✅ | Returns the first point of a LineString. | +| ST_Summary | | Returns a text summary of the contents of a geometry. | +| ST_X | | Returns the X coordinate of a Point. | +| ST_Y | | Returns the Y coordinate of a Point. | +| ST_Z | | Returns the Z coordinate of a Point. | +| ST_Zmflag | | Returns a code indicating the ZM coordinate dimension of a geometry. | +| ST_HasZ | | Checks if a geometry has a Z dimension. | +| ST_HasM | | Checks if a geometry has an M (measure) dimension. | + +### Geometry Editors + +| Name | Implemented | Description | +| -------------------------------- | ----------- | --------------------------------------------------------------------------------------------------- | +| ST_AddPoint | | Add a point to a LineString. | +| ST_CollectionExtract | | Given a geometry collection, returns a multi-geometry containing only elements of a specified type. | +| ST_CollectionHomogenize | | Returns the simplest representation of a geometry collection. | +| ST_CurveToLine | | Converts a geometry containing curves to a linear geometry. | +| ST_Scroll | | Change start point of a closed LineString. | +| ST_FlipCoordinates | | Returns a version of a geometry with X and Y axis flipped. | +| ST_Force2D | | Force the geometries into a "2-dimensional mode". | +| ST_Force3D | | Force the geometries into XYZ mode. This is an alias for ST_Force3DZ. | +| ST_Force3DZ | | Force the geometries into XYZ mode. | +| ST_Force3DM | | Force the geometries into XYM mode. | +| ST_Force4D | | Force the geometries into XYZM mode. | +| ST_ForceCollection | | Convert the geometry into a GEOMETRYCOLLECTION. | +| ST_ForceCurve | | Upcast a geometry into its curved type, if applicable. | +| ST_ForcePolygonCCW | | Orients all exterior rings counter-clockwise and all interior rings clockwise. | +| ST_ForcePolygonCW | | Orients all exterior rings clockwise and all interior rings counter-clockwise. | +| ST_ForceSFS | | Force the geometries to use SFS 1.1 geometry types only. | +| ST_ForceRHR | | Force the orientation of the vertices in a polygon to follow the Right-Hand-Rule. | +| ST_LineExtend | | Returns a line extended forwards and backwards by specified distances. | +| ST_LineToCurve | | Converts a linear geometry to a curved geometry. | +| ST_Multi | | Return the geometry as a MULTI\* geometry. | +| ST_Normalize | | Return the geometry in its canonical form. | +| ST_Project | | Returns a point projected from a start point by a distance and bearing (azimuth). | +| ST_QuantizeCoordinates | | Sets least significant bits of coordinates to zero | +| ST_RemovePoint | | Remove a point from a linestring. | +| ST_RemoveRepeatedPoints | | Returns a version of a geometry with duplicate points removed. | +| ST_RemoveIrrelevantPointsForView | | Removes points that are irrelevant for rendering a specific rectangluar view of a geometry. | +| ST_RemoveSmallParts | | Removes small parts (polygon rings or linestrings) of a geometry. | +| ST_Reverse | | Return the geometry with vertex order reversed. | +| ST_Segmentize | | Returns a modified geometry/geography having no segment longer than a given distance. | +| ST_SetPoint | | Replace point of a linestring with a given point. | +| ST_ShiftLongitude | | Shifts the longitude coordinates of a geometry between -180..180 and 0..360. | +| ST_WrapX | | Wrap a geometry around an X value. | +| ST_SnapToGrid | | Snap all points of the input geometry to a regular grid. | +| ST_Snap | | Snap segments and vertices of input geometry to vertices of a reference geometry. | +| ST_SwapOrdinates | | Returns a version of the given geometry with given ordinate values swapped. | + +### Geometry Validation + +| Name | Implemented | Description | +| ---------------- | ----------- | -------------------------------------------------------------------------------------------- | +| ST_IsValid | | Tests if a geometry is well-formed in 2D. | +| ST_IsValidDetail | | Returns a valid_detail row stating if a geometry is valid or if not a reason and a location. | +| ST_IsValidReason | | Returns text stating if a geometry is valid, or a reason for invalidity. | +| ST_MakeValid | | Attempts to make an invalid geometry valid without losing vertices. | + +### Geometry Input + +#### Well-Known Text (WKT) + +| Name | Implemented | Description | +| -------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| ST_BdPolyFromText | | Construct a Polygon given an arbitrary collection of closed linestrings as a MultiLineString Well-Known text representation. | +| ST_BdMPolyFromText | | Construct a MultiPolygon given an arbitrary collection of closed linestrings as a MultiLineString text representation Well-Known text representation. | +| ST_GeogFromText | | Return a specified geography value from Well-Known Text representation or extended (WKT). | +| ST_GeographyFromText | | Return a specified geography value from Well-Known Text representation or extended (WKT). | +| ST_GeomCollFromText | | Makes a collection Geometry from collection WKT with the given SRID. If SRID is not given, it defaults to 0. | +| ST_GeomFromEWKT | | Return a specified ST_Geometry value from Extended Well-Known Text representation (EWKT). | +| ST_GeomFromMARC21 | | Takes MARC21/XML geographic data as input and returns a PostGIS geometry object. | +| ST_GeometryFromText | | Return a specified ST_Geometry value from Well-Known Text representation (WKT). This is an alias name for ST_GeomFromText | +| ST_GeomFromText | ✅ | Return a specified ST_Geometry value from Well-Known Text representation (WKT). | +| ST_LineFromText | | Makes a Geometry from WKT representation with the given SRID. If SRID is not given, it defaults to 0. | +| ST_MLineFromText | | Return a specified ST_MultiLineString value from WKT representation. | +| ST_MPointFromText | | Makes a Geometry from WKT with the given SRID. If SRID is not given, it defaults to 0. | +| ST_MPolyFromText | | Makes a MultiPolygon Geometry from WKT with the given SRID. If SRID is not given, it defaults to 0. | +| ST_PointFromText | | Makes a point Geometry from WKT with the given SRID. If SRID is not given, it defaults to unknown. | +| ST_PolygonFromText | | Makes a Geometry from WKT with the given SRID. If SRID is not given, it defaults to 0. | +| ST_WKTToSQL | | Return a specified ST_Geometry value from Well-Known Text representation (WKT). This is an alias name for ST_GeomFromText | + +#### Well-Known Binary (WKB) + +| Name | Implemented | Description | +| -------------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------- | +| ST_GeogFromWKB | | Creates a geography instance from a Well-Known Binary geometry representation (WKB) or extended Well Known Binary (EWKB). | +| ST_GeomFromEWKB | | Return a specified ST_Geometry value from Extended Well-Known Binary representation (EWKB). | +| ST_GeomFromWKB | ✅ | Creates a geometry instance from a Well-Known Binary geometry representation (WKB) and optional SRID. | +| ST_LineFromWKB | | Makes a LINESTRING from WKB with the given SRID | +| ST_LinestringFromWKB | | Makes a geometry from WKB with the given SRID. | +| ST_PointFromWKB | | Makes a geometry from WKB with the given SRID | +| ST_WKBToSQL | | Return a specified ST_Geometry value from Well-Known Binary representation (WKB). This is an alias name for ST_GeomFromWKB that takes no srid | + +#### Other Formats + +| Name | Implemented | Description | +| -------------------------- | ----------- | ------------------------------------------------------------------------------------------------------ | +| ST_Box2dFromGeoHash | | Return a BOX2D from a GeoHash string. | +| ST_GeomFromGeoHash | | Return a geometry from a GeoHash string. | +| ST_GeomFromGML | | Takes as input GML representation of geometry and outputs a PostGIS geometry object | +| ST_GeomFromGeoJSON | | Takes as input a geojson representation of a geometry and outputs a PostGIS geometry object | +| ST_GeomFromKML | | Takes as input KML representation of geometry and outputs a PostGIS geometry object | +| ST_GeomFromTWKB | | Creates a geometry instance from a TWKB ("Tiny Well-Known Binary") geometry representation. | +| ST_GMLToSQL | | Return a specified ST_Geometry value from GML representation. This is an alias name for ST_GeomFromGML | +| ST_LineFromEncodedPolyline | | Creates a LineString from an Encoded Polyline. | +| ST_PointFromGeoHash | | Return a point from a GeoHash string. | +| ST_FromFlatGeobufToTable | | Creates a table based on the structure of FlatGeobuf data. | +| ST_FromFlatGeobuf | | Reads FlatGeobuf data. | + +### Geometry Output + +#### Well-Known Text (WKT) + +| Name | Implemented | Description | +| --------- | ----------- | ------------------------------------------------------------------------------------------------ | +| ST_AsEWKT | | Return the Well-Known Text (WKT) representation of the geometry with SRID meta data. | +| ST_AsText | ✅ | Return the Well-Known Text (WKT) representation of the geometry/geography without SRID metadata. | + +#### Well-Known Binary (WKB) + +| Name | Implemented | Description | +| ------------ | ----------- | ------------------------------------------------------------------------------------------------------------- | +| ST_AsBinary | ✅ | Return the OGC/ISO Well-Known Binary (WKB) representation of the geometry/geography without SRID meta data. | +| ST_AsEWKB | | Return the Extended Well-Known Binary (EWKB) representation of the geometry with SRID meta data. | +| ST_AsHEXEWKB | | Returns a Geometry in HEXEWKB format (as text) using either little-endian (NDR) or big-endian (XDR) encoding. | + +#### Other Formats + +| Name | Implemented | Description | +| -------------------- | ----------- | ------------------------------------------------------------------------------------- | +| ST_AsEncodedPolyline | | Returns an Encoded Polyline from a LineString geometry. | +| ST_AsFlatGeobuf | | Return a FlatGeobuf representation of a set of rows. | +| ST_AsGeobuf | | Return a Geobuf representation of a set of rows. | +| ST_AsGeoJSON | | Return a geometry or feature in GeoJSON format. | +| ST_AsGML | | Return the geometry as a GML version 2 or 3 element. | +| ST_AsKML | | Return the geometry as a KML element. | +| ST_AsLatLonText | | Return the Degrees, Minutes, Seconds representation of the given point. | +| ST_AsMARC21 | | Returns geometry as a MARC21/XML record with a geographic datafield (034). | +| ST_AsMVTGeom | | Transforms a geometry into the coordinate space of a MVT tile. | +| ST_AsMVT | | Aggregate function returning a MVT representation of a set of rows. | +| ST_AsSVG | | Returns SVG path data for a geometry. | +| ST_AsTWKB | | Returns the geometry as TWKB, aka "Tiny Well-Known Binary" | +| ST_AsX3D | | Returns a Geometry in X3D xml node element format: ISO-IEC-19776-1.2-X3DEncodings-XML | +| ST_GeoHash | | Return a GeoHash representation of the geometry. | + +### Operators + +### Spatial Relationships + +### Measurement Functions + +| Name | Implemented | Description | +| ----------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------ | +| ST_Area | | Returns the area of a polygonal geometry. | +| ST_Azimuth | | Returns the north-based azimuth of a line between two points. | +| ST_Angle | | Returns the angle between two vectors defined by 3 or 4 points, or 2 lines. | +| ST_ClosestPoint | | Returns the 2D point on g1 that is closest to g2. This is the first point of the shortest line from one geometry to the other. | +| ST_3DClosestPoint | | Returns the 3D point on g1 that is closest to g2. This is the first point of the 3D shortest line. | +| ST_Distance | | Returns the distance between two geometry or geography values. | +| ST_3DDistance | | Returns the 3D cartesian minimum distance (based on spatial ref) between two geometries in projected units. | +| ST_DistanceSphere | | Returns minimum distance in meters between two lon/lat geometries using a spherical earth model. | +| ST_DistanceSpheroid | | Returns the minimum distance between two lon/lat geometries using a spheroidal earth model. | +| ST_FrechetDistance | | Returns the Fréchet distance between two geometries. | +| ST_HausdorffDistance | | Returns the Hausdorff distance between two geometries. | +| ST_Length | | Returns the 2D length of a linear geometry. | +| ST_Length2D | | Returns the 2D length of a linear geometry. Alias for ST_Length | +| ST_3DLength | | Returns the 3D length of a linear geometry. | +| ST_LengthSpheroid | | Returns the 2D or 3D length/perimeter of a lon/lat geometry on a spheroid. | +| ST_LongestLine | | Returns the 2D longest line between two geometries. | +| ST_3DLongestLine | | Returns the 3D longest line between two geometries | +| ST_MaxDistance | | Returns the 2D largest distance between two geometries in projected units. | +| ST_3DMaxDistance | | Returns the 3D cartesian maximum distance (based on spatial ref) between two geometries in projected units. | +| ST_MinimumClearance | | Returns the minimum clearance of a geometry, a measure of a geometry's robustness. | +| ST_MinimumClearanceLine | | Returns the two-point LineString spanning a geometry's minimum clearance. | +| ST_Perimeter | | Returns the length of the boundary of a polygonal geometry or geography. | +| ST_Perimeter2D | | Returns the 2D perimeter of a polygonal geometry. Alias for ST_Perimeter. | +| ST_3DPerimeter | | Returns the 3D perimeter of a polygonal geometry. | +| ST_ShortestLine | | Returns the 2D shortest line between two geometries | +| ST_3DShortestLine | | Returns the 3D shortest line between two geometries | + +### Overlay Functions + +| Name | Implemented | Description | +| ---------------- | ----------- | ------------------------------------------------------------------------------------------- | +| ST_ClipByBox2D | | Computes the portion of a geometry falling within a rectangle. | +| ST_Difference | | Computes a geometry representing the part of geometry A that does not intersect geometry B. | +| ST_Intersection | | Computes a geometry representing the shared portion of geometries A and B. | +| ST_MemUnion | | Aggregate function which unions geometries in a memory-efficent but slower way | +| ST_Node | | Nodes a collection of lines. | +| ST_Split | | Returns a collection of geometries created by splitting a geometry by another geometry. | +| ST_Subdivide | | Computes a rectilinear subdivision of a geometry. | +| ST_SymDifference | | Computes a geometry representing the portions of geometries A and B that do not intersect. | +| ST_UnaryUnion | | Computes the union of the components of a single geometry. | +| ST_Union | | Computes a geometry representing the point-set union of the input geometries. | + +### Geometry Processing + +| Name | Implemented | Description | +| --------------------------- | ----------- | ------------------------------------------------------------------------------------------------- | +| ST_Buffer | | Computes a geometry covering all points within a given distance from a geometry. | +| ST_BuildArea | | Creates a polygonal geometry formed by the linework of a geometry. | +| ST_Centroid | ✅ | Returns the geometric center of a geometry. | +| ST_ChaikinSmoothing | | Returns a smoothed version of a geometry, using the Chaikin algorithm | +| ST_ConcaveHull | | Computes a possibly concave geometry that contains all input geometry vertices | +| ST_ConvexHull | ✅ | Computes the convex hull of a geometry. | +| ST_DelaunayTriangles | | Returns the Delaunay triangulation of the vertices of a geometry. | +| ST_FilterByM | | Removes vertices based on their M value | +| ST_GeneratePoints | | Generates a multipoint of random points contained in a Polygon or MultiPolygon. | +| ST_GeometricMedian | | Returns the geometric median of a MultiPoint. | +| ST_LineMerge | | Return the lines formed by sewing together a MultiLineString. | +| ST_MaximumInscribedCircle | | Computes the largest circle contained within a geometry. | +| ST_LargestEmptyCircle | | Computes the largest circle not overlapping a geometry. | +| ST_MinimumBoundingCircle | | Returns the smallest circle polygon that contains a geometry. | +| ST_MinimumBoundingRadius | | Returns the center point and radius of the smallest circle that contains a geometry. | +| ST_OrientedEnvelope | | Returns a minimum-area rectangle containing a geometry. | +| ST_OffsetCurve | | Returns an offset line at a given distance and side from an input line. | +| ST_PointOnSurface | | Computes a point guaranteed to lie in a polygon, or on a geometry. | +| ST_Polygonize | | Computes a collection of polygons formed from the linework of a set of geometries. | +| ST_ReducePrecision | | Returns a valid geometry with points rounded to a grid tolerance. | +| ST_SharedPaths | | Returns a collection containing paths shared by the two input linestrings/multilinestrings. | +| ST_Simplify | | Returns a simplified representation of a geometry, using the Douglas-Peucker algorithm. | +| ST_SimplifyPreserveTopology | | Returns a simplified and valid representation of a geometry, using the Douglas-Peucker algorithm. | +| ST_SimplifyPolygonHull | | Computes a simplifed topology-preserving outer or inner hull of a polygonal geometry. | +| ST_SimplifyVW | | Returns a simplified representation of a geometry, using the Visvalingam-Whyatt algorithm | +| ST_SetEffectiveArea | | Sets the effective area for each vertex, using the Visvalingam-Whyatt algorithm. | +| ST_TriangulatePolygon | | Computes the constrained Delaunay triangulation of polygons | +| ST_VoronoiLines | | Returns the boundaries of the Voronoi diagram of the vertices of a geometry. | +| ST_VoronoiPolygons | | Returns the cells of the Voronoi diagram of the vertices of a geometry. | + +### Coverages + +| Name | Implemented | Description | +| ----------------------- | ----------- | ------------------------------------------------------------------------------------ | +| ST_CoverageInvalidEdges | | Window function that finds locations where polygons fail to form a valid coverage. | +| ST_CoverageSimplify | | Window function that simplifies the edges of a polygonal coverage. | +| ST_CoverageUnion | | Computes the union of a set of polygons forming a coverage by removing shared edges. | + +### Affine Transformations + +| Name | Implemented | Description | +| ------------- | ----------- | -------------------------------------------------------------- | +| ST_Affine | | Apply a 3D affine transformation to a geometry. | +| ST_Rotate | | Rotates a geometry about an origin point. | +| ST_RotateX | | Rotates a geometry about the X axis. | +| ST_RotateY | | Rotates a geometry about the Y axis. | +| ST_RotateZ | | Rotates a geometry about the Z axis. | +| ST_Scale | | Scales a geometry by given factors. | +| ST_Translate | | Translates a geometry by given offsets. | +| ST_TransScale | | Translates and scales a geometry by given offsets and factors. | + +### Clustering Functions + +| Name | Implemented | Description | +| ------------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------- | +| ST_ClusterDBSCAN | | Window function that returns a cluster id for each input geometry using the DBSCAN algorithm. | +| ST_ClusterIntersecting | | Aggregate function that clusters input geometries into connected sets. | +| ST_ClusterIntersectingWin | | Window function that returns a cluster id for each input geometry, clustering input geometries into connected sets. | +| ST_ClusterKMeans | | Window function that returns a cluster id for each input geometry using the K-means algorithm. | +| ST_ClusterWithin | | Aggregate function that clusters geometries by separation distance. | +| ST_ClusterWithinWin | | Window function that returns a cluster id for each input geometry, clustering using separation distance. | + +### Bounding Box Functions + +| Name | Implemented | Description | +| ------------------ | ----------- | ------------------------------------------------------------------------ | +| Box2D | ✅ | Returns a BOX2D representing the 2D extent of a geometry. | +| Box3D | | Returns a BOX3D representing the 3D extent of a geometry. | +| ST_EstimatedExtent | | Returns the estimated extent of a spatial table. | +| ST_Expand | | Returns a bounding box expanded from another bounding box or a geometry. | +| ST_Extent | | Aggregate function that returns the bounding box of geometries. | +| ST_3DExtent | | Aggregate function that returns the 3D bounding box of geometries. | +| ST_MakeBox2D | | Creates a BOX2D defined by two 2D point geometries. | +| ST_3DMakeBox | | Creates a BOX3D defined by two 3D point geometries. | +| ST_XMax | ✅ | Returns the X maxima of a 2D or 3D bounding box or a geometry. | +| ST_XMin | ✅ | Returns the X minima of a 2D or 3D bounding box or a geometry. | +| ST_YMax | ✅ | Returns the Y maxima of a 2D or 3D bounding box or a geometry. | +| ST_YMin | ✅ | Returns the Y minima of a 2D or 3D bounding box or a geometry. | +| ST_ZMax | | Returns the Z maxima of a 2D or 3D bounding box or a geometry. | +| ST_ZMin | | Returns the Z minima of a 2D or 3D bounding box or a geometry. | + +### Linear Referencing + +| Name | Implemented | Description | +| -------------------------- | ----------- | -------------------------------------------------------------------------- | +| ST_LineInterpolatePoint | | Returns a point interpolated along a line at a fractional location. | +| ST_3DLineInterpolatePoint | | Returns a point interpolated along a 3D line at a fractional location. | +| ST_LineInterpolatePoints | | Returns points interpolated along a line at a fractional interval. | +| ST_LineLocatePoint | | Returns the fractional location of the closest point on a line to a point. | +| ST_LineSubstring | | Returns the part of a line between two fractional locations. | +| ST_LocateAlong | | Returns the point(s) on a geometry that match a measure value. | +| ST_LocateBetween | | Returns the portions of a geometry that match a measure range. | +| ST_LocateBetweenElevations | | Returns the portions of a geometry that lie in an elevation (Z) range. | +| ST_InterpolatePoint | | Returns the interpolated measure of a geometry closest to a point. | +| ST_AddMeasure | | Interpolates measures along a linear geometry. | diff --git a/rust/geodatafusion/src/data_types.rs b/rust/geodatafusion/src/data_types.rs new file mode 100644 index 00000000..70959788 --- /dev/null +++ b/rust/geodatafusion/src/data_types.rs @@ -0,0 +1,52 @@ +use std::sync::Arc; + +use arrow_array::ArrayRef; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::{Signature, Volatility}; +use geoarrow::array::{CoordType, GeometryArray, PointArray, RectArray}; +use geoarrow::datatypes::{Dimension, NativeType}; +use geoarrow::NativeArray; + +use crate::error::GeoDataFusionResult; + +pub const POINT2D_TYPE: NativeType = NativeType::Point(CoordType::Separated, Dimension::XY); +pub const POINT3D_TYPE: NativeType = NativeType::Point(CoordType::Separated, Dimension::XYZ); +pub const BOX2D_TYPE: NativeType = NativeType::Rect(Dimension::XY); +pub const BOX3D_TYPE: NativeType = NativeType::Rect(Dimension::XYZ); +pub const GEOMETRY_TYPE: NativeType = NativeType::Geometry(CoordType::Separated); + +pub(crate) fn any_single_geometry_type_input() -> Signature { + Signature::uniform( + 1, + vec![ + POINT2D_TYPE.into(), + POINT3D_TYPE.into(), + BOX2D_TYPE.into(), + BOX3D_TYPE.into(), + GEOMETRY_TYPE.into(), + ], + Volatility::Immutable, + ) +} + +/// This will not cast a PointArray to a GeometryArray +pub(crate) fn parse_to_native_array(array: ArrayRef) -> GeoDataFusionResult> { + let data_type = array.data_type(); + if data_type.equals_datatype(&POINT2D_TYPE.into()) { + let point_array = PointArray::try_from((array.as_ref(), Dimension::XY))?; + Ok(Arc::new(point_array)) + } else if data_type.equals_datatype(&POINT3D_TYPE.into()) { + let point_array = PointArray::try_from((array.as_ref(), Dimension::XYZ))?; + Ok(Arc::new(point_array)) + } else if data_type.equals_datatype(&BOX2D_TYPE.into()) { + let rect_array = RectArray::try_from((array.as_ref(), Dimension::XY))?; + Ok(Arc::new(rect_array)) + } else if data_type.equals_datatype(&BOX3D_TYPE.into()) { + let rect_array = RectArray::try_from((array.as_ref(), Dimension::XYZ))?; + Ok(Arc::new(rect_array)) + } else if data_type.equals_datatype(&GEOMETRY_TYPE.into()) { + Ok(Arc::new(GeometryArray::try_from(array.as_ref())?)) + } else { + Err(DataFusionError::Execution(format!("Unexpected input data type: {}", data_type)).into()) + } +} diff --git a/rust/geodatafusion/src/error.rs b/rust/geodatafusion/src/error.rs new file mode 100644 index 00000000..a219e01e --- /dev/null +++ b/rust/geodatafusion/src/error.rs @@ -0,0 +1,33 @@ +//! Defines [`GeoArrowError`], representing all errors returned by this crate. + +use arrow_schema::ArrowError; +use datafusion::error::DataFusionError; +use geoarrow::error::GeoArrowError; +use std::fmt::Debug; +use thiserror::Error; + +/// Enum with all errors in this crate. +#[derive(Error, Debug)] +pub(crate) enum GeoDataFusionError { + #[error(transparent)] + Arrow(#[from] ArrowError), + + #[error(transparent)] + DataFusion(#[from] DataFusionError), + + #[error(transparent)] + GeoArrow(#[from] GeoArrowError), +} + +/// Crate-specific result type. +pub(crate) type GeoDataFusionResult = std::result::Result; + +impl From for DataFusionError { + fn from(value: GeoDataFusionError) -> Self { + match value { + GeoDataFusionError::Arrow(err) => DataFusionError::ArrowError(err, None), + GeoDataFusionError::DataFusion(err) => err, + GeoDataFusionError::GeoArrow(err) => DataFusionError::External(Box::new(err)), + } + } +} diff --git a/rust/geodatafusion/src/lib.rs b/rust/geodatafusion/src/lib.rs new file mode 100644 index 00000000..7581c8a9 --- /dev/null +++ b/rust/geodatafusion/src/lib.rs @@ -0,0 +1,3 @@ +pub(crate) mod data_types; +pub(crate) mod error; +pub mod udf; diff --git a/rust/geodatafusion/src/udf/geos/mod.rs b/rust/geodatafusion/src/udf/geos/mod.rs new file mode 100644 index 00000000..2491e36c --- /dev/null +++ b/rust/geodatafusion/src/udf/geos/mod.rs @@ -0,0 +1 @@ +//! User-defined functions that wrap the [geos] crate. diff --git a/rust/geodatafusion/src/udf/mod.rs b/rust/geodatafusion/src/udf/mod.rs new file mode 100644 index 00000000..1a7a9bd7 --- /dev/null +++ b/rust/geodatafusion/src/udf/mod.rs @@ -0,0 +1,2 @@ +pub mod geos; +pub mod native; diff --git a/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs b/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs new file mode 100644 index 00000000..531c25d2 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs @@ -0,0 +1,96 @@ +use std::any::Any; +use std::sync::{Arc, OnceLock}; + +use arrow::array::UInt8Builder; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use datafusion::scalar::ScalarValue; +use geo_traits::{GeometryTrait, PointTrait}; +use geoarrow::array::AsNativeArray; +use geoarrow::datatypes::NativeType; +use geoarrow::trait_::ArrayAccessor; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct CoordDim { + signature: Signature, +} + +impl CoordDim { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for CoordDim { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_coorddim" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::UInt8) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(coord_dim_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Return the coordinate dimension of the ST_Geometry value.") + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn coord_dim_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + + match native_array.data_type() { + NativeType::Point(_, _) => { + let array_ref = native_array.as_ref(); + let arr = array_ref.as_point(); + let mut output_array = UInt8Builder::with_capacity(native_array.len()); + for geom in arr.iter() { + output_array.append_option(geom.map(|g| g.dim().size().try_into().unwrap())); + } + Ok(ColumnarValue::Array(Arc::new(output_array.finish()))) + } + NativeType::Rect(dim) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some( + dim.size().try_into().unwrap(), + )))), + NativeType::Geometry(_) => { + let array_ref = native_array.as_ref(); + let arr = array_ref.as_geometry(); + let mut output_array = UInt8Builder::with_capacity(native_array.len()); + for geom in arr.iter() { + output_array.append_option(geom.map(|g| g.dim().size().try_into().unwrap())); + } + Ok(ColumnarValue::Array(Arc::new(output_array.finish()))) + } + _ => unreachable!(), + } +} diff --git a/rust/geodatafusion/src/udf/native/accessors/envelope.rs b/rust/geodatafusion/src/udf/native/accessors/envelope.rs new file mode 100644 index 00000000..29b7438b --- /dev/null +++ b/rust/geodatafusion/src/udf/native/accessors/envelope.rs @@ -0,0 +1,73 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::native::BoundingRectArray; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Envelope { + signature: Signature, +} + +impl Envelope { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Envelope { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_envelope" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(GEOMETRY_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(envelope_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Computes a point which is the geometric center of mass of a geometry.", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn envelope_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + // Since a RectArray is a valid normal geometry type for us, we don't have to cast it to a + // Geometry array. That just has overhead. + let output = native_array.as_ref().bounding_rect()?; + Ok(output.into_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/accessors/line_string.rs b/rust/geodatafusion/src/udf/native/accessors/line_string.rs new file mode 100644 index 00000000..7375fbbb --- /dev/null +++ b/rust/geodatafusion/src/udf/native/accessors/line_string.rs @@ -0,0 +1,99 @@ +//! Accessors from LineString geometries + +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geo_traits::LineStringTrait; +use geoarrow::array::{AsNativeArray, CoordType, PointBuilder}; +use geoarrow::datatypes::Dimension; +use geoarrow::error::GeoArrowError; +use geoarrow::scalar::Geometry; +use geoarrow::trait_::ArrayAccessor; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, POINT2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct StartPoint { + signature: Signature, +} + +impl StartPoint { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static START_POINT_DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for StartPoint { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_startpoint" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(POINT2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(start_point_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(START_POINT_DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Returns the first point of a LINESTRING geometry as a POINT. Returns NULL if the input is not a LINESTRING", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn start_point_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let native_array_ref = native_array.as_ref(); + let geometry_array = native_array_ref + .as_geometry_opt() + .ok_or(GeoArrowError::General( + "Expected Geometry-typed array in ST_StartPoint".to_string(), + ))?; + + let mut output_builder = PointBuilder::with_capacity_and_options( + Dimension::XY, + geometry_array.len(), + CoordType::Separated, + Default::default(), + ); + + for geom in geometry_array.iter() { + if let Some(Geometry::LineString(line_string)) = geom { + output_builder.push_coord(line_string.coord(0).as_ref()); + } else { + output_builder.push_null(); + } + } + + Ok(output_builder.finish().into_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/accessors/mod.rs b/rust/geodatafusion/src/udf/native/accessors/mod.rs new file mode 100644 index 00000000..8118b6d0 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/accessors/mod.rs @@ -0,0 +1,12 @@ +mod coord_dim; +mod envelope; +mod line_string; + +use datafusion::prelude::SessionContext; + +/// Register all provided [geo] functions for constructing geometries +pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(coord_dim::CoordDim::new().into()); + ctx.register_udf(envelope::Envelope::new().into()); + ctx.register_udf(line_string::StartPoint::new().into()); +} diff --git a/rust/geodatafusion/src/udf/native/bounding_box/box.rs b/rust/geodatafusion/src/udf/native/bounding_box/box.rs new file mode 100644 index 00000000..c5d2c6ae --- /dev/null +++ b/rust/geodatafusion/src/udf/native/bounding_box/box.rs @@ -0,0 +1,70 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::native::BoundingRectArray; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, BOX2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Box2D { + signature: Signature, +} + +impl Box2D { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Box2D { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_box2d" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(BOX2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(box2d_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns a box2d representing the 2D extent of the geometry.") + .with_argument("geom", "geometry") + .build() + .unwrap() + })) + } +} + +// Note: this is exactly the same impl as ST_Envelope. Perhaps we should use an alias instead +fn box2d_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = native_array.as_ref().bounding_rect()?; + Ok(output.into_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs b/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs new file mode 100644 index 00000000..62dbb361 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs @@ -0,0 +1,288 @@ +use std::any::Any; +use std::sync::{Arc, OnceLock}; + +use arrow::array::Float64Builder; +use arrow_array::ArrayRef; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geo_traits::{CoordTrait, RectTrait}; +use geoarrow::algorithm::native::BoundingRectArray; +use geoarrow::array::RectArray; +use geoarrow::trait_::ArrayAccessor; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array}; +use crate::error::GeoDataFusionResult; + +fn rect_array_from_array_ref(array: ArrayRef) -> GeoDataFusionResult { + let native_arr = parse_to_native_array(array)?; + Ok(native_arr.as_ref().bounding_rect()?) +} + +#[derive(Debug)] +pub(super) struct XMin { + signature: Signature, +} + +impl XMin { + pub(super) fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static XMIN_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for XMin { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_xmin" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Float64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + let arg = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let mut output_array = Float64Builder::with_capacity(arg.len()); + + let rect_array = rect_array_from_array_ref(arg)?; + + for rect in rect_array.iter() { + output_array.append_option(rect.map(|r| r.min().x())); + } + Ok(ColumnarValue::from( + Arc::new(output_array.finish()) as ArrayRef + )) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(XMIN_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns X minima of a bounding box 2d or 3d or a geometry") + .with_syntax_example("ST_XMin(geometry)") + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() + .unwrap() + })) + } +} + +#[derive(Debug)] +pub(super) struct YMin { + signature: Signature, +} + +impl YMin { + pub(super) fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static YMIN_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for YMin { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_ymin" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Float64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + let arg = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let mut output_array = Float64Builder::with_capacity(arg.len()); + + let rect_array = rect_array_from_array_ref(arg)?; + + for rect in rect_array.iter() { + output_array.append_option(rect.map(|r| r.min().y())); + } + Ok(ColumnarValue::from( + Arc::new(output_array.finish()) as ArrayRef + )) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(YMIN_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns Y minima of a bounding box 2d or 3d or a geometry") + .with_syntax_example("ST_YMin(geometry)") + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() + .unwrap() + })) + } +} + +#[derive(Debug)] +pub(super) struct XMax { + signature: Signature, +} + +impl XMax { + pub(super) fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static XMAX_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for XMax { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_xmax" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Float64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + let arg = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let mut output_array = Float64Builder::with_capacity(arg.len()); + let rect_array = rect_array_from_array_ref(arg)?; + for rect in rect_array.iter() { + output_array.append_option(rect.map(|r| r.max().x())); + } + Ok(ColumnarValue::from( + Arc::new(output_array.finish()) as ArrayRef + )) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(XMAX_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns X maxima of a bounding box 2d or 3d or a geometry") + .with_syntax_example("ST_XMax(geometry)") + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() + .unwrap() + })) + } +} + +#[derive(Debug)] +pub(super) struct YMax { + signature: Signature, +} + +impl YMax { + pub(super) fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static YMAX_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for YMax { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_ymax" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Float64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + let arg = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let mut output_array = Float64Builder::with_capacity(arg.len()); + let rect_array = rect_array_from_array_ref(arg)?; + for rect in rect_array.iter() { + output_array.append_option(rect.map(|r| r.max().y())); + } + Ok(ColumnarValue::from( + Arc::new(output_array.finish()) as ArrayRef + )) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(YMAX_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns Y maxima of a bounding box 2d or 3d or a geometry") + .with_syntax_example("ST_YMax(geometry)") + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() + .unwrap() + })) + } +} diff --git a/rust/geodatafusion/src/udf/native/bounding_box/mod.rs b/rust/geodatafusion/src/udf/native/bounding_box/mod.rs new file mode 100644 index 00000000..962ec875 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/bounding_box/mod.rs @@ -0,0 +1,13 @@ +mod r#box; +mod extrema; + +use datafusion::prelude::SessionContext; + +/// Register all provided bounding box functions +pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(extrema::XMin::new().into()); + ctx.register_udf(extrema::YMin::new().into()); + ctx.register_udf(extrema::XMax::new().into()); + ctx.register_udf(extrema::YMax::new().into()); + ctx.register_udf(r#box::Box2D::new().into()); +} diff --git a/rust/geodatafusion/src/udf/native/constructors/mod.rs b/rust/geodatafusion/src/udf/native/constructors/mod.rs new file mode 100644 index 00000000..7111a584 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/constructors/mod.rs @@ -0,0 +1,9 @@ +mod point; + +use datafusion::prelude::SessionContext; + +/// Register all provided [geo] functions for constructing geometries +pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(point::Point::new().into()); + ctx.register_udf(point::MakePoint::new().into()); +} diff --git a/rust/geodatafusion/src/udf/native/constructors/point.rs b/rust/geodatafusion/src/udf/native/constructors/point.rs new file mode 100644 index 00000000..c4e9b560 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/constructors/point.rs @@ -0,0 +1,234 @@ +//! Point constructors + +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, +}; +use geo_traits::CoordTrait; +use geoarrow::array::{CoordType, GeometryArray, PointBuilder}; +use geoarrow::datatypes::Dimension; +use geoarrow::ArrayBase; + +use crate::data_types::{POINT2D_TYPE, POINT3D_TYPE}; + +#[derive(Debug)] +pub(super) struct Point { + signature: Signature, +} + +impl Point { + pub fn new() -> Self { + Self { + signature: Signature::exact( + vec![DataType::Float64, DataType::Float64], + Volatility::Immutable, + ), + } + } +} + +static POINT_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Point { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_point" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(POINT2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + let mut args = ColumnarValue::values_to_arrays(args)?.into_iter(); + let x = args.next().unwrap(); + let y = args.next().unwrap(); + + let x = x.as_primitive::(); + let y = y.as_primitive::(); + + let mut builder = PointBuilder::with_capacity_and_options( + Dimension::XY, + x.len(), + CoordType::Separated, + Default::default(), + ); + for (x, y) in x.iter().zip(y.iter()) { + if let (Some(x), Some(y)) = (x, y) { + builder.push_coord(Some(&geo::coord! { x: x, y: y})); + } else { + builder.push_null(); + } + } + + Ok(builder.finish().into_array_ref().into()) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(POINT_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns a Point with the given X and Y coordinate values.") + .with_syntax_example("ST_Point(-71.104, 42.315)") + .with_argument("x", "x value") + .with_argument("y", "y value") + .with_related_udf("st_makepoint") + .with_related_udf("st_pointz") + .build() + .unwrap() + })) + } +} + +#[derive(Debug)] +pub(super) struct MakePoint { + signature: Signature, +} + +impl MakePoint { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Exact(vec![DataType::Float64, DataType::Float64]), + TypeSignature::Exact(vec![ + DataType::Float64, + DataType::Float64, + DataType::Float64, + ]), + ], + Volatility::Immutable, + ), + } + } +} + +static MAKE_POINT_DOC: OnceLock = OnceLock::new(); + +struct PointZ { + x: f64, + y: f64, + z: f64, +} + +impl CoordTrait for PointZ { + type T = f64; + + fn dim(&self) -> geo_traits::Dimensions { + geo_traits::Dimensions::Xyz + } + + fn x(&self) -> Self::T { + self.x + } + + fn y(&self) -> Self::T { + self.y + } + + fn nth_or_panic(&self, n: usize) -> Self::T { + match n { + 0 => self.x, + 1 => self.y, + 2 => self.z, + _ => panic!("invalid dimension index"), + } + } +} + +impl ScalarUDFImpl for MakePoint { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_makepoint" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion::error::Result { + match arg_types.len() { + 2 => Ok(POINT2D_TYPE.into()), + 3 => Ok(POINT3D_TYPE.into()), + _ => unreachable!(), + } + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + let mut args = ColumnarValue::values_to_arrays(args)?.into_iter(); + let x = args.next().unwrap(); + let y = args.next().unwrap(); + let z = args.next(); + + let x = x.as_primitive::(); + let y = y.as_primitive::(); + + let dim = if z.is_some() { + Dimension::XYZ + } else { + Dimension::XY + }; + let mut builder = PointBuilder::with_capacity_and_options( + dim, + x.len(), + CoordType::Separated, + Default::default(), + ); + + if let Some(z) = z { + let z = z.as_primitive::(); + + for ((x, y), z) in x.iter().zip(y.iter()).zip(z.iter()) { + if let (Some(x), Some(y), Some(z)) = (x, y, z) { + builder.push_coord(Some(&PointZ { x, y, z })); + } else { + builder.push_null(); + } + } + } else { + for (x, y) in x.iter().zip(y.iter()) { + if let (Some(x), Some(y)) = (x, y) { + builder.push_coord(Some(&geo::coord! { x: x, y: y})); + } else { + builder.push_null(); + } + } + } + + Ok(GeometryArray::from(builder.finish()) + .into_array_ref() + .into()) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(MAKE_POINT_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Creates a 2D XY or 3D XYZ Point geometry.") + .with_syntax_example("ST_MakePoint(-71.104, 42.315)") + .with_argument("x", "x value") + .with_argument("y", "y value") + .with_argument("z", "z value") + .with_related_udf("st_point") + .with_related_udf("st_pointz") + .build() + .unwrap() + })) + } +} diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs new file mode 100644 index 00000000..341beca7 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -0,0 +1,14 @@ +//! Geometry Input and Output + +mod wkb; +mod wkt; + +use datafusion::prelude::SessionContext; + +/// Register all provided functions for geometry input and output +pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(wkb::AsBinary::new().into()); + ctx.register_udf(wkb::GeomFromWKB::new().into()); + ctx.register_udf(wkt::AsText::new().into()); + ctx.register_udf(wkt::GeomFromText::new().into()); +} diff --git a/rust/geodatafusion/src/udf/native/io/wkb.rs b/rust/geodatafusion/src/udf/native/io/wkb.rs new file mode 100644 index 00000000..7bda68fa --- /dev/null +++ b/rust/geodatafusion/src/udf/native/io/wkb.rs @@ -0,0 +1,137 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geoarrow::array::{CoordType, WKBArray}; +use geoarrow::datatypes::NativeType; +use geoarrow::io::wkb::{from_wkb, to_wkb}; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct AsBinary { + signature: Signature, +} + +impl AsBinary { + pub fn new() -> Self { + // TODO: extend to allow specifying little/big endian + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static AS_BINARY_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for AsBinary { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_asbinary" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Binary) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(as_binary_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(AS_BINARY_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Returns the OGC/ISO Well-Known Binary (WKB) representation of the geometry.", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn as_binary_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let wkb_arr = to_wkb::(native_array.as_ref()); + Ok(wkb_arr.into_array_ref().into()) +} + +#[derive(Debug)] +pub(super) struct GeomFromWKB { + signature: Signature, +} + +impl GeomFromWKB { + pub fn new() -> Self { + Self { + signature: Signature::coercible(vec![DataType::Binary], Volatility::Immutable), + } + } +} + +static GEOM_FROM_WKB_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for GeomFromWKB { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_geomfromwkb" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(GEOMETRY_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(geom_from_wkb_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(GEOM_FROM_WKB_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", + ) + .with_argument("geom", "WKB buffers") + .build() + .unwrap() + })) + } +} + +fn geom_from_wkb_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let wkb_arr = WKBArray::new(array.as_binary::().clone(), Default::default()); + let native_arr = from_wkb(&wkb_arr, NativeType::Geometry(CoordType::Separated), false)?; + Ok(native_arr.to_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/io/wkt.rs b/rust/geodatafusion/src/udf/native/io/wkt.rs new file mode 100644 index 00000000..1d46fb85 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/io/wkt.rs @@ -0,0 +1,137 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geoarrow::array::{CoordType, WKTArray}; +use geoarrow::io::wkt::{read_wkt, ToWKT}; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct AsText { + signature: Signature, +} + +impl AsText { + pub fn new() -> Self { + // TODO: extend to allow specifying little/big endian + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static AS_TEXT_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for AsText { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_astext" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Utf8) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(as_text_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(AS_TEXT_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Returns the OGC Well-Known Text (WKT) representation of the geometry/geography.", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn as_text_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let wkt_arr = native_array.as_ref().to_wkt::()?; + Ok(wkt_arr.into_array_ref().into()) +} + +#[derive(Debug)] +pub(super) struct GeomFromText { + signature: Signature, +} + +impl GeomFromText { + pub fn new() -> Self { + // TODO: extend to allow specifying little/big endian + Self { + signature: Signature::coercible(vec![DataType::Utf8], Volatility::Immutable), + } + } +} + +static GEOM_FROM_TEXT_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for GeomFromText { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_astext" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(GEOMETRY_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(geom_from_text_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(GEOM_FROM_TEXT_DOC.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Constructs a geometry object from the OGC Well-Known text representation.", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn geom_from_text_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let wkt_arr = WKTArray::new(array.as_string::().clone(), Default::default()); + let native_arr = read_wkt(&wkt_arr, CoordType::Separated, false)?; + Ok(native_arr.to_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/measurement/area.rs b/rust/geodatafusion/src/udf/native/measurement/area.rs new file mode 100644 index 00000000..0738105c --- /dev/null +++ b/rust/geodatafusion/src/udf/native/measurement/area.rs @@ -0,0 +1,118 @@ +use std::any::Any; +use std::sync::{Arc, OnceLock}; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::geo::Area as _Area; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Area { + signature: Signature, +} + +impl Area { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Area { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_area" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Float64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(area_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description("Returns the area of a polygonal geometry.") + .with_argument("geom", "geometry") + .build() + .unwrap() + })) + } +} + +fn area_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let area = native_array.as_ref().unsigned_area()?; + Ok(ColumnarValue::Array(Arc::new(area))) +} + +#[cfg(test)] +mod test { + use arrow_array::RecordBatch; + use arrow_schema::Schema; + use datafusion::error::Result; + use datafusion::prelude::SessionContext; + use geoarrow::algorithm::native::Cast; + use geoarrow::array::CoordType; + use geoarrow::datatypes::NativeType; + use geoarrow::io::flatgeobuf::read_flatgeobuf; + use std::fs::File; + use std::sync::Arc; + + use super::*; + + fn load_file() -> RecordBatch { + let mut file = File::open("../../fixtures/flatgeobuf/countries.fgb").unwrap(); + let table = read_flatgeobuf(&mut file, Default::default()).unwrap(); + let geometry = table.geometry_column(None).unwrap(); + let geometry = geometry + .as_ref() + .cast(NativeType::Geometry(CoordType::Separated)) + .unwrap(); + let field = geometry.extension_field(); + let chunk = geometry.array_refs()[0].clone(); + RecordBatch::try_new(Arc::new(Schema::new(vec![field])), vec![chunk]).unwrap() + } + + fn create_context() -> Result { + let ctx = SessionContext::new(); + + let batch = load_file(); + + ctx.register_batch("t", batch).unwrap(); + Ok(ctx) + } + + #[tokio::test] + async fn test() -> Result<()> { + let ctx = create_context()?; + ctx.register_udf(Area::new().into()); + + let sql_df = ctx.sql("SELECT ST_Area(geometry) FROM t;").await?; + // print the results + sql_df.show().await?; + + Ok(()) + } +} diff --git a/rust/geodatafusion/src/udf/native/measurement/mod.rs b/rust/geodatafusion/src/udf/native/measurement/mod.rs new file mode 100644 index 00000000..026bf7ad --- /dev/null +++ b/rust/geodatafusion/src/udf/native/measurement/mod.rs @@ -0,0 +1,8 @@ +mod area; + +use datafusion::prelude::SessionContext; + +/// Register all provided [geo] functions for constructing geometries +pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(area::Area::new().into()); +} diff --git a/rust/geodatafusion/src/udf/native/mod.rs b/rust/geodatafusion/src/udf/native/mod.rs new file mode 100644 index 00000000..15ac8b46 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/mod.rs @@ -0,0 +1,20 @@ +//! User-defined functions that wrap native Rust implementations. + +mod accessors; +mod bounding_box; +mod constructors; +mod io; +mod measurement; +mod processing; + +use datafusion::prelude::SessionContext; + +/// Register all provided [geo] functions +pub fn register_geo(ctx: &SessionContext) { + accessors::register_udfs(ctx); + bounding_box::register_udfs(ctx); + constructors::register_udfs(ctx); + io::register_udfs(ctx); + measurement::register_udfs(ctx); + processing::register_udfs(ctx); +} diff --git a/rust/geodatafusion/src/udf/native/processing/centroid.rs b/rust/geodatafusion/src/udf/native/processing/centroid.rs new file mode 100644 index 00000000..f5136bd3 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/centroid.rs @@ -0,0 +1,125 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::geo::Centroid as _Centroid; +use geoarrow::array::CoordType; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, POINT2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Centroid { + signature: Signature, +} + +impl Centroid { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Centroid { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_centroid" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(POINT2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(centroid_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Computes a point which is the geometric center of mass of a geometry.", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn centroid_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = native_array.as_ref().centroid()?; + Ok(output + .into_coord_type(CoordType::Separated) + .into_array_ref() + .into()) +} + +#[cfg(test)] +mod test { + use arrow_array::RecordBatch; + use arrow_schema::Schema; + use datafusion::error::Result; + use datafusion::prelude::SessionContext; + use geoarrow::algorithm::native::Cast; + use geoarrow::array::CoordType; + use geoarrow::datatypes::NativeType; + use geoarrow::io::flatgeobuf::read_flatgeobuf; + use std::fs::File; + use std::sync::Arc; + + use super::*; + + fn load_file() -> RecordBatch { + let mut file = File::open("../../fixtures/flatgeobuf/countries.fgb").unwrap(); + let table = read_flatgeobuf(&mut file, Default::default()).unwrap(); + let geometry = table.geometry_column(None).unwrap(); + let geometry = geometry + .as_ref() + .cast(NativeType::Geometry(CoordType::Separated)) + .unwrap(); + let field = geometry.extension_field(); + let chunk = geometry.array_refs()[0].clone(); + RecordBatch::try_new(Arc::new(Schema::new(vec![field])), vec![chunk]).unwrap() + } + + fn create_context() -> Result { + let ctx = SessionContext::new(); + + let batch = load_file(); + + ctx.register_batch("t", batch).unwrap(); + Ok(ctx) + } + + #[tokio::test] + async fn test() -> Result<()> { + let ctx = create_context()?; + ctx.register_udf(Centroid::new().into()); + + let sql_df = ctx.sql("SELECT ST_centroid(geometry) FROM t;").await?; + // print the results + sql_df.show().await?; + + Ok(()) + } +} diff --git a/rust/geodatafusion/src/udf/native/processing/convex_hull.rs b/rust/geodatafusion/src/udf/native/processing/convex_hull.rs new file mode 100644 index 00000000..4b66b4c0 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/convex_hull.rs @@ -0,0 +1,75 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::geo::ConvexHull as _ConvexHull; +use geoarrow::array::{CoordType, GeometryArray}; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct ConvexHull { + signature: Signature, +} + +impl ConvexHull { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for ConvexHull { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_convexhull" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(GEOMETRY_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(convex_hull_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_OTHER) + .with_description( + "Computes the convex hull of a geometry. The convex hull is the smallest convex geometry that encloses all geometries in the input.", + ) + .with_argument("g1", "geometry") + .build() + .unwrap() + })) + } +} + +fn convex_hull_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = native_array + .as_ref() + .convex_hull()? + .into_coord_type(CoordType::Separated); + Ok(GeometryArray::from(output).into_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/processing/mod.rs b/rust/geodatafusion/src/udf/native/processing/mod.rs new file mode 100644 index 00000000..e6bbe9cb --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/mod.rs @@ -0,0 +1,10 @@ +mod centroid; +mod convex_hull; + +use datafusion::prelude::SessionContext; + +/// Register all provided [geo] functions for processing geometries +pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(centroid::Centroid::new().into()); + ctx.register_udf(convex_hull::ConvexHull::new().into()); +} From b999078f84bb373e7b18a017838ce2e356925745 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 13 Dec 2024 15:41:24 -0500 Subject: [PATCH 05/11] Update to latest datafusion main (#945) Uses latest datafusion main in an attempt to solve https://github.com/apache/datafusion/issues/13762. The example in https://github.com/apache/datafusion/issues/13762 is still failing (the tests added in this branch were switched to using `unwrap_err()` instead of `unwrap()`). I'll still merge this PR because there are other breaking changes in the upcoming release of datafusion, and I'd like to make other PRs here that use the latest API. --- .github/workflows/ci.yml | 49 ++-- Cargo.lock | 217 ++++++++++-------- rust/geodatafusion/Cargo.toml | 2 +- .../src/udf/native/accessors/coord_dim.rs | 13 +- .../src/udf/native/accessors/envelope.rs | 15 +- .../src/udf/native/accessors/line_string.rs | 7 +- .../src/udf/native/bounding_box/box.rs | 13 +- .../src/udf/native/bounding_box/extrema.rs | 104 ++++----- .../src/udf/native/constructors/point.rs | 42 ++-- rust/geodatafusion/src/udf/native/io/mod.rs | 1 + .../src/udf/native/io/union_example.rs | 95 ++++++++ rust/geodatafusion/src/udf/native/io/wkb.rs | 24 +- rust/geodatafusion/src/udf/native/io/wkt.rs | 63 +++-- .../src/udf/native/measurement/area.rs | 13 +- rust/geodatafusion/src/udf/native/mod.rs | 4 +- .../src/udf/native/processing/centroid.rs | 15 +- .../native/processing/chaikin_smoothing.rs | 77 +++++++ .../src/udf/native/processing/convex_hull.rs | 7 +- .../src/udf/native/processing/mod.rs | 1 + 19 files changed, 483 insertions(+), 279 deletions(-) create mode 100644 rust/geodatafusion/src/udf/native/io/union_example.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b4aaf395..f822e484 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,27 +96,28 @@ jobs: - name: Test run: cargo check ${{ matrix.args }} - build-benchmarks: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: "recursive" - # We use nightly for now so that we can pass RUSTFLAGS below to work around - # https://github.com/geoarrow/geoarrow-rs/issues/716 - - uses: dtolnay/rust-toolchain@nightly - - uses: Swatinem/rust-cache@v2 - - uses: prefix-dev/setup-pixi@v0.8.1 - with: - activate-environment: true - cache: true - cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} - manifest-path: build/pixi.toml - - name: Tweak environment to find GDAL - run: | - echo "PKG_CONFIG_PATH=$(pwd)/build/.pixi/envs/default/lib/pkgconfig" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=$(pwd)/build/.pixi/envs/default/lib" >> "$GITHUB_ENV" - - name: Build benchmarks with no features - run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run - - name: Build benchmarks with all features - run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run --all-features + # We don't build benchmarks on CI because they're quite slow to compile + # build-benchmarks: + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: "recursive" + # # We use nightly for now so that we can pass RUSTFLAGS below to work around + # # https://github.com/geoarrow/geoarrow-rs/issues/716 + # - uses: dtolnay/rust-toolchain@nightly + # - uses: Swatinem/rust-cache@v2 + # - uses: prefix-dev/setup-pixi@v0.8.1 + # with: + # activate-environment: true + # cache: true + # cache-write: ${{ github.event_name == 'push' && github.ref_name == 'main' }} + # manifest-path: build/pixi.toml + # - name: Tweak environment to find GDAL + # run: | + # echo "PKG_CONFIG_PATH=$(pwd)/build/.pixi/envs/default/lib/pkgconfig" >> "$GITHUB_ENV" + # echo "LD_LIBRARY_PATH=$(pwd)/build/.pixi/envs/default/lib" >> "$GITHUB_ENV" + # - name: Build benchmarks with no features + # run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run + # - name: Build benchmarks with all features + # run: RUSTFLAGS="-Zinline-mir=no" cargo bench --no-run --all-features diff --git a/Cargo.lock b/Cargo.lock index e63c7c98..6b98331e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -955,10 +955,8 @@ dependencies = [ [[package]] name = "datafusion" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", @@ -977,6 +975,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-nested", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -987,17 +986,11 @@ dependencies = [ "flate2", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", "itertools 0.13.0", "log", - "num_cpus", "object_store", "parking_lot", "parquet", - "paste", - "pin-project-lite", "rand", "sqlparser", "tempfile", @@ -1012,8 +1005,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow-schema", "async-trait", @@ -1027,51 +1019,50 @@ dependencies = [ [[package]] name = "datafusion-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", "arrow-schema", - "chrono", "half", "hashbrown 0.14.5", "indexmap", - "instant", "libc", - "num_cpus", "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "43.0.0" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" + [[package]] name = "datafusion-execution" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", - "chrono", "dashmap", "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store", "parking_lot", @@ -1083,44 +1074,37 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ - "ahash", "arrow", - "arrow-array", - "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", - "strum", - "strum_macros", ] [[package]] name = "datafusion-expr-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "datafusion-common", "itertools 0.13.0", - "paste", ] [[package]] name = "datafusion-functions" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "arrow-buffer", @@ -1129,8 +1113,10 @@ dependencies = [ "blake3", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools 0.13.0", @@ -1146,20 +1132,20 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap", "log", "paste", ] @@ -1167,22 +1153,19 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", - "rand", ] [[package]] name = "datafusion-functions-nested" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "arrow-array", @@ -1198,18 +1181,33 @@ dependencies = [ "itertools 0.13.0", "log", "paste", - "rand", +] + +[[package]] +name = "datafusion-functions-table" +version = "43.0.0" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", ] [[package]] name = "datafusion-functions-window" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "datafusion-common", + "datafusion-doc", "datafusion-expr", "datafusion-functions-window-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "log", @@ -1219,47 +1217,49 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "43.0.0" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +dependencies = [ + "quote", + "syn 2.0.79", +] + [[package]] name = "datafusion-optimizer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", - "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", "itertools 0.13.0", "log", - "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1277,38 +1277,36 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-optimizer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools 0.13.0", + "log", + "recursive", ] [[package]] name = "datafusion-physical-plan" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "ahash", "arrow", @@ -1322,7 +1320,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1332,18 +1329,15 @@ dependencies = [ "indexmap", "itertools 0.13.0", "log", - "once_cell", "parking_lot", "pin-project-lite", - "rand", "tokio", ] [[package]] name = "datafusion-sql" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" +source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" dependencies = [ "arrow", "arrow-array", @@ -1352,9 +1346,9 @@ dependencies = [ "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", - "strum", ] [[package]] @@ -2310,18 +2304,6 @@ dependencies = [ "hashbrown 0.15.2", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -2807,16 +2789,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - [[package]] name = "num_enum" version = "0.7.3" @@ -3246,6 +3218,15 @@ dependencies = [ "tar", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "quick-xml" version = "0.36.2" @@ -3372,6 +3353,26 @@ dependencies = [ "log", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.79", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -3916,9 +3917,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.51.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" dependencies = [ "log", "sqlparser_derive", @@ -4142,6 +4143,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -4164,9 +4178,6 @@ name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros", -] [[package]] name = "strum_macros" @@ -4706,6 +4717,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.25.4" diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index dcbcfc1e..943b02b9 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -12,7 +12,7 @@ rust-version = "1.82" [dependencies] -datafusion = "43" +datafusion = { git = "https://github.com/apache/datafusion", rev = "03e39da62e403e064d21b57e9d6c200464c03749" } arrow = { version = "53.3", features = ["ffi"] } arrow-array = { version = "53.3", features = ["chrono-tz"] } arrow-buffer = "53.3" diff --git a/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs b/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs index 531c25d2..36ebf90c 100644 --- a/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs +++ b/rust/geodatafusion/src/udf/native/accessors/coord_dim.rs @@ -52,12 +52,13 @@ impl ScalarUDFImpl for CoordDim { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Return the coordinate dimension of the ST_Geometry value.") - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Return the coordinate dimension of the ST_Geometry value.", + "ST_CoordDim(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/accessors/envelope.rs b/rust/geodatafusion/src/udf/native/accessors/envelope.rs index 29b7438b..13aef106 100644 --- a/rust/geodatafusion/src/udf/native/accessors/envelope.rs +++ b/rust/geodatafusion/src/udf/native/accessors/envelope.rs @@ -48,14 +48,13 @@ impl ScalarUDFImpl for Envelope { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Computes a point which is the geometric center of mass of a geometry.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a point which is the geometric center of mass of a geometry.", + "ST_Envelope(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/accessors/line_string.rs b/rust/geodatafusion/src/udf/native/accessors/line_string.rs index 7375fbbb..a576758d 100644 --- a/rust/geodatafusion/src/udf/native/accessors/line_string.rs +++ b/rust/geodatafusion/src/udf/native/accessors/line_string.rs @@ -55,14 +55,9 @@ impl ScalarUDFImpl for StartPoint { fn documentation(&self) -> Option<&Documentation> { Some(START_POINT_DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Returns the first point of a LINESTRING geometry as a POINT. Returns NULL if the input is not a LINESTRING", - ) + Documentation::builder(DOC_SECTION_OTHER, "Returns the first point of a LINESTRING geometry as a POINT. Returns NULL if the input is not a LINESTRING", "ST_StartPoint(line_string)" ) .with_argument("g1", "geometry") .build() - .unwrap() })) } } diff --git a/rust/geodatafusion/src/udf/native/bounding_box/box.rs b/rust/geodatafusion/src/udf/native/bounding_box/box.rs index c5d2c6ae..9900fa6d 100644 --- a/rust/geodatafusion/src/udf/native/bounding_box/box.rs +++ b/rust/geodatafusion/src/udf/native/bounding_box/box.rs @@ -48,12 +48,13 @@ impl ScalarUDFImpl for Box2D { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns a box2d representing the 2D extent of the geometry.") - .with_argument("geom", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a box2d representing the 2D extent of the geometry.", + "ST_Box2D(geometry)", + ) + .with_argument("geom", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs b/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs index 62dbb361..6e13f037 100644 --- a/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs +++ b/rust/geodatafusion/src/udf/native/bounding_box/extrema.rs @@ -70,19 +70,19 @@ impl ScalarUDFImpl for XMin { fn documentation(&self) -> Option<&Documentation> { Some(XMIN_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns X minima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_XMin(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns X minima of a bounding box 2d or 3d or a geometry", + "ST_XMin(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } @@ -138,19 +138,19 @@ impl ScalarUDFImpl for YMin { fn documentation(&self) -> Option<&Documentation> { Some(YMIN_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns Y minima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_YMin(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns Y minima of a bounding box 2d or 3d or a geometry", + "ST_YMin(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } @@ -204,19 +204,19 @@ impl ScalarUDFImpl for XMax { fn documentation(&self) -> Option<&Documentation> { Some(XMAX_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns X maxima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_XMax(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns X maxima of a bounding box 2d or 3d or a geometry", + "ST_XMax(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } @@ -270,19 +270,19 @@ impl ScalarUDFImpl for YMax { fn documentation(&self) -> Option<&Documentation> { Some(YMAX_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns Y maxima of a bounding box 2d or 3d or a geometry") - .with_syntax_example("ST_YMax(geometry)") - .with_argument("box", "The geometry or box input") - .with_related_udf("st_xmin") - .with_related_udf("st_ymin") - .with_related_udf("st_zmin") - .with_related_udf("st_xmax") - .with_related_udf("st_ymax") - .with_related_udf("st_zmax") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns Y maxima of a bounding box 2d or 3d or a geometry", + "ST_YMax(geometry)", + ) + .with_argument("box", "The geometry or box input") + .with_related_udf("st_xmin") + .with_related_udf("st_ymin") + .with_related_udf("st_zmin") + .with_related_udf("st_xmax") + .with_related_udf("st_ymax") + .with_related_udf("st_zmax") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/constructors/point.rs b/rust/geodatafusion/src/udf/native/constructors/point.rs index c4e9b560..54d8f9c6 100644 --- a/rust/geodatafusion/src/udf/native/constructors/point.rs +++ b/rust/geodatafusion/src/udf/native/constructors/point.rs @@ -79,16 +79,16 @@ impl ScalarUDFImpl for Point { fn documentation(&self) -> Option<&Documentation> { Some(POINT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns a Point with the given X and Y coordinate values.") - .with_syntax_example("ST_Point(-71.104, 42.315)") - .with_argument("x", "x value") - .with_argument("y", "y value") - .with_related_udf("st_makepoint") - .with_related_udf("st_pointz") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a Point with the given X and Y coordinate values.", + "ST_Point(-71.104, 42.315)", + ) + .with_argument("x", "x value") + .with_argument("y", "y value") + .with_related_udf("st_makepoint") + .with_related_udf("st_pointz") + .build() })) } } @@ -218,17 +218,17 @@ impl ScalarUDFImpl for MakePoint { fn documentation(&self) -> Option<&Documentation> { Some(MAKE_POINT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Creates a 2D XY or 3D XYZ Point geometry.") - .with_syntax_example("ST_MakePoint(-71.104, 42.315)") - .with_argument("x", "x value") - .with_argument("y", "y value") - .with_argument("z", "z value") - .with_related_udf("st_point") - .with_related_udf("st_pointz") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Creates a 2D XY or 3D XYZ Point geometry.", + "ST_MakePoint(-71.104, 42.315)", + ) + .with_argument("x", "x value") + .with_argument("y", "y value") + .with_argument("z", "z value") + .with_related_udf("st_point") + .with_related_udf("st_pointz") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs index 341beca7..bbe293c8 100644 --- a/rust/geodatafusion/src/udf/native/io/mod.rs +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -1,5 +1,6 @@ //! Geometry Input and Output +mod union_example; mod wkb; mod wkt; diff --git a/rust/geodatafusion/src/udf/native/io/union_example.rs b/rust/geodatafusion/src/udf/native/io/union_example.rs new file mode 100644 index 00000000..982edc63 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/io/union_example.rs @@ -0,0 +1,95 @@ +use std::any::Any; +use std::sync::Arc; + +use arrow::array::UnionBuilder; +use arrow::datatypes::{Float64Type, Int32Type}; +use arrow_array::Array; +use arrow_schema::{DataType, Field, UnionFields, UnionMode}; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; + +#[derive(Debug)] +pub struct UnionExample { + signature: Signature, +} + +impl UnionExample { + #[allow(dead_code)] + pub fn new() -> Self { + Self { + signature: Signature::nullary(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for UnionExample { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "example_union" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + let fields = UnionFields::new( + vec![0, 1], + vec![ + Arc::new(Field::new("a", DataType::Int32, false)), + Arc::new(Field::new("b", DataType::Float64, false)), + ], + ); + Ok(DataType::Union(fields, UnionMode::Dense)) + } + + fn invoke_no_args(&self, _number_rows: usize) -> datafusion::error::Result { + let mut builder = UnionBuilder::new_dense(); + builder.append::("a", 1).unwrap(); + builder.append::("b", 3.0).unwrap(); + builder.append::("a", 4).unwrap(); + let arr = builder.build().unwrap(); + + assert_eq!(arr.type_id(0), 0); + assert_eq!(arr.type_id(1), 1); + assert_eq!(arr.type_id(2), 0); + + assert_eq!(arr.value_offset(0), 0); + assert_eq!(arr.value_offset(1), 0); + assert_eq!(arr.value_offset(2), 1); + + let arr = arr.slice(0, 1); + + assert!(matches!( + arr.data_type(), + DataType::Union(_, UnionMode::Dense) + )); + + Ok(ColumnarValue::Array(Arc::new(arr))) + } + + fn documentation(&self) -> Option<&Documentation> { + None + } +} + +#[cfg(test)] +mod test { + use super::*; + use datafusion::prelude::*; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + ctx.register_udf(UnionExample::new().into()); + + let out = ctx.sql("SELECT example_union();").await.unwrap(); + // TODO: fix this error upstream + // https://github.com/apache/datafusion/issues/13762 + out.show().await.unwrap_err(); + } +} diff --git a/rust/geodatafusion/src/udf/native/io/wkb.rs b/rust/geodatafusion/src/udf/native/io/wkb.rs index 7bda68fa..ca9e1b2f 100644 --- a/rust/geodatafusion/src/udf/native/io/wkb.rs +++ b/rust/geodatafusion/src/udf/native/io/wkb.rs @@ -54,14 +54,13 @@ impl ScalarUDFImpl for AsBinary { fn documentation(&self) -> Option<&Documentation> { Some(AS_BINARY_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Returns the OGC/ISO Well-Known Binary (WKB) representation of the geometry.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the OGC/ISO Well-Known Binary (WKB) representation of the geometry.", + "ST_AsBinary(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } @@ -84,7 +83,7 @@ pub(super) struct GeomFromWKB { impl GeomFromWKB { pub fn new() -> Self { Self { - signature: Signature::coercible(vec![DataType::Binary], Volatility::Immutable), + signature: Signature::exact(vec![DataType::Binary], Volatility::Immutable), } } } @@ -114,14 +113,9 @@ impl ScalarUDFImpl for GeomFromWKB { fn documentation(&self) -> Option<&Documentation> { Some(GEOM_FROM_WKB_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", - ) + Documentation::builder(DOC_SECTION_OTHER, "Takes a well-known binary representation of a geometry and a Spatial Reference System ID (SRID) and creates an instance of the appropriate geometry type", "ST_GeomFromWKB(buffer)") .with_argument("geom", "WKB buffers") .build() - .unwrap() })) } } diff --git a/rust/geodatafusion/src/udf/native/io/wkt.rs b/rust/geodatafusion/src/udf/native/io/wkt.rs index 1d46fb85..872afacd 100644 --- a/rust/geodatafusion/src/udf/native/io/wkt.rs +++ b/rust/geodatafusion/src/udf/native/io/wkt.rs @@ -2,6 +2,7 @@ use std::any::Any; use std::sync::OnceLock; use arrow::array::AsArray; +use arrow_array::Array; use arrow_schema::DataType; use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; use datafusion::logical_expr::{ @@ -53,14 +54,13 @@ impl ScalarUDFImpl for AsText { fn documentation(&self) -> Option<&Documentation> { Some(AS_TEXT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Returns the OGC Well-Known Text (WKT) representation of the geometry/geography.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the OGC Well-Known Text (WKT) representation of the geometry/geography.", + "ST_AsText(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } @@ -82,9 +82,8 @@ pub(super) struct GeomFromText { impl GeomFromText { pub fn new() -> Self { - // TODO: extend to allow specifying little/big endian Self { - signature: Signature::coercible(vec![DataType::Utf8], Volatility::Immutable), + signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable), } } } @@ -97,7 +96,7 @@ impl ScalarUDFImpl for GeomFromText { } fn name(&self) -> &str { - "st_astext" + "st_geomfromtext" } fn signature(&self) -> &Signature { @@ -114,14 +113,13 @@ impl ScalarUDFImpl for GeomFromText { fn documentation(&self) -> Option<&Documentation> { Some(GEOM_FROM_TEXT_DOC.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Constructs a geometry object from the OGC Well-Known text representation.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Constructs a geometry object from the OGC Well-Known text representation.", + "ST_GeomFromText(text)", + ) + .with_argument("g1", "geometry") + .build() })) } } @@ -133,5 +131,30 @@ fn geom_from_text_impl(args: &[ColumnarValue]) -> GeoDataFusionResult().clone(), Default::default()); let native_arr = read_wkt(&wkt_arr, CoordType::Separated, false)?; - Ok(native_arr.to_array_ref().into()) + dbg!("native_arr"); + + let arrow_arr = native_arr.to_array_ref(); + if let DataType::Union(_fields, mode) = arrow_arr.data_type() { + dbg!(mode); + } + + Ok(arrow_arr.into()) +} + +#[cfg(test)] +mod test { + use datafusion::prelude::*; + + use crate::udf::native::register_native; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx.sql("SELECT ST_GeomFromText('LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)');").await.unwrap(); + // TODO: fix this error upstream + // https://github.com/apache/datafusion/issues/13762 + out.show().await.unwrap_err(); + } } diff --git a/rust/geodatafusion/src/udf/native/measurement/area.rs b/rust/geodatafusion/src/udf/native/measurement/area.rs index 0738105c..8480e88c 100644 --- a/rust/geodatafusion/src/udf/native/measurement/area.rs +++ b/rust/geodatafusion/src/udf/native/measurement/area.rs @@ -47,12 +47,13 @@ impl ScalarUDFImpl for Area { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description("Returns the area of a polygonal geometry.") - .with_argument("geom", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Returns the area of a polygonal geometry.", + "ST_Area(geom)", + ) + .with_argument("geom", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/mod.rs b/rust/geodatafusion/src/udf/native/mod.rs index 15ac8b46..604a1687 100644 --- a/rust/geodatafusion/src/udf/native/mod.rs +++ b/rust/geodatafusion/src/udf/native/mod.rs @@ -9,8 +9,8 @@ mod processing; use datafusion::prelude::SessionContext; -/// Register all provided [geo] functions -pub fn register_geo(ctx: &SessionContext) { +/// Register all provided native-Rust functions +pub fn register_native(ctx: &SessionContext) { accessors::register_udfs(ctx); bounding_box::register_udfs(ctx); constructors::register_udfs(ctx); diff --git a/rust/geodatafusion/src/udf/native/processing/centroid.rs b/rust/geodatafusion/src/udf/native/processing/centroid.rs index f5136bd3..91760fd8 100644 --- a/rust/geodatafusion/src/udf/native/processing/centroid.rs +++ b/rust/geodatafusion/src/udf/native/processing/centroid.rs @@ -49,14 +49,13 @@ impl ScalarUDFImpl for Centroid { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Computes a point which is the geometric center of mass of a geometry.", - ) - .with_argument("g1", "geometry") - .build() - .unwrap() + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a point which is the geometric center of mass of a geometry.", + "ST_Centroid(geometry)", + ) + .with_argument("g1", "geometry") + .build() })) } } diff --git a/rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs b/rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs new file mode 100644 index 00000000..242f1383 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/chaikin_smoothing.rs @@ -0,0 +1,77 @@ +// use std::any::Any; +// use std::sync::OnceLock; + +// use arrow_schema::DataType; +// use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +// use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature}; +// use geoarrow::algorithm::geo::ChaikinSmoothing as _; +// use geoarrow::array::{CoordType, GeometryArray}; +// use geoarrow::ArrayBase; + +// use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE}; +// use crate::error::GeoDataFusionResult; + +// #[derive(Debug)] +// pub(super) struct ChaikinSmoothing { +// signature: Signature, +// } + +// impl ChaikinSmoothing { +// pub fn new() -> Self { +// // TypeSignature:: +// Signature::co(vec![GEOMETRY_TYPE.into(), ], volatility) +// Self { +// signature: any_single_geometry_type_input(), +// } +// } +// } + +// static DOCUMENTATION: OnceLock = OnceLock::new(); + +// impl ScalarUDFImpl for ChaikinSmoothing { +// fn as_any(&self) -> &dyn Any { +// self +// } + +// fn name(&self) -> &str { +// "st_convexhull" +// } + +// fn signature(&self) -> &Signature { +// &self.signature +// } + +// fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { +// Ok(GEOMETRY_TYPE.into()) +// } + +// fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { +// Ok(chaikin_impl(args)?) +// } + +// fn documentation(&self) -> Option<&Documentation> { +// Some(DOCUMENTATION.get_or_init(|| { +// Documentation::builder() +// .with_doc_section(DOC_SECTION_OTHER) +// .with_description( +// "Smoothes a linear or polygonal geometry using Chaikin's algorithm.", +// ) +// .with_argument("g1", "geometry") +// .build() +// .unwrap() +// })) +// } +// } + +// fn chaikin_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { +// let array = ColumnarValue::values_to_arrays(args)? +// .into_iter() +// .next() +// .unwrap(); +// let native_array = parse_to_native_array(array)?; +// let output = native_array +// .as_ref() +// .convex_hull()? +// .into_coord_type(CoordType::Separated); +// Ok(GeometryArray::from(output).into_array_ref().into()) +// } diff --git a/rust/geodatafusion/src/udf/native/processing/convex_hull.rs b/rust/geodatafusion/src/udf/native/processing/convex_hull.rs index 4b66b4c0..f3860f7d 100644 --- a/rust/geodatafusion/src/udf/native/processing/convex_hull.rs +++ b/rust/geodatafusion/src/udf/native/processing/convex_hull.rs @@ -49,14 +49,9 @@ impl ScalarUDFImpl for ConvexHull { fn documentation(&self) -> Option<&Documentation> { Some(DOCUMENTATION.get_or_init(|| { - Documentation::builder() - .with_doc_section(DOC_SECTION_OTHER) - .with_description( - "Computes the convex hull of a geometry. The convex hull is the smallest convex geometry that encloses all geometries in the input.", - ) + Documentation::builder(DOC_SECTION_OTHER, "Computes the convex hull of a geometry. The convex hull is the smallest convex geometry that encloses all geometries in the input.", "ST_ConvexHull(geometry)") .with_argument("g1", "geometry") .build() - .unwrap() })) } } diff --git a/rust/geodatafusion/src/udf/native/processing/mod.rs b/rust/geodatafusion/src/udf/native/processing/mod.rs index e6bbe9cb..84191069 100644 --- a/rust/geodatafusion/src/udf/native/processing/mod.rs +++ b/rust/geodatafusion/src/udf/native/processing/mod.rs @@ -1,4 +1,5 @@ mod centroid; +mod chaikin_smoothing; mod convex_hull; use datafusion::prelude::SessionContext; From f51443ae951fa199e6f78fa81651f19f7654026e Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 13 Dec 2024 17:36:44 -0500 Subject: [PATCH 06/11] Add `ST_MakeBox2D`, `ST_Expand`, fix RectArray round trip (#946) ### Change list - Add `ST_MakeBox2D`, `ST_Expand`. - Add test for each. - Fix round-tripping `RectArray` to an `ArrayRef` - Add test of round-tripping `RectArray` to an `ArrayRef` --- Cargo.lock | 1 + rust/geoarrow/src/array/rect/array.rs | 73 ++++--- rust/geoarrow/src/array/rect/builder.rs | 10 +- rust/geodatafusion/Cargo.toml | 1 + rust/geodatafusion/README.md | 4 +- .../native/bounding_box/{box.rs => box_2d.rs} | 0 .../src/udf/native/bounding_box/expand.rs | 179 ++++++++++++++++++ .../udf/native/bounding_box/make_box_2d.rs | 132 +++++++++++++ .../src/udf/native/bounding_box/mod.rs | 12 +- 9 files changed, 381 insertions(+), 31 deletions(-) rename rust/geodatafusion/src/udf/native/bounding_box/{box.rs => box_2d.rs} (100%) create mode 100644 rust/geodatafusion/src/udf/native/bounding_box/expand.rs create mode 100644 rust/geodatafusion/src/udf/native/bounding_box/make_box_2d.rs diff --git a/Cargo.lock b/Cargo.lock index 6b98331e..d6102c4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1849,6 +1849,7 @@ dependencies = [ name = "geodatafusion" version = "0.1.0-dev" dependencies = [ + "approx", "arrow", "arrow-array", "arrow-buffer", diff --git a/rust/geoarrow/src/array/rect/array.rs b/rust/geoarrow/src/array/rect/array.rs index 26371cba..d4ece60f 100644 --- a/rust/geoarrow/src/array/rect/array.rs +++ b/rust/geoarrow/src/array/rect/array.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use arrow::array::AsArray; use arrow::datatypes::Float64Type; -use arrow_array::{Array, ArrayRef, Float64Array, StructArray}; -use arrow_buffer::NullBuffer; +use arrow_array::{Array, ArrayRef, StructArray}; +use arrow_buffer::{NullBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field}; use crate::array::metadata::ArrayMetadata; @@ -182,14 +182,12 @@ impl IntoArrow for RectArray { fn into_arrow(self) -> Self::ArrowArray { let fields = rect_fields(self.data_type.dimension().unwrap()); let mut arrays: Vec = vec![]; - for buf in self.lower.buffers { - arrays.push(Arc::new(Float64Array::new(buf, None))); - } - for buf in self.upper.buffers { - arrays.push(Arc::new(Float64Array::new(buf, None))); - } - let validity = self.validity; + // values_array takes care of the correct number of dimensions + arrays.extend_from_slice(self.lower.values_array().as_slice()); + arrays.extend_from_slice(self.upper.values_array().as_slice()); + + let validity = self.validity; StructArray::new(fields, arrays, validity) } } @@ -202,23 +200,24 @@ impl TryFrom<(&StructArray, Dimension)> for RectArray { let columns = value.columns(); assert_eq!(columns.len(), dim.size() * 2); - let lower = match dim { - Dimension::XY => { - core::array::from_fn(|i| columns[i].as_primitive::().values().clone()) - } - Dimension::XYZ => { - core::array::from_fn(|i| columns[i].as_primitive::().values().clone()) + let dim_size = dim.size(); + let lower = core::array::from_fn(|i| { + if i < dim_size { + columns[i].as_primitive::().values().clone() + } else { + ScalarBuffer::from(vec![]) } - }; - let upper = match dim { - Dimension::XY => { - core::array::from_fn(|i| columns[i].as_primitive::().values().clone()) + }); + let upper = core::array::from_fn(|i| { + if i < dim_size { + columns[dim_size + i] + .as_primitive::() + .values() + .clone() + } else { + ScalarBuffer::from(vec![]) } - Dimension::XYZ => { - core::array::from_fn(|i| columns[i].as_primitive::().values().clone()) - } - }; - + }); Ok(Self::new( SeparatedCoordBuffer::new(lower, dim), SeparatedCoordBuffer::new(upper, dim), @@ -271,3 +270,29 @@ impl> From<(Vec>, Dimension)> for RectArray { mut_arr.into() } } + +#[cfg(test)] +mod test { + use super::*; + use crate::algorithm::native::eq::rect_eq; + use crate::array::RectBuilder; + use crate::datatypes::Dimension; + + #[test] + fn rect_array_round_trip() { + let rect = geo::Rect::new( + geo::coord! { x: 0.0, y: 5.0 }, + geo::coord! { x: 10.0, y: 15.0 }, + ); + let mut builder = + RectBuilder::with_capacity_and_options(Dimension::XY, 1, Default::default()); + builder.push_rect(Some(&rect)); + builder.push_min_max(&rect.min(), &rect.max()); + let rect_arr = builder.finish(); + + let arrow_arr = rect_arr.into_array_ref(); + let rect_arr_again = RectArray::try_from((arrow_arr.as_ref(), Dimension::XY)).unwrap(); + let rect_again = rect_arr_again.value(0); + assert!(rect_eq(&rect, &rect_again)); + } +} diff --git a/rust/geoarrow/src/array/rect/builder.rs b/rust/geoarrow/src/array/rect/builder.rs index 4f589156..3669e85a 100644 --- a/rust/geoarrow/src/array/rect/builder.rs +++ b/rust/geoarrow/src/array/rect/builder.rs @@ -6,7 +6,7 @@ use crate::scalar::Rect; use crate::trait_::IntoArrow; use arrow_array::{Array, StructArray}; use arrow_buffer::NullBufferBuilder; -use geo_traits::RectTrait; +use geo_traits::{CoordTrait, RectTrait}; use std::sync::Arc; /// The GeoArrow equivalent to `Vec>`: a mutable collection of Rects. @@ -168,6 +168,14 @@ impl RectBuilder { } } + /// Push min and max coordinates of a rect to the builder. + #[inline] + pub fn push_min_max(&mut self, min: &impl CoordTrait, max: &impl CoordTrait) { + self.lower.push_coord(min); + self.upper.push_coord(max); + self.validity.append_non_null() + } + /// Create this builder from a iterator of Rects. pub fn from_rects<'a>( geoms: impl ExactSizeIterator + 'a)>, diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index 943b02b9..360928ed 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -29,4 +29,5 @@ geoarrow = { path = "../geoarrow", features = ["flatgeobuf"] } thiserror = "1" [dev-dependencies] +approx = "0.5.1" tokio = { version = "1.9", features = ["macros", "fs", "rt-multi-thread"] } diff --git a/rust/geodatafusion/README.md b/rust/geodatafusion/README.md index c8f1cde1..474e84a9 100644 --- a/rust/geodatafusion/README.md +++ b/rust/geodatafusion/README.md @@ -337,10 +337,10 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | Box2D | ✅ | Returns a BOX2D representing the 2D extent of a geometry. | | Box3D | | Returns a BOX3D representing the 3D extent of a geometry. | | ST_EstimatedExtent | | Returns the estimated extent of a spatial table. | -| ST_Expand | | Returns a bounding box expanded from another bounding box or a geometry. | +| ST_Expand | ✅ | Returns a bounding box expanded from another bounding box or a geometry. | | ST_Extent | | Aggregate function that returns the bounding box of geometries. | | ST_3DExtent | | Aggregate function that returns the 3D bounding box of geometries. | -| ST_MakeBox2D | | Creates a BOX2D defined by two 2D point geometries. | +| ST_MakeBox2D | ✅ | Creates a BOX2D defined by two 2D point geometries. | | ST_3DMakeBox | | Creates a BOX3D defined by two 3D point geometries. | | ST_XMax | ✅ | Returns the X maxima of a 2D or 3D bounding box or a geometry. | | ST_XMin | ✅ | Returns the X minima of a 2D or 3D bounding box or a geometry. | diff --git a/rust/geodatafusion/src/udf/native/bounding_box/box.rs b/rust/geodatafusion/src/udf/native/bounding_box/box_2d.rs similarity index 100% rename from rust/geodatafusion/src/udf/native/bounding_box/box.rs rename to rust/geodatafusion/src/udf/native/bounding_box/box_2d.rs diff --git a/rust/geodatafusion/src/udf/native/bounding_box/expand.rs b/rust/geodatafusion/src/udf/native/bounding_box/expand.rs new file mode 100644 index 00000000..515cbbb1 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/bounding_box/expand.rs @@ -0,0 +1,179 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility, +}; +use geo_traits::{CoordTrait, RectTrait}; +use geoarrow::array::{RectArray, RectBuilder}; +use geoarrow::datatypes::Dimension; +use geoarrow::error::GeoArrowError; +use geoarrow::trait_::ArrayAccessor; +use geoarrow::ArrayBase; + +use crate::data_types::BOX2D_TYPE; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Expand { + signature: Signature, +} + +impl Expand { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Exact(vec![BOX2D_TYPE.into(), DataType::Float64]), + TypeSignature::Exact(vec![ + BOX2D_TYPE.into(), + DataType::Float64, + DataType::Float64, + ]), + ], + Volatility::Immutable, + ), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Expand { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_expand" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion::error::Result { + Ok(arg_types.first().unwrap().clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(expand_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a bounding box expanded from the bounding box of the input, either by specifying a single distance with which the box should be expanded on both axes, or by specifying an expansion distance for each axis. Uses double-precision. Can be used for distance queries, or to add a bounding box filter to a query to take advantage of a spatial index.", + "ST_Expand(box)", + ) + .with_argument("box", "box2d") + .build() + })) + } +} + +fn expand_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let mut args = ColumnarValue::values_to_arrays(args)?.into_iter(); + let rect_array = args.next().unwrap(); + let factor1 = args.next().unwrap(); + let factor2 = args.next(); + + let dx = factor1.as_primitive::(); + + if BOX2D_TYPE + .to_data_type() + .equals_datatype(rect_array.data_type()) + { + let rect_array = RectArray::try_from((rect_array.as_ref(), Dimension::XY))?; + let mut builder = RectBuilder::with_capacity_and_options( + Dimension::XY, + rect_array.len(), + rect_array.metadata().clone(), + ); + + if let Some(dy) = factor2 { + let dy = dy.as_primitive::(); + + for val in rect_array.iter().zip(dx.iter()).zip(dy.iter()) { + if let ((Some(rect), Some(dx)), Some(dy)) = val { + builder.push_rect(Some(&expand_2d_rect(rect, dx, dy))); + } else { + builder.push_null(); + } + } + } else { + for val in rect_array.iter().zip(dx.iter()) { + if let (Some(rect), Some(dx)) = val { + builder.push_rect(Some(&expand_2d_rect(rect, dx, dx))); + } else { + builder.push_null(); + } + } + } + + return Ok(builder.finish().into_array_ref().into()); + } + + Err(Err(GeoArrowError::General(format!( + "Unexpected data type: {:?}", + rect_array.data_type() + )))?) +} + +#[inline] +fn expand_2d_rect(rect: impl RectTrait, dx: f64, dy: f64) -> geo::Rect { + let min = rect.min(); + let max = rect.max(); + + let new_min = geo::coord! { x: min.x() - dx, y: min.y() - dy }; + let new_max = geo::coord! { x: max.x() + dx, y: max.y() + dy }; + + geo::Rect::new(new_min, new_max) +} + +#[cfg(test)] +mod test { + use approx::relative_eq; + use datafusion::prelude::*; + use geo_traits::{CoordTrait, RectTrait}; + use geoarrow::array::RectArray; + use geoarrow::datatypes::Dimension; + use geoarrow::trait_::ArrayAccessor; + + use crate::data_types::BOX2D_TYPE; + use crate::udf::native::register_native; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx + .sql("SELECT ST_Expand(ST_MakeBox2D(ST_Point(0, 5), ST_Point(10, 20)), 10, 20);") + .await + .unwrap(); + + let batches = out.collect().await.unwrap(); + assert_eq!(batches.len(), 1); + let batch = batches.into_iter().next().unwrap(); + assert_eq!(batch.columns().len(), 1); + assert!(batch + .schema() + .field(0) + .data_type() + .equals_datatype(&BOX2D_TYPE.into())); + + let rect_array = RectArray::try_from((batch.columns()[0].as_ref(), Dimension::XY)).unwrap(); + let rect = rect_array.value(0); + + assert!(relative_eq!(rect.min().x(), -10.0)); + assert!(relative_eq!(rect.min().y(), -15.0)); + assert!(relative_eq!(rect.max().x(), 20.0)); + assert!(relative_eq!(rect.max().y(), 40.0)); + } +} diff --git a/rust/geodatafusion/src/udf/native/bounding_box/make_box_2d.rs b/rust/geodatafusion/src/udf/native/bounding_box/make_box_2d.rs new file mode 100644 index 00000000..2967b56d --- /dev/null +++ b/rust/geodatafusion/src/udf/native/bounding_box/make_box_2d.rs @@ -0,0 +1,132 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geo_traits::PointTrait; +use geoarrow::array::{PointArray, RectBuilder}; +use geoarrow::datatypes::Dimension; +use geoarrow::trait_::ArrayAccessor; +use geoarrow::ArrayBase; + +use crate::data_types::{BOX2D_TYPE, POINT2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct MakeBox2D { + signature: Signature, +} + +impl MakeBox2D { + pub fn new() -> Self { + Self { + signature: Signature::exact( + vec![POINT2D_TYPE.into(), POINT2D_TYPE.into()], + Volatility::Immutable, + ), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for MakeBox2D { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_makebox2d" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(BOX2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(make_box2d_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Creates a box2d defined by two Point geometries. This is useful for doing range queries.", + "ST_MakeBox2D(ST_Point(-989502.1875, 528439.5625), ST_Point(-987121.375, 529933.1875))", + ) + .with_argument("pointLowLeft", "geometry") + .with_argument("pointUpRight", "geometry") + .build() + })) + } +} + +fn make_box2d_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let mut args = ColumnarValue::values_to_arrays(args)?.into_iter(); + let min = args.next().unwrap(); + let max = args.next().unwrap(); + + let min = PointArray::try_from((min.as_ref(), Dimension::XY))?; + let max = PointArray::try_from((max.as_ref(), Dimension::XY))?; + + let mut builder = + RectBuilder::with_capacity_and_options(Dimension::XY, min.len(), min.metadata().clone()); + + for val in min.iter().zip(max.iter()) { + if let (Some(min), Some(max)) = val { + builder.push_min_max(&min.coord().unwrap(), &max.coord().unwrap()); + } else { + builder.push_null(); + } + } + + Ok(builder.finish().into_array_ref().into()) +} + +#[cfg(test)] +mod test { + use approx::relative_eq; + use datafusion::prelude::*; + use geo_traits::{CoordTrait, RectTrait}; + use geoarrow::array::RectArray; + use geoarrow::datatypes::Dimension; + use geoarrow::trait_::ArrayAccessor; + + use crate::data_types::BOX2D_TYPE; + use crate::udf::native::register_native; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx + .sql("SELECT ST_MakeBox2D(ST_Point(0, 5), ST_Point(10, 20));") + .await + .unwrap(); + let batches = out.collect().await.unwrap(); + assert_eq!(batches.len(), 1); + let batch = batches.into_iter().next().unwrap(); + assert_eq!(batch.columns().len(), 1); + assert!(batch + .schema() + .field(0) + .data_type() + .equals_datatype(&BOX2D_TYPE.into())); + + let rect_array = RectArray::try_from((batch.columns()[0].as_ref(), Dimension::XY)).unwrap(); + let rect = rect_array.value(0); + + assert!(relative_eq!(rect.min().x(), 0.0)); + assert!(relative_eq!(rect.min().y(), 5.0)); + assert!(relative_eq!(rect.max().x(), 10.0)); + assert!(relative_eq!(rect.max().y(), 20.0)); + } +} diff --git a/rust/geodatafusion/src/udf/native/bounding_box/mod.rs b/rust/geodatafusion/src/udf/native/bounding_box/mod.rs index 962ec875..bbb7b58a 100644 --- a/rust/geodatafusion/src/udf/native/bounding_box/mod.rs +++ b/rust/geodatafusion/src/udf/native/bounding_box/mod.rs @@ -1,13 +1,17 @@ -mod r#box; +mod box_2d; +mod expand; mod extrema; +mod make_box_2d; use datafusion::prelude::SessionContext; /// Register all provided bounding box functions pub fn register_udfs(ctx: &SessionContext) { - ctx.register_udf(extrema::XMin::new().into()); - ctx.register_udf(extrema::YMin::new().into()); + ctx.register_udf(box_2d::Box2D::new().into()); + ctx.register_udf(expand::Expand::new().into()); ctx.register_udf(extrema::XMax::new().into()); + ctx.register_udf(extrema::XMin::new().into()); ctx.register_udf(extrema::YMax::new().into()); - ctx.register_udf(r#box::Box2D::new().into()); + ctx.register_udf(extrema::YMin::new().into()); + ctx.register_udf(make_box_2d::MakeBox2D::new().into()); } From 5b69dace7d766d8718ec73c65402d904dc431d79 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 13 Dec 2024 18:17:38 -0500 Subject: [PATCH 07/11] Add `ST_GeoHash`, `ST_Box2dFromGeoHash`, `ST_PointFromGeoHash` (#947) --- Cargo.lock | 13 +- rust/geodatafusion/Cargo.toml | 1 + rust/geodatafusion/README.md | 6 +- rust/geodatafusion/src/error.rs | 4 + .../src/udf/native/io/geohash.rs | 327 ++++++++++++++++++ rust/geodatafusion/src/udf/native/io/mod.rs | 4 + 6 files changed, 351 insertions(+), 4 deletions(-) create mode 100644 rust/geodatafusion/src/udf/native/io/geohash.rs diff --git a/Cargo.lock b/Cargo.lock index d6102c4b..c5761dd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1864,6 +1864,7 @@ dependencies = [ "geo 0.29.3", "geo-traits", "geoarrow", + "geohash", "thiserror", "tokio", ] @@ -1877,6 +1878,16 @@ dependencies = [ "libm", ] +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "geojson" version = "0.24.1" @@ -4154,7 +4165,7 @@ dependencies = [ "cfg-if", "libc", "psm", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index 360928ed..2f99ee6b 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -24,6 +24,7 @@ arrow-schema = "53.3" async-stream = { version = "0.3", optional = true } async-trait = { version = "0.1", optional = true } geo = "0.29.3" +geohash = "0.13.1" geo-traits = "0.2" geoarrow = { path = "../geoarrow", features = ["flatgeobuf"] } thiserror = "1" diff --git a/rust/geodatafusion/README.md b/rust/geodatafusion/README.md index 474e84a9..e6897d77 100644 --- a/rust/geodatafusion/README.md +++ b/rust/geodatafusion/README.md @@ -166,7 +166,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | Name | Implemented | Description | | -------------------------- | ----------- | ------------------------------------------------------------------------------------------------------ | -| ST_Box2dFromGeoHash | | Return a BOX2D from a GeoHash string. | +| ST_Box2dFromGeoHash | ✅ | Return a BOX2D from a GeoHash string. | | ST_GeomFromGeoHash | | Return a geometry from a GeoHash string. | | ST_GeomFromGML | | Takes as input GML representation of geometry and outputs a PostGIS geometry object | | ST_GeomFromGeoJSON | | Takes as input a geojson representation of a geometry and outputs a PostGIS geometry object | @@ -174,7 +174,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | ST_GeomFromTWKB | | Creates a geometry instance from a TWKB ("Tiny Well-Known Binary") geometry representation. | | ST_GMLToSQL | | Return a specified ST_Geometry value from GML representation. This is an alias name for ST_GeomFromGML | | ST_LineFromEncodedPolyline | | Creates a LineString from an Encoded Polyline. | -| ST_PointFromGeoHash | | Return a point from a GeoHash string. | +| ST_PointFromGeoHash | ✅ | Return a point from a GeoHash string. | | ST_FromFlatGeobufToTable | | Creates a table based on the structure of FlatGeobuf data. | | ST_FromFlatGeobuf | | Reads FlatGeobuf data. | @@ -212,7 +212,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | ST_AsSVG | | Returns SVG path data for a geometry. | | ST_AsTWKB | | Returns the geometry as TWKB, aka "Tiny Well-Known Binary" | | ST_AsX3D | | Returns a Geometry in X3D xml node element format: ISO-IEC-19776-1.2-X3DEncodings-XML | -| ST_GeoHash | | Return a GeoHash representation of the geometry. | +| ST_GeoHash | ✅ | Return a GeoHash representation of the geometry. | ### Operators diff --git a/rust/geodatafusion/src/error.rs b/rust/geodatafusion/src/error.rs index a219e01e..f8921794 100644 --- a/rust/geodatafusion/src/error.rs +++ b/rust/geodatafusion/src/error.rs @@ -17,6 +17,9 @@ pub(crate) enum GeoDataFusionError { #[error(transparent)] GeoArrow(#[from] GeoArrowError), + + #[error(transparent)] + GeoHash(#[from] geohash::GeohashError), } /// Crate-specific result type. @@ -28,6 +31,7 @@ impl From for DataFusionError { GeoDataFusionError::Arrow(err) => DataFusionError::ArrowError(err, None), GeoDataFusionError::DataFusion(err) => err, GeoDataFusionError::GeoArrow(err) => DataFusionError::External(Box::new(err)), + GeoDataFusionError::GeoHash(err) => DataFusionError::External(Box::new(err)), } } } diff --git a/rust/geodatafusion/src/udf/native/io/geohash.rs b/rust/geodatafusion/src/udf/native/io/geohash.rs new file mode 100644 index 00000000..b9e2e58a --- /dev/null +++ b/rust/geodatafusion/src/udf/native/io/geohash.rs @@ -0,0 +1,327 @@ +use std::any::Any; +use std::sync::{Arc, OnceLock}; + +use arrow::array::{AsArray, StringBuilder}; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geo_traits::PointTrait; +use geoarrow::array::{CoordType, PointArray, PointBuilder, RectBuilder}; +use geoarrow::datatypes::Dimension; +use geoarrow::trait_::{ArrayAccessor, NativeScalar}; +use geoarrow::ArrayBase; + +use crate::data_types::{BOX2D_TYPE, POINT2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Box2DFromGeoHash { + signature: Signature, +} + +impl Box2DFromGeoHash { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable), + } + } +} + +static BOX_FROM_GEOHASH_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Box2DFromGeoHash { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_box2dfromgeohash" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(BOX2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(box_from_geohash_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(BOX_FROM_GEOHASH_DOC.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Return a BOX2D from a GeoHash string.", + "ST_Box2dFromGeoHash(geohash)", + ) + .with_argument("text", "geohash") + .build() + })) + } +} + +fn box_from_geohash_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + + let string_array = array.as_string::(); + let mut builder = + RectBuilder::with_capacity_and_options(Dimension::XY, array.len(), Default::default()); + + for s in string_array.iter() { + builder.push_rect(s.map(geohash::decode_bbox).transpose()?.as_ref()); + } + + Ok(builder.finish().into_array_ref().into()) +} + +#[derive(Debug)] +pub(super) struct PointFromGeoHash { + signature: Signature, +} + +impl PointFromGeoHash { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![DataType::Utf8], Volatility::Immutable), + } + } +} + +static POINT_FROM_GEOHASH_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for PointFromGeoHash { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_pointfromgeohash" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(POINT2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(point_from_geohash_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(POINT_FROM_GEOHASH_DOC.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Return a point from a GeoHash string. The point represents the center point of the GeoHash.", + "ST_PointFromGeoHash(geohash)", + ) + .with_argument("text", "geohash") + .build() + })) + } +} + +fn point_from_geohash_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + + let string_array = array.as_string::(); + let mut builder = PointBuilder::with_capacity_and_options( + Dimension::XY, + array.len(), + CoordType::Separated, + Default::default(), + ); + + for s in string_array.iter() { + if let Some(s) = s { + let (coord, _, _) = geohash::decode(s)?; + builder.push_coord(Some(&coord)); + } else { + builder.push_null(); + } + } + + Ok(builder.finish().into_array_ref().into()) +} + +#[derive(Debug)] +pub(super) struct GeoHash { + signature: Signature, +} + +impl GeoHash { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![POINT2D_TYPE.into()], Volatility::Immutable), + } + } +} + +static GEOHASH_DOC: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for GeoHash { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_geohash" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(DataType::Utf8) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(geohash_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(GEOHASH_DOC.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a GeoHash representation of a geometry. A GeoHash encodes a geographic Point into a text form that is sortable and searchable based on prefixing. A shorter GeoHash is a less precise representation of a point. It can be thought of as a box that contains the point.", + "ST_GeoHash(point)", + ) + .with_argument("geom", "geometry") + .build() + })) + } +} + +fn geohash_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let point_array = PointArray::try_from((array.as_ref(), Dimension::XY))?; + + let mut builder = StringBuilder::with_capacity(array.len(), 0); + + for point in point_array.iter() { + if let Some(point) = point { + let coord = point.coord().unwrap(); + // TODO: make arg + // 12 is the max length supported by rust geohash. We should document this and maybe + // clamp numbers to 12. + let s = geohash::encode(coord.to_geo(), 12)?; + builder.append_value(s); + } else { + builder.append_null(); + } + } + + Ok(ColumnarValue::Array(Arc::new(builder.finish()))) +} + +#[cfg(test)] +mod test { + use approx::relative_eq; + use arrow::array::AsArray; + use datafusion::prelude::*; + use geo_traits::{CoordTrait, PointTrait, RectTrait}; + use geoarrow::array::{PointArray, RectArray}; + use geoarrow::datatypes::Dimension; + use geoarrow::trait_::ArrayAccessor; + + use crate::data_types::{BOX2D_TYPE, POINT2D_TYPE}; + use crate::udf::native::register_native; + + #[tokio::test] + async fn test_box2d_from_geohash() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx + .sql("SELECT ST_Box2dFromGeoHash('ww8p1r4t8');") + .await + .unwrap(); + + let batches = out.collect().await.unwrap(); + assert_eq!(batches.len(), 1); + + let batch = batches.into_iter().next().unwrap(); + assert_eq!(batch.columns().len(), 1); + + assert!(batch + .schema() + .field(0) + .data_type() + .equals_datatype(&BOX2D_TYPE.into())); + + let rect_array = RectArray::try_from((batch.columns()[0].as_ref(), Dimension::XY)).unwrap(); + let rect = rect_array.value(0); + + assert!(relative_eq!(rect.min().x(), 112.55836486816406)); + assert!(relative_eq!(rect.min().y(), 37.83236503601074)); + assert!(relative_eq!(rect.max().x(), 112.5584077835083)); + assert!(relative_eq!(rect.max().y(), 37.83240795135498)); + } + + #[tokio::test] + async fn test_point_from_geohash() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx + .sql("SELECT ST_PointFromGeoHash('9qqj');") + .await + .unwrap(); + + let batches = out.collect().await.unwrap(); + assert_eq!(batches.len(), 1); + + let batch = batches.into_iter().next().unwrap(); + assert_eq!(batch.columns().len(), 1); + + assert!(batch + .schema() + .field(0) + .data_type() + .equals_datatype(&POINT2D_TYPE.into())); + + let point_array = + PointArray::try_from((batch.columns()[0].as_ref(), Dimension::XY)).unwrap(); + let point = point_array.value(0); + + assert!(relative_eq!(point.coord().unwrap().x(), -115.13671875)); + assert!(relative_eq!(point.coord().unwrap().y(), 36.123046875)); + } + + #[tokio::test] + async fn test_geohash() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx + .sql("SELECT ST_GeoHash( ST_Point(-126,48) );") + .await + .unwrap(); + + let batches = out.collect().await.unwrap(); + assert_eq!(batches.len(), 1); + + let batch = batches.into_iter().next().unwrap(); + assert_eq!(batch.columns().len(), 1); + + let arr = batch.columns()[0].as_string::(); + assert_eq!(arr.value(0), "c0w3hf1s70w3"); + } +} diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs index bbe293c8..63d63aad 100644 --- a/rust/geodatafusion/src/udf/native/io/mod.rs +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -1,5 +1,6 @@ //! Geometry Input and Output +mod geohash; mod union_example; mod wkb; mod wkt; @@ -8,6 +9,9 @@ use datafusion::prelude::SessionContext; /// Register all provided functions for geometry input and output pub fn register_udfs(ctx: &SessionContext) { + ctx.register_udf(geohash::Box2DFromGeoHash::new().into()); + ctx.register_udf(geohash::GeoHash::new().into()); + ctx.register_udf(geohash::PointFromGeoHash::new().into()); ctx.register_udf(wkb::AsBinary::new().into()); ctx.register_udf(wkb::GeomFromWKB::new().into()); ctx.register_udf(wkt::AsText::new().into()); From 815f62c94b498ed4edcb200b6280edfb6f3669a0 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 16 Dec 2024 12:41:45 -0500 Subject: [PATCH 08/11] Update datafusion git tag to fix DenseUnion (#951) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref https://github.com/apache/datafusion/pull/13797. This is now working for geometries! ``` SELECT ST_GeomFromText('LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)'); ``` ``` ---- udf::native::io::wkt::test::test stdout ---- +----------------------------------------------------------------------------------------------------+ | st_geomfromtext(Utf8("LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)")) | +----------------------------------------------------------------------------------------------------+ | {=[{x: -71.160281, y: 42.258729}, {x: -71.160837, y: 42.259113}, {x: -71.161144, y: 42.25932}]} | +----------------------------------------------------------------------------------------------------+ ``` 🚀 --- Cargo.lock | 45 ++++----- rust/geodatafusion/Cargo.toml | 2 +- rust/geodatafusion/src/udf/native/io/mod.rs | 1 - .../src/udf/native/io/union_example.rs | 95 ------------------- rust/geodatafusion/src/udf/native/io/wkt.rs | 14 +-- 5 files changed, 26 insertions(+), 131 deletions(-) delete mode 100644 rust/geodatafusion/src/udf/native/io/union_example.rs diff --git a/Cargo.lock b/Cargo.lock index c5761dd4..aea52987 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -955,7 +955,7 @@ dependencies = [ [[package]] name = "datafusion" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-array", @@ -1005,7 +1005,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow-schema", "async-trait", @@ -1019,7 +1019,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1042,7 +1042,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "log", "tokio", @@ -1051,12 +1051,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" [[package]] name = "datafusion-execution" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "dashmap", @@ -1074,7 +1074,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "chrono", @@ -1094,7 +1094,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "datafusion-common", @@ -1104,7 +1104,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-buffer", @@ -1116,6 +1116,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-macros", "hashbrown 0.14.5", "hex", @@ -1132,7 +1133,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1153,7 +1154,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1165,7 +1166,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-array", @@ -1186,7 +1187,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "async-trait", @@ -1201,7 +1202,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "datafusion-common", "datafusion-doc", @@ -1217,7 +1218,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1226,7 +1227,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "quote", "syn 2.0.79", @@ -1235,7 +1236,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "chrono", @@ -1253,7 +1254,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1277,7 +1278,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1290,7 +1291,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "datafusion-common", @@ -1306,7 +1307,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "ahash", "arrow", @@ -1337,7 +1338,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "43.0.0" -source = "git+https://github.com/apache/datafusion?rev=03e39da62e403e064d21b57e9d6c200464c03749#03e39da62e403e064d21b57e9d6c200464c03749" +source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "arrow", "arrow-array", diff --git a/rust/geodatafusion/Cargo.toml b/rust/geodatafusion/Cargo.toml index 2f99ee6b..ef471815 100644 --- a/rust/geodatafusion/Cargo.toml +++ b/rust/geodatafusion/Cargo.toml @@ -12,7 +12,7 @@ rust-version = "1.82" [dependencies] -datafusion = { git = "https://github.com/apache/datafusion", rev = "03e39da62e403e064d21b57e9d6c200464c03749" } +datafusion = { git = "https://github.com/kylebarron/datafusion", rev = "170432e3179ed72f413ffcd4d7edfe0007db296d" } arrow = { version = "53.3", features = ["ffi"] } arrow-array = { version = "53.3", features = ["chrono-tz"] } arrow-buffer = "53.3" diff --git a/rust/geodatafusion/src/udf/native/io/mod.rs b/rust/geodatafusion/src/udf/native/io/mod.rs index 63d63aad..ea85c047 100644 --- a/rust/geodatafusion/src/udf/native/io/mod.rs +++ b/rust/geodatafusion/src/udf/native/io/mod.rs @@ -1,7 +1,6 @@ //! Geometry Input and Output mod geohash; -mod union_example; mod wkb; mod wkt; diff --git a/rust/geodatafusion/src/udf/native/io/union_example.rs b/rust/geodatafusion/src/udf/native/io/union_example.rs deleted file mode 100644 index 982edc63..00000000 --- a/rust/geodatafusion/src/udf/native/io/union_example.rs +++ /dev/null @@ -1,95 +0,0 @@ -use std::any::Any; -use std::sync::Arc; - -use arrow::array::UnionBuilder; -use arrow::datatypes::{Float64Type, Int32Type}; -use arrow_array::Array; -use arrow_schema::{DataType, Field, UnionFields, UnionMode}; -use datafusion::logical_expr::{ - ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, -}; - -#[derive(Debug)] -pub struct UnionExample { - signature: Signature, -} - -impl UnionExample { - #[allow(dead_code)] - pub fn new() -> Self { - Self { - signature: Signature::nullary(Volatility::Immutable), - } - } -} - -impl ScalarUDFImpl for UnionExample { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "example_union" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { - let fields = UnionFields::new( - vec![0, 1], - vec![ - Arc::new(Field::new("a", DataType::Int32, false)), - Arc::new(Field::new("b", DataType::Float64, false)), - ], - ); - Ok(DataType::Union(fields, UnionMode::Dense)) - } - - fn invoke_no_args(&self, _number_rows: usize) -> datafusion::error::Result { - let mut builder = UnionBuilder::new_dense(); - builder.append::("a", 1).unwrap(); - builder.append::("b", 3.0).unwrap(); - builder.append::("a", 4).unwrap(); - let arr = builder.build().unwrap(); - - assert_eq!(arr.type_id(0), 0); - assert_eq!(arr.type_id(1), 1); - assert_eq!(arr.type_id(2), 0); - - assert_eq!(arr.value_offset(0), 0); - assert_eq!(arr.value_offset(1), 0); - assert_eq!(arr.value_offset(2), 1); - - let arr = arr.slice(0, 1); - - assert!(matches!( - arr.data_type(), - DataType::Union(_, UnionMode::Dense) - )); - - Ok(ColumnarValue::Array(Arc::new(arr))) - } - - fn documentation(&self) -> Option<&Documentation> { - None - } -} - -#[cfg(test)] -mod test { - use super::*; - use datafusion::prelude::*; - - #[tokio::test] - async fn test() { - let ctx = SessionContext::new(); - ctx.register_udf(UnionExample::new().into()); - - let out = ctx.sql("SELECT example_union();").await.unwrap(); - // TODO: fix this error upstream - // https://github.com/apache/datafusion/issues/13762 - out.show().await.unwrap_err(); - } -} diff --git a/rust/geodatafusion/src/udf/native/io/wkt.rs b/rust/geodatafusion/src/udf/native/io/wkt.rs index 872afacd..d3d23c21 100644 --- a/rust/geodatafusion/src/udf/native/io/wkt.rs +++ b/rust/geodatafusion/src/udf/native/io/wkt.rs @@ -2,7 +2,6 @@ use std::any::Any; use std::sync::OnceLock; use arrow::array::AsArray; -use arrow_array::Array; use arrow_schema::DataType; use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; use datafusion::logical_expr::{ @@ -131,14 +130,7 @@ fn geom_from_text_impl(args: &[ColumnarValue]) -> GeoDataFusionResult().clone(), Default::default()); let native_arr = read_wkt(&wkt_arr, CoordType::Separated, false)?; - dbg!("native_arr"); - - let arrow_arr = native_arr.to_array_ref(); - if let DataType::Union(_fields, mode) = arrow_arr.data_type() { - dbg!(mode); - } - - Ok(arrow_arr.into()) + Ok(native_arr.to_array_ref().into()) } #[cfg(test)] @@ -153,8 +145,6 @@ mod test { register_native(&ctx); let out = ctx.sql("SELECT ST_GeomFromText('LINESTRING(-71.160281 42.258729,-71.160837 42.259113,-71.161144 42.25932)');").await.unwrap(); - // TODO: fix this error upstream - // https://github.com/apache/datafusion/issues/13762 - out.show().await.unwrap_err(); + out.show().await.unwrap(); } } From 4cbad81fef5993bf06f963b301a9b2f13fac790f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 16 Dec 2024 20:20:33 -0500 Subject: [PATCH 09/11] Add ST_Simplify (#952) --- js/Cargo.lock | 2 +- js/src/algorithm/geo/mod.rs | 2 +- js/src/algorithm/geo/simplify.rs | 2 +- js/src/algorithm/geo/simplify_vw.rs | 2 +- python/Cargo.lock | 190 +++++++++--------- .../src/algorithm/geo/simplify.rs | 12 +- .../geoarrow-compute/src/broadcasting/mod.rs | 3 - .../src/broadcasting/primitive.rs | 67 ------ python/geoarrow-compute/src/lib.rs | 1 - .../src/algorithm/broadcasting/primitive.rs | 10 +- rust/geoarrow/src/algorithm/geo/simplify.rs | 145 +++++++++---- .../geoarrow/src/algorithm/geo/simplify_vw.rs | 143 +++++++++---- .../src/algorithm/geo/simplify_vw_preserve.rs | 141 +++++++++---- rust/geodatafusion/README.md | 12 +- .../src/udf/native/processing/mod.rs | 6 + .../src/udf/native/processing/simplify.rs | 124 ++++++++++++ .../processing/simplify_preserve_topology.rs | 95 +++++++++ .../src/udf/native/processing/simplify_vw.rs | 122 +++++++++++ 18 files changed, 783 insertions(+), 296 deletions(-) delete mode 100644 python/geoarrow-compute/src/broadcasting/mod.rs delete mode 100644 python/geoarrow-compute/src/broadcasting/primitive.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/simplify.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/simplify_preserve_topology.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/simplify_vw.rs diff --git a/js/Cargo.lock b/js/Cargo.lock index 8695cdfa..85b40204 100644 --- a/js/Cargo.lock +++ b/js/Cargo.lock @@ -1093,7 +1093,7 @@ dependencies = [ [[package]] name = "geoarrow" -version = "0.4.0-beta.1" +version = "0.4.0-beta.2" dependencies = [ "arrow", "arrow-array", diff --git a/js/src/algorithm/geo/mod.rs b/js/src/algorithm/geo/mod.rs index fc403992..eff20889 100644 --- a/js/src/algorithm/geo/mod.rs +++ b/js/src/algorithm/geo/mod.rs @@ -13,7 +13,7 @@ pub mod euclidean_length; pub mod geodesic_area; pub mod geodesic_length; pub mod haversine_length; -pub mod rotate; +// pub mod rotate; pub mod scale; pub mod simplify; pub mod simplify_vw; diff --git a/js/src/algorithm/geo/simplify.rs b/js/src/algorithm/geo/simplify.rs index 3d3ea5a5..0db772af 100644 --- a/js/src/algorithm/geo/simplify.rs +++ b/js/src/algorithm/geo/simplify.rs @@ -21,7 +21,7 @@ macro_rules! impl_simplify { #[wasm_bindgen] pub fn simplify(&self, epsilon: f64) -> Self { use geoarrow::algorithm::geo::Simplify; - Simplify::simplify(&self.0, &epsilon).into() + Simplify::simplify(&self.0, &epsilon.into()).into() } } }; diff --git a/js/src/algorithm/geo/simplify_vw.rs b/js/src/algorithm/geo/simplify_vw.rs index a2dfdd14..33ec3480 100644 --- a/js/src/algorithm/geo/simplify_vw.rs +++ b/js/src/algorithm/geo/simplify_vw.rs @@ -21,7 +21,7 @@ macro_rules! impl_simplify_vw { #[wasm_bindgen(js_name = simplifyVw)] pub fn simplify_vw(&self, epsilon: f64) -> Self { use geoarrow::algorithm::geo::SimplifyVw; - SimplifyVw::simplify_vw(&self.0, &epsilon).into() + SimplifyVw::simplify_vw(&self.0, &epsilon.into()).into() } } }; diff --git a/python/Cargo.lock b/python/Cargo.lock index 42f27f83..aed8a9a7 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -170,7 +170,7 @@ dependencies = [ "base64 0.22.1", "chrono", "half", - "lexical-core 1.0.2", + "lexical-core 1.0.5", "num", "ryu", ] @@ -190,7 +190,7 @@ dependencies = [ "csv", "csv-core", "lazy_static", - "lexical-core 1.0.2", + "lexical-core 1.0.5", "regex", ] @@ -236,7 +236,7 @@ dependencies = [ "chrono", "half", "indexmap", - "lexical-core 1.0.2", + "lexical-core 1.0.5", "num", "serde", "serde_json", @@ -492,9 +492,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", @@ -613,9 +613,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -632,18 +632,18 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" @@ -822,9 +822,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flatbuffers" @@ -1384,11 +1384,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1453,9 +1453,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", @@ -1481,7 +1481,7 @@ dependencies = [ "http", "hyper", "hyper-util", - "rustls 0.23.19", + "rustls 0.23.20", "rustls-native-certs", "rustls-pki-types", "tokio", @@ -1793,9 +1793,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.74" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ "once_cell", "wasm-bindgen", @@ -1825,15 +1825,15 @@ dependencies = [ [[package]] name = "lexical-core" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" dependencies = [ - "lexical-parse-float 1.0.2", - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", - "lexical-write-float 1.0.2", - "lexical-write-integer 1.0.2", + "lexical-parse-float 1.0.5", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", + "lexical-write-float 1.0.5", + "lexical-write-integer 1.0.5", ] [[package]] @@ -1849,12 +1849,12 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", "static_assertions", ] @@ -1870,11 +1870,11 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "static_assertions", ] @@ -1889,9 +1889,9 @@ dependencies = [ [[package]] name = "lexical-util" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] @@ -1909,12 +1909,12 @@ dependencies = [ [[package]] name = "lexical-write-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" dependencies = [ - "lexical-util 1.0.3", - "lexical-write-integer 1.0.2", + "lexical-util 1.0.6", + "lexical-write-integer 1.0.5", "static_assertions", ] @@ -1930,19 +1930,19 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "static_assertions", ] [[package]] name = "libc" -version = "0.2.167" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libm" @@ -2749,9 +2749,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.19", + "rustls 0.23.20", "socket2", - "thiserror 2.0.4", + "thiserror 2.0.7", "tokio", "tracing", ] @@ -2767,10 +2767,10 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls 0.23.19", + "rustls 0.23.20", "rustls-pki-types", "slab", - "thiserror 2.0.4", + "thiserror 2.0.7", "tinyvec", "tracing", "web-time", @@ -2778,9 +2778,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" +checksum = "52cd4b1eff68bf27940dd39811292c49e007f4d0b4c357358dc9b0197be6b527" dependencies = [ "cfg_aliases", "libc", @@ -2866,9 +2866,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] @@ -2931,7 +2931,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.19", + "rustls 0.23.20", "rustls-native-certs", "rustls-pemfile 2.2.0", "rustls-pki-types", @@ -3028,15 +3028,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.41" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3052,9 +3052,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.19" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "once_cell", "ring", @@ -3096,9 +3096,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" dependencies = [ "web-time", ] @@ -3208,9 +3208,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" [[package]] name = "seq-macro" @@ -3220,18 +3220,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", @@ -3732,11 +3732,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.4" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490" +checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" dependencies = [ - "thiserror-impl 2.0.4", + "thiserror-impl 2.0.7", ] [[package]] @@ -3752,9 +3752,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.4" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" +checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" dependencies = [ "proc-macro2", "quote", @@ -3864,20 +3864,19 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls 0.23.19", - "rustls-pki-types", + "rustls 0.23.20", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -3976,9 +3975,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicode-bidi" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" @@ -4099,9 +4098,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -4110,13 +4109,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn 2.0.90", @@ -4125,9 +4123,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.47" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", @@ -4138,9 +4136,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4148,9 +4146,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", @@ -4161,9 +4159,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.97" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "wasm-streams" @@ -4180,9 +4178,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.74" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/python/geoarrow-compute/src/algorithm/geo/simplify.rs b/python/geoarrow-compute/src/algorithm/geo/simplify.rs index 29743553..22928f6e 100644 --- a/python/geoarrow-compute/src/algorithm/geo/simplify.rs +++ b/python/geoarrow-compute/src/algorithm/geo/simplify.rs @@ -37,17 +37,17 @@ pub fn simplify( match input { AnyNativeInput::Array(arr) => { let out = match method { - SimplifyMethod::Rdp => arr.as_ref().simplify(&epsilon)?, - SimplifyMethod::Vw => arr.as_ref().simplify_vw(&epsilon)?, - SimplifyMethod::VwPreserve => arr.as_ref().simplify_vw_preserve(&epsilon)?, + SimplifyMethod::Rdp => arr.as_ref().simplify(&epsilon.into())?, + SimplifyMethod::Vw => arr.as_ref().simplify_vw(&epsilon.into())?, + SimplifyMethod::VwPreserve => arr.as_ref().simplify_vw_preserve(&epsilon.into())?, }; return_geometry_array(py, out) } AnyNativeInput::Chunked(arr) => { let out = match method { - SimplifyMethod::Rdp => arr.as_ref().simplify(&epsilon)?, - SimplifyMethod::Vw => arr.as_ref().simplify_vw(&epsilon)?, - SimplifyMethod::VwPreserve => arr.as_ref().simplify_vw_preserve(&epsilon)?, + SimplifyMethod::Rdp => arr.as_ref().simplify(&epsilon.into())?, + SimplifyMethod::Vw => arr.as_ref().simplify_vw(&epsilon.into())?, + SimplifyMethod::VwPreserve => arr.as_ref().simplify_vw_preserve(&epsilon.into())?, }; return_chunked_geometry_array(py, out) } diff --git a/python/geoarrow-compute/src/broadcasting/mod.rs b/python/geoarrow-compute/src/broadcasting/mod.rs deleted file mode 100644 index fdfafee2..00000000 --- a/python/geoarrow-compute/src/broadcasting/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod primitive; - -pub use primitive::BroadcastableFloat; diff --git a/python/geoarrow-compute/src/broadcasting/primitive.rs b/python/geoarrow-compute/src/broadcasting/primitive.rs deleted file mode 100644 index b83d64d5..00000000 --- a/python/geoarrow-compute/src/broadcasting/primitive.rs +++ /dev/null @@ -1,67 +0,0 @@ -use arrow_array::types::Float64Type; -// use arrow_array::{Float64Array, UInt32Array}; -use geoarrow::algorithm::broadcasting::BroadcastablePrimitive; -use pyo3::prelude::*; - -// pub struct BroadcastableUint32(pub(crate) BroadcastablePrimitive); - -// impl<'a> FromPyObject<'a> for BroadcastableUint32 { -// fn extract(_ob: &'a PyAny) -> PyResult { -// todo!() -// // Python::with_gil(|py| { -// // let pa = py.import("pyarrow")?; -// // let array = pa.getattr("Array")?; -// // if ob.hasattr("__arrow_c_array__")? { -// // let arr = from_py_array(ob)?; -// // let arr = arr.as_any().downcast_ref::().unwrap(); -// // Ok(BroadcastableUint32(BroadcastablePrimitive::Array( -// // arr.clone(), -// // ))) -// // } else { -// // let val: u32 = ob.extract()?; -// // Ok(BroadcastableUint32(BroadcastablePrimitive::Scalar(val))) -// // } -// // }) -// } -// } - -// // impl IntoPy> for BroadcastableUint32 { -// // fn into_py(self, py: Python<'_>) -> PyResult { -// // match self.0 { -// // BroadcastablePrimitive::Array(arr) => to_py_array(py, todo!()), -// // BroadcastablePrimitive::Scalar(scalar) => Ok(scalar.into_py(py)), -// // } -// // } -// // } - -#[allow(dead_code)] -pub struct BroadcastableFloat(pub(crate) BroadcastablePrimitive); - -impl<'a> FromPyObject<'a> for BroadcastableFloat { - fn extract_bound(_ob: &Bound<'a, PyAny>) -> PyResult { - todo!() - // Python::with_gil(|py| { - // let pa = py.import("pyarrow")?; - // let array = pa.getattr("Array")?; - // if ob.hasattr("__arrow_c_array__")? { - // let arr = from_py_array(ob)?; - // let arr = arr.as_any().downcast_ref::().unwrap(); - // Ok(BroadcastableFloat(BroadcastablePrimitive::Array( - // arr.clone(), - // ))) - // } else { - // let val: f64 = ob.extract()?; - // Ok(BroadcastableFloat(BroadcastablePrimitive::Scalar(val))) - // } - // }) - } -} - -// impl IntoPy> for BroadcastableFloat { -// fn into_py(self, py: Python<'_>) -> PyResult { -// match self.0 { -// BroadcastablePrimitive::Array(arr) => to_py_array(py, todo!()), -// BroadcastablePrimitive::Scalar(scalar) => Ok(scalar.into_py(py)), -// } -// } -// } diff --git a/python/geoarrow-compute/src/lib.rs b/python/geoarrow-compute/src/lib.rs index a024e656..8d9a780c 100644 --- a/python/geoarrow-compute/src/lib.rs +++ b/python/geoarrow-compute/src/lib.rs @@ -3,7 +3,6 @@ use pyo3::intern; use pyo3::prelude::*; use pyo3::types::PyTuple; mod algorithm; -pub mod broadcasting; pub mod ffi; mod util; diff --git a/rust/geoarrow/src/algorithm/broadcasting/primitive.rs b/rust/geoarrow/src/algorithm/broadcasting/primitive.rs index 52daf8d8..06346170 100644 --- a/rust/geoarrow/src/algorithm/broadcasting/primitive.rs +++ b/rust/geoarrow/src/algorithm/broadcasting/primitive.rs @@ -51,12 +51,18 @@ where } } -impl> From for BroadcastablePrimitive

{ - fn from(value: N) -> Self { +impl> From for BroadcastablePrimitive

{ + fn from(value: T) -> Self { BroadcastablePrimitive::Scalar(value) } } +// impl<'a, T: ArrowPrimitiveType> From<&'a PrimitiveArray> for BroadcastablePrimitive<'_, T> { +// fn from(value: &'a PrimitiveArray) -> Self { +// BroadcastablePrimitive::Array(value) +// } +// } + #[cfg(test)] mod tests { use crate::algorithm::broadcasting::BroadcastablePrimitive; diff --git a/rust/geoarrow/src/algorithm/geo/simplify.rs b/rust/geoarrow/src/algorithm/geo/simplify.rs index e2da1060..cedb01a9 100644 --- a/rust/geoarrow/src/algorithm/geo/simplify.rs +++ b/rust/geoarrow/src/algorithm/geo/simplify.rs @@ -1,11 +1,13 @@ use std::sync::Arc; +use crate::algorithm::broadcasting::BroadcastablePrimitive; use crate::array::*; use crate::chunked_array::{ChunkedGeometryArray, ChunkedNativeArray}; use crate::datatypes::{Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; use crate::trait_::ArrayAccessor; use crate::NativeArray; +use arrow::datatypes::Float64Type; use geo::Simplify as _Simplify; /// Simplifies a geometry. @@ -41,7 +43,7 @@ pub trait Simplify { /// ]; /// let line_string_array: LineStringArray = (vec![line_string].as_slice(), Dimension::XY).into(); /// - /// let simplified_array = line_string_array.simplify(&1.0); + /// let simplified_array = line_string_array.simplify(&1.0.into()); /// /// let expected = line_string![ /// (x: 0.0, y: 0.0), @@ -52,7 +54,7 @@ pub trait Simplify { /// /// assert_eq!(expected, simplified_array.value_as_geo(0)) /// ``` - fn simplify(&self, epsilon: &f64) -> Self::Output; + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self::Output; } /// Implementation that returns the identity @@ -61,7 +63,7 @@ macro_rules! identity_impl { impl Simplify for $type { type Output = Self; - fn simplify(&self, _epsilon: &f64) -> Self { + fn simplify(&self, _epsilon: &BroadcastablePrimitive) -> Self { self.clone() } } @@ -73,46 +75,114 @@ identity_impl!(MultiPointArray); /// Implementation that iterates over geo objects macro_rules! iter_geo_impl { - ($type:ty, $geo_type:ty) => { + ($type:ty, $builder_type:ty, $method:ident, $geo_type:ty) => { impl Simplify for $type { type Output = Self; - fn simplify(&self, epsilon: &f64) -> Self { + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self { let output_geoms: Vec> = self .iter_geo() - .map(|maybe_g| maybe_g.map(|geom| geom.simplify(epsilon))) + .zip(epsilon) + .map(|(maybe_g, epsilon)| { + if let (Some(geom), Some(eps)) = (maybe_g, epsilon) { + Some(geom.simplify(&eps)) + } else { + None + } + }) .collect(); - (output_geoms, Dimension::XY).into() + <$builder_type>::$method( + output_geoms.as_slice(), + Dimension::XY, + self.coord_type(), + self.metadata.clone(), + ) + .finish() } } }; } -iter_geo_impl!(LineStringArray, geo::LineString); -iter_geo_impl!(PolygonArray, geo::Polygon); -iter_geo_impl!(MultiLineStringArray, geo::MultiLineString); -iter_geo_impl!(MultiPolygonArray, geo::MultiPolygon); -// iter_geo_impl!(MixedGeometryArray, geo::Geometry); -// iter_geo_impl!(GeometryCollectionArray, geo::GeometryCollection); +iter_geo_impl!( + LineStringArray, + LineStringBuilder, + from_nullable_line_strings, + geo::LineString +); +iter_geo_impl!( + PolygonArray, + PolygonBuilder, + from_nullable_polygons, + geo::Polygon +); +iter_geo_impl!( + MultiLineStringArray, + MultiLineStringBuilder, + from_nullable_multi_line_strings, + geo::MultiLineString +); +iter_geo_impl!( + MultiPolygonArray, + MultiPolygonBuilder, + from_nullable_multi_polygons, + geo::MultiPolygon +); + +impl Simplify for GeometryArray { + type Output = Result; + + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { + let output_geoms: Vec> = self + .iter_geo() + .zip(epsilon) + .map(|(maybe_g, epsilon)| { + if let (Some(geom), Some(eps)) = (maybe_g, epsilon) { + let out = match geom { + geo::Geometry::LineString(g) => geo::Geometry::LineString(g.simplify(&eps)), + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.simplify(&eps)), + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(g.simplify(&eps)) + } + geo::Geometry::MultiPolygon(g) => { + geo::Geometry::MultiPolygon(g.simplify(&eps)) + } + g => g, + }; + Some(out) + } else { + None + } + }) + .collect(); + + let builder = GeometryBuilder::from_nullable_geometries( + output_geoms.as_slice(), + self.coord_type(), + self.metadata().clone(), + false, + )?; + Ok(builder.finish()) + } +} impl Simplify for &dyn NativeArray { type Output = Result>; - fn simplify(&self, epsilon: &f64) -> Self::Output { - use Dimension::*; + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { use NativeType::*; let result: Arc = match self.data_type() { - Point(_, XY) => Arc::new(self.as_point().simplify(epsilon)), - LineString(_, XY) => Arc::new(self.as_line_string().simplify(epsilon)), - Polygon(_, XY) => Arc::new(self.as_polygon().simplify(epsilon)), - MultiPoint(_, XY) => Arc::new(self.as_multi_point().simplify(epsilon)), - MultiLineString(_, XY) => Arc::new(self.as_multi_line_string().simplify(epsilon)), - MultiPolygon(_, XY) => Arc::new(self.as_multi_polygon().simplify(epsilon)), - // Mixed(_, XY) => self.as_mixed().simplify(epsilon), - // GeometryCollection(_, XY) => self.as_geometry_collection().simplify(), - _ => return Err(GeoArrowError::IncorrectType("".into())), + Point(_, _) => Arc::new(self.as_point().simplify(epsilon)), + LineString(_, _) => Arc::new(self.as_line_string().simplify(epsilon)), + Polygon(_, _) => Arc::new(self.as_polygon().simplify(epsilon)), + MultiPoint(_, _) => Arc::new(self.as_multi_point().simplify(epsilon)), + MultiLineString(_, _) => Arc::new(self.as_multi_line_string().simplify(epsilon)), + MultiPolygon(_, _) => Arc::new(self.as_multi_polygon().simplify(epsilon)), + Geometry(_) => Arc::new(self.as_geometry().simplify(epsilon)?), + // Mixed(_,_) => self.as_mixed().simplify(epsilon), + // GeometryCollection(_,_) => self.as_geometry_collection().simplify(), + _ => return Err(GeoArrowError::IncorrectType("simplify".into())), }; Ok(result) } @@ -121,7 +191,7 @@ impl Simplify for &dyn NativeArray { impl Simplify for ChunkedGeometryArray { type Output = Self; - fn simplify(&self, epsilon: &f64) -> Self::Output { + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { self.map(|chunk| chunk.simplify(epsilon)) .try_into() .unwrap() @@ -134,7 +204,7 @@ macro_rules! chunked_impl { impl Simplify for $type { type Output = Self; - fn simplify(&self, epsilon: &f64) -> Self { + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self { self.map(|chunk| chunk.simplify(epsilon)) .try_into() .unwrap() @@ -152,19 +222,18 @@ chunked_impl!(ChunkedGeometryArray); impl Simplify for &dyn ChunkedNativeArray { type Output = Result>; - fn simplify(&self, epsilon: &f64) -> Self::Output { - use Dimension::*; + fn simplify(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { use NativeType::*; let result: Arc = match self.data_type() { - Point(_, XY) => Arc::new(self.as_point().simplify(epsilon)), - LineString(_, XY) => Arc::new(self.as_line_string().simplify(epsilon)), - Polygon(_, XY) => Arc::new(self.as_polygon().simplify(epsilon)), - MultiPoint(_, XY) => Arc::new(self.as_multi_point().simplify(epsilon)), - MultiLineString(_, XY) => Arc::new(self.as_multi_line_string().simplify(epsilon)), - MultiPolygon(_, XY) => Arc::new(self.as_multi_polygon().simplify(epsilon)), - // Mixed(_, XY) => self.as_mixed().simplify(epsilon), - // GeometryCollection(_, XY) => self.as_geometry_collection().simplify(), + Point(_, _) => Arc::new(self.as_point().simplify(epsilon)), + LineString(_, _) => Arc::new(self.as_line_string().simplify(epsilon)), + Polygon(_, _) => Arc::new(self.as_polygon().simplify(epsilon)), + MultiPoint(_, _) => Arc::new(self.as_multi_point().simplify(epsilon)), + MultiLineString(_, _) => Arc::new(self.as_multi_line_string().simplify(epsilon)), + MultiPolygon(_, _) => Arc::new(self.as_multi_polygon().simplify(epsilon)), + // Mixed(_,_) => self.as_mixed().simplify(epsilon), + // GeometryCollection(_,_) => self.as_geometry_collection().simplify(), _ => return Err(GeoArrowError::IncorrectType("".into())), }; Ok(result) @@ -188,7 +257,7 @@ mod tests { (x: 27.8, y: 0.1 ), ]; let input_array: LineStringArray = (vec![input_geom].as_slice(), Dimension::XY).into(); - let result_array = input_array.simplify(&1.0); + let result_array = input_array.simplify(&BroadcastablePrimitive::Scalar(1.0)); let expected = line_string![ ( x: 0.0, y: 0.0 ), @@ -211,7 +280,7 @@ mod tests { (x: 0., y: 0.), ]; let input_array: PolygonArray = (vec![input_geom].as_slice(), Dimension::XY).into(); - let result_array = input_array.simplify(&2.0); + let result_array = input_array.simplify(&BroadcastablePrimitive::Scalar(2.0)); let expected = polygon![ (x: 0., y: 0.), diff --git a/rust/geoarrow/src/algorithm/geo/simplify_vw.rs b/rust/geoarrow/src/algorithm/geo/simplify_vw.rs index a4ee33bf..060f546d 100644 --- a/rust/geoarrow/src/algorithm/geo/simplify_vw.rs +++ b/rust/geoarrow/src/algorithm/geo/simplify_vw.rs @@ -1,11 +1,13 @@ use std::sync::Arc; +use crate::algorithm::broadcasting::BroadcastablePrimitive; use crate::array::*; use crate::chunked_array::{ChunkedGeometryArray, ChunkedNativeArray}; use crate::datatypes::{Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; use crate::trait_::ArrayAccessor; use crate::NativeArray; +use arrow::datatypes::Float64Type; use geo::SimplifyVw as _SimplifyVw; /// Simplifies a geometry. @@ -40,7 +42,7 @@ pub trait SimplifyVw { /// ]; /// let line_string_array: LineStringArray = (vec![line_string].as_slice(), Dimension::XY).into(); /// - /// let simplified_array = line_string_array.simplify_vw(&30.0); + /// let simplified_array = line_string_array.simplify_vw(&30.0.into()); /// /// let expected = line_string![ /// (x: 5.0, y: 2.0), @@ -50,7 +52,7 @@ pub trait SimplifyVw { /// /// assert_eq!(expected, simplified_array.value_as_geo(0)) /// ``` - fn simplify_vw(&self, epsilon: &f64) -> Self::Output; + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self::Output; } /// Implementation that returns the identity @@ -59,7 +61,7 @@ macro_rules! identity_impl { impl SimplifyVw for $type { type Output = Self; - fn simplify_vw(&self, _epsilon: &f64) -> Self { + fn simplify_vw(&self, _epsilon: &BroadcastablePrimitive) -> Self { self.clone() } } @@ -71,46 +73,116 @@ identity_impl!(MultiPointArray); /// Implementation that iterates over geo objects macro_rules! iter_geo_impl { - ($type:ty, $geo_type:ty) => { + ($type:ty, $builder_type:ty, $method:ident, $geo_type:ty) => { impl SimplifyVw for $type { type Output = Self; - fn simplify_vw(&self, epsilon: &f64) -> Self { + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self { let output_geoms: Vec> = self .iter_geo() - .map(|maybe_g| maybe_g.map(|geom| geom.simplify_vw(epsilon))) + .zip(epsilon) + .map(|(maybe_g, epsilon)| { + if let (Some(geom), Some(eps)) = (maybe_g, epsilon) { + Some(geom.simplify_vw(&eps)) + } else { + None + } + }) .collect(); - (output_geoms, Dimension::XY).into() + <$builder_type>::$method( + output_geoms.as_slice(), + Dimension::XY, + self.coord_type(), + self.metadata.clone(), + ) + .finish() } } }; } -iter_geo_impl!(LineStringArray, geo::LineString); -iter_geo_impl!(PolygonArray, geo::Polygon); -iter_geo_impl!(MultiLineStringArray, geo::MultiLineString); -iter_geo_impl!(MultiPolygonArray, geo::MultiPolygon); -// iter_geo_impl!(MixedGeometryArray, geo::Geometry); -// iter_geo_impl!(GeometryCollectionArray, geo::GeometryCollection); +iter_geo_impl!( + LineStringArray, + LineStringBuilder, + from_nullable_line_strings, + geo::LineString +); +iter_geo_impl!( + PolygonArray, + PolygonBuilder, + from_nullable_polygons, + geo::Polygon +); +iter_geo_impl!( + MultiLineStringArray, + MultiLineStringBuilder, + from_nullable_multi_line_strings, + geo::MultiLineString +); +iter_geo_impl!( + MultiPolygonArray, + MultiPolygonBuilder, + from_nullable_multi_polygons, + geo::MultiPolygon +); + +impl SimplifyVw for GeometryArray { + type Output = Result; + + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { + let output_geoms: Vec> = self + .iter_geo() + .zip(epsilon) + .map(|(maybe_g, epsilon)| { + if let (Some(geom), Some(eps)) = (maybe_g, epsilon) { + let out = match geom { + geo::Geometry::LineString(g) => { + geo::Geometry::LineString(g.simplify_vw(&eps)) + } + geo::Geometry::Polygon(g) => geo::Geometry::Polygon(g.simplify_vw(&eps)), + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(g.simplify_vw(&eps)) + } + geo::Geometry::MultiPolygon(g) => { + geo::Geometry::MultiPolygon(g.simplify_vw(&eps)) + } + g => g, + }; + Some(out) + } else { + None + } + }) + .collect(); + + let builder = GeometryBuilder::from_nullable_geometries( + output_geoms.as_slice(), + self.coord_type(), + self.metadata().clone(), + false, + )?; + Ok(builder.finish()) + } +} impl SimplifyVw for &dyn NativeArray { type Output = Result>; - fn simplify_vw(&self, epsilon: &f64) -> Self::Output { - use Dimension::*; + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { use NativeType::*; let result: Arc = match self.data_type() { - Point(_, XY) => Arc::new(self.as_point().simplify_vw(epsilon)), - LineString(_, XY) => Arc::new(self.as_line_string().simplify_vw(epsilon)), - Polygon(_, XY) => Arc::new(self.as_polygon().simplify_vw(epsilon)), - MultiPoint(_, XY) => Arc::new(self.as_multi_point().simplify_vw(epsilon)), - MultiLineString(_, XY) => Arc::new(self.as_multi_line_string().simplify_vw(epsilon)), - MultiPolygon(_, XY) => Arc::new(self.as_multi_polygon().simplify_vw(epsilon)), - // Mixed(_, XY) => self.as_mixed().simplify_vw(epsilon), - // GeometryCollection(_, XY) => self.as_geometry_collection().simplify_vw(), - _ => return Err(GeoArrowError::IncorrectType("".into())), + Point(_, _) => Arc::new(self.as_point().simplify_vw(epsilon)), + LineString(_, _) => Arc::new(self.as_line_string().simplify_vw(epsilon)), + Polygon(_, _) => Arc::new(self.as_polygon().simplify_vw(epsilon)), + MultiPoint(_, _) => Arc::new(self.as_multi_point().simplify_vw(epsilon)), + MultiLineString(_, _) => Arc::new(self.as_multi_line_string().simplify_vw(epsilon)), + MultiPolygon(_, _) => Arc::new(self.as_multi_polygon().simplify_vw(epsilon)), + Geometry(_) => Arc::new(self.as_geometry().simplify_vw(epsilon)?), + // Mixed(_, _) => self.as_mixed().simplify_vw(epsilon), + // GeometryCollection(_, _) => self.as_geometry_collection().simplify_vw(), + _ => return Err(GeoArrowError::IncorrectType("simplify vw".into())), }; Ok(result) } @@ -119,7 +191,7 @@ impl SimplifyVw for &dyn NativeArray { impl SimplifyVw for ChunkedGeometryArray { type Output = Self; - fn simplify_vw(&self, epsilon: &f64) -> Self::Output { + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { self.map(|chunk| chunk.simplify_vw(epsilon)) .try_into() .unwrap() @@ -132,7 +204,7 @@ macro_rules! chunked_impl { impl SimplifyVw for $type { type Output = Self; - fn simplify_vw(&self, epsilon: &f64) -> Self { + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self { self.map(|chunk| chunk.simplify_vw(epsilon)) .try_into() .unwrap() @@ -150,19 +222,18 @@ chunked_impl!(ChunkedGeometryArray); impl SimplifyVw for &dyn ChunkedNativeArray { type Output = Result>; - fn simplify_vw(&self, epsilon: &f64) -> Self::Output { - use Dimension::*; + fn simplify_vw(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { use NativeType::*; let result: Arc = match self.data_type() { - Point(_, XY) => Arc::new(self.as_point().simplify_vw(epsilon)), - LineString(_, XY) => Arc::new(self.as_line_string().simplify_vw(epsilon)), - Polygon(_, XY) => Arc::new(self.as_polygon().simplify_vw(epsilon)), - MultiPoint(_, XY) => Arc::new(self.as_multi_point().simplify_vw(epsilon)), - MultiLineString(_, XY) => Arc::new(self.as_multi_line_string().simplify_vw(epsilon)), - MultiPolygon(_, XY) => Arc::new(self.as_multi_polygon().simplify_vw(epsilon)), - // Mixed(_, XY) => self.as_mixed().simplify_vw(epsilon), - // GeometryCollection(_, XY) => self.as_geometry_collection().simplify_vw(), + Point(_, _) => Arc::new(self.as_point().simplify_vw(epsilon)), + LineString(_, _) => Arc::new(self.as_line_string().simplify_vw(epsilon)), + Polygon(_, _) => Arc::new(self.as_polygon().simplify_vw(epsilon)), + MultiPoint(_, _) => Arc::new(self.as_multi_point().simplify_vw(epsilon)), + MultiLineString(_, _) => Arc::new(self.as_multi_line_string().simplify_vw(epsilon)), + MultiPolygon(_, _) => Arc::new(self.as_multi_polygon().simplify_vw(epsilon)), + // Mixed(_, _) => self.as_mixed().simplify_vw(epsilon), + // GeometryCollection(_, _) => self.as_geometry_collection().simplify_vw(), _ => return Err(GeoArrowError::IncorrectType("".into())), }; Ok(result) diff --git a/rust/geoarrow/src/algorithm/geo/simplify_vw_preserve.rs b/rust/geoarrow/src/algorithm/geo/simplify_vw_preserve.rs index 257ebee8..2cf1acad 100644 --- a/rust/geoarrow/src/algorithm/geo/simplify_vw_preserve.rs +++ b/rust/geoarrow/src/algorithm/geo/simplify_vw_preserve.rs @@ -1,11 +1,13 @@ use std::sync::Arc; +use crate::algorithm::broadcasting::BroadcastablePrimitive; use crate::array::*; use crate::chunked_array::{ChunkedGeometryArray, ChunkedNativeArray}; use crate::datatypes::{Dimension, NativeType}; use crate::error::{GeoArrowError, Result}; use crate::trait_::ArrayAccessor; use crate::NativeArray; +use arrow::datatypes::Float64Type; use geo::SimplifyVwPreserve as _SimplifyVwPreserve; /// Simplifies a geometry, attempting to preserve its topology by removing self-intersections @@ -43,7 +45,7 @@ pub trait SimplifyVwPreserve { /// points to form a valid geometry. /// - The tolerance used to remove a point is `epsilon`, in keeping with GEOS. JTS uses /// `epsilon ^ 2` - fn simplify_vw_preserve(&self, epsilon: &f64) -> Self::Output; + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self::Output; } /// Implementation that returns the identity @@ -52,7 +54,7 @@ macro_rules! identity_impl { impl SimplifyVwPreserve for $type { type Output = Self; - fn simplify_vw_preserve(&self, _epsilon: &f64) -> Self { + fn simplify_vw_preserve(&self, _epsilon: &BroadcastablePrimitive) -> Self { self.clone() } } @@ -64,47 +66,119 @@ identity_impl!(MultiPointArray); /// Implementation that iterates over geo objects macro_rules! iter_geo_impl { - ($type:ty, $geo_type:ty) => { + ($type:ty, $builder_type:ty, $method:ident, $geo_type:ty) => { impl SimplifyVwPreserve for $type { type Output = Self; - fn simplify_vw_preserve(&self, epsilon: &f64) -> Self { + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self { let output_geoms: Vec> = self .iter_geo() - .map(|maybe_g| maybe_g.map(|geom| geom.simplify_vw_preserve(epsilon))) + .zip(epsilon) + .map(|(maybe_g, epsilon)| { + if let (Some(geom), Some(eps)) = (maybe_g, epsilon) { + Some(geom.simplify_vw_preserve(&eps)) + } else { + None + } + }) .collect(); - (output_geoms, Dimension::XY).into() + <$builder_type>::$method( + output_geoms.as_slice(), + Dimension::XY, + self.coord_type(), + self.metadata.clone(), + ) + .finish() } } }; } -iter_geo_impl!(LineStringArray, geo::LineString); -iter_geo_impl!(PolygonArray, geo::Polygon); -iter_geo_impl!(MultiLineStringArray, geo::MultiLineString); -iter_geo_impl!(MultiPolygonArray, geo::MultiPolygon); -// iter_geo_impl!(MixedGeometryArray, geo::Geometry); -// iter_geo_impl!(GeometryCollectionArray, geo::GeometryCollection); +iter_geo_impl!( + LineStringArray, + LineStringBuilder, + from_nullable_line_strings, + geo::LineString +); +iter_geo_impl!( + PolygonArray, + PolygonBuilder, + from_nullable_polygons, + geo::Polygon +); +iter_geo_impl!( + MultiLineStringArray, + MultiLineStringBuilder, + from_nullable_multi_line_strings, + geo::MultiLineString +); +iter_geo_impl!( + MultiPolygonArray, + MultiPolygonBuilder, + from_nullable_multi_polygons, + geo::MultiPolygon +); + +impl SimplifyVwPreserve for GeometryArray { + type Output = Result; + + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { + let output_geoms: Vec> = self + .iter_geo() + .zip(epsilon) + .map(|(maybe_g, epsilon)| { + if let (Some(geom), Some(eps)) = (maybe_g, epsilon) { + let out = match geom { + geo::Geometry::LineString(g) => { + geo::Geometry::LineString(g.simplify_vw_preserve(&eps)) + } + geo::Geometry::Polygon(g) => { + geo::Geometry::Polygon(g.simplify_vw_preserve(&eps)) + } + geo::Geometry::MultiLineString(g) => { + geo::Geometry::MultiLineString(g.simplify_vw_preserve(&eps)) + } + geo::Geometry::MultiPolygon(g) => { + geo::Geometry::MultiPolygon(g.simplify_vw_preserve(&eps)) + } + g => g, + }; + Some(out) + } else { + None + } + }) + .collect(); + + let builder = GeometryBuilder::from_nullable_geometries( + output_geoms.as_slice(), + self.coord_type(), + self.metadata().clone(), + false, + )?; + Ok(builder.finish()) + } +} impl SimplifyVwPreserve for &dyn NativeArray { type Output = Result>; - fn simplify_vw_preserve(&self, epsilon: &f64) -> Self::Output { - use Dimension::*; + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { use NativeType::*; let result: Arc = match self.data_type() { - Point(_, XY) => Arc::new(self.as_point().simplify_vw_preserve(epsilon)), - LineString(_, XY) => Arc::new(self.as_line_string().simplify_vw_preserve(epsilon)), - Polygon(_, XY) => Arc::new(self.as_polygon().simplify_vw_preserve(epsilon)), - MultiPoint(_, XY) => Arc::new(self.as_multi_point().simplify_vw_preserve(epsilon)), - MultiLineString(_, XY) => { + Point(_, _) => Arc::new(self.as_point().simplify_vw_preserve(epsilon)), + LineString(_, _) => Arc::new(self.as_line_string().simplify_vw_preserve(epsilon)), + Polygon(_, _) => Arc::new(self.as_polygon().simplify_vw_preserve(epsilon)), + MultiPoint(_, _) => Arc::new(self.as_multi_point().simplify_vw_preserve(epsilon)), + MultiLineString(_, _) => { Arc::new(self.as_multi_line_string().simplify_vw_preserve(epsilon)) } - MultiPolygon(_, XY) => Arc::new(self.as_multi_polygon().simplify_vw_preserve(epsilon)), - // Mixed(_, XY) => self.as_mixed().simplify_vw_preserve(epsilon), - // GeometryCollection(_, XY) => self.as_geometry_collection().simplify_vw_preserve(), + MultiPolygon(_, _) => Arc::new(self.as_multi_polygon().simplify_vw_preserve(epsilon)), + Geometry(_) => Arc::new(self.as_geometry().simplify_vw_preserve(epsilon)?), + // Mixed(_, _) => self.as_mixed().simplify_vw_preserve(epsilon), + // GeometryCollection(_, _) => self.as_geometry_collection().simplify_vw_preserve(), _ => return Err(GeoArrowError::IncorrectType("".into())), }; Ok(result) @@ -114,7 +188,7 @@ impl SimplifyVwPreserve for &dyn NativeArray { impl SimplifyVwPreserve for ChunkedGeometryArray { type Output = Self; - fn simplify_vw_preserve(&self, epsilon: &f64) -> Self::Output { + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { self.map(|chunk| chunk.simplify_vw_preserve(epsilon)) .try_into() .unwrap() @@ -127,7 +201,7 @@ macro_rules! chunked_impl { impl SimplifyVwPreserve for $type { type Output = Self; - fn simplify_vw_preserve(&self, epsilon: &f64) -> Self { + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self { self.map(|chunk| chunk.simplify_vw_preserve(epsilon)) .try_into() .unwrap() @@ -145,21 +219,20 @@ chunked_impl!(ChunkedGeometryArray); impl SimplifyVwPreserve for &dyn ChunkedNativeArray { type Output = Result>; - fn simplify_vw_preserve(&self, epsilon: &f64) -> Self::Output { - use Dimension::*; + fn simplify_vw_preserve(&self, epsilon: &BroadcastablePrimitive) -> Self::Output { use NativeType::*; let result: Arc = match self.data_type() { - Point(_, XY) => Arc::new(self.as_point().simplify_vw_preserve(epsilon)), - LineString(_, XY) => Arc::new(self.as_line_string().simplify_vw_preserve(epsilon)), - Polygon(_, XY) => Arc::new(self.as_polygon().simplify_vw_preserve(epsilon)), - MultiPoint(_, XY) => Arc::new(self.as_multi_point().simplify_vw_preserve(epsilon)), - MultiLineString(_, XY) => { + Point(_, _) => Arc::new(self.as_point().simplify_vw_preserve(epsilon)), + LineString(_, _) => Arc::new(self.as_line_string().simplify_vw_preserve(epsilon)), + Polygon(_, _) => Arc::new(self.as_polygon().simplify_vw_preserve(epsilon)), + MultiPoint(_, _) => Arc::new(self.as_multi_point().simplify_vw_preserve(epsilon)), + MultiLineString(_, _) => { Arc::new(self.as_multi_line_string().simplify_vw_preserve(epsilon)) } - MultiPolygon(_, XY) => Arc::new(self.as_multi_polygon().simplify_vw_preserve(epsilon)), - // Mixed(_, XY) => self.as_mixed().simplify_vw_preserve(epsilon), - // GeometryCollection(_, XY) => self.as_geometry_collection().simplify_vw_preserve(), + MultiPolygon(_, _) => Arc::new(self.as_multi_polygon().simplify_vw_preserve(epsilon)), + // Mixed(_, _) => self.as_mixed().simplify_vw_preserve(epsilon), + // GeometryCollection(_, _) => self.as_geometry_collection().simplify_vw_preserve(), _ => return Err(GeoArrowError::IncorrectType("".into())), }; Ok(result) diff --git a/rust/geodatafusion/README.md b/rust/geodatafusion/README.md index e6897d77..82bb8067 100644 --- a/rust/geodatafusion/README.md +++ b/rust/geodatafusion/README.md @@ -135,11 +135,8 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | -------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | | ST_BdPolyFromText | | Construct a Polygon given an arbitrary collection of closed linestrings as a MultiLineString Well-Known text representation. | | ST_BdMPolyFromText | | Construct a MultiPolygon given an arbitrary collection of closed linestrings as a MultiLineString text representation Well-Known text representation. | -| ST_GeogFromText | | Return a specified geography value from Well-Known Text representation or extended (WKT). | -| ST_GeographyFromText | | Return a specified geography value from Well-Known Text representation or extended (WKT). | | ST_GeomCollFromText | | Makes a collection Geometry from collection WKT with the given SRID. If SRID is not given, it defaults to 0. | | ST_GeomFromEWKT | | Return a specified ST_Geometry value from Extended Well-Known Text representation (EWKT). | -| ST_GeomFromMARC21 | | Takes MARC21/XML geographic data as input and returns a PostGIS geometry object. | | ST_GeometryFromText | | Return a specified ST_Geometry value from Well-Known Text representation (WKT). This is an alias name for ST_GeomFromText | | ST_GeomFromText | ✅ | Return a specified ST_Geometry value from Well-Known Text representation (WKT). | | ST_LineFromText | | Makes a Geometry from WKT representation with the given SRID. If SRID is not given, it defaults to 0. | @@ -154,7 +151,6 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | Name | Implemented | Description | | -------------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------- | -| ST_GeogFromWKB | | Creates a geography instance from a Well-Known Binary geometry representation (WKB) or extended Well Known Binary (EWKB). | | ST_GeomFromEWKB | | Return a specified ST_Geometry value from Extended Well-Known Binary representation (EWKB). | | ST_GeomFromWKB | ✅ | Creates a geometry instance from a Well-Known Binary geometry representation (WKB) and optional SRID. | | ST_LineFromWKB | | Makes a LINESTRING from WKB with the given SRID | @@ -206,12 +202,10 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | ST_AsGML | | Return the geometry as a GML version 2 or 3 element. | | ST_AsKML | | Return the geometry as a KML element. | | ST_AsLatLonText | | Return the Degrees, Minutes, Seconds representation of the given point. | -| ST_AsMARC21 | | Returns geometry as a MARC21/XML record with a geographic datafield (034). | | ST_AsMVTGeom | | Transforms a geometry into the coordinate space of a MVT tile. | | ST_AsMVT | | Aggregate function returning a MVT representation of a set of rows. | | ST_AsSVG | | Returns SVG path data for a geometry. | | ST_AsTWKB | | Returns the geometry as TWKB, aka "Tiny Well-Known Binary" | -| ST_AsX3D | | Returns a Geometry in X3D xml node element format: ISO-IEC-19776-1.2-X3DEncodings-XML | | ST_GeoHash | ✅ | Return a GeoHash representation of the geometry. | ### Operators @@ -289,10 +283,10 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | ST_Polygonize | | Computes a collection of polygons formed from the linework of a set of geometries. | | ST_ReducePrecision | | Returns a valid geometry with points rounded to a grid tolerance. | | ST_SharedPaths | | Returns a collection containing paths shared by the two input linestrings/multilinestrings. | -| ST_Simplify | | Returns a simplified representation of a geometry, using the Douglas-Peucker algorithm. | -| ST_SimplifyPreserveTopology | | Returns a simplified and valid representation of a geometry, using the Douglas-Peucker algorithm. | +| ST_Simplify | ✅ | Returns a simplified representation of a geometry, using the Douglas-Peucker algorithm. | +| ST_SimplifyPreserveTopology | ✅ | Returns a simplified and valid representation of a geometry, using the Douglas-Peucker algorithm. | | ST_SimplifyPolygonHull | | Computes a simplifed topology-preserving outer or inner hull of a polygonal geometry. | -| ST_SimplifyVW | | Returns a simplified representation of a geometry, using the Visvalingam-Whyatt algorithm | +| ST_SimplifyVW | ✅ | Returns a simplified representation of a geometry, using the Visvalingam-Whyatt algorithm | | ST_SetEffectiveArea | | Sets the effective area for each vertex, using the Visvalingam-Whyatt algorithm. | | ST_TriangulatePolygon | | Computes the constrained Delaunay triangulation of polygons | | ST_VoronoiLines | | Returns the boundaries of the Voronoi diagram of the vertices of a geometry. | diff --git a/rust/geodatafusion/src/udf/native/processing/mod.rs b/rust/geodatafusion/src/udf/native/processing/mod.rs index 84191069..b6929b4d 100644 --- a/rust/geodatafusion/src/udf/native/processing/mod.rs +++ b/rust/geodatafusion/src/udf/native/processing/mod.rs @@ -1,6 +1,9 @@ mod centroid; mod chaikin_smoothing; mod convex_hull; +mod simplify; +mod simplify_preserve_topology; +mod simplify_vw; use datafusion::prelude::SessionContext; @@ -8,4 +11,7 @@ use datafusion::prelude::SessionContext; pub fn register_udfs(ctx: &SessionContext) { ctx.register_udf(centroid::Centroid::new().into()); ctx.register_udf(convex_hull::ConvexHull::new().into()); + ctx.register_udf(simplify_preserve_topology::SimplifyPreserveTopology::new().into()); + ctx.register_udf(simplify_vw::SimplifyVw::new().into()); + ctx.register_udf(simplify::Simplify::new().into()); } diff --git a/rust/geodatafusion/src/udf/native/processing/simplify.rs b/rust/geodatafusion/src/udf/native/processing/simplify.rs new file mode 100644 index 00000000..e47a181e --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/simplify.rs @@ -0,0 +1,124 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geoarrow::algorithm::broadcasting::BroadcastablePrimitive; +use geoarrow::algorithm::geo::Simplify as _Simplify; + +use crate::data_types::{parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct Simplify { + signature: Signature, +} + +impl Simplify { + pub fn new() -> Self { + Self { + signature: Signature::exact( + vec![GEOMETRY_TYPE.into(), DataType::Float64], + Volatility::Immutable, + ), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for Simplify { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_simplify" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion::error::Result { + Ok(arg_types[0].clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(simplify_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a simplified representation of a geometry using the Douglas-Peucker algorithm. The simplification tolerance is a distance value, in the units of the input SRS. Simplification removes vertices which are within the tolerance distance of the simplified linework. The result may not be valid even if the input is. + +The function can be called with any kind of geometry (including GeometryCollections), but only line and polygon elements are simplified. Endpoints of linear geometry are preserved.", + "ST_Simplify(geometry, epsilon)", + ) + .with_argument("geom", "geometry") + .with_argument("tolerance", "float") + .build() + })) + } +} + +fn simplify_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(&args[..1])? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = match &args[1] { + ColumnarValue::Scalar(epsilon) => { + let epsilon = epsilon.to_scalar()?.into_inner(); + let epsilon = epsilon.as_primitive::().value(0); + native_array.as_ref().simplify(&epsilon.into())? + } + ColumnarValue::Array(epsilon) => { + native_array + .as_ref() + .simplify(&BroadcastablePrimitive::Array( + epsilon.as_primitive().clone(), + ))? + } + }; + Ok(output.to_array_ref().into()) +} + +#[cfg(test)] +mod test { + use datafusion::prelude::*; + use geo::line_string; + use geoarrow::array::GeometryArray; + use geoarrow::trait_::ArrayAccessor; + + use crate::udf::native::register_native; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx.sql( + "SELECT ST_Simplify(ST_GeomFromText('LINESTRING(0.0 0.0, 5.0 4.0, 11.0 5.5, 17.3 3.2, 27.8 0.1)'), 1.0);").await.unwrap(); + let batches = out.collect().await.unwrap(); + let column = batches.first().unwrap().columns().first().unwrap().clone(); + let geom_arr = GeometryArray::try_from(column.as_ref()).unwrap(); + let expected = line_string![ + (x: 0.0, y: 0.0), + (x: 5.0, y: 4.0), + (x: 11.0, y: 5.5), + (x: 27.8, y: 0.1), + ]; + // Not sure why rust-analyzer is complaining + let _expected = geo::Geometry::LineString(expected); + assert_eq!(geom_arr.value_as_geo(0), _expected); + } +} diff --git a/rust/geodatafusion/src/udf/native/processing/simplify_preserve_topology.rs b/rust/geodatafusion/src/udf/native/processing/simplify_preserve_topology.rs new file mode 100644 index 00000000..5fdb0792 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/simplify_preserve_topology.rs @@ -0,0 +1,95 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geoarrow::algorithm::broadcasting::BroadcastablePrimitive; +use geoarrow::algorithm::geo::SimplifyVwPreserve as _; + +use crate::data_types::{parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct SimplifyPreserveTopology { + signature: Signature, +} + +impl SimplifyPreserveTopology { + pub fn new() -> Self { + Self { + signature: Signature::exact( + vec![GEOMETRY_TYPE.into(), DataType::Float64], + Volatility::Immutable, + ), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for SimplifyPreserveTopology { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_simplifypreservetopology" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion::error::Result { + Ok(arg_types[0].clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(simplify_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Computes a simplified representation of a geometry using a variant of the Visvalingam-Whyatt algorithm which limits simplification to ensure the result has the same topology as the input. The simplification tolerance is a distance value, in the units of the input SRS. Simplification removes vertices which are within the tolerance distance of the simplified linework, as long as topology is preserved. The result will be valid and simple if the input is. + +The function can be called with any kind of geometry (including GeometryCollections), but only line and polygon elements are simplified. For polygonal inputs, the result will have the same number of rings (shells and holes), and the rings will not cross. Ring endpoints may be simplified. For linear inputs, the result will have the same number of lines, and lines will not intersect if they did not do so in the original geometry. Endpoints of linear geometry are preserved.", + "ST_SimplifyPreserveTopology(geometry, epsilon)", + ) + .with_argument("geom", "geometry") + .with_argument("tolerance", "float") + .build() + })) + } +} + +fn simplify_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(&args[..1])? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = match &args[1] { + ColumnarValue::Scalar(epsilon) => { + let epsilon = epsilon.to_scalar()?.into_inner(); + let epsilon = epsilon.as_primitive::().value(0); + native_array + .as_ref() + .simplify_vw_preserve(&epsilon.into())? + } + ColumnarValue::Array(epsilon) => { + native_array + .as_ref() + .simplify_vw_preserve(&BroadcastablePrimitive::Array( + epsilon.as_primitive().clone(), + ))? + } + }; + Ok(output.to_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/processing/simplify_vw.rs b/rust/geodatafusion/src/udf/native/processing/simplify_vw.rs new file mode 100644 index 00000000..da7c94b4 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/simplify_vw.rs @@ -0,0 +1,122 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geoarrow::algorithm::broadcasting::BroadcastablePrimitive; +use geoarrow::algorithm::geo::SimplifyVw as _; + +use crate::data_types::{parse_to_native_array, GEOMETRY_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct SimplifyVw { + signature: Signature, +} + +impl SimplifyVw { + pub fn new() -> Self { + Self { + signature: Signature::exact( + vec![GEOMETRY_TYPE.into(), DataType::Float64], + Volatility::Immutable, + ), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for SimplifyVw { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_simplifyvw" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion::error::Result { + Ok(arg_types[0].clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(simplify_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a simplified representation of a geometry using the Visvalingam-Whyatt algorithm. The simplification tolerance is an area value, in the units of the input SRS. Simplification removes vertices which form \"corners\" with area less than the tolerance. The result may not be valid even if the input is. + +The function can be called with any kind of geometry (including GeometryCollections), but only line and polygon elements are simplified. Endpoints of linear geometry are preserved.", + "ST_SimplifyVW(geometry, epsilon)", + ) + .with_argument("geom", "geometry") + .with_argument("tolerance", "float") + .build() + })) + } +} + +fn simplify_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(&args[..1])? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = match &args[1] { + ColumnarValue::Scalar(epsilon) => { + let epsilon = epsilon.to_scalar()?.into_inner(); + let epsilon = epsilon.as_primitive::().value(0); + native_array.as_ref().simplify_vw(&epsilon.into())? + } + ColumnarValue::Array(epsilon) => { + native_array + .as_ref() + .simplify_vw(&BroadcastablePrimitive::Array( + epsilon.as_primitive().clone(), + ))? + } + }; + Ok(output.to_array_ref().into()) +} + +#[cfg(test)] +mod test { + use datafusion::prelude::*; + use geo::line_string; + use geoarrow::array::GeometryArray; + use geoarrow::trait_::ArrayAccessor; + + use crate::udf::native::register_native; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + register_native(&ctx); + + let out = ctx.sql( + "SELECT ST_SimplifyVW(ST_GeomFromText('LINESTRING(5 2, 3 8, 6 20, 7 25, 10 10)'), 30);").await.unwrap(); + let batches = out.collect().await.unwrap(); + let column = batches.first().unwrap().columns().first().unwrap().clone(); + let geom_arr = GeometryArray::try_from(column.as_ref()).unwrap(); + let expected = line_string![ + (x: 5.0, y: 2.0), + (x: 7.0, y: 25.0), + (x: 10.0, y: 10.0), + ]; + let expected = geo::Geometry::LineString(expected); + assert_eq!(geom_arr.value_as_geo(0), expected); + } +} From 108f9451578794714933d9d640706c49c785087f Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 16 Dec 2024 21:08:17 -0500 Subject: [PATCH 10/11] ST_PointOnSurface, ST_ConcaveHull (#953) --- .../src/algorithm/geo/concave_hull.rs | 119 ++++++++++++++++++ .../src/algorithm/geo/interior_point.rs | 107 ++++++++++++++++ rust/geoarrow/src/algorithm/geo/mod.rs | 8 ++ rust/geodatafusion/README.md | 4 +- .../src/udf/native/processing/concave_hull.rs | 98 +++++++++++++++ .../src/udf/native/processing/mod.rs | 4 + .../udf/native/processing/point_on_surface.rs | 70 +++++++++++ 7 files changed, 408 insertions(+), 2 deletions(-) create mode 100644 rust/geoarrow/src/algorithm/geo/concave_hull.rs create mode 100644 rust/geoarrow/src/algorithm/geo/interior_point.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/concave_hull.rs create mode 100644 rust/geodatafusion/src/udf/native/processing/point_on_surface.rs diff --git a/rust/geoarrow/src/algorithm/geo/concave_hull.rs b/rust/geoarrow/src/algorithm/geo/concave_hull.rs new file mode 100644 index 00000000..fca255e5 --- /dev/null +++ b/rust/geoarrow/src/algorithm/geo/concave_hull.rs @@ -0,0 +1,119 @@ +use crate::algorithm::broadcasting::BroadcastablePrimitive; +use crate::array::*; +use crate::datatypes::{Dimension, NativeType}; +use crate::error::{GeoArrowError, Result}; +use crate::trait_::ArrayAccessor; +use crate::NativeArray; +use arrow::datatypes::Float64Type; +use geo::algorithm::ConcaveHull as _; +use geo::Polygon; + +/// Returns a polygon which covers a geometry. Unlike convex hulls, which also cover +/// their geometry, a concave hull does so while trying to further minimize its area by +/// constructing edges such that the exterior of the polygon incorporates points that would +/// be interior points in a convex hull. +/// +/// This implementation is inspired by +/// and also uses ideas from the following paper: +/// www.iis.sinica.edu.tw/page/jise/2012/201205_10.pdf +pub trait ConcaveHull { + type Output; + + fn concave_hull(&self, concavity: &BroadcastablePrimitive) -> Self::Output; +} + +/// Implementation that iterates over geo objects +macro_rules! iter_geo_impl { + ($type:ty) => { + impl ConcaveHull for $type { + type Output = PolygonArray; + + fn concave_hull( + &self, + concavity: &BroadcastablePrimitive, + ) -> Self::Output { + let output_geoms: Vec> = self + .iter_geo() + .zip(concavity) + .map(|(maybe_g, concavity)| { + if let (Some(geom), Some(concavity)) = (maybe_g, concavity) { + Some(geom.concave_hull(concavity)) + } else { + None + } + }) + .collect(); + + PolygonBuilder::from_nullable_polygons( + output_geoms.as_slice(), + Dimension::XY, + self.coord_type(), + self.metadata().clone(), + ) + .finish() + } + } + }; +} + +iter_geo_impl!(LineStringArray); +iter_geo_impl!(PolygonArray); +iter_geo_impl!(MultiPointArray); +iter_geo_impl!(MultiLineStringArray); +iter_geo_impl!(MultiPolygonArray); + +impl ConcaveHull for GeometryArray { + type Output = Result; + + fn concave_hull(&self, concavity: &BroadcastablePrimitive) -> Self::Output { + let output_geoms: Vec> = self + .iter_geo() + .zip(concavity) + .map(|(maybe_g, concavity)| { + if let (Some(geom), Some(concavity)) = (maybe_g, concavity) { + let out = match geom { + geo::Geometry::LineString(g) => g.concave_hull(concavity), + geo::Geometry::Polygon(g) => g.concave_hull(concavity), + geo::Geometry::MultiLineString(g) => g.concave_hull(concavity), + geo::Geometry::MultiPolygon(g) => g.concave_hull(concavity), + _ => { + return Err(GeoArrowError::IncorrectType( + "incorrect type in concave_hull".into(), + )) + } + }; + Ok(Some(out)) + } else { + Ok(None) + } + }) + .collect::>()?; + + Ok(PolygonBuilder::from_nullable_polygons( + output_geoms.as_slice(), + Dimension::XY, + self.coord_type(), + self.metadata().clone(), + ) + .finish()) + } +} + +impl ConcaveHull for &dyn NativeArray { + type Output = Result; + + fn concave_hull(&self, concavity: &BroadcastablePrimitive) -> Self::Output { + use NativeType::*; + + let result = match self.data_type() { + LineString(_, _) => self.as_line_string().concave_hull(concavity), + Polygon(_, _) => self.as_polygon().concave_hull(concavity), + MultiPoint(_, _) => self.as_multi_point().concave_hull(concavity), + MultiLineString(_, _) => self.as_multi_line_string().concave_hull(concavity), + MultiPolygon(_, _) => self.as_multi_polygon().concave_hull(concavity), + Geometry(_) => self.as_geometry().concave_hull(concavity)?, + _ => return Err(GeoArrowError::IncorrectType("".into())), + }; + Ok(result) + } +} diff --git a/rust/geoarrow/src/algorithm/geo/interior_point.rs b/rust/geoarrow/src/algorithm/geo/interior_point.rs new file mode 100644 index 00000000..d088b0c2 --- /dev/null +++ b/rust/geoarrow/src/algorithm/geo/interior_point.rs @@ -0,0 +1,107 @@ +use crate::array::*; +use crate::datatypes::{Dimension, NativeType}; +use crate::error::Result; +use crate::trait_::ArrayAccessor; +use crate::NativeArray; +use geo::algorithm::interior_point::InteriorPoint as _; + +/// Calculation of interior points. +/// +/// An interior point is a point that's guaranteed to intersect a given geometry, and will be +/// strictly on the interior of the geometry if possible, or on the edge if the geometry has zero +/// area. A best effort will additionally be made to locate the point reasonably centrally. +/// +/// For polygons, this point is located by drawing a line that approximately subdivides the +/// bounding box around the polygon in half, intersecting it with the polygon, then calculating +/// the midpoint of the longest line produced by the intersection. For lines, the non-endpoint +/// vertex closest to the line's centroid is returned if the line has interior points, or an +/// endpoint is returned otherwise. +/// +/// For multi-geometries or collections, the interior points of the constituent components are +/// calculated, and one of those is returned (for MultiPolygons, it's the point that's the midpoint +/// of the longest intersection of the intersection lines of any of the constituent polygons, as +/// described above; for all others, the interior point closest to the collection's centroid is +/// used). +/// +pub trait InteriorPoint { + type Output; + + fn interior_point(&self) -> Self::Output; +} + +impl InteriorPoint for PointArray { + type Output = PointArray; + + fn interior_point(&self) -> Self::Output { + self.clone() + } +} + +impl InteriorPoint for RectArray { + type Output = PointArray; + + fn interior_point(&self) -> Self::Output { + let mut output_array = PointBuilder::with_capacity_and_options( + Dimension::XY, + self.len(), + self.coord_type(), + self.metadata().clone(), + ); + self.iter_geo().for_each(|maybe_g| { + output_array.push_point(maybe_g.map(|g| g.interior_point()).as_ref()) + }); + output_array.into() + } +} + +/// Implementation that iterates over geo objects +macro_rules! iter_geo_impl { + ($type:ty) => { + impl InteriorPoint for $type { + type Output = PointArray; + + fn interior_point(&self) -> Self::Output { + let mut output_array = PointBuilder::with_capacity_and_options( + Dimension::XY, + self.len(), + self.coord_type(), + self.metadata().clone(), + ); + self.iter_geo().for_each(|maybe_g| { + output_array.push_point(maybe_g.and_then(|g| g.interior_point()).as_ref()) + }); + output_array.into() + } + } + }; +} + +iter_geo_impl!(LineStringArray); +iter_geo_impl!(PolygonArray); +iter_geo_impl!(MultiPointArray); +iter_geo_impl!(MultiLineStringArray); +iter_geo_impl!(MultiPolygonArray); +iter_geo_impl!(MixedGeometryArray); +iter_geo_impl!(GeometryCollectionArray); +iter_geo_impl!(GeometryArray); + +impl InteriorPoint for &dyn NativeArray { + type Output = Result; + + fn interior_point(&self) -> Self::Output { + use NativeType::*; + + let result = match self.data_type() { + Point(_, _) => self.as_point().interior_point(), + LineString(_, _) => self.as_line_string().interior_point(), + Polygon(_, _) => self.as_polygon().interior_point(), + MultiPoint(_, _) => self.as_multi_point().interior_point(), + MultiLineString(_, _) => self.as_multi_line_string().interior_point(), + MultiPolygon(_, _) => self.as_multi_polygon().interior_point(), + GeometryCollection(_, _) => self.as_geometry_collection().interior_point(), + Rect(_) => self.as_rect().interior_point(), + Geometry(_) => self.as_geometry().interior_point(), + }; + Ok(result) + } +} diff --git a/rust/geoarrow/src/algorithm/geo/mod.rs b/rust/geoarrow/src/algorithm/geo/mod.rs index 4d3ffb76..1fc16f89 100644 --- a/rust/geoarrow/src/algorithm/geo/mod.rs +++ b/rust/geoarrow/src/algorithm/geo/mod.rs @@ -34,6 +34,10 @@ pub use chaikin_smoothing::ChaikinSmoothing; mod chamberlain_duquette_area; pub use chamberlain_duquette_area::ChamberlainDuquetteArea; +/// Calculate the concave hull of geometries. +mod concave_hull; +pub use concave_hull::ConcaveHull; + /// Determine whether `Geometry` `A` completely encloses `Geometry` `B`. mod contains; pub use contains::Contains; @@ -74,6 +78,10 @@ pub use geodesic_length::GeodesicLength; mod haversine_length; pub use haversine_length::HaversineLength; +/// Calculation of interior points. +mod interior_point; +pub use interior_point::InteriorPoint; + /// Determine whether `Geometry` `A` intersects `Geometry` `B`. mod intersects; pub use intersects::Intersects; diff --git a/rust/geodatafusion/README.md b/rust/geodatafusion/README.md index 82bb8067..24cdc272 100644 --- a/rust/geodatafusion/README.md +++ b/rust/geodatafusion/README.md @@ -266,7 +266,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | ST_BuildArea | | Creates a polygonal geometry formed by the linework of a geometry. | | ST_Centroid | ✅ | Returns the geometric center of a geometry. | | ST_ChaikinSmoothing | | Returns a smoothed version of a geometry, using the Chaikin algorithm | -| ST_ConcaveHull | | Computes a possibly concave geometry that contains all input geometry vertices | +| ST_ConcaveHull | ✅ | Computes a possibly concave geometry that contains all input geometry vertices | | ST_ConvexHull | ✅ | Computes the convex hull of a geometry. | | ST_DelaunayTriangles | | Returns the Delaunay triangulation of the vertices of a geometry. | | ST_FilterByM | | Removes vertices based on their M value | @@ -279,7 +279,7 @@ Spatial extensions for [Apache DataFusion](https://datafusion.apache.org/), an e | ST_MinimumBoundingRadius | | Returns the center point and radius of the smallest circle that contains a geometry. | | ST_OrientedEnvelope | | Returns a minimum-area rectangle containing a geometry. | | ST_OffsetCurve | | Returns an offset line at a given distance and side from an input line. | -| ST_PointOnSurface | | Computes a point guaranteed to lie in a polygon, or on a geometry. | +| ST_PointOnSurface | ✅ | Computes a point guaranteed to lie in a polygon, or on a geometry. | | ST_Polygonize | | Computes a collection of polygons formed from the linework of a set of geometries. | | ST_ReducePrecision | | Returns a valid geometry with points rounded to a grid tolerance. | | ST_SharedPaths | | Returns a collection containing paths shared by the two input linestrings/multilinestrings. | diff --git a/rust/geodatafusion/src/udf/native/processing/concave_hull.rs b/rust/geodatafusion/src/udf/native/processing/concave_hull.rs new file mode 100644 index 00000000..1782cf63 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/concave_hull.rs @@ -0,0 +1,98 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use geoarrow::algorithm::broadcasting::BroadcastablePrimitive; +use geoarrow::algorithm::geo::ConcaveHull as _; +use geoarrow::array::GeometryArray; +use geoarrow::ArrayBase; + +use crate::data_types::{parse_to_native_array, GEOMETRY_TYPE, POINT2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct ConcaveHull { + signature: Signature, +} + +impl ConcaveHull { + pub fn new() -> Self { + Self { + signature: Signature::exact( + vec![GEOMETRY_TYPE.into(), DataType::Float64], + Volatility::Immutable, + ), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for ConcaveHull { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_concavehull" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(POINT2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(concave_hull_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "A concave hull is a (usually) concave geometry which contains the input, and whose vertices are a subset of the input vertices. In the general case the concave hull is a Polygon. The concave hull of two or more collinear points is a two-point LineString. The concave hull of one or more identical points is a Point. The polygon will not contain holes unless the optional param_allow_holes argument is specified as true. + +One can think of a concave hull as \"shrink-wrapping\" a set of points. This is different to the convex hull, which is more like wrapping a rubber band around the points. A concave hull generally has a smaller area and represents a more natural boundary for the input points. + +The param_pctconvex controls the concaveness of the computed hull. A value of 1 produces the convex hull. Values between 1 and 0 produce hulls of increasing concaveness. A value of 0 produces a hull with maximum concaveness (but still a single polygon). Choosing a suitable value depends on the nature of the input data, but often values between 0.3 and 0.1 produce reasonable results.", + "ST_ConcaveHull(geometry)", + ) + .with_argument("g1", "geometry") + .with_argument("param_pctconvex", "float") + .build() + })) + } +} + +fn concave_hull_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(&args[..1])? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = match &args[1] { + ColumnarValue::Scalar(concavity) => { + let concavity = concavity.to_scalar()?.into_inner(); + let concavity = concavity.as_primitive::().value(0); + native_array.as_ref().concave_hull(&concavity.into())? + } + ColumnarValue::Array(concavity) => { + native_array + .as_ref() + .concave_hull(&BroadcastablePrimitive::Array( + concavity.as_primitive().clone(), + ))? + } + }; + + Ok(GeometryArray::from(output).to_array_ref().into()) +} diff --git a/rust/geodatafusion/src/udf/native/processing/mod.rs b/rust/geodatafusion/src/udf/native/processing/mod.rs index b6929b4d..6c2275b9 100644 --- a/rust/geodatafusion/src/udf/native/processing/mod.rs +++ b/rust/geodatafusion/src/udf/native/processing/mod.rs @@ -1,6 +1,8 @@ mod centroid; mod chaikin_smoothing; +mod concave_hull; mod convex_hull; +mod point_on_surface; mod simplify; mod simplify_preserve_topology; mod simplify_vw; @@ -10,7 +12,9 @@ use datafusion::prelude::SessionContext; /// Register all provided [geo] functions for processing geometries pub fn register_udfs(ctx: &SessionContext) { ctx.register_udf(centroid::Centroid::new().into()); + ctx.register_udf(concave_hull::ConcaveHull::new().into()); ctx.register_udf(convex_hull::ConvexHull::new().into()); + ctx.register_udf(point_on_surface::PointOnSurface::new().into()); ctx.register_udf(simplify_preserve_topology::SimplifyPreserveTopology::new().into()); ctx.register_udf(simplify_vw::SimplifyVw::new().into()); ctx.register_udf(simplify::Simplify::new().into()); diff --git a/rust/geodatafusion/src/udf/native/processing/point_on_surface.rs b/rust/geodatafusion/src/udf/native/processing/point_on_surface.rs new file mode 100644 index 00000000..4ad793cc --- /dev/null +++ b/rust/geodatafusion/src/udf/native/processing/point_on_surface.rs @@ -0,0 +1,70 @@ +use std::any::Any; +use std::sync::OnceLock; + +use arrow_schema::DataType; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature}; +use geoarrow::algorithm::geo::InteriorPoint; +use geoarrow::ArrayBase; + +use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, POINT2D_TYPE}; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub(super) struct PointOnSurface { + signature: Signature, +} + +impl PointOnSurface { + pub fn new() -> Self { + Self { + signature: any_single_geometry_type_input(), + } + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for PointOnSurface { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_pointonsurface" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result { + Ok(POINT2D_TYPE.into()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result { + Ok(interior_impl(args)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Returns a POINT which is guaranteed to lie in the interior of a surface (POLYGON, MULTIPOLYGON, and CURVEPOLYGON). In PostGIS this function also works on line and point geometries.", + "ST_PointOnSurface(geometry)", + ) + .with_argument("g1", "geometry") + .build() + })) + } +} + +fn interior_impl(args: &[ColumnarValue]) -> GeoDataFusionResult { + let array = ColumnarValue::values_to_arrays(args)? + .into_iter() + .next() + .unwrap(); + let native_array = parse_to_native_array(array)?; + let output = native_array.as_ref().interior_point()?; + Ok(output.into_array_ref().into()) +} From d61e866e6cef54f6538c3f0841bc94c479e40dfb Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 19 Dec 2024 14:11:45 -0500 Subject: [PATCH 11/11] Fix building Python wheels for linux aarch64 (#950) Closes https://github.com/geoarrow/geoarrow-rs/issues/832 --- .github/workflows/python-io-wheels.yml | 102 +-- Cargo.lock | 939 ++++++++++++++----------- js/Cargo.lock | 3 +- python/Cargo.lock | 215 +----- python/geoarrow-io/Cargo.toml | 12 +- rust/geoarrow/Cargo.toml | 8 +- 6 files changed, 611 insertions(+), 668 deletions(-) diff --git a/.github/workflows/python-io-wheels.yml b/.github/workflows/python-io-wheels.yml index f0100999..4a2ba9c9 100644 --- a/.github/workflows/python-io-wheels.yml +++ b/.github/workflows/python-io-wheels.yml @@ -12,16 +12,41 @@ concurrency: jobs: linux: - runs-on: ubuntu-latest + runs-on: ${{ matrix.platform.runner }} strategy: matrix: - target: [x86_64] + platform: + - runner: ubuntu-latest + target: x86_64 + manylinux: auto + - runner: ubuntu-latest + target: x86 + manylinux: auto + - runner: ubuntu-latest + target: aarch64 + manylinux: "2_28" + - runner: ubuntu-latest + target: armv7 + manylinux: auto + - runner: ubuntu-latest + target: s390x + manylinux: auto + - runner: ubuntu-latest + target: ppc64le + manylinux: auto + module: + - geoarrow-io steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: 3.x - architecture: x64 + enable-cache: true + version: "0.4.x" + + - name: Install Python versions + run: uv python install 3.9 3.10 3.11 3.12 3.13 pypy3.10 - name: Build wheels uses: PyO3/maturin-action@v1 @@ -29,72 +54,19 @@ jobs: RUSTFLAGS: "-Zinline-mir=no" with: rust-toolchain: nightly - target: ${{ matrix.target }} - manylinux: manylinux_2_28 - args: --release --out dist -i 3.9 -i 3.10 -i 3.11 -i 3.12 -i 3.13 -m python/geoarrow-io/Cargo.toml - before-script-linux: | - yum update -y - yum install openssl openssl-devel perl-IPC-Cmd -y - - - name: Install built wheel - if: matrix.target == 'x86_64' - run: | - pip install geoarrow-rust-io --find-links dist --force-reinstall - python -c "import geoarrow.rust.io" + target: ${{ matrix.platform.target }} + # As of Nov 2024, it was necessary to manually specify -i 3.13 to get + # maturin to find the executable. --find-interpreter did not find it. + args: --release --out dist -i 3.9 -i 3.10 -i 3.11 -i 3.12 -i 3.13 -i pypy3.10 --manifest-path python/${{ matrix.module }}/Cargo.toml + sccache: "true" + manylinux: ${{ matrix.platform.manylinux }} - name: Upload wheels uses: actions/upload-artifact@v4 with: - name: wheels-linux-${{ matrix.target }} + name: wheels-linux-${{ matrix.platform.target }}-${{ matrix.module }} path: dist - # linux-cross: - # runs-on: ubuntu-latest - # strategy: - # matrix: - # target: [aarch64, armv7, s390x, ppc64le, ppc64] - # steps: - # - uses: actions/checkout@v4 - # - uses: actions/setup-python@v4 - # with: - # python-version: 3.x - - # - name: Build wheels - # uses: PyO3/maturin-action@v1 - # env: - # RUSTFLAGS: "-Zinline-mir=no" - # with: - # rust-toolchain: nightly - # target: ${{ matrix.target }} - # manylinux: auto - # args: --release --out dist -i 3.9 -i 3.10 -i 3.11 -i 3.12 -i 3.13 -m python/geoarrow-io/Cargo.toml - - # # This is currently failing with - # # python: command not found - - # # - uses: uraimo/run-on-arch-action@v2.5.1 - # # if: matrix.target == 'aarch64' - # # name: Install built wheel - # # with: - # # arch: ${{ matrix.target }} - # # distro: ubuntu20.04 - # # githubToken: ${{ github.token }} - # # install: | - # # apt-get update - # # apt-get install -y --no-install-recommends python3 python3-pip - # # pip3 install -U pip - # # run: | - # # pip install geoarrow-rust-io --find-links dist --force-reinstall - # # python -c "import geoarrow.rust.io" - - # # Have to set path from root - # # https://github.com/actions/upload-artifact/issues/232#issuecomment-964235360 - # - name: Upload wheels - # uses: actions/upload-artifact@v3 - # with: - # name: wheels - # path: dist/*.whl - macos: name: Build ${{ matrix.platform.target }} wheels on ${{ matrix.platform.runner }} runs-on: ${{ matrix.platform.runner }} diff --git a/Cargo.lock b/Cargo.lock index aea52987..bae602a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,9 +57,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -84,9 +84,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "approx" @@ -189,7 +189,7 @@ dependencies = [ "chrono", "comfy-table", "half", - "lexical-core 1.0.2", + "lexical-core 1.0.5", "num", "ryu", ] @@ -209,7 +209,7 @@ dependencies = [ "csv", "csv-core", "lazy_static", - "lexical-core 1.0.2", + "lexical-core 1.0.5", "regex", ] @@ -255,7 +255,7 @@ dependencies = [ "chrono", "half", "indexmap", - "lexical-core 1.0.2", + "lexical-core 1.0.5", "num", "serde", "serde_json", @@ -367,7 +367,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -378,7 +378,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -454,7 +454,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.79", + "syn 2.0.90", "which", ] @@ -477,7 +477,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.79", + "syn 2.0.90", "which", ] @@ -556,9 +556,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.18.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" +checksum = "8b37c88a63ffd85d15b406896cc343916d7cf57838a847b3a6f2ca5d39a5695a" [[package]] name = "byteorder" @@ -568,9 +568,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "bzip2" @@ -607,9 +607,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.28" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" dependencies = [ "jobserver", "libc", @@ -631,11 +631,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", @@ -707,18 +713,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.20" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstyle", "clap_lex", @@ -726,15 +732,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cmake" -version = "0.1.51" +version = "0.1.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" dependencies = [ "cc", ] @@ -784,9 +790,9 @@ checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "core-foundation" -version = "0.9.4" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" dependencies = [ "core-foundation-sys", "libc", @@ -800,9 +806,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -869,9 +875,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -888,18 +894,18 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" @@ -919,9 +925,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -1230,7 +1236,7 @@ version = "43.0.0" source = "git+https://github.com/kylebarron/datafusion?rev=170432e3179ed72f413ffcd4d7edfe0007db296d#170432e3179ed72f413ffcd4d7edfe0007db296d" dependencies = [ "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -1394,6 +1400,17 @@ dependencies = [ "subtle", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "dotenvy" version = "0.15.7" @@ -1419,15 +1436,6 @@ dependencies = [ "serde", ] -[[package]] -name = "encoding_rs" -version = "0.8.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" -dependencies = [ - "cfg-if", -] - [[package]] name = "enum-as-inner" version = "0.6.1" @@ -1437,7 +1445,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -1448,12 +1456,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1481,9 +1489,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" -version = "2.1.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "filetime" @@ -1515,9 +1523,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.34" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1526,8 +1534,7 @@ dependencies = [ [[package]] name = "flatgeobuf" version = "4.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d78ca20e47a9f45703d3a811f7f24935ce2d3a3f7b947fd433489ddd8daad6" +source = "git+https://github.com/kylebarron/flatgeobuf?rev=06e987d6d3d73edb95124a14cdaab9ee8e6e57ac#06e987d6d3d73edb95124a14cdaab9ee8e6e57ac" dependencies = [ "byteorder", "bytes", @@ -1549,9 +1556,9 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" [[package]] name = "flume" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" dependencies = [ "futures-core", "futures-sink", @@ -1564,21 +1571,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1655,7 +1647,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -1701,7 +1693,7 @@ dependencies = [ "libc", "once_cell", "semver", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1770,7 +1762,7 @@ dependencies = [ "bytemuck", "float_next_after", "num-traits", - "thiserror", + "thiserror 1.0.69", "tinyvec", ] @@ -1840,7 +1832,7 @@ dependencies = [ "serde_json", "shapefile", "sqlx", - "thiserror", + "thiserror 1.0.69", "tokio", "wkb", "wkt 0.12.0", @@ -1866,7 +1858,7 @@ dependencies = [ "geo-traits", "geoarrow", "geohash", - "thiserror", + "thiserror 1.0.69", "tokio", ] @@ -1898,7 +1890,7 @@ dependencies = [ "log", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1936,8 +1928,8 @@ dependencies = [ "log", "scroll", "serde_json", - "thiserror", - "wkt 0.11.0", + "thiserror 1.0.69", + "wkt 0.11.1", ] [[package]] @@ -1947,8 +1939,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1965,9 +1959,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", @@ -2052,12 +2046,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hermit-abi" version = "0.4.0" @@ -2090,18 +2078,18 @@ dependencies = [ [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -2134,15 +2122,14 @@ dependencies = [ [[package]] name = "http-range-client" version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae68ca93886cdaad288a3172f8157ce99ffb621343832b6254b79b4d7e8a34f" +source = "git+https://github.com/pka/http-range-client?rev=5699e32fafc416ce683bfbf1d179f80b0b6549a3#5699e32fafc416ce683bfbf1d179f80b0b6549a3" dependencies = [ "async-trait", "byteorder", "bytes", "read-logger", "reqwest", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -2159,9 +2146,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", @@ -2179,15 +2166,15 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.3" +version = "0.27.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +checksum = "f6884a48c6826ec44f524c7456b163cebe9e55a18d7b5e307cb4f100371cc767" dependencies = [ "futures-util", "http", "hyper", "hyper-util", - "rustls 0.23.14", + "rustls 0.23.20", "rustls-native-certs", "rustls-pki-types", "tokio", @@ -2195,27 +2182,11 @@ dependencies = [ "tower-service", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -2297,21 +2268,150 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -2335,7 +2435,7 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" dependencies = [ - "hermit-abi 0.4.0", + "hermit-abi", "libc", "windows-sys 0.52.0", ] @@ -2378,9 +2478,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jobserver" @@ -2393,10 +2493,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.71" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cb94a0ffd3f3ee755c20f7d8752f45cac88605a4dcf808abcff72873296ec7b" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -2430,15 +2531,15 @@ dependencies = [ [[package]] name = "lexical-core" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" dependencies = [ - "lexical-parse-float 1.0.2", - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", - "lexical-write-float 1.0.2", - "lexical-write-integer 1.0.2", + "lexical-parse-float 1.0.5", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", + "lexical-write-float 1.0.5", + "lexical-write-integer 1.0.5", ] [[package]] @@ -2454,12 +2555,12 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", "static_assertions", ] @@ -2475,11 +2576,11 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "static_assertions", ] @@ -2494,9 +2595,9 @@ dependencies = [ [[package]] name = "lexical-util" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] @@ -2514,12 +2615,12 @@ dependencies = [ [[package]] name = "lexical-write-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" dependencies = [ - "lexical-util 1.0.3", - "lexical-write-integer 1.0.2", + "lexical-util 1.0.6", + "lexical-write-integer 1.0.5", "static_assertions", ] @@ -2535,25 +2636,25 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "static_assertions", ] [[package]] name = "libc" -version = "0.2.159" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", "windows-targets 0.52.6", @@ -2561,9 +2662,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libredox" @@ -2593,6 +2694,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "lock_api" version = "0.4.12" @@ -2659,42 +2766,24 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ "adler2", ] [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi 0.3.9", "libc", "wasi", "windows-sys 0.52.0", ] -[[package]] -name = "native-tls" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - [[package]] name = "nom" version = "7.1.3" @@ -2820,7 +2909,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -2834,9 +2923,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" +checksum = "6eb4c22c6154a1e759d7099f9ffad7cc5ef8245f9efbab4a41b92623079c82f3" dependencies = [ "async-trait", "base64 0.22.1", @@ -2874,50 +2963,12 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" -[[package]] -name = "openssl" -version = "0.10.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" -dependencies = [ - "bitflags 2.6.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.79", -] - [[package]] name = "openssl-probe" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "openssl-sys" -version = "0.9.103" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "ordered-float" version = "2.10.1" @@ -3072,7 +3123,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -3086,9 +3137,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -3159,7 +3210,7 @@ checksum = "20b682daed9c6adcacc2c546410d7692babe5bf946e71a5e3b8b5c9b20d604b2" dependencies = [ "geo 0.28.0", "num-traits", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -3179,12 +3230,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -3198,9 +3249,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -3215,7 +3266,7 @@ dependencies = [ "libc", "num-traits", "proj-sys", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -3252,45 +3303,49 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ "bytes", "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.0.0", - "rustls 0.23.14", + "rustc-hash 2.1.0", + "rustls 0.23.20", "socket2", - "thiserror", + "thiserror 2.0.8", "tokio", "tracing", ] [[package]] name = "quinn-proto" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", + "getrandom", "rand", "ring", - "rustc-hash 2.0.0", - "rustls 0.23.14", + "rustc-hash 2.1.0", + "rustls 0.23.20", + "rustls-pki-types", "slab", - "thiserror", + "thiserror 2.0.8", "tinyvec", "tracing", + "web-time", ] [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" dependencies = [ + "cfg_aliases", "libc", "once_cell", "socket2", @@ -3383,23 +3438,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -3409,9 +3464,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -3426,14 +3481,12 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", - "encoding_rs", - "futures-channel", "futures-core", "futures-util", "h2", @@ -3442,18 +3495,16 @@ dependencies = [ "http-body-util", "hyper", "hyper-rustls", - "hyper-tls", "hyper-util", "ipnet", "js-sys", "log", "mime", - "native-tls", "once_cell", "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.14", + "rustls 0.23.20", "rustls-native-certs", "rustls-pemfile 2.2.0", "rustls-pki-types", @@ -3461,9 +3512,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", - "tokio-native-tls", "tokio-rustls", "tokio-util", "tower-service", @@ -3498,9 +3547,9 @@ checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30" [[package]] name = "rsa" -version = "0.9.6" +version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +checksum = "47c75d7c5c6b673e58bf54d8544a9f432e3a925b0e80f7cd3602ab5c50c55519" dependencies = [ "const-oid", "digest", @@ -3518,9 +3567,9 @@ dependencies = [ [[package]] name = "rstar" -version = "0.12.0" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "133315eb94c7b1e8d0cb097e5a710d850263372fd028fff18969de708afc7008" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" dependencies = [ "heapless", "num-traits", @@ -3541,9 +3590,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" [[package]] name = "rustc_version" @@ -3556,15 +3605,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3580,9 +3629,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "once_cell", "ring", @@ -3594,12 +3643,11 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" dependencies = [ "openssl-probe", - "rustls-pemfile 2.2.0", "rustls-pki-types", "schannel", "security-framework", @@ -3625,9 +3673,12 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -3673,9 +3724,9 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -3704,9 +3755,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.11.1" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "81d3f8c9bfcc3cbb6b0179eb57042d75b1582bdc65c3cb95f3fa999509c03cbc" dependencies = [ "bitflags 2.6.0", "core-foundation", @@ -3717,9 +3768,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" dependencies = [ "core-foundation-sys", "libc", @@ -3727,9 +3778,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" [[package]] name = "seq-macro" @@ -3739,29 +3790,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -3868,7 +3919,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -3879,9 +3930,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -3946,7 +3997,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -3997,7 +4048,7 @@ dependencies = [ "sha2", "smallvec", "sqlformat", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tracing", @@ -4082,7 +4133,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "tracing", "whoami", ] @@ -4121,7 +4172,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "tracing", "whoami", ] @@ -4202,7 +4253,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -4224,9 +4275,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -4235,39 +4286,29 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" dependencies = [ "futures-core", ] [[package]] -name = "system-configuration" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" -dependencies = [ - "bitflags 2.6.0", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.6.0" +name = "synstructure" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ - "core-foundation-sys", - "libc", + "proc-macro2", + "quote", + "syn 2.0.90", ] [[package]] name = "tar" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ff6c40d3aedb5e06b57c6f669ad17ab063dd1e63d977c6a88e7f4dfa4f04020" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" dependencies = [ "filetime", "libc", @@ -4276,9 +4317,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -4289,22 +4330,42 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f5383f3e0071702bf93ab5ee99b52d26936be9dedd9413067cbdcddcb6141a" +dependencies = [ + "thiserror-impl 2.0.8", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ - "thiserror-impl", + "proc-macro2", + "quote", + "syn 2.0.90", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "f2f357fcec90b3caef6623a099691be676d033b40a058ac95d2a6ade6fa0c943" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] @@ -4320,9 +4381,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "num-conv", @@ -4346,6 +4407,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -4373,9 +4444,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", @@ -4395,35 +4466,24 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", -] - -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", + "syn 2.0.90", ] [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls 0.23.14", - "rustls-pki-types", + "rustls 0.23.20", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -4432,9 +4492,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -4468,9 +4528,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -4480,20 +4540,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] @@ -4522,15 +4582,15 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicode-bidi" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-normalization" @@ -4573,9 +4633,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -4588,6 +4648,18 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.11.0" @@ -4642,9 +4714,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.94" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef073ced962d62984fb38a36e5fdc1a2b23c9e0e1fa0689bb97afa4202ef6887" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -4653,36 +4725,36 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.94" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4bfab14ef75323f4eb75fa52ee0a3fb59611977fd3240da19b2cf36ff85030e" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.44" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65471f79c1022ffa5291d33520cbbb53b7687b01c2f8e83b57d102eed7ed479d" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.94" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7bec9830f60924d9ceb3ef99d55c155be8afa76954edffbb5936ff4509474e7" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4690,28 +4762,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.94" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c74f6e152a76a2ad448e223b0fc0b6b5747649c3d769cc6bf45737bf97d0ed6" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.94" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42f6c679374623f295a8623adfe63d9284091245c3504bde47c17a3ce2777d9" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -4722,9 +4794,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.71" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44188d185b5bdcae1052d08bcbcf9091a5524038d4572cc4f4f2bb9d5554ddd9" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", @@ -4982,19 +5054,19 @@ dependencies = [ "byteorder", "geo-traits", "num_enum", - "thiserror", + "thiserror 1.0.69", ] [[package]] name = "wkt" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "296937617013271141d1145d9c05861f5ed3b1bc4297e81c692aa3cff9270a9c" +checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d" dependencies = [ "geo-types", "log", "num-traits", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -5007,9 +5079,21 @@ dependencies = [ "geo-types", "log", "num-traits", - "thiserror", + "thiserror 1.0.69", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xattr" version = "1.3.1" @@ -5030,6 +5114,30 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -5048,7 +5156,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.90", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", ] [[package]] @@ -5057,6 +5186,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "zstd" version = "0.13.2" diff --git a/js/Cargo.lock b/js/Cargo.lock index 85b40204..aaede781 100644 --- a/js/Cargo.lock +++ b/js/Cargo.lock @@ -895,8 +895,7 @@ dependencies = [ [[package]] name = "flatgeobuf" version = "4.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d78ca20e47a9f45703d3a811f7f24935ce2d3a3f7b947fd433489ddd8daad6" +source = "git+https://github.com/kylebarron/flatgeobuf?rev=06e987d6d3d73edb95124a14cdaab9ee8e6e57ac#06e987d6d3d73edb95124a14cdaab9ee8e6e57ac" dependencies = [ "byteorder", "fallible-streaming-iterator", diff --git a/python/Cargo.lock b/python/Cargo.lock index aed8a9a7..0bef08a0 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -552,16 +552,6 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation" version = "0.10.0" @@ -760,15 +750,6 @@ dependencies = [ "serde", ] -[[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" -dependencies = [ - "cfg-if", -] - [[package]] name = "enum-as-inner" version = "0.6.1" @@ -849,8 +830,7 @@ dependencies = [ [[package]] name = "flatgeobuf" version = "4.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d78ca20e47a9f45703d3a811f7f24935ce2d3a3f7b947fd433489ddd8daad6" +source = "git+https://github.com/kylebarron/flatgeobuf?rev=06e987d6d3d73edb95124a14cdaab9ee8e6e57ac#06e987d6d3d73edb95124a14cdaab9ee8e6e57ac" dependencies = [ "byteorder", "bytes", @@ -893,21 +873,6 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1192,7 +1157,6 @@ dependencies = [ "geo-traits", "geoarrow", "object_store", - "openssl", "parquet", "pyo3", "pyo3-arrow", @@ -1200,7 +1164,6 @@ dependencies = [ "pyo3-geoarrow", "pyo3-object_store", "pythonize", - "reqwest", "serde_json", "sqlx", "thiserror 1.0.69", @@ -1428,8 +1391,7 @@ dependencies = [ [[package]] name = "http-range-client" version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae68ca93886cdaad288a3172f8157ce99ffb621343832b6254b79b4d7e8a34f" +source = "git+https://github.com/pka/http-range-client?rev=5699e32fafc416ce683bfbf1d179f80b0b6549a3#5699e32fafc416ce683bfbf1d179f80b0b6549a3" dependencies = [ "async-trait", "byteorder", @@ -1473,9 +1435,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.3" +version = "0.27.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +checksum = "f6884a48c6826ec44f524c7456b163cebe9e55a18d7b5e307cb4f100371cc767" dependencies = [ "futures-util", "http", @@ -1489,22 +1451,6 @@ dependencies = [ "tower-service", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.10" @@ -1940,9 +1886,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libm" @@ -2047,9 +1993,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ "adler2", ] @@ -2065,23 +2011,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "native-tls" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework 2.11.1", - "security-framework-sys", - "tempfile", -] - [[package]] name = "ndarray" version = "0.16.1" @@ -2287,60 +2216,12 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" -[[package]] -name = "openssl" -version = "0.10.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" -dependencies = [ - "bitflags 2.6.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "openssl-probe" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "openssl-src" -version = "300.4.1+3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faa4eac4138c62414b5622d1b31c5c304f34b406b013c079c2bbc652fdd6678c" -dependencies = [ - "cc", -] - -[[package]] -name = "openssl-sys" -version = "0.9.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" -dependencies = [ - "cc", - "libc", - "openssl-src", - "pkg-config", - "vcpkg", -] - [[package]] name = "ordered-float" version = "2.10.1" @@ -2751,7 +2632,7 @@ dependencies = [ "rustc-hash", "rustls 0.23.20", "socket2", - "thiserror 2.0.7", + "thiserror 2.0.8", "tokio", "tracing", ] @@ -2770,7 +2651,7 @@ dependencies = [ "rustls 0.23.20", "rustls-pki-types", "slab", - "thiserror 2.0.7", + "thiserror 2.0.8", "tinyvec", "tracing", "web-time", @@ -2778,9 +2659,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52cd4b1eff68bf27940dd39811292c49e007f4d0b4c357358dc9b0197be6b527" +checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" dependencies = [ "cfg_aliases", "libc", @@ -2910,8 +2791,6 @@ checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", - "encoding_rs", - "futures-channel", "futures-core", "futures-util", "h2", @@ -2920,13 +2799,11 @@ dependencies = [ "http-body-util", "hyper", "hyper-rustls", - "hyper-tls", "hyper-util", "ipnet", "js-sys", "log", "mime", - "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -2939,9 +2816,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", - "tokio-native-tls", "tokio-rustls", "tokio-util", "tower-service", @@ -3073,7 +2948,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.0.1", + "security-framework", ] [[package]] @@ -3172,25 +3047,12 @@ dependencies = [ [[package]] name = "security-framework" -version = "2.11.1" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "81d3f8c9bfcc3cbb6b0179eb57042d75b1582bdc65c3cb95f3fa999509c03cbc" dependencies = [ "bitflags 2.6.0", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" -dependencies = [ - "bitflags 2.6.0", - "core-foundation 0.10.0", + "core-foundation", "core-foundation-sys", "libc", "security-framework-sys", @@ -3198,9 +3060,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" +checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" dependencies = [ "core-foundation-sys", "libc", @@ -3681,27 +3543,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "system-configuration" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" -dependencies = [ - "bitflags 2.6.0", - "core-foundation 0.9.4", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "target-lexicon" version = "0.12.16" @@ -3732,11 +3573,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.7" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" +checksum = "08f5383f3e0071702bf93ab5ee99b52d26936be9dedd9413067cbdcddcb6141a" dependencies = [ - "thiserror-impl 2.0.7", + "thiserror-impl 2.0.8", ] [[package]] @@ -3752,9 +3593,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.7" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" +checksum = "f2f357fcec90b3caef6623a099691be676d033b40a058ac95d2a6ade6fa0c943" dependencies = [ "proc-macro2", "quote", @@ -3852,16 +3693,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.1" diff --git a/python/geoarrow-io/Cargo.toml b/python/geoarrow-io/Cargo.toml index 0e1d93f4..bae96e03 100644 --- a/python/geoarrow-io/Cargo.toml +++ b/python/geoarrow-io/Cargo.toml @@ -41,7 +41,7 @@ object_store = { workspace = true, features = [ "azure", "gcp", "http", -], optional = true } +], optional = true, default-features = false } parquet = { workspace = true } pyo3 = { workspace = true } pyo3-arrow = { workspace = true } @@ -68,16 +68,6 @@ thiserror = { workspace = true } tokio = { version = "1.9", features = ["rt"], optional = true } url = "2.5" -# reqwest is pulled in by object store, but not used by python binding itself -# We statically link the rust-native rustls. For a binary wheel we want a -# static-compiled openssl anyways, and having it be native rust seems to make -# things simpler. -# See native roots discussion here: -# https://github.com/rustls/rustls-native-certs/blob/0ebeee01ed61ebeacd5ebb98e7f5694b0ee70a08/README.md#should-i-use-this-or-webpki-roots -reqwest = { version = "*", features = ["rustls-tls-native-roots"] } -# Compile openssl from source -openssl = { version = "0.10", features = ["vendored"] } - # Pin to fix strange pyodide compilation errors. # See https://github.com/geoarrow/geoarrow-rs/issues/868 cc = "1.1, <1.2" diff --git a/rust/geoarrow/Cargo.toml b/rust/geoarrow/Cargo.toml index 6582f52e..47c10f47 100644 --- a/rust/geoarrow/Cargo.toml +++ b/rust/geoarrow/Cargo.toml @@ -59,9 +59,8 @@ bytes = { version = "1.5.0", optional = true } chrono = { version = "0.4" } dbase = "0.5.0" enum-as-inner = "0.6.1" -# Set default-features = false because async not working in wasm right now -# For geo-traits impl -flatgeobuf = { version = "4.5", optional = true, default-features = false } +# TODO: update to 4.6 when released +flatgeobuf = { git = "https://github.com/kylebarron/flatgeobuf", rev = "06e987d6d3d73edb95124a14cdaab9ee8e6e57ac", version = "4.5", optional = true, default-features = false } futures = { version = "0.3", optional = true } gdal = { version = "0.17", optional = true } geo = "0.29.3" @@ -70,7 +69,8 @@ geo-traits = "0.2" geos = { version = "9.1.1", features = ["v3_10_0"], optional = true } geozero = { version = "0.14", features = ["with-wkb"] } half = { version = "2.4.1" } -http-range-client = { version = "0.8", optional = true } +# TODO: update to 0.9 when released +http-range-client = { git = "https://github.com/pka/http-range-client", rev = "5699e32fafc416ce683bfbf1d179f80b0b6549a3", version = "0.8", optional = true, default-features = false } indexmap = { version = "2" } lexical-core = { version = "0.8.5" } num-traits = "0.2.19"