Skip to content

Commit

Permalink
progress refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Dec 13, 2024
1 parent 3835461 commit ed7622d
Show file tree
Hide file tree
Showing 13 changed files with 396 additions and 255 deletions.
73 changes: 73 additions & 0 deletions rust/geodatafusion/src/udf/geo/accessors/envelope.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use std::any::Any;
use std::sync::OnceLock;

use arrow_schema::DataType;
use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER;
use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature};
use geoarrow::algorithm::native::BoundingRectArray;
use geoarrow::ArrayBase;

use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, GEOMETRY_TYPE};
use crate::error::GeoDataFusionResult;

#[derive(Debug)]
pub(super) struct Envelope {
signature: Signature,
}

impl Envelope {
pub fn new() -> Self {
Self {
signature: any_single_geometry_type_input(),
}
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

impl ScalarUDFImpl for Envelope {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &str {
"st_envelope"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result<DataType> {
Ok(GEOMETRY_TYPE.into())
}

fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result<ColumnarValue> {
Ok(envelope_impl(args)?)
}

fn documentation(&self) -> Option<&Documentation> {
Some(DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_OTHER)
.with_description(
"Computes a point which is the geometric center of mass of a geometry.",
)
.with_argument("g1", "geometry")
.build()
.unwrap()
}))
}
}

fn envelope_impl(args: &[ColumnarValue]) -> GeoDataFusionResult<ColumnarValue> {
let array = ColumnarValue::values_to_arrays(args)?
.into_iter()
.next()
.unwrap();
let native_array = parse_to_native_array(array)?;
// Since a RectArray is a valid normal geometry type for us, we don't have to cast it to a
// Geometry array. That just has overhead.
let output = native_array.as_ref().bounding_rect()?;
Ok(output.into_array_ref().into())
}
8 changes: 8 additions & 0 deletions rust/geodatafusion/src/udf/geo/accessors/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
mod envelope;

use datafusion::prelude::SessionContext;

/// Register all provided [geo] functions for constructing geometries
pub fn register_constructors(ctx: &SessionContext) {
ctx.register_udf(envelope::Envelope::new().into());
}
70 changes: 70 additions & 0 deletions rust/geodatafusion/src/udf/geo/bounding_box/box.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
use std::any::Any;
use std::sync::OnceLock;

use arrow_schema::DataType;
use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER;
use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature};
use geoarrow::algorithm::native::BoundingRectArray;
use geoarrow::ArrayBase;

use crate::data_types::{any_single_geometry_type_input, parse_to_native_array, BOX2D_TYPE};
use crate::error::GeoDataFusionResult;

#[derive(Debug)]
pub(super) struct Box2D {
signature: Signature,
}

impl Box2D {
pub fn new() -> Self {
Self {
signature: any_single_geometry_type_input(),
}
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

impl ScalarUDFImpl for Box2D {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &str {
"st_box2d"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, _arg_types: &[DataType]) -> datafusion::error::Result<DataType> {
Ok(BOX2D_TYPE.into())
}

fn invoke(&self, args: &[ColumnarValue]) -> datafusion::error::Result<ColumnarValue> {
Ok(box2d_impl(args)?)
}

fn documentation(&self) -> Option<&Documentation> {
Some(DOCUMENTATION.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_OTHER)
.with_description("Returns a box2d representing the 2D extent of the geometry.")
.with_argument("geom", "geometry")
.build()
.unwrap()
}))
}
}

// Note: this is exactly the same impl as ST_Envelope. Perhaps we should use an alias instead
fn box2d_impl(args: &[ColumnarValue]) -> GeoDataFusionResult<ColumnarValue> {
let array = ColumnarValue::values_to_arrays(args)?
.into_iter()
.next()
.unwrap();
let native_array = parse_to_native_array(array)?;
let output = native_array.as_ref().bounding_rect()?;
Ok(output.into_array_ref().into())
}
Original file line number Diff line number Diff line change
@@ -1,100 +1,33 @@
//! Box functions
use std::any::Any;
use std::sync::{Arc, OnceLock};

use arrow::array::Float64Builder;
use arrow_array::ArrayRef;
use arrow_schema::DataType;
use datafusion::error::DataFusionError;
use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER;
use datafusion::logical_expr::{
ColumnarValue, Documentation, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
};
use datafusion::prelude::create_udf;
use datafusion::logical_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature};
use geo_traits::{CoordTrait, RectTrait};
use geoarrow::algorithm::native::BoundingRectArray;
use geoarrow::array::{GeometryArray, RectArray};
use geoarrow::datatypes::Dimension;
use geoarrow::array::RectArray;
use geoarrow::trait_::ArrayAccessor;
use geoarrow::ArrayBase;

use crate::data_types::{any_single_geometry_type_input, parse_to_native_array};
use crate::error::GeoDataFusionResult;
use crate::udf::geo::util::{
box2d_data_type, box3d_data_type, geometry_data_type, parse_single_arg_to_geometry_array,
};

/// Box2D
///
/// - Returns a BOX2D representing the maximum extents of the geometry.
pub fn box_2d() -> ScalarUDF {
create_udf(
"box2d",
vec![geometry_data_type()],
box2d_data_type(),
Volatility::Immutable,
Arc::new(|args: &[ColumnarValue]| Ok(_box2d(args)?)),
)
}

fn _box2d(args: &[ColumnarValue]) -> GeoDataFusionResult<ColumnarValue> {
let geom_arr = parse_single_arg_to_geometry_array(args)?;
Ok(geom_arr.bounding_rect().into_array_ref().into())
}

/// Returns X minima of a bounding box 2d or 3d or a geometry
pub fn xmin() -> ScalarUDF {
XMin::new().into()
}

/// Returns Y minima of a bounding box 2d or 3d or a geometry
pub fn ymin() -> ScalarUDF {
YMin::new().into()
}

/// Returns X maxima of a bounding box 2d or 3d or a geometry
pub fn xmax() -> ScalarUDF {
XMax::new().into()
}

/// Returns Y maxima of a bounding box 2d or 3d or a geometry
pub fn ymax() -> ScalarUDF {
YMax::new().into()
}

fn rect_array_from_array_ref(array: ArrayRef) -> datafusion::error::Result<RectArray> {
let data_type = array.data_type();
if box2d_data_type().equals_datatype(data_type) {
RectArray::try_from((array.as_ref(), Dimension::XY))
.map_err(|err| DataFusionError::External(Box::new(err)))
} else if box3d_data_type().equals_datatype(data_type) {
RectArray::try_from((array.as_ref(), Dimension::XYZ))
.map_err(|err| DataFusionError::External(Box::new(err)))
} else if geometry_data_type().equals_datatype(data_type) {
let geom_array = GeometryArray::try_from(array.as_ref())
.map_err(|err| DataFusionError::External(Box::new(err)))?;
Ok(geom_array.bounding_rect())
} else {
return Err(DataFusionError::Execution(format!(
"Unsupported input data type: {}",
data_type
)));
}
fn rect_array_from_array_ref(array: ArrayRef) -> GeoDataFusionResult<RectArray> {
let native_arr = parse_to_native_array(array)?;
Ok(native_arr.as_ref().bounding_rect()?)
}

#[derive(Debug)]
struct XMin {
pub(super) struct XMin {
signature: Signature,
}

impl XMin {
fn new() -> Self {
pub(super) fn new() -> Self {
Self {
signature: Signature::uniform(
1,
vec![box2d_data_type(), box3d_data_type(), geometry_data_type()],
Volatility::Immutable,
),
signature: any_single_geometry_type_input(),
}
}
}
Expand Down Expand Up @@ -155,18 +88,14 @@ impl ScalarUDFImpl for XMin {
}

#[derive(Debug)]
struct YMin {
pub(super) struct YMin {
signature: Signature,
}

impl YMin {
fn new() -> Self {
pub(super) fn new() -> Self {
Self {
signature: Signature::uniform(
1,
vec![box2d_data_type(), box3d_data_type(), geometry_data_type()],
Volatility::Immutable,
),
signature: any_single_geometry_type_input(),
}
}
}
Expand Down Expand Up @@ -227,18 +156,14 @@ impl ScalarUDFImpl for YMin {
}

#[derive(Debug)]
struct XMax {
pub(super) struct XMax {
signature: Signature,
}

impl XMax {
fn new() -> Self {
pub(super) fn new() -> Self {
Self {
signature: Signature::uniform(
1,
vec![box2d_data_type(), box3d_data_type(), geometry_data_type()],
Volatility::Immutable,
),
signature: any_single_geometry_type_input(),
}
}
}
Expand Down Expand Up @@ -297,18 +222,14 @@ impl ScalarUDFImpl for XMax {
}

#[derive(Debug)]
struct YMax {
pub(super) struct YMax {
signature: Signature,
}

impl YMax {
fn new() -> Self {
pub(super) fn new() -> Self {
Self {
signature: Signature::uniform(
1,
vec![box2d_data_type(), box3d_data_type(), geometry_data_type()],
Volatility::Immutable,
),
signature: any_single_geometry_type_input(),
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions rust/geodatafusion/src/udf/geo/bounding_box/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
mod r#box;
mod extrema;

use datafusion::prelude::SessionContext;

/// Register all provided bounding box functions
pub fn register_constructors(ctx: &SessionContext) {
ctx.register_udf(extrema::XMin::new().into());
ctx.register_udf(extrema::YMin::new().into());
ctx.register_udf(extrema::XMax::new().into());
ctx.register_udf(extrema::YMax::new().into());
ctx.register_udf(r#box::Box2D::new().into());
}
Loading

0 comments on commit ed7622d

Please sign in to comment.