Skip to content

Commit 95d296c

Browse files
Support n-ary monotonic functions in ordering equivalence (#13841)
* Support n-ary monotonic functions in `discover_new_orderings` * Add tests for n-ary monotonic functions in `discover_new_orderings` * Fix tests * Fix non-monotonic test case * Fix unintended simplification * Minor comment changes * Fix tests * Add `preserves_lex_ordering` field * Use `preserves_lex_ordering` on `discover_new_orderings()` * Add `output_ordering` and `output_preserves_lex_ordering` implementations for `ConcatFunc` * Update tests * Move logic to UDF * Cargo fmt * Refactor * Cargo fmt * Simply use false value on default implementation * Remove unnecessary import * Clippy fix * Update Cargo.lock * Move dep to dev-dependencies * Rename output_preserves_lex_ordering to preserves_lex_ordering * minor --------- Co-authored-by: berkaysynnada <[email protected]>
1 parent 5d563d9 commit 95d296c

File tree

9 files changed

+269
-38
lines changed

9 files changed

+269
-38
lines changed

datafusion/expr-common/src/sort_properties.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,19 +129,30 @@ impl Neg for SortProperties {
129129
}
130130
}
131131

132-
/// Represents the properties of a `PhysicalExpr`, including its sorting and range attributes.
132+
/// Represents the properties of a `PhysicalExpr`, including its sorting,
133+
/// range, and whether it preserves lexicographical ordering.
133134
#[derive(Debug, Clone)]
134135
pub struct ExprProperties {
136+
/// Properties that describe the sorting behavior of the expression,
137+
/// such as whether it is ordered, unordered, or a singleton value.
135138
pub sort_properties: SortProperties,
139+
/// A closed interval representing the range of possible values for
140+
/// the expression. Used to compute reliable bounds.
136141
pub range: Interval,
142+
/// Indicates whether the expression preserves lexicographical ordering
143+
/// of its inputs. For example, string concatenation preserves ordering,
144+
/// while addition does not.
145+
pub preserves_lex_ordering: bool,
137146
}
138147

139148
impl ExprProperties {
140-
/// Creates a new `ExprProperties` instance with unknown sort properties and unknown range.
149+
/// Creates a new `ExprProperties` instance with unknown sort properties,
150+
/// unknown range, and unknown lexicographical ordering preservation.
141151
pub fn new_unknown() -> Self {
142152
Self {
143153
sort_properties: SortProperties::default(),
144154
range: Interval::make_unbounded(&DataType::Null).unwrap(),
155+
preserves_lex_ordering: false,
145156
}
146157
}
147158

@@ -156,4 +167,10 @@ impl ExprProperties {
156167
self.range = range;
157168
self
158169
}
170+
171+
/// Sets whether the expression maintains lexicographical ordering and returns the modified instance.
172+
pub fn with_preserves_lex_ordering(mut self, preserves_lex_ordering: bool) -> Self {
173+
self.preserves_lex_ordering = preserves_lex_ordering;
174+
self
175+
}
159176
}

datafusion/expr/src/udf.rs

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,10 @@ impl ScalarUDF {
303303
self.inner.output_ordering(inputs)
304304
}
305305

306+
pub fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
307+
self.inner.preserves_lex_ordering(inputs)
308+
}
309+
306310
/// See [`ScalarUDFImpl::coerce_types`] for more details.
307311
pub fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
308312
self.inner.coerce_types(arg_types)
@@ -650,10 +654,30 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
650654
Ok(Some(vec![]))
651655
}
652656

653-
/// Calculates the [`SortProperties`] of this function based on its
654-
/// children's properties.
655-
fn output_ordering(&self, _inputs: &[ExprProperties]) -> Result<SortProperties> {
656-
Ok(SortProperties::Unordered)
657+
/// Calculates the [`SortProperties`] of this function based on its children's properties.
658+
fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
659+
if !self.preserves_lex_ordering(inputs)? {
660+
return Ok(SortProperties::Unordered);
661+
}
662+
663+
let Some(first_order) = inputs.first().map(|p| &p.sort_properties) else {
664+
return Ok(SortProperties::Singleton);
665+
};
666+
667+
if inputs
668+
.iter()
669+
.skip(1)
670+
.all(|input| &input.sort_properties == first_order)
671+
{
672+
Ok(*first_order)
673+
} else {
674+
Ok(SortProperties::Unordered)
675+
}
676+
}
677+
678+
/// Whether the function preserves lexicographical ordering based on the input ordering
679+
fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
680+
Ok(false)
657681
}
658682

659683
/// Coerce arguments of a function call to types that the function can evaluate.
@@ -809,6 +833,10 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
809833
self.inner.output_ordering(inputs)
810834
}
811835

836+
fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
837+
self.inner.preserves_lex_ordering(inputs)
838+
}
839+
812840
fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
813841
self.inner.coerce_types(arg_types)
814842
}

datafusion/functions/src/string/concat.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use arrow::array::{as_largestring_array, Array};
1919
use arrow::datatypes::DataType;
20+
use datafusion_expr::sort_properties::ExprProperties;
2021
use std::any::Any;
2122
use std::sync::{Arc, OnceLock};
2223

@@ -265,6 +266,10 @@ impl ScalarUDFImpl for ConcatFunc {
265266
fn documentation(&self) -> Option<&Documentation> {
266267
Some(get_concat_doc())
267268
}
269+
270+
fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
271+
Ok(true)
272+
}
268273
}
269274

270275
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

datafusion/physical-expr/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ petgraph = "0.6.2"
5757
[dev-dependencies]
5858
arrow = { workspace = true, features = ["test_utils"] }
5959
criterion = "0.5"
60+
datafusion-functions = { workspace = true }
6061
rand = { workspace = true }
6162
rstest = { workspace = true }
6263

0 commit comments

Comments
 (0)