feat: add bounds for unary math scalar functions (#11584)

tshauck · web-flow · commit 5901df58b21b · 2024-07-24T15:21:13.000-04:00
* feat: unary udf function bounds

* feat: add bounds for more types

* feat: remove eprint

* fix: add missing bounds file

* tests: add tests for unary udf bounds

* tests: test f32 and f64

* build: remove unrelated changes

* refactor: better unbounded func name

* tests: fix tests

* refactor: use data_type method

* refactor: add more useful intervals to Interval

* refactor: use typed bounds for (-inf, inf)

* refactor: inf to unbounded

* refactor: add lower/upper pi bounds

* refactor: consts to consts module

* fix: add missing file

* fix: docstring typo

* refactor: remove unused signum bounds
diff --git a/datafusion/common/src/scalar/consts.rs b/datafusion/common/src/scalar/consts.rs
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Constants defined for scalar construction.
+
+// PI ~ 3.1415927 in f32
+#[allow(clippy::approx_constant)]
+pub(super) const PI_UPPER_F32: f32 = 3.141593_f32;
+
+// PI ~ 3.141592653589793 in f64
+pub(super) const PI_UPPER_F64: f64 = 3.141592653589794_f64;
+
+// -PI ~ -3.1415927 in f32
+#[allow(clippy::approx_constant)]
+pub(super) const NEGATIVE_PI_LOWER_F32: f32 = -3.141593_f32;
+
+// -PI ~ -3.141592653589793 in f64
+pub(super) const NEGATIVE_PI_LOWER_F64: f64 = -3.141592653589794_f64;
+
+// PI / 2 ~ 1.5707964 in f32
+pub(super) const FRAC_PI_2_UPPER_F32: f32 = 1.5707965_f32;
+
+// PI / 2 ~ 1.5707963267948966 in f64
+pub(super) const FRAC_PI_2_UPPER_F64: f64 = 1.5707963267948967_f64;
+
+// -PI / 2 ~ -1.5707964 in f32
+pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F32: f32 = -1.5707965_f32;
+
+// -PI / 2 ~ -1.5707963267948966 in f64
+pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F64: f64 = -1.5707963267948967_f64;
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
@@ -17,7 +17,9 @@
 
 //! [`ScalarValue`]: stores single  values
 
+mod consts;
 mod struct_builder;
+
 use std::borrow::Borrow;
 use std::cmp::Ordering;
 use std::collections::{HashSet, VecDeque};
@@ -1007,6 +1009,123 @@ impl ScalarValue {
         }
     }
 
+    /// Returns a [`ScalarValue`] representing PI
+    pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
+            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
+            _ => _internal_err!("PI is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing PI's upper bound
+    pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
+        // TODO: replace the constants with next_up/next_down when
+        // they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
+            DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
+            _ => {
+                _internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing -PI's lower bound
+    pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
+            DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
+            _ => {
+                _internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
+    pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
+            DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
+            _ => {
+                _internal_err!(
+                    "PI_UPPER/2 is not supported for data type: {:?}",
+                    datatype
+                )
+            }
+        }
+    }
+
+    // Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
+    pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => {
+                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
+            }
+            DataType::Float64 => {
+                Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
+            }
+            _ => {
+                _internal_err!(
+                    "-PI/2_LOWER is not supported for data type: {:?}",
+                    datatype
+                )
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing -PI
+    pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
+            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
+            _ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing PI/2
+    pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
+            DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
+            _ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing -PI/2
+    pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
+            DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
+            _ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing infinity
+    pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
+            DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
+            _ => {
+                _internal_err!("Infinity is not supported for data type: {:?}", datatype)
+            }
+        }
+    }
+
+    /// Returns a [`ScalarValue`] representing negative infinity
+    pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
+        match datatype {
+            DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
+            DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
+            _ => {
+                _internal_err!(
+                    "Negative Infinity is not supported for data type: {:?}",
+                    datatype
+                )
+            }
+        }
+    }
+
     /// Create a zero value in the given type.
     pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
         Ok(match datatype {
diff --git a/datafusion/expr/src/interval_arithmetic.rs b/datafusion/expr/src/interval_arithmetic.rs
@@ -332,6 +332,38 @@ impl Interval {
         Ok(Self::new(unbounded_endpoint.clone(), unbounded_endpoint))
     }
 
+    /// Creates an interval between -1 to 1.
+    pub fn make_symmetric_unit_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_negative_one(data_type)?,
+            ScalarValue::new_one(data_type)?,
+        )
+    }
+
+    /// Create an interval from -π to π.
+    pub fn make_symmetric_pi_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_negative_pi_lower(data_type)?,
+            ScalarValue::new_pi_upper(data_type)?,
+        )
+    }
+
+    /// Create an interval from -π/2 to π/2.
+    pub fn make_symmetric_half_pi_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_neg_frac_pi_2_lower(data_type)?,
+            ScalarValue::new_frac_pi_2_upper(data_type)?,
+        )
+    }
+
+    /// Create an interval from 0 to infinity.
+    pub fn make_non_negative_infinity_interval(data_type: &DataType) -> Result<Self> {
+        Self::try_new(
+            ScalarValue::new_zero(data_type)?,
+            ScalarValue::try_from(data_type)?,
+        )
+    }
+
     /// Returns a reference to the lower bound.
     pub fn lower(&self) -> &ScalarValue {
         &self.lower
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
@@ -162,7 +162,7 @@ macro_rules! downcast_arg {
 /// $UNARY_FUNC: the unary function to apply to the argument
 /// $OUTPUT_ORDERING: the output ordering calculation method of the function
 macro_rules! make_math_unary_udf {
-    ($UDF:ident, $GNAME:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr) => {
+    ($UDF:ident, $GNAME:ident, $NAME:ident, $UNARY_FUNC:ident, $OUTPUT_ORDERING:expr, $EVALUATE_BOUNDS:expr) => {
         make_udf_function!($NAME::$UDF, $GNAME, $NAME);
 
         mod $NAME {
@@ -172,6 +172,7 @@ macro_rules! make_math_unary_udf {
             use arrow::array::{ArrayRef, Float32Array, Float64Array};
             use arrow::datatypes::DataType;
             use datafusion_common::{exec_err, DataFusionError, Result};
+            use datafusion_expr::interval_arithmetic::Interval;
             use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
             use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 
@@ -222,6 +223,10 @@ macro_rules! make_math_unary_udf {
                     $OUTPUT_ORDERING(input)
                 }
 
+                fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
+                    $EVALUATE_BOUNDS(inputs)
+                }
+
                 fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
                     let args = ColumnarValue::values_to_arrays(args)?;
 
diff --git a/datafusion/functions/src/math/bounds.rs b/datafusion/functions/src/math/bounds.rs
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::ScalarValue;
+use datafusion_expr::interval_arithmetic::Interval;
+
+pub(super) fn unbounded_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    let data_type = input[0].data_type();
+
+    Interval::make_unbounded(&data_type)
+}
+
+pub(super) fn sin_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // sin(x) is bounded by [-1, 1]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_unit_interval(&data_type)
+}
+
+pub(super) fn asin_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // asin(x) is bounded by [-π/2, π/2]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_half_pi_interval(&data_type)
+}
+
+pub(super) fn atan_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // atan(x) is bounded by [-π/2, π/2]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_half_pi_interval(&data_type)
+}
+
+pub(super) fn acos_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // acos(x) is bounded by [0, π]
+    let data_type = input[0].data_type();
+
+    Interval::try_new(
+        ScalarValue::new_zero(&data_type)?,
+        ScalarValue::new_pi_upper(&data_type)?,
+    )
+}
+
+pub(super) fn acosh_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // acosh(x) is bounded by [0, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::make_non_negative_infinity_interval(&data_type)
+}
+
+pub(super) fn cos_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // cos(x) is bounded by [-1, 1]
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_unit_interval(&data_type)
+}
+
+pub(super) fn cosh_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // cosh(x) is bounded by [1, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::try_new(
+        ScalarValue::new_one(&data_type)?,
+        ScalarValue::try_from(&data_type)?,
+    )
+}
+
+pub(super) fn exp_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // exp(x) is bounded by [0, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::make_non_negative_infinity_interval(&data_type)
+}
+
+pub(super) fn radians_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // radians(x) is bounded by (-π, π)
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_pi_interval(&data_type)
+}
+
+pub(super) fn sqrt_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // sqrt(x) is bounded by [0, ∞)
+    let data_type = input[0].data_type();
+
+    Interval::make_non_negative_infinity_interval(&data_type)
+}
+
+pub(super) fn tanh_bounds(input: &[&Interval]) -> crate::Result<Interval> {
+    // tanh(x) is bounded by (-1, 1)
+    let data_type = input[0].data_type();
+
+    Interval::make_symmetric_unit_interval(&data_type)
+}
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
diff --git a/datafusion/functions/src/math/monotonicity.rs b/datafusion/functions/src/math/monotonicity.rs