Skip to content

Commit fe71aa2

Browse files
authored
Move Nanvl and random functions to datafusion-functions (#10017)
* Move Nanvl and random functions to datafusion-functions * Remove extraneous test udf.
1 parent ff2d202 commit fe71aa2

File tree

19 files changed

+360
-246
lines changed

19 files changed

+360
-246
lines changed

datafusion-cli/Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/tests/simplification.rs

+8-7
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ use datafusion_expr::expr::ScalarFunction;
2828
use datafusion_expr::logical_plan::builder::table_scan_with_filters;
2929
use datafusion_expr::simplify::SimplifyInfo;
3030
use datafusion_expr::{
31-
expr, table_scan, BuiltinScalarFunction, Cast, ColumnarValue, Expr, ExprSchemable,
32-
LogicalPlan, LogicalPlanBuilder, ScalarUDF, Volatility,
31+
expr, table_scan, Cast, ColumnarValue, Expr, ExprSchemable, LogicalPlan,
32+
LogicalPlanBuilder, ScalarUDF, Volatility,
3333
};
34+
use datafusion_functions::math;
3435
use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyExpressions};
3536
use datafusion_optimizer::{OptimizerContext, OptimizerRule};
3637
use std::sync::Arc;
@@ -383,17 +384,17 @@ fn test_const_evaluator_scalar_functions() {
383384

384385
// volatile / stable functions should not be evaluated
385386
// rand() + (1 + 2) --> rand() + 3
386-
let fun = BuiltinScalarFunction::Random;
387-
assert_eq!(fun.volatility(), Volatility::Volatile);
388-
let rand = Expr::ScalarFunction(ScalarFunction::new(fun, vec![]));
387+
let fun = math::random();
388+
assert_eq!(fun.signature().volatility, Volatility::Volatile);
389+
let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
389390
let expr = rand.clone() + (lit(1) + lit(2));
390391
let expected = rand + lit(3);
391392
test_evaluate(expr, expected);
392393

393394
// parenthesization matters: can't rewrite
394395
// (rand() + 1) + 2 --> (rand() + 1) + 2)
395-
let fun = BuiltinScalarFunction::Random;
396-
let rand = Expr::ScalarFunction(ScalarFunction::new(fun, vec![]));
396+
let fun = math::random();
397+
let rand = Expr::ScalarFunction(ScalarFunction::new_udf(fun, vec![]));
397398
let expr = (rand + lit(1)) + lit(2);
398399
test_evaluate(expr.clone(), expr);
399400
}

datafusion/expr/src/built_in_function.rs

-21
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ pub enum BuiltinScalarFunction {
4545
Exp,
4646
/// factorial
4747
Factorial,
48-
/// nanvl
49-
Nanvl,
5048
// string functions
5149
/// concat
5250
Concat,
@@ -56,8 +54,6 @@ pub enum BuiltinScalarFunction {
5654
EndsWith,
5755
/// initcap
5856
InitCap,
59-
/// random
60-
Random,
6157
}
6258

6359
/// Maps the sql function name to `BuiltinScalarFunction`
@@ -114,14 +110,10 @@ impl BuiltinScalarFunction {
114110
BuiltinScalarFunction::Coalesce => Volatility::Immutable,
115111
BuiltinScalarFunction::Exp => Volatility::Immutable,
116112
BuiltinScalarFunction::Factorial => Volatility::Immutable,
117-
BuiltinScalarFunction::Nanvl => Volatility::Immutable,
118113
BuiltinScalarFunction::Concat => Volatility::Immutable,
119114
BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable,
120115
BuiltinScalarFunction::EndsWith => Volatility::Immutable,
121116
BuiltinScalarFunction::InitCap => Volatility::Immutable,
122-
123-
// Volatile builtin functions
124-
BuiltinScalarFunction::Random => Volatility::Volatile,
125117
}
126118
}
127119

@@ -152,16 +144,10 @@ impl BuiltinScalarFunction {
152144
BuiltinScalarFunction::InitCap => {
153145
utf8_to_str_type(&input_expr_types[0], "initcap")
154146
}
155-
BuiltinScalarFunction::Random => Ok(Float64),
156147
BuiltinScalarFunction::EndsWith => Ok(Boolean),
157148

158149
BuiltinScalarFunction::Factorial => Ok(Int64),
159150

160-
BuiltinScalarFunction::Nanvl => match &input_expr_types[0] {
161-
Float32 => Ok(Float32),
162-
_ => Ok(Float64),
163-
},
164-
165151
BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp => {
166152
match input_expr_types[0] {
167153
Float32 => Ok(Float32),
@@ -199,11 +185,6 @@ impl BuiltinScalarFunction {
199185
],
200186
self.volatility(),
201187
),
202-
BuiltinScalarFunction::Random => Signature::exact(vec![], self.volatility()),
203-
BuiltinScalarFunction::Nanvl => Signature::one_of(
204-
vec![Exact(vec![Float32, Float32]), Exact(vec![Float64, Float64])],
205-
self.volatility(),
206-
),
207188
BuiltinScalarFunction::Factorial => {
208189
Signature::uniform(1, vec![Int64], self.volatility())
209190
}
@@ -240,8 +221,6 @@ impl BuiltinScalarFunction {
240221
BuiltinScalarFunction::Ceil => &["ceil"],
241222
BuiltinScalarFunction::Exp => &["exp"],
242223
BuiltinScalarFunction::Factorial => &["factorial"],
243-
BuiltinScalarFunction::Nanvl => &["nanvl"],
244-
BuiltinScalarFunction::Random => &["random"],
245224

246225
// conditional functions
247226
BuiltinScalarFunction::Coalesce => &["coalesce"],

datafusion/expr/src/expr.rs

+2-9
Original file line numberDiff line numberDiff line change
@@ -1903,8 +1903,8 @@ mod test {
19031903
use crate::expr::Cast;
19041904
use crate::expr_fn::col;
19051905
use crate::{
1906-
case, lit, BuiltinScalarFunction, ColumnarValue, Expr, ScalarFunctionDefinition,
1907-
ScalarUDF, ScalarUDFImpl, Signature, Volatility,
1906+
case, lit, ColumnarValue, Expr, ScalarFunctionDefinition, ScalarUDF,
1907+
ScalarUDFImpl, Signature, Volatility,
19081908
};
19091909
use arrow::datatypes::DataType;
19101910
use datafusion_common::Column;
@@ -2018,13 +2018,6 @@ mod test {
20182018

20192019
#[test]
20202020
fn test_is_volatile_scalar_func_definition() {
2021-
// BuiltIn
2022-
assert!(
2023-
ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Random)
2024-
.is_volatile()
2025-
.unwrap()
2026-
);
2027-
20282021
// UDF
20292022
#[derive(Debug)]
20302023
struct TestScalarUDF {

datafusion/expr/src/expr_fn.rs

-7
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,6 @@ pub fn concat_ws(sep: Expr, values: Vec<Expr>) -> Expr {
297297
))
298298
}
299299

300-
/// Returns a random value in the range 0.0 <= x < 1.0
301-
pub fn random() -> Expr {
302-
Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Random, vec![]))
303-
}
304-
305300
/// Returns the approximate number of distinct input values.
306301
/// This function provides an approximation of count(DISTINCT x).
307302
/// Zero is returned if all input values are null.
@@ -550,7 +545,6 @@ nary_scalar_expr!(
550545
"concatenates several strings, placing a seperator between each one"
551546
);
552547
nary_scalar_expr!(Concat, concat_expr, "concatenates several strings");
553-
scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y");
554548

555549
/// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression.
556550
pub fn case(expr: Expr) -> CaseBuilder {
@@ -922,7 +916,6 @@ mod test {
922916
test_unary_scalar_expr!(Factorial, factorial);
923917
test_unary_scalar_expr!(Ceil, ceil);
924918
test_unary_scalar_expr!(Exp, exp);
925-
test_scalar_expr!(Nanvl, nanvl, x, y);
926919

927920
test_scalar_expr!(InitCap, initcap, string);
928921
test_scalar_expr!(EndsWith, ends_with, string, characters);

datafusion/expr/src/signature.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ pub enum Volatility {
5151
Stable,
5252
/// A volatile function may change the return value from evaluation to evaluation.
5353
/// Multiple invocations of a volatile function may return different results when used in the
54-
/// same query. An example of this is [super::BuiltinScalarFunction::Random]. DataFusion
54+
/// same query. An example of this is the random() function. DataFusion
5555
/// can not evaluate such functions during planning.
5656
/// In the query `select col1, random() from t1`, `random()` function will be evaluated
5757
/// for each output row, resulting in a unique random value for each row.

datafusion/functions/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ hex = { version = "0.4", optional = true }
7777
itertools = { workspace = true }
7878
log = { workspace = true }
7979
md-5 = { version = "^0.10.0", optional = true }
80+
rand = { workspace = true }
8081
regex = { version = "1.8", optional = true }
8182
sha2 = { version = "^0.10.1", optional = true }
8283
unicode-segmentation = { version = "^1.7.1", optional = true }

datafusion/functions/src/math/mod.rs

+16
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ pub mod iszero;
2727
pub mod lcm;
2828
pub mod log;
2929
pub mod nans;
30+
pub mod nanvl;
3031
pub mod pi;
3132
pub mod power;
33+
pub mod random;
3234
pub mod round;
3335
pub mod trunc;
3436

@@ -55,9 +57,11 @@ make_udf_function!(lcm::LcmFunc, LCM, lcm);
5557
make_math_unary_udf!(LnFunc, LN, ln, ln, Some(vec![Some(true)]));
5658
make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)]));
5759
make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)]));
60+
make_udf_function!(nanvl::NanvlFunc, NANVL, nanvl);
5861
make_udf_function!(pi::PiFunc, PI, pi);
5962
make_udf_function!(power::PowerFunc, POWER, power);
6063
make_math_unary_udf!(RadiansFunc, RADIANS, radians, to_radians, None);
64+
make_udf_function!(random::RandomFunc, RANDOM, random);
6165
make_udf_function!(round::RoundFunc, ROUND, round);
6266
make_math_unary_udf!(SignumFunc, SIGNUM, signum, signum, None);
6367
make_math_unary_udf!(SinFunc, SIN, sin, sin, None);
@@ -180,6 +184,11 @@ pub mod expr_fn {
180184
super::log10().call(vec![num])
181185
}
182186

187+
#[doc = "returns x if x is not NaN otherwise returns y"]
188+
pub fn nanvl(x: Expr, y: Expr) -> Expr {
189+
super::nanvl().call(vec![x, y])
190+
}
191+
183192
#[doc = "Returns an approximate value of π"]
184193
pub fn pi() -> Expr {
185194
super::pi().call(vec![])
@@ -195,6 +204,11 @@ pub mod expr_fn {
195204
super::radians().call(vec![num])
196205
}
197206

207+
#[doc = "Returns a random value in the range 0.0 <= x < 1.0"]
208+
pub fn random() -> Expr {
209+
super::random().call(vec![])
210+
}
211+
198212
#[doc = "round to nearest integer"]
199213
pub fn round(args: Vec<Expr>) -> Expr {
200214
super::round().call(args)
@@ -261,9 +275,11 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
261275
log(),
262276
log2(),
263277
log10(),
278+
nanvl(),
264279
pi(),
265280
power(),
266281
radians(),
282+
random(),
267283
round(),
268284
signum(),
269285
sin(),

0 commit comments

Comments
 (0)