Skip to content

Commit 05bb569

Browse files
feat: Add array_max function support (#14470)
* Issue-14469 - feat: Add array_max function * Address review comments * Address review comments II * Address review comments III
1 parent 34efd1f commit 05bb569

File tree

5 files changed

+260
-1
lines changed

5 files changed

+260
-1
lines changed

datafusion/functions-aggregate/src/min_max.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
573573
}
574574

575575
/// dynamically-typed max(array) -> ScalarValue
576-
fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
576+
pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
577577
Ok(match values.data_type() {
578578
DataType::Utf8 => {
579579
typed_min_max_batch_string!(values, StringArray, Utf8, max_string)

datafusion/functions-nested/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pub mod map;
5252
pub mod map_extract;
5353
pub mod map_keys;
5454
pub mod map_values;
55+
pub mod max;
5556
pub mod planner;
5657
pub mod position;
5758
pub mod range;
@@ -144,6 +145,7 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
144145
length::array_length_udf(),
145146
distance::array_distance_udf(),
146147
flatten::flatten_udf(),
148+
max::array_max_udf(),
147149
sort::array_sort_udf(),
148150
repeat::array_repeat_udf(),
149151
resize::array_resize_udf(),
+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! [`ScalarUDFImpl`] definitions for array_max function.
19+
use crate::utils::make_scalar_function;
20+
use arrow::array::ArrayRef;
21+
use arrow::datatypes::DataType;
22+
use arrow::datatypes::DataType::List;
23+
use datafusion_common::cast::as_list_array;
24+
use datafusion_common::utils::take_function_args;
25+
use datafusion_common::{exec_err, ScalarValue};
26+
use datafusion_doc::Documentation;
27+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
28+
use datafusion_functions_aggregate::min_max;
29+
use datafusion_macros::user_doc;
30+
use itertools::Itertools;
31+
use std::any::Any;
32+
33+
make_udf_expr_and_func!(
34+
ArrayMax,
35+
array_max,
36+
array,
37+
"returns the maximum value in the array.",
38+
array_max_udf
39+
);
40+
41+
#[user_doc(
42+
doc_section(label = "Array Functions"),
43+
description = "Returns the maximum value in the array.",
44+
syntax_example = "array_max(array)",
45+
sql_example = r#"```sql
46+
> select array_max([3,1,4,2]);
47+
+-----------------------------------------+
48+
| array_max(List([3,1,4,2])) |
49+
+-----------------------------------------+
50+
| 4 |
51+
+-----------------------------------------+
52+
```"#,
53+
argument(
54+
name = "array",
55+
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
56+
)
57+
)]
58+
#[derive(Debug)]
59+
pub struct ArrayMax {
60+
signature: Signature,
61+
aliases: Vec<String>,
62+
}
63+
64+
impl Default for ArrayMax {
65+
fn default() -> Self {
66+
Self::new()
67+
}
68+
}
69+
70+
impl ArrayMax {
71+
pub fn new() -> Self {
72+
Self {
73+
signature: Signature::array(Volatility::Immutable),
74+
aliases: vec!["list_max".to_string()],
75+
}
76+
}
77+
}
78+
79+
impl ScalarUDFImpl for ArrayMax {
80+
fn as_any(&self) -> &dyn Any {
81+
self
82+
}
83+
84+
fn name(&self) -> &str {
85+
"array_max"
86+
}
87+
88+
fn signature(&self) -> &Signature {
89+
&self.signature
90+
}
91+
92+
fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
93+
match &arg_types[0] {
94+
List(field) => Ok(field.data_type().clone()),
95+
_ => exec_err!("Not reachable, data_type should be List"),
96+
}
97+
}
98+
99+
fn invoke_batch(
100+
&self,
101+
args: &[ColumnarValue],
102+
_number_rows: usize,
103+
) -> datafusion_common::Result<ColumnarValue> {
104+
make_scalar_function(array_max_inner)(args)
105+
}
106+
107+
fn aliases(&self) -> &[String] {
108+
&self.aliases
109+
}
110+
111+
fn documentation(&self) -> Option<&Documentation> {
112+
self.doc()
113+
}
114+
}
115+
116+
/// array_max SQL function
117+
///
118+
/// There is one argument for array_max as the array.
119+
/// `array_max(array)`
120+
///
121+
/// For example:
122+
/// > array_max(\[1, 3, 2]) -> 3
123+
pub fn array_max_inner(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
124+
let [arg1] = take_function_args("array_max", args)?;
125+
126+
match arg1.data_type() {
127+
List(_) => {
128+
let input_list_array = as_list_array(&arg1)?;
129+
let result_vec = input_list_array
130+
.iter()
131+
.flat_map(|arr| min_max::max_batch(&arr.unwrap()))
132+
.collect_vec();
133+
ScalarValue::iter_to_array(result_vec)
134+
}
135+
_ => exec_err!("array_max does not support type: {:?}", arg1.data_type()),
136+
}
137+
}

datafusion/sqllogictest/test_files/array.slt

+87
Original file line numberDiff line numberDiff line change
@@ -1435,6 +1435,93 @@ NULL 23
14351435
NULL 43
14361436
5 NULL
14371437

1438+
1439+
## array_max
1440+
# array_max scalar function #1 (with positive index)
1441+
query I
1442+
select array_max(make_array(5, 3, 6, 4));
1443+
----
1444+
6
1445+
1446+
query I
1447+
select array_max(make_array(5, 3, 4, NULL, 6, NULL));
1448+
----
1449+
6
1450+
1451+
query I
1452+
select array_max(make_array(NULL, NULL));
1453+
----
1454+
NULL
1455+
1456+
query T
1457+
select array_max(make_array('h', 'e', 'o', 'l', 'l'));
1458+
----
1459+
o
1460+
1461+
query T
1462+
select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
1463+
----
1464+
o
1465+
1466+
query B
1467+
select array_max(make_array(false, true, false, true));
1468+
----
1469+
true
1470+
1471+
query B
1472+
select array_max(make_array(false, true, NULL, false, true));
1473+
----
1474+
true
1475+
1476+
query D
1477+
select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01'));
1478+
----
1479+
1999-05-01
1480+
1481+
query D
1482+
select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL));
1483+
----
1484+
1999-05-01
1485+
1486+
query P
1487+
select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01'));
1488+
----
1489+
1995-06-01T00:00:00
1490+
1491+
query P
1492+
select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01'));
1493+
----
1494+
1996-10-01T00:00:00
1495+
1496+
query R
1497+
select array_max(make_array(5.1, -3.2, 6.3, 4.9));
1498+
----
1499+
6.3
1500+
1501+
query ?I
1502+
select input, array_max(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d))
1503+
----
1504+
[-1, 0, 1] 1
1505+
[9, 10, 11] 11
1506+
[19, 20, 21] 21
1507+
[29, 30, 31] 31
1508+
[NULL, NULL, NULL] NULL
1509+
1510+
query II
1511+
select array_max(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_max(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
1512+
----
1513+
3 1
1514+
1515+
query I
1516+
select array_max(make_array());
1517+
----
1518+
NULL
1519+
1520+
# Testing with empty arguments should result in an error
1521+
query error DataFusion error: Error during planning: 'array_max' does not support zero arguments
1522+
select array_max();
1523+
1524+
14381525
## array_pop_back (aliases: `list_pop_back`)
14391526

14401527
# array_pop_back scalar function with null

docs/source/user-guide/sql/scalar_functions.md

+33
Original file line numberDiff line numberDiff line change
@@ -2524,6 +2524,7 @@ _Alias of [current_date](#current_date)._
25242524
- [array_intersect](#array_intersect)
25252525
- [array_join](#array_join)
25262526
- [array_length](#array_length)
2527+
- [array_max](#array_max)
25272528
- [array_ndims](#array_ndims)
25282529
- [array_pop_back](#array_pop_back)
25292530
- [array_pop_front](#array_pop_front)
@@ -2569,6 +2570,7 @@ _Alias of [current_date](#current_date)._
25692570
- [list_intersect](#list_intersect)
25702571
- [list_join](#list_join)
25712572
- [list_length](#list_length)
2573+
- [list_max](#list_max)
25722574
- [list_ndims](#list_ndims)
25732575
- [list_pop_back](#list_pop_back)
25742576
- [list_pop_front](#list_pop_front)
@@ -3002,6 +3004,33 @@ array_length(array, dimension)
30023004

30033005
- list_length
30043006

3007+
### `array_max`
3008+
3009+
Returns the maximum value in the array.
3010+
3011+
```sql
3012+
array_max(array)
3013+
```
3014+
3015+
#### Arguments
3016+
3017+
- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.
3018+
3019+
#### Example
3020+
3021+
```sql
3022+
> select array_max([3,1,4,2]);
3023+
+-----------------------------------------+
3024+
| array_max(List([3,1,4,2])) |
3025+
+-----------------------------------------+
3026+
| 4 |
3027+
+-----------------------------------------+
3028+
```
3029+
3030+
#### Aliases
3031+
3032+
- list_max
3033+
30053034
### `array_ndims`
30063035

30073036
Returns the number of dimensions of the array.
@@ -3759,6 +3788,10 @@ _Alias of [array_to_string](#array_to_string)._
37593788

37603789
_Alias of [array_length](#array_length)._
37613790

3791+
### `list_max`
3792+
3793+
_Alias of [array_max](#array_max)._
3794+
37623795
### `list_ndims`
37633796

37643797
_Alias of [array_ndims](#array_ndims)._

0 commit comments

Comments
 (0)