Skip to content

Commit 7d76719

Browse files
Issue-14469 - feat: Add array_max function
1 parent ea788c7 commit 7d76719

File tree

4 files changed

+265
-0
lines changed

4 files changed

+265
-0
lines changed

datafusion/functions-nested/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub mod map;
4747
pub mod map_extract;
4848
pub mod map_keys;
4949
pub mod map_values;
50+
pub mod max;
5051
pub mod planner;
5152
pub mod position;
5253
pub mod range;
@@ -139,6 +140,7 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
139140
length::array_length_udf(),
140141
distance::array_distance_udf(),
141142
flatten::flatten_udf(),
143+
max::array_max_udf(),
142144
sort::array_sort_udf(),
143145
repeat::array_repeat_udf(),
144146
resize::array_resize_udf(),
+173
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! [`ScalarUDFImpl`] definitions for array_max function.
19+
use crate::sort::array_sort_inner;
20+
use crate::utils::make_scalar_function;
21+
use arrow_array::{Array, ArrayRef, StringArray};
22+
use arrow_schema::DataType;
23+
use arrow_schema::DataType::{FixedSizeList, LargeList, List};
24+
use datafusion_common::cast::as_list_array;
25+
use datafusion_common::exec_err;
26+
use datafusion_doc::Documentation;
27+
use datafusion_expr::{ColumnarValue, Expr, ScalarUDFImpl, Signature, Volatility};
28+
use datafusion_macros::user_doc;
29+
use std::any::Any;
30+
use std::sync::Arc;
31+
32+
make_udf_expr_and_func!(
33+
ArrayMax,
34+
array_max,
35+
array,
36+
"returns the maximum value in the array.",
37+
array_max_udf
38+
);
39+
40+
#[user_doc(
41+
doc_section(label = "Array Functions"),
42+
description = "Returns the maximum value in the array.",
43+
syntax_example = "array_max(array)",
44+
sql_example = r#"```sql
45+
> select array_max([3,1,4,2]);
46+
+-----------------------------------------+
47+
| array_max(List([3,1,4,2])) |
48+
+-----------------------------------------+
49+
| 4 |
50+
+-----------------------------------------+
51+
```"#,
52+
argument(
53+
name = "array",
54+
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
55+
)
56+
)]
57+
#[derive(Debug)]
58+
pub struct ArrayMax {
59+
signature: Signature,
60+
aliases: Vec<String>,
61+
}
62+
63+
impl Default for ArrayMax {
64+
fn default() -> Self {
65+
Self::new()
66+
}
67+
}
68+
69+
impl ArrayMax {
70+
pub fn new() -> Self {
71+
Self {
72+
signature: Signature::array(Volatility::Immutable),
73+
aliases: vec!["list_max".to_string()],
74+
}
75+
}
76+
}
77+
78+
impl ScalarUDFImpl for ArrayMax {
79+
fn as_any(&self) -> &dyn Any {
80+
self
81+
}
82+
83+
fn name(&self) -> &str {
84+
"array_max"
85+
}
86+
87+
fn display_name(&self, args: &[Expr]) -> datafusion_common::Result<String> {
88+
let args_name = args.iter().map(ToString::to_string).collect::<Vec<_>>();
89+
if args_name.len() != 1 {
90+
return exec_err!("expects 1 arg, got {}", args_name.len());
91+
}
92+
93+
Ok(format!("{}", args_name[0]))
94+
}
95+
96+
fn schema_name(&self, args: &[Expr]) -> datafusion_common::Result<String> {
97+
let args_name = args
98+
.iter()
99+
.map(|e| e.schema_name().to_string())
100+
.collect::<Vec<_>>();
101+
if args_name.len() != 1 {
102+
return exec_err!("expects 1 arg, got {}", args_name.len());
103+
}
104+
105+
Ok(format!("{}", args_name[0]))
106+
}
107+
108+
fn signature(&self) -> &Signature {
109+
&self.signature
110+
}
111+
112+
fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
113+
match &arg_types[0] {
114+
List(field) | LargeList(field) | FixedSizeList(field, _) => {
115+
Ok(field.data_type().clone())
116+
}
117+
_ => exec_err!(
118+
"Not reachable, data_type should be List, LargeList or FixedSizeList"
119+
),
120+
}
121+
}
122+
123+
fn invoke_batch(
124+
&self,
125+
args: &[ColumnarValue],
126+
_number_rows: usize,
127+
) -> datafusion_common::Result<ColumnarValue> {
128+
make_scalar_function(array_max_inner)(args)
129+
}
130+
131+
fn aliases(&self) -> &[String] {
132+
&self.aliases
133+
}
134+
135+
fn documentation(&self) -> Option<&Documentation> {
136+
self.doc()
137+
}
138+
}
139+
140+
/// array_max SQL function
141+
///
142+
/// There is one argument for array_max as the array.
143+
/// `array_max(array)`
144+
///
145+
/// For example:
146+
/// > array_max(\[1, 3, 2]) -> 3
147+
pub fn array_max_inner(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
148+
if args.len() != 1 {
149+
return exec_err!("array_max needs one argument");
150+
}
151+
152+
match &args[0].data_type() {
153+
List(_) | LargeList(_) | FixedSizeList(_, _) => {
154+
let new_args = vec![
155+
args[0].clone(),
156+
Arc::new(StringArray::from_iter(vec![Some("DESC")])),
157+
Arc::new(StringArray::from_iter(vec![Some("NULLS LAST")])),
158+
];
159+
array_max_internal(&new_args)
160+
}
161+
_ => exec_err!("array_max does not support type: {:?}", args[0].data_type()),
162+
}
163+
}
164+
165+
fn array_max_internal(args: &[ArrayRef]) -> datafusion_common::Result<ArrayRef> {
166+
let sorted_array = array_sort_inner(args)?;
167+
let result_array = as_list_array(&sorted_array)?.value(0);
168+
if result_array.is_empty() {
169+
return exec_err!("array_max needs one argument as non-empty array");
170+
}
171+
let max_result = result_array.slice(0, 1);
172+
Ok(max_result)
173+
}

datafusion/sqllogictest/test_files/array.slt

+61
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,67 @@ NULL 23
14331433
NULL 43
14341434
5 NULL
14351435

1436+
1437+
## array_max
1438+
# array_max scalar function #1 (with positive index)
1439+
query I
1440+
select array_max(make_array(5, 3, 6, 4));
1441+
----
1442+
6
1443+
1444+
query I
1445+
select array_max(make_array(5, 3, 4, NULL, 6, NULL));
1446+
----
1447+
6
1448+
1449+
query I
1450+
select array_max(make_array(NULL, NULL));
1451+
----
1452+
NULL
1453+
1454+
query T
1455+
select array_max(make_array('h', 'e', 'o', 'l', 'l'));
1456+
----
1457+
o
1458+
1459+
query T
1460+
select array_max(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
1461+
----
1462+
o
1463+
1464+
query B
1465+
select array_max(make_array(false, true, false, true));
1466+
----
1467+
true
1468+
1469+
query B
1470+
select array_max(make_array(false, true, NULL, false, true));
1471+
----
1472+
true
1473+
1474+
query D
1475+
select array_max(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01'));
1476+
----
1477+
1999-05-01
1478+
1479+
query D
1480+
select array_max(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL));
1481+
----
1482+
1999-05-01
1483+
1484+
query P
1485+
select array_max(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01'));
1486+
----
1487+
1995-06-01T00:00:00
1488+
1489+
query P
1490+
select array_max(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01'));
1491+
----
1492+
1996-10-01T00:00:00
1493+
1494+
query error Execution error: array_max needs one argument as non-empty array
1495+
select array_max(make_array());
1496+
14361497
## array_pop_back (aliases: `list_pop_back`)
14371498

14381499
# array_pop_back scalar function with null

docs/source/user-guide/sql/scalar_functions.md

+29
Original file line numberDiff line numberDiff line change
@@ -2524,6 +2524,7 @@ _Alias of [current_date](#current_date)._
25242524
- [array_intersect](#array_intersect)
25252525
- [array_join](#array_join)
25262526
- [array_length](#array_length)
2527+
- [array_max](#array_max)
25272528
- [array_ndims](#array_ndims)
25282529
- [array_pop_back](#array_pop_back)
25292530
- [array_pop_front](#array_pop_front)
@@ -2569,6 +2570,7 @@ _Alias of [current_date](#current_date)._
25692570
- [list_intersect](#list_intersect)
25702571
- [list_join](#list_join)
25712572
- [list_length](#list_length)
2573+
- [list_max](#list_max)
25722574
- [list_ndims](#list_ndims)
25732575
- [list_pop_back](#list_pop_back)
25742576
- [list_pop_front](#list_pop_front)
@@ -3002,6 +3004,33 @@ array_length(array, dimension)
30023004

30033005
- list_length
30043006

3007+
### `array_max`
3008+
3009+
Returns the maximum value in the array.
3010+
3011+
```
3012+
array_max(array)
3013+
```
3014+
3015+
#### Arguments
3016+
3017+
- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.
3018+
3019+
#### Example
3020+
3021+
```sql
3022+
> select array_max([3,1,4,2]);
3023+
+-------------------------------------------+
3024+
| array_max(List([3,1,4,2])) |
3025+
+-------------------------------------------+
3026+
| 4 |
3027+
+-------------------------------------------+
3028+
```
3029+
3030+
#### Aliases
3031+
3032+
- list_max
3033+
30053034
### `array_ndims`
30063035

30073036
Returns the number of dimensions of the array.

0 commit comments

Comments
 (0)