17
17
//! [`Max`] and [`MaxAccumulator`] accumulator for the `max` function
18
18
//! [`Min`] and [`MinAccumulator`] accumulator for the `min` function
19
19
20
+ mod min_max_bytes;
21
+
20
22
use arrow:: array:: {
21
23
ArrayRef , BinaryArray , BinaryViewArray , BooleanArray , Date32Array , Date64Array ,
22
24
Decimal128Array , Decimal256Array , Float16Array , Float32Array , Float64Array ,
@@ -50,6 +52,7 @@ use arrow::datatypes::{
50
52
TimestampMillisecondType , TimestampNanosecondType , TimestampSecondType ,
51
53
} ;
52
54
55
+ use crate :: min_max:: min_max_bytes:: MinMaxBytesAccumulator ;
53
56
use datafusion_common:: ScalarValue ;
54
57
use datafusion_expr:: {
55
58
function:: AccumulatorArgs , Accumulator , AggregateUDFImpl , Documentation , Signature ,
@@ -104,7 +107,7 @@ impl Default for Max {
104
107
/// the specified [`ArrowPrimitiveType`].
105
108
///
106
109
/// [`ArrowPrimitiveType`]: arrow::datatypes::ArrowPrimitiveType
107
- macro_rules! instantiate_max_accumulator {
110
+ macro_rules! primitive_max_accumulator {
108
111
( $DATA_TYPE: ident, $NATIVE: ident, $PRIMTYPE: ident) => { {
109
112
Ok ( Box :: new(
110
113
PrimitiveGroupsAccumulator :: <$PRIMTYPE, _>:: new( $DATA_TYPE, |cur, new| {
@@ -123,7 +126,7 @@ macro_rules! instantiate_max_accumulator {
123
126
///
124
127
///
125
128
/// [`ArrowPrimitiveType`]: arrow::datatypes::ArrowPrimitiveType
126
- macro_rules! instantiate_min_accumulator {
129
+ macro_rules! primitive_min_accumulator {
127
130
( $DATA_TYPE: ident, $NATIVE: ident, $PRIMTYPE: ident) => { {
128
131
Ok ( Box :: new(
129
132
PrimitiveGroupsAccumulator :: <$PRIMTYPE, _>:: new( & $DATA_TYPE, |cur, new| {
@@ -231,6 +234,12 @@ impl AggregateUDFImpl for Max {
231
234
| Time32 ( _)
232
235
| Time64 ( _)
233
236
| Timestamp ( _, _)
237
+ | Utf8
238
+ | LargeUtf8
239
+ | Utf8View
240
+ | Binary
241
+ | LargeBinary
242
+ | BinaryView
234
243
)
235
244
}
236
245
@@ -242,58 +251,58 @@ impl AggregateUDFImpl for Max {
242
251
use TimeUnit :: * ;
243
252
let data_type = args. return_type ;
244
253
match data_type {
245
- Int8 => instantiate_max_accumulator ! ( data_type, i8 , Int8Type ) ,
246
- Int16 => instantiate_max_accumulator ! ( data_type, i16 , Int16Type ) ,
247
- Int32 => instantiate_max_accumulator ! ( data_type, i32 , Int32Type ) ,
248
- Int64 => instantiate_max_accumulator ! ( data_type, i64 , Int64Type ) ,
249
- UInt8 => instantiate_max_accumulator ! ( data_type, u8 , UInt8Type ) ,
250
- UInt16 => instantiate_max_accumulator ! ( data_type, u16 , UInt16Type ) ,
251
- UInt32 => instantiate_max_accumulator ! ( data_type, u32 , UInt32Type ) ,
252
- UInt64 => instantiate_max_accumulator ! ( data_type, u64 , UInt64Type ) ,
254
+ Int8 => primitive_max_accumulator ! ( data_type, i8 , Int8Type ) ,
255
+ Int16 => primitive_max_accumulator ! ( data_type, i16 , Int16Type ) ,
256
+ Int32 => primitive_max_accumulator ! ( data_type, i32 , Int32Type ) ,
257
+ Int64 => primitive_max_accumulator ! ( data_type, i64 , Int64Type ) ,
258
+ UInt8 => primitive_max_accumulator ! ( data_type, u8 , UInt8Type ) ,
259
+ UInt16 => primitive_max_accumulator ! ( data_type, u16 , UInt16Type ) ,
260
+ UInt32 => primitive_max_accumulator ! ( data_type, u32 , UInt32Type ) ,
261
+ UInt64 => primitive_max_accumulator ! ( data_type, u64 , UInt64Type ) ,
253
262
Float16 => {
254
- instantiate_max_accumulator ! ( data_type, f16, Float16Type )
263
+ primitive_max_accumulator ! ( data_type, f16, Float16Type )
255
264
}
256
265
Float32 => {
257
- instantiate_max_accumulator ! ( data_type, f32 , Float32Type )
266
+ primitive_max_accumulator ! ( data_type, f32 , Float32Type )
258
267
}
259
268
Float64 => {
260
- instantiate_max_accumulator ! ( data_type, f64 , Float64Type )
269
+ primitive_max_accumulator ! ( data_type, f64 , Float64Type )
261
270
}
262
- Date32 => instantiate_max_accumulator ! ( data_type, i32 , Date32Type ) ,
263
- Date64 => instantiate_max_accumulator ! ( data_type, i64 , Date64Type ) ,
271
+ Date32 => primitive_max_accumulator ! ( data_type, i32 , Date32Type ) ,
272
+ Date64 => primitive_max_accumulator ! ( data_type, i64 , Date64Type ) ,
264
273
Time32 ( Second ) => {
265
- instantiate_max_accumulator ! ( data_type, i32 , Time32SecondType )
274
+ primitive_max_accumulator ! ( data_type, i32 , Time32SecondType )
266
275
}
267
276
Time32 ( Millisecond ) => {
268
- instantiate_max_accumulator ! ( data_type, i32 , Time32MillisecondType )
277
+ primitive_max_accumulator ! ( data_type, i32 , Time32MillisecondType )
269
278
}
270
279
Time64 ( Microsecond ) => {
271
- instantiate_max_accumulator ! ( data_type, i64 , Time64MicrosecondType )
280
+ primitive_max_accumulator ! ( data_type, i64 , Time64MicrosecondType )
272
281
}
273
282
Time64 ( Nanosecond ) => {
274
- instantiate_max_accumulator ! ( data_type, i64 , Time64NanosecondType )
283
+ primitive_max_accumulator ! ( data_type, i64 , Time64NanosecondType )
275
284
}
276
285
Timestamp ( Second , _) => {
277
- instantiate_max_accumulator ! ( data_type, i64 , TimestampSecondType )
286
+ primitive_max_accumulator ! ( data_type, i64 , TimestampSecondType )
278
287
}
279
288
Timestamp ( Millisecond , _) => {
280
- instantiate_max_accumulator ! ( data_type, i64 , TimestampMillisecondType )
289
+ primitive_max_accumulator ! ( data_type, i64 , TimestampMillisecondType )
281
290
}
282
291
Timestamp ( Microsecond , _) => {
283
- instantiate_max_accumulator ! ( data_type, i64 , TimestampMicrosecondType )
292
+ primitive_max_accumulator ! ( data_type, i64 , TimestampMicrosecondType )
284
293
}
285
294
Timestamp ( Nanosecond , _) => {
286
- instantiate_max_accumulator ! ( data_type, i64 , TimestampNanosecondType )
295
+ primitive_max_accumulator ! ( data_type, i64 , TimestampNanosecondType )
287
296
}
288
297
Decimal128 ( _, _) => {
289
- instantiate_max_accumulator ! ( data_type, i128 , Decimal128Type )
298
+ primitive_max_accumulator ! ( data_type, i128 , Decimal128Type )
290
299
}
291
300
Decimal256 ( _, _) => {
292
- instantiate_max_accumulator ! ( data_type, i256, Decimal256Type )
301
+ primitive_max_accumulator ! ( data_type, i256, Decimal256Type )
302
+ }
303
+ Utf8 | LargeUtf8 | Utf8View | Binary | LargeBinary | BinaryView => {
304
+ Ok ( Box :: new ( MinMaxBytesAccumulator :: new_max ( data_type. clone ( ) ) ) )
293
305
}
294
-
295
- // It would be nice to have a fast implementation for Strings as well
296
- // https://github.com/apache/datafusion/issues/6906
297
306
298
307
// This is only reached if groups_accumulator_supported is out of sync
299
308
_ => internal_err ! ( "GroupsAccumulator not supported for max({})" , data_type) ,
@@ -1057,6 +1066,12 @@ impl AggregateUDFImpl for Min {
1057
1066
| Time32 ( _)
1058
1067
| Time64 ( _)
1059
1068
| Timestamp ( _, _)
1069
+ | Utf8
1070
+ | LargeUtf8
1071
+ | Utf8View
1072
+ | Binary
1073
+ | LargeBinary
1074
+ | BinaryView
1060
1075
)
1061
1076
}
1062
1077
@@ -1068,58 +1083,58 @@ impl AggregateUDFImpl for Min {
1068
1083
use TimeUnit :: * ;
1069
1084
let data_type = args. return_type ;
1070
1085
match data_type {
1071
- Int8 => instantiate_min_accumulator ! ( data_type, i8 , Int8Type ) ,
1072
- Int16 => instantiate_min_accumulator ! ( data_type, i16 , Int16Type ) ,
1073
- Int32 => instantiate_min_accumulator ! ( data_type, i32 , Int32Type ) ,
1074
- Int64 => instantiate_min_accumulator ! ( data_type, i64 , Int64Type ) ,
1075
- UInt8 => instantiate_min_accumulator ! ( data_type, u8 , UInt8Type ) ,
1076
- UInt16 => instantiate_min_accumulator ! ( data_type, u16 , UInt16Type ) ,
1077
- UInt32 => instantiate_min_accumulator ! ( data_type, u32 , UInt32Type ) ,
1078
- UInt64 => instantiate_min_accumulator ! ( data_type, u64 , UInt64Type ) ,
1086
+ Int8 => primitive_min_accumulator ! ( data_type, i8 , Int8Type ) ,
1087
+ Int16 => primitive_min_accumulator ! ( data_type, i16 , Int16Type ) ,
1088
+ Int32 => primitive_min_accumulator ! ( data_type, i32 , Int32Type ) ,
1089
+ Int64 => primitive_min_accumulator ! ( data_type, i64 , Int64Type ) ,
1090
+ UInt8 => primitive_min_accumulator ! ( data_type, u8 , UInt8Type ) ,
1091
+ UInt16 => primitive_min_accumulator ! ( data_type, u16 , UInt16Type ) ,
1092
+ UInt32 => primitive_min_accumulator ! ( data_type, u32 , UInt32Type ) ,
1093
+ UInt64 => primitive_min_accumulator ! ( data_type, u64 , UInt64Type ) ,
1079
1094
Float16 => {
1080
- instantiate_min_accumulator ! ( data_type, f16, Float16Type )
1095
+ primitive_min_accumulator ! ( data_type, f16, Float16Type )
1081
1096
}
1082
1097
Float32 => {
1083
- instantiate_min_accumulator ! ( data_type, f32 , Float32Type )
1098
+ primitive_min_accumulator ! ( data_type, f32 , Float32Type )
1084
1099
}
1085
1100
Float64 => {
1086
- instantiate_min_accumulator ! ( data_type, f64 , Float64Type )
1101
+ primitive_min_accumulator ! ( data_type, f64 , Float64Type )
1087
1102
}
1088
- Date32 => instantiate_min_accumulator ! ( data_type, i32 , Date32Type ) ,
1089
- Date64 => instantiate_min_accumulator ! ( data_type, i64 , Date64Type ) ,
1103
+ Date32 => primitive_min_accumulator ! ( data_type, i32 , Date32Type ) ,
1104
+ Date64 => primitive_min_accumulator ! ( data_type, i64 , Date64Type ) ,
1090
1105
Time32 ( Second ) => {
1091
- instantiate_min_accumulator ! ( data_type, i32 , Time32SecondType )
1106
+ primitive_min_accumulator ! ( data_type, i32 , Time32SecondType )
1092
1107
}
1093
1108
Time32 ( Millisecond ) => {
1094
- instantiate_min_accumulator ! ( data_type, i32 , Time32MillisecondType )
1109
+ primitive_min_accumulator ! ( data_type, i32 , Time32MillisecondType )
1095
1110
}
1096
1111
Time64 ( Microsecond ) => {
1097
- instantiate_min_accumulator ! ( data_type, i64 , Time64MicrosecondType )
1112
+ primitive_min_accumulator ! ( data_type, i64 , Time64MicrosecondType )
1098
1113
}
1099
1114
Time64 ( Nanosecond ) => {
1100
- instantiate_min_accumulator ! ( data_type, i64 , Time64NanosecondType )
1115
+ primitive_min_accumulator ! ( data_type, i64 , Time64NanosecondType )
1101
1116
}
1102
1117
Timestamp ( Second , _) => {
1103
- instantiate_min_accumulator ! ( data_type, i64 , TimestampSecondType )
1118
+ primitive_min_accumulator ! ( data_type, i64 , TimestampSecondType )
1104
1119
}
1105
1120
Timestamp ( Millisecond , _) => {
1106
- instantiate_min_accumulator ! ( data_type, i64 , TimestampMillisecondType )
1121
+ primitive_min_accumulator ! ( data_type, i64 , TimestampMillisecondType )
1107
1122
}
1108
1123
Timestamp ( Microsecond , _) => {
1109
- instantiate_min_accumulator ! ( data_type, i64 , TimestampMicrosecondType )
1124
+ primitive_min_accumulator ! ( data_type, i64 , TimestampMicrosecondType )
1110
1125
}
1111
1126
Timestamp ( Nanosecond , _) => {
1112
- instantiate_min_accumulator ! ( data_type, i64 , TimestampNanosecondType )
1127
+ primitive_min_accumulator ! ( data_type, i64 , TimestampNanosecondType )
1113
1128
}
1114
1129
Decimal128 ( _, _) => {
1115
- instantiate_min_accumulator ! ( data_type, i128 , Decimal128Type )
1130
+ primitive_min_accumulator ! ( data_type, i128 , Decimal128Type )
1116
1131
}
1117
1132
Decimal256 ( _, _) => {
1118
- instantiate_min_accumulator ! ( data_type, i256, Decimal256Type )
1133
+ primitive_min_accumulator ! ( data_type, i256, Decimal256Type )
1134
+ }
1135
+ Utf8 | LargeUtf8 | Utf8View | Binary | LargeBinary | BinaryView => {
1136
+ Ok ( Box :: new ( MinMaxBytesAccumulator :: new_min ( data_type. clone ( ) ) ) )
1119
1137
}
1120
-
1121
- // It would be nice to have a fast implementation for Strings as well
1122
- // https://github.com/apache/datafusion/issues/6906
1123
1138
1124
1139
// This is only reached if groups_accumulator_supported is out of sync
1125
1140
_ => internal_err ! ( "GroupsAccumulator not supported for min({})" , data_type) ,
0 commit comments