Skip to content

Commit 96b88e0

Browse files
jayzhan211wiedld
authored andcommitted
Remove element's nullability of array_agg function (apache#11447)
* rm null Signed-off-by: jayzhan211 <[email protected]> * fmt Signed-off-by: jayzhan211 <[email protected]> * fix test Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]>
1 parent 530bb0b commit 96b88e0

File tree

6 files changed

+23
-75
lines changed

6 files changed

+23
-75
lines changed

datafusion/core/tests/sql/aggregates.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
3636
*actual[0].schema(),
3737
Schema::new(vec![Field::new_list(
3838
"ARRAY_AGG(DISTINCT aggregate_test_100.c2)",
39-
Field::new("item", DataType::UInt32, false),
39+
Field::new("item", DataType::UInt32, true),
4040
true
4141
),])
4242
);

datafusion/physical-expr/src/aggregate/array_agg.rs

+6-17
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use arrow::array::ArrayRef;
2424
use arrow::datatypes::{DataType, Field};
2525
use arrow_array::Array;
2626
use datafusion_common::cast::as_list_array;
27-
use datafusion_common::utils::array_into_list_array;
27+
use datafusion_common::utils::array_into_list_array_nullable;
2828
use datafusion_common::Result;
2929
use datafusion_common::ScalarValue;
3030
use datafusion_expr::Accumulator;
@@ -40,8 +40,6 @@ pub struct ArrayAgg {
4040
input_data_type: DataType,
4141
/// The input expression
4242
expr: Arc<dyn PhysicalExpr>,
43-
/// If the input expression can have NULLs
44-
nullable: bool,
4543
}
4644

4745
impl ArrayAgg {
@@ -50,13 +48,11 @@ impl ArrayAgg {
5048
expr: Arc<dyn PhysicalExpr>,
5149
name: impl Into<String>,
5250
data_type: DataType,
53-
nullable: bool,
5451
) -> Self {
5552
Self {
5653
name: name.into(),
5754
input_data_type: data_type,
5855
expr,
59-
nullable,
6056
}
6157
}
6258
}
@@ -70,22 +66,21 @@ impl AggregateExpr for ArrayAgg {
7066
Ok(Field::new_list(
7167
&self.name,
7268
// This should be the same as return type of AggregateFunction::ArrayAgg
73-
Field::new("item", self.input_data_type.clone(), self.nullable),
69+
Field::new("item", self.input_data_type.clone(), true),
7470
true,
7571
))
7672
}
7773

7874
fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
7975
Ok(Box::new(ArrayAggAccumulator::try_new(
8076
&self.input_data_type,
81-
self.nullable,
8277
)?))
8378
}
8479

8580
fn state_fields(&self) -> Result<Vec<Field>> {
8681
Ok(vec![Field::new_list(
8782
format_state_name(&self.name, "array_agg"),
88-
Field::new("item", self.input_data_type.clone(), self.nullable),
83+
Field::new("item", self.input_data_type.clone(), true),
8984
true,
9085
)])
9186
}
@@ -116,16 +111,14 @@ impl PartialEq<dyn Any> for ArrayAgg {
116111
pub(crate) struct ArrayAggAccumulator {
117112
values: Vec<ArrayRef>,
118113
datatype: DataType,
119-
nullable: bool,
120114
}
121115

122116
impl ArrayAggAccumulator {
123117
/// new array_agg accumulator based on given item data type
124-
pub fn try_new(datatype: &DataType, nullable: bool) -> Result<Self> {
118+
pub fn try_new(datatype: &DataType) -> Result<Self> {
125119
Ok(Self {
126120
values: vec![],
127121
datatype: datatype.clone(),
128-
nullable,
129122
})
130123
}
131124
}
@@ -169,15 +162,11 @@ impl Accumulator for ArrayAggAccumulator {
169162
self.values.iter().map(|a| a.as_ref()).collect();
170163

171164
if element_arrays.is_empty() {
172-
return Ok(ScalarValue::new_null_list(
173-
self.datatype.clone(),
174-
self.nullable,
175-
1,
176-
));
165+
return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
177166
}
178167

179168
let concated_array = arrow::compute::concat(&element_arrays)?;
180-
let list_array = array_into_list_array(concated_array, self.nullable);
169+
let list_array = array_into_list_array_nullable(concated_array);
181170

182171
Ok(ScalarValue::List(Arc::new(list_array)))
183172
}

datafusion/physical-expr/src/aggregate/array_agg_distinct.rs

+5-18
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ pub struct DistinctArrayAgg {
4242
input_data_type: DataType,
4343
/// The input expression
4444
expr: Arc<dyn PhysicalExpr>,
45-
/// If the input expression can have NULLs
46-
nullable: bool,
4745
}
4846

4947
impl DistinctArrayAgg {
@@ -52,14 +50,12 @@ impl DistinctArrayAgg {
5250
expr: Arc<dyn PhysicalExpr>,
5351
name: impl Into<String>,
5452
input_data_type: DataType,
55-
nullable: bool,
5653
) -> Self {
5754
let name = name.into();
5855
Self {
5956
name,
6057
input_data_type,
6158
expr,
62-
nullable,
6359
}
6460
}
6561
}
@@ -74,22 +70,21 @@ impl AggregateExpr for DistinctArrayAgg {
7470
Ok(Field::new_list(
7571
&self.name,
7672
// This should be the same as return type of AggregateFunction::ArrayAgg
77-
Field::new("item", self.input_data_type.clone(), self.nullable),
73+
Field::new("item", self.input_data_type.clone(), true),
7874
true,
7975
))
8076
}
8177

8278
fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
8379
Ok(Box::new(DistinctArrayAggAccumulator::try_new(
8480
&self.input_data_type,
85-
self.nullable,
8681
)?))
8782
}
8883

8984
fn state_fields(&self) -> Result<Vec<Field>> {
9085
Ok(vec![Field::new_list(
9186
format_state_name(&self.name, "distinct_array_agg"),
92-
Field::new("item", self.input_data_type.clone(), self.nullable),
87+
Field::new("item", self.input_data_type.clone(), true),
9388
true,
9489
)])
9590
}
@@ -120,15 +115,13 @@ impl PartialEq<dyn Any> for DistinctArrayAgg {
120115
struct DistinctArrayAggAccumulator {
121116
values: HashSet<ScalarValue>,
122117
datatype: DataType,
123-
nullable: bool,
124118
}
125119

126120
impl DistinctArrayAggAccumulator {
127-
pub fn try_new(datatype: &DataType, nullable: bool) -> Result<Self> {
121+
pub fn try_new(datatype: &DataType) -> Result<Self> {
128122
Ok(Self {
129123
values: HashSet::new(),
130124
datatype: datatype.clone(),
131-
nullable,
132125
})
133126
}
134127
}
@@ -166,13 +159,9 @@ impl Accumulator for DistinctArrayAggAccumulator {
166159
fn evaluate(&mut self) -> Result<ScalarValue> {
167160
let values: Vec<ScalarValue> = self.values.iter().cloned().collect();
168161
if values.is_empty() {
169-
return Ok(ScalarValue::new_null_list(
170-
self.datatype.clone(),
171-
self.nullable,
172-
1,
173-
));
162+
return Ok(ScalarValue::new_null_list(self.datatype.clone(), true, 1));
174163
}
175-
let arr = ScalarValue::new_list(&values, &self.datatype, self.nullable);
164+
let arr = ScalarValue::new_list(&values, &self.datatype, true);
176165
Ok(ScalarValue::List(arr))
177166
}
178167

@@ -255,7 +244,6 @@ mod tests {
255244
col("a", &schema)?,
256245
"bla".to_string(),
257246
datatype,
258-
true,
259247
));
260248
let actual = aggregate(&batch, agg)?;
261249
compare_list_contents(expected, actual)
@@ -272,7 +260,6 @@ mod tests {
272260
col("a", &schema)?,
273261
"bla".to_string(),
274262
datatype,
275-
true,
276263
));
277264

278265
let mut accum1 = agg.create_accumulator()?;

datafusion/physical-expr/src/aggregate/array_agg_ordered.rs

+9-28
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use arrow::datatypes::{DataType, Field};
3333
use arrow_array::cast::AsArray;
3434
use arrow_array::{new_empty_array, Array, ArrayRef, StructArray};
3535
use arrow_schema::Fields;
36-
use datafusion_common::utils::{array_into_list_array, get_row_at_idx};
36+
use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
3737
use datafusion_common::{exec_err, Result, ScalarValue};
3838
use datafusion_expr::utils::AggregateOrderSensitivity;
3939
use datafusion_expr::Accumulator;
@@ -50,8 +50,6 @@ pub struct OrderSensitiveArrayAgg {
5050
input_data_type: DataType,
5151
/// The input expression
5252
expr: Arc<dyn PhysicalExpr>,
53-
/// If the input expression can have `NULL`s
54-
nullable: bool,
5553
/// Ordering data types
5654
order_by_data_types: Vec<DataType>,
5755
/// Ordering requirement
@@ -66,15 +64,13 @@ impl OrderSensitiveArrayAgg {
6664
expr: Arc<dyn PhysicalExpr>,
6765
name: impl Into<String>,
6866
input_data_type: DataType,
69-
nullable: bool,
7067
order_by_data_types: Vec<DataType>,
7168
ordering_req: LexOrdering,
7269
) -> Self {
7370
Self {
7471
name: name.into(),
7572
input_data_type,
7673
expr,
77-
nullable,
7874
order_by_data_types,
7975
ordering_req,
8076
reverse: false,
@@ -90,8 +86,8 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
9086
fn field(&self) -> Result<Field> {
9187
Ok(Field::new_list(
9288
&self.name,
93-
// This should be the same as return type of AggregateFunction::ArrayAgg
94-
Field::new("item", self.input_data_type.clone(), self.nullable),
89+
// This should be the same as return type of AggregateFunction::OrderSensitiveArrayAgg
90+
Field::new("item", self.input_data_type.clone(), true),
9591
true,
9692
))
9793
}
@@ -102,25 +98,20 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
10298
&self.order_by_data_types,
10399
self.ordering_req.clone(),
104100
self.reverse,
105-
self.nullable,
106101
)
107102
.map(|acc| Box::new(acc) as _)
108103
}
109104

110105
fn state_fields(&self) -> Result<Vec<Field>> {
111106
let mut fields = vec![Field::new_list(
112107
format_state_name(&self.name, "array_agg"),
113-
Field::new("item", self.input_data_type.clone(), self.nullable),
108+
Field::new("item", self.input_data_type.clone(), true),
114109
true, // This should be the same as field()
115110
)];
116111
let orderings = ordering_fields(&self.ordering_req, &self.order_by_data_types);
117112
fields.push(Field::new_list(
118113
format_state_name(&self.name, "array_agg_orderings"),
119-
Field::new(
120-
"item",
121-
DataType::Struct(Fields::from(orderings)),
122-
self.nullable,
123-
),
114+
Field::new("item", DataType::Struct(Fields::from(orderings)), true),
124115
false,
125116
));
126117
Ok(fields)
@@ -147,7 +138,6 @@ impl AggregateExpr for OrderSensitiveArrayAgg {
147138
name: self.name.to_string(),
148139
input_data_type: self.input_data_type.clone(),
149140
expr: Arc::clone(&self.expr),
150-
nullable: self.nullable,
151141
order_by_data_types: self.order_by_data_types.clone(),
152142
// Reverse requirement:
153143
ordering_req: reverse_order_bys(&self.ordering_req),
@@ -186,8 +176,6 @@ pub(crate) struct OrderSensitiveArrayAggAccumulator {
186176
ordering_req: LexOrdering,
187177
/// Whether the aggregation is running in reverse.
188178
reverse: bool,
189-
/// Whether the input expr is nullable
190-
nullable: bool,
191179
}
192180

193181
impl OrderSensitiveArrayAggAccumulator {
@@ -198,7 +186,6 @@ impl OrderSensitiveArrayAggAccumulator {
198186
ordering_dtypes: &[DataType],
199187
ordering_req: LexOrdering,
200188
reverse: bool,
201-
nullable: bool,
202189
) -> Result<Self> {
203190
let mut datatypes = vec![datatype.clone()];
204191
datatypes.extend(ordering_dtypes.iter().cloned());
@@ -208,7 +195,6 @@ impl OrderSensitiveArrayAggAccumulator {
208195
datatypes,
209196
ordering_req,
210197
reverse,
211-
nullable,
212198
})
213199
}
214200
}
@@ -312,7 +298,7 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
312298
if self.values.is_empty() {
313299
return Ok(ScalarValue::new_null_list(
314300
self.datatypes[0].clone(),
315-
self.nullable,
301+
true,
316302
1,
317303
));
318304
}
@@ -322,14 +308,10 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
322308
ScalarValue::new_list_from_iter(
323309
values.into_iter().rev(),
324310
&self.datatypes[0],
325-
self.nullable,
311+
true,
326312
)
327313
} else {
328-
ScalarValue::new_list_from_iter(
329-
values.into_iter(),
330-
&self.datatypes[0],
331-
self.nullable,
332-
)
314+
ScalarValue::new_list_from_iter(values.into_iter(), &self.datatypes[0], true)
333315
};
334316
Ok(ScalarValue::List(array))
335317
}
@@ -385,9 +367,8 @@ impl OrderSensitiveArrayAggAccumulator {
385367
column_wise_ordering_values,
386368
None,
387369
)?;
388-
Ok(ScalarValue::List(Arc::new(array_into_list_array(
370+
Ok(ScalarValue::List(Arc::new(array_into_list_array_nullable(
389371
Arc::new(ordering_array),
390-
self.nullable,
391372
))))
392373
}
393374
}

datafusion/physical-expr/src/aggregate/build_in.rs

+2-10
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,14 @@ pub fn create_aggregate_expr(
6262
Ok(match (fun, distinct) {
6363
(AggregateFunction::ArrayAgg, false) => {
6464
let expr = Arc::clone(&input_phy_exprs[0]);
65-
let nullable = expr.nullable(input_schema)?;
6665

6766
if ordering_req.is_empty() {
68-
Arc::new(expressions::ArrayAgg::new(expr, name, data_type, nullable))
67+
Arc::new(expressions::ArrayAgg::new(expr, name, data_type))
6968
} else {
7069
Arc::new(expressions::OrderSensitiveArrayAgg::new(
7170
expr,
7271
name,
7372
data_type,
74-
nullable,
7573
ordering_types,
7674
ordering_req.to_vec(),
7775
))
@@ -84,13 +82,7 @@ pub fn create_aggregate_expr(
8482
);
8583
}
8684
let expr = Arc::clone(&input_phy_exprs[0]);
87-
let is_expr_nullable = expr.nullable(input_schema)?;
88-
Arc::new(expressions::DistinctArrayAgg::new(
89-
expr,
90-
name,
91-
data_type,
92-
is_expr_nullable,
93-
))
85+
Arc::new(expressions::DistinctArrayAgg::new(expr, name, data_type))
9486
}
9587
(AggregateFunction::Min, _) => Arc::new(expressions::Min::new(
9688
Arc::clone(&input_phy_exprs[0]),

datafusion/physical-plan/src/aggregates/mod.rs

-1
Original file line numberDiff line numberDiff line change
@@ -2231,7 +2231,6 @@ mod tests {
22312231
Arc::clone(col_a),
22322232
"array_agg",
22332233
DataType::Int32,
2234-
false,
22352234
vec![],
22362235
order_by_expr.unwrap_or_default(),
22372236
)) as _

0 commit comments

Comments
 (0)