Skip to content

Commit 6b70214

Browse files
authored
Remove Built-in sum and Rename to lowercase sum (#10831)
* rm sum Signed-off-by: jayzhan211 <[email protected]> * mv stub to df:expr Signed-off-by: jayzhan211 <[email protected]> * fix sql example Signed-off-by: jayzhan211 <[email protected]> * lowercase in slt Signed-off-by: jayzhan211 <[email protected]> * rename to lowercase Signed-off-by: jayzhan211 <[email protected]> * rename stl in tpch Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]>
1 parent 90f89e0 commit 6b70214

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+814
-1313
lines changed

datafusion/core/src/dataframe/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,7 +1820,7 @@ mod tests {
18201820

18211821
assert_batches_sorted_eq!(
18221822
["+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
1823-
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | SUM(aggregate_test_100.c12) | COUNT(aggregate_test_100.c12) | COUNT(DISTINCT aggregate_test_100.c12) |",
1823+
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | COUNT(aggregate_test_100.c12) | COUNT(DISTINCT aggregate_test_100.c12) |",
18241824
"+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
18251825
"| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
18261826
"| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
@@ -2395,7 +2395,7 @@ mod tests {
23952395
assert_batches_sorted_eq!(
23962396
[
23972397
"+----+-----------------------------+",
2398-
"| c1 | SUM(aggregate_test_100.c12) |",
2398+
"| c1 | sum(aggregate_test_100.c12) |",
23992399
"+----+-----------------------------+",
24002400
"| a | 10.238448667882977 |",
24012401
"| b | 7.797734760124923 |",
@@ -2411,7 +2411,7 @@ mod tests {
24112411
assert_batches_sorted_eq!(
24122412
[
24132413
"+----+---------------------+",
2414-
"| c1 | SUM(test_table.c12) |",
2414+
"| c1 | sum(test_table.c12) |",
24152415
"+----+---------------------+",
24162416
"| a | 10.238448667882977 |",
24172417
"| b | 7.797734760124923 |",

datafusion/core/src/datasource/file_format/csv.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,7 @@ mod tests {
919919

920920
#[rustfmt::skip]
921921
let expected = ["+--------------+",
922-
"| SUM(aggr.c2) |",
922+
"| sum(aggr.c2) |",
923923
"+--------------+",
924924
"| 285 |",
925925
"+--------------+"];
@@ -956,7 +956,7 @@ mod tests {
956956

957957
#[rustfmt::skip]
958958
let expected = ["+--------------+",
959-
"| SUM(aggr.c3) |",
959+
"| sum(aggr.c3) |",
960960
"+--------------+",
961961
"| 781 |",
962962
"+--------------+"];
@@ -1122,7 +1122,7 @@ mod tests {
11221122

11231123
#[rustfmt::skip]
11241124
let expected = ["+---------------------+",
1125-
"| SUM(empty.column_1) |",
1125+
"| sum(empty.column_1) |",
11261126
"+---------------------+",
11271127
"| 10 |",
11281128
"+---------------------+"];
@@ -1161,7 +1161,7 @@ mod tests {
11611161

11621162
#[rustfmt::skip]
11631163
let expected = ["+-----------------------+",
1164-
"| SUM(one_col.column_1) |",
1164+
"| sum(one_col.column_1) |",
11651165
"+-----------------------+",
11661166
"| 50 |",
11671167
"+-----------------------+"];

datafusion/core/src/datasource/file_format/json.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,15 +470,15 @@ mod tests {
470470
ctx.register_json("json_parallel", table_path, options)
471471
.await?;
472472

473-
let query = "SELECT SUM(a) FROM json_parallel;";
473+
let query = "SELECT sum(a) FROM json_parallel;";
474474

475475
let result = ctx.sql(query).await?.collect().await?;
476476
let actual_partitions = count_num_partitions(&ctx, query).await?;
477477

478478
#[rustfmt::skip]
479479
let expected = [
480480
"+----------------------+",
481-
"| SUM(json_parallel.a) |",
481+
"| sum(json_parallel.a) |",
482482
"+----------------------+",
483483
"| -7 |",
484484
"+----------------------+"

datafusion/core/src/execution/context/csv.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ mod tests {
110110
)
111111
.await?;
112112
let results =
113-
plan_and_collect(&ctx, "SELECT SUM(c1), SUM(c2), COUNT(*) FROM test").await?;
113+
plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), COUNT(*) FROM test").await?;
114114

115115
assert_eq!(results.len(), 1);
116116
let expected = [
117117
"+--------------+--------------+----------+",
118-
"| SUM(test.c1) | SUM(test.c2) | COUNT(*) |",
118+
"| sum(test.c1) | sum(test.c2) | COUNT(*) |",
119119
"+--------------+--------------+----------+",
120120
"| 10 | 110 | 20 |",
121121
"+--------------+--------------+----------+",

datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,9 @@ mod tests {
206206
use crate::physical_plan::{displayable, Partitioning};
207207

208208
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
209-
use datafusion_physical_expr::expressions::{col, Count, Sum};
209+
use datafusion_functions_aggregate::sum::sum_udaf;
210+
use datafusion_physical_expr::expressions::{col, Count};
211+
use datafusion_physical_plan::udaf::create_aggregate_expr;
210212

211213
/// Runs the CombinePartialFinalAggregate optimizer and asserts the plan against the expected
212214
macro_rules! assert_optimized {
@@ -391,12 +393,17 @@ mod tests {
391393
#[test]
392394
fn aggregations_with_group_combined() -> Result<()> {
393395
let schema = schema();
394-
let aggr_expr = vec![Arc::new(Sum::new(
395-
col("b", &schema)?,
396-
"Sum(b)".to_string(),
397-
DataType::Int64,
398-
)) as _];
399396

397+
let aggr_expr = vec![create_aggregate_expr(
398+
&sum_udaf(),
399+
&[col("b", &schema)?],
400+
&[],
401+
&[],
402+
&schema,
403+
"Sum(b)",
404+
false,
405+
false,
406+
)?];
400407
let groups: Vec<(Arc<dyn PhysicalExpr>, String)> =
401408
vec![(col("c", &schema)?, "c".to_string())];
402409

datafusion/core/src/physical_planner.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2586,7 +2586,7 @@ mod tests {
25862586
.downcast_ref::<AggregateExec>()
25872587
.expect("hash aggregate");
25882588
assert_eq!(
2589-
"SUM(aggregate_test_100.c2)",
2589+
"sum(aggregate_test_100.c2)",
25902590
final_hash_agg.schema().field(1).name()
25912591
);
25922592
// we need access to the input to the partial aggregate so that other projects can
@@ -2614,7 +2614,7 @@ mod tests {
26142614
.downcast_ref::<AggregateExec>()
26152615
.expect("hash aggregate");
26162616
assert_eq!(
2617-
"SUM(aggregate_test_100.c3)",
2617+
"sum(aggregate_test_100.c3)",
26182618
final_hash_agg.schema().field(2).name()
26192619
);
26202620
// we need access to the input to the partial aggregate so that other projects can

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ use datafusion::physical_plan::memory::MemoryExec;
3232
use datafusion::physical_plan::{collect, displayable, ExecutionPlan};
3333
use datafusion::prelude::{DataFrame, SessionConfig, SessionContext};
3434
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
35-
use datafusion_physical_expr::expressions::{col, Sum};
36-
use datafusion_physical_expr::{AggregateExpr, PhysicalSortExpr};
35+
use datafusion_functions_aggregate::sum::sum_udaf;
36+
use datafusion_physical_expr::expressions::col;
37+
use datafusion_physical_expr::PhysicalSortExpr;
38+
use datafusion_physical_plan::udaf::create_aggregate_expr;
3739
use datafusion_physical_plan::InputOrderMode;
3840
use test_utils::{add_empty_batches, StringBatchGenerator};
3941

@@ -101,11 +103,17 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
101103
.with_sort_information(vec![sort_keys]),
102104
);
103105

104-
let aggregate_expr = vec![Arc::new(Sum::new(
105-
col("d", &schema).unwrap(),
106+
let aggregate_expr = vec![create_aggregate_expr(
107+
&sum_udaf(),
108+
&[col("d", &schema).unwrap()],
109+
&[],
110+
&[],
111+
&schema,
106112
"sum1",
107-
DataType::Int64,
108-
)) as Arc<dyn AggregateExpr>];
113+
false,
114+
false,
115+
)
116+
.unwrap()];
109117
let expr = group_by_columns
110118
.iter()
111119
.map(|elem| (col(elem, &schema).unwrap(), elem.to_string()))

datafusion/core/tests/user_defined/user_defined_aggregates.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ async fn test_udaf_shadows_builtin_fn() {
186186
// compute with builtin `sum` aggregator
187187
let expected = [
188188
"+---------------------------------------+",
189-
"| SUM(arrow_cast(t.time,Utf8(\"Int64\"))) |",
189+
"| sum(arrow_cast(t.time,Utf8(\"Int64\"))) |",
190190
"+---------------------------------------+",
191191
"| 19000 |",
192192
"+---------------------------------------+",

datafusion/expr/src/aggregate_function.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ use strum_macros::EnumIter;
3535
pub enum AggregateFunction {
3636
/// Count
3737
Count,
38-
/// Sum
39-
Sum,
4038
/// Minimum
4139
Min,
4240
/// Maximum
@@ -102,7 +100,6 @@ impl AggregateFunction {
102100
use AggregateFunction::*;
103101
match self {
104102
Count => "COUNT",
105-
Sum => "SUM",
106103
Min => "MIN",
107104
Max => "MAX",
108105
Avg => "AVG",
@@ -157,7 +154,6 @@ impl FromStr for AggregateFunction {
157154
"max" => AggregateFunction::Max,
158155
"mean" => AggregateFunction::Avg,
159156
"min" => AggregateFunction::Min,
160-
"sum" => AggregateFunction::Sum,
161157
"array_agg" => AggregateFunction::ArrayAgg,
162158
"nth_value" => AggregateFunction::NthValue,
163159
"string_agg" => AggregateFunction::StringAgg,
@@ -223,7 +219,6 @@ impl AggregateFunction {
223219
// The coerced_data_types is same with input_types.
224220
Ok(coerced_data_types[0].clone())
225221
}
226-
AggregateFunction::Sum => sum_return_type(&coerced_data_types[0]),
227222
AggregateFunction::BitAnd
228223
| AggregateFunction::BitOr
229224
| AggregateFunction::BitXor => Ok(coerced_data_types[0].clone()),
@@ -308,7 +303,6 @@ impl AggregateFunction {
308303
Signature::uniform(1, vec![DataType::Boolean], Volatility::Immutable)
309304
}
310305
AggregateFunction::Avg
311-
| AggregateFunction::Sum
312306
| AggregateFunction::VariancePop
313307
| AggregateFunction::Stddev
314308
| AggregateFunction::StddevPop

datafusion/expr/src/expr.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2239,7 +2239,6 @@ mod test {
22392239
"max",
22402240
"count",
22412241
"avg",
2242-
"sum",
22432242
];
22442243
for name in names {
22452244
let fun = find_df_window_func(name).unwrap();

datafusion/expr/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ pub mod logical_plan;
5151
pub mod registry;
5252
pub mod simplify;
5353
pub mod sort_properties;
54+
pub mod test;
5455
pub mod tree_node;
5556
pub mod type_coercion;
5657
pub mod utils;

datafusion/optimizer/src/test/function_stub.rs renamed to datafusion/expr/src/test/function_stub.rs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,40 +15,40 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Aggregate function stubs to test SQL optimizers.
18+
//! Aggregate function stubs for test in expr / optimizer.
1919
//!
2020
//! These are used to avoid a dependence on `datafusion-functions-aggregate` which live in a different crate
2121
2222
use std::any::Any;
2323

24-
use arrow::datatypes::{
25-
DataType, Field, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
26-
};
27-
use datafusion_common::{exec_err, Result};
28-
use datafusion_expr::{
24+
use crate::{
2925
expr::AggregateFunction,
3026
function::{AccumulatorArgs, StateFieldsArgs},
3127
utils::AggregateOrderSensitivity,
3228
Accumulator, AggregateUDFImpl, Expr, GroupsAccumulator, ReversedUDAF, Signature,
3329
Volatility,
3430
};
31+
use arrow::datatypes::{
32+
DataType, Field, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
33+
};
34+
use datafusion_common::{exec_err, Result};
3535

3636
macro_rules! create_func {
3737
($UDAF:ty, $AGGREGATE_UDF_FN:ident) => {
3838
paste::paste! {
3939
/// Singleton instance of [$UDAF], ensures the UDAF is only created once
4040
/// named STATIC_$(UDAF). For example `STATIC_FirstValue`
4141
#[allow(non_upper_case_globals)]
42-
static [< STATIC_ $UDAF >]: std::sync::OnceLock<std::sync::Arc<datafusion_expr::AggregateUDF>> =
42+
static [< STATIC_ $UDAF >]: std::sync::OnceLock<std::sync::Arc<crate::AggregateUDF>> =
4343
std::sync::OnceLock::new();
4444

4545
/// AggregateFunction that returns a [AggregateUDF] for [$UDAF]
4646
///
47-
/// [AggregateUDF]: datafusion_expr::AggregateUDF
48-
pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<datafusion_expr::AggregateUDF> {
47+
/// [AggregateUDF]: crate::AggregateUDF
48+
pub fn $AGGREGATE_UDF_FN() -> std::sync::Arc<crate::AggregateUDF> {
4949
[< STATIC_ $UDAF >]
5050
.get_or_init(|| {
51-
std::sync::Arc::new(datafusion_expr::AggregateUDF::from(<$UDAF>::default()))
51+
std::sync::Arc::new(crate::AggregateUDF::from(<$UDAF>::default()))
5252
})
5353
.clone()
5454
}
@@ -58,7 +58,7 @@ macro_rules! create_func {
5858

5959
create_func!(Sum, sum_udaf);
6060

61-
pub(crate) fn sum(expr: Expr) -> Expr {
61+
pub fn sum(expr: Expr) -> Expr {
6262
Expr::AggregateFunction(AggregateFunction::new_udf(
6363
sum_udaf(),
6464
vec![expr],
@@ -73,14 +73,12 @@ pub(crate) fn sum(expr: Expr) -> Expr {
7373
#[derive(Debug)]
7474
pub struct Sum {
7575
signature: Signature,
76-
aliases: Vec<String>,
7776
}
7877

7978
impl Sum {
8079
pub fn new() -> Self {
8180
Self {
8281
signature: Signature::user_defined(Volatility::Immutable),
83-
aliases: vec!["sum".to_string()],
8482
}
8583
}
8684
}
@@ -97,7 +95,7 @@ impl AggregateUDFImpl for Sum {
9795
}
9896

9997
fn name(&self) -> &str {
100-
"SUM"
98+
"sum"
10199
}
102100

103101
fn signature(&self) -> &Signature {
@@ -162,7 +160,7 @@ impl AggregateUDFImpl for Sum {
162160
}
163161

164162
fn aliases(&self) -> &[String] {
165-
&self.aliases
163+
&[]
166164
}
167165

168166
fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {

datafusion/expr/src/test/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
pub mod function_stub;

0 commit comments

Comments
 (0)