Skip to content

Commit 11e143c

Browse files
authored
Convert approx_distinct to UDAF (#10851)
* Convert approx_distinct to UDAF proto style fix prost fix clippy fix doc fix clippy * refactor code
1 parent f37f1a5 commit 11e143c

File tree

18 files changed

+169
-247
lines changed

18 files changed

+169
-247
lines changed

datafusion/core/tests/user_defined/user_defined_aggregates.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ use datafusion_expr::{
4949
SimpleAggregateUDF,
5050
};
5151
use datafusion_physical_expr::expressions::AvgAccumulator;
52-
5352
/// Test to show the contents of the setup
5453
#[tokio::test]
5554
async fn test_setup() {

datafusion/expr/src/aggregate_function.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ pub enum AggregateFunction {
4141
Max,
4242
/// Average
4343
Avg,
44-
/// Approximate distinct function
45-
ApproxDistinct,
4644
/// Aggregation into an array
4745
ArrayAgg,
4846
/// N'th value in a group according to some ordering
@@ -95,7 +93,6 @@ impl AggregateFunction {
9593
Min => "MIN",
9694
Max => "MAX",
9795
Avg => "AVG",
98-
ApproxDistinct => "APPROX_DISTINCT",
9996
ArrayAgg => "ARRAY_AGG",
10097
NthValue => "NTH_VALUE",
10198
Correlation => "CORR",
@@ -157,7 +154,6 @@ impl FromStr for AggregateFunction {
157154
"regr_syy" => AggregateFunction::RegrSYY,
158155
"regr_sxy" => AggregateFunction::RegrSXY,
159156
// approximate
160-
"approx_distinct" => AggregateFunction::ApproxDistinct,
161157
"approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
162158
"approx_percentile_cont_with_weight" => {
163159
AggregateFunction::ApproxPercentileContWithWeight
@@ -194,9 +190,7 @@ impl AggregateFunction {
194190
})?;
195191

196192
match self {
197-
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
198-
Ok(DataType::Int64)
199-
}
193+
AggregateFunction::Count => Ok(DataType::Int64),
200194
AggregateFunction::Max | AggregateFunction::Min => {
201195
// For min and max agg function, the returned type is same as input type.
202196
// The coerced_data_types is same with input_types.
@@ -256,9 +250,9 @@ impl AggregateFunction {
256250
// note: the physical expression must accept the type returned by this function or the execution panics.
257251
match self {
258252
AggregateFunction::Count => Signature::variadic_any(Volatility::Immutable),
259-
AggregateFunction::ApproxDistinct
260-
| AggregateFunction::Grouping
261-
| AggregateFunction::ArrayAgg => Signature::any(1, Volatility::Immutable),
253+
AggregateFunction::Grouping | AggregateFunction::ArrayAgg => {
254+
Signature::any(1, Volatility::Immutable)
255+
}
262256
AggregateFunction::Min | AggregateFunction::Max => {
263257
let valid = STRINGS
264258
.iter()

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -266,24 +266,6 @@ pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
266266
Expr::InList(InList::new(Box::new(expr), list, negated))
267267
}
268268

269-
/// Returns the approximate number of distinct input values.
270-
/// This function provides an approximation of count(DISTINCT x).
271-
/// Zero is returned if all input values are null.
272-
/// This function should produce a standard error of 0.81%,
273-
/// which is the standard deviation of the (approximately normal)
274-
/// error distribution over all possible sets.
275-
/// It does not guarantee an upper bound on the error for any specific input set.
276-
pub fn approx_distinct(expr: Expr) -> Expr {
277-
Expr::AggregateFunction(AggregateFunction::new(
278-
aggregate_function::AggregateFunction::ApproxDistinct,
279-
vec![expr],
280-
false,
281-
None,
282-
None,
283-
None,
284-
))
285-
}
286-
287269
/// Calculate an approximation of the specified `percentile` for `expr`.
288270
pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
289271
Expr::AggregateFunction(AggregateFunction::new(

datafusion/expr/src/type_coercion/aggregates.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ pub fn coerce_types(
9696
check_arg_count(agg_fun.name(), input_types, &signature.type_signature)?;
9797

9898
match agg_fun {
99-
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
100-
Ok(input_types.to_vec())
101-
}
99+
AggregateFunction::Count => Ok(input_types.to_vec()),
102100
AggregateFunction::ArrayAgg => Ok(input_types.to_vec()),
103101
AggregateFunction::Min | AggregateFunction::Max => {
104102
// min and max support the dictionary data type
@@ -529,7 +527,6 @@ mod tests {
529527
let funs = vec![
530528
AggregateFunction::Count,
531529
AggregateFunction::ArrayAgg,
532-
AggregateFunction::ApproxDistinct,
533530
AggregateFunction::Min,
534531
AggregateFunction::Max,
535532
];

0 commit comments

Comments
 (0)