Skip to content

Commit c7afa7c

Browse files
committed
Convert approx_distinct to UDAF
1 parent 9503456 commit c7afa7c

File tree

14 files changed

+577
-262
lines changed

14 files changed

+577
-262
lines changed

datafusion/expr/src/aggregate_function.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ pub enum AggregateFunction {
4141
Max,
4242
/// Average
4343
Avg,
44-
/// Approximate distinct function
45-
ApproxDistinct,
4644
/// Aggregation into an array
4745
ArrayAgg,
4846
/// N'th value in a group according to some ordering
@@ -99,7 +97,6 @@ impl AggregateFunction {
9997
Min => "MIN",
10098
Max => "MAX",
10199
Avg => "AVG",
102-
ApproxDistinct => "APPROX_DISTINCT",
103100
ArrayAgg => "ARRAY_AGG",
104101
NthValue => "NTH_VALUE",
105102
VariancePop => "VAR_POP",
@@ -164,7 +161,6 @@ impl FromStr for AggregateFunction {
164161
"regr_syy" => AggregateFunction::RegrSYY,
165162
"regr_sxy" => AggregateFunction::RegrSXY,
166163
// approximate
167-
"approx_distinct" => AggregateFunction::ApproxDistinct,
168164
"approx_median" => AggregateFunction::ApproxMedian,
169165
"approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
170166
"approx_percentile_cont_with_weight" => {
@@ -202,9 +198,7 @@ impl AggregateFunction {
202198
})?;
203199

204200
match self {
205-
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
206-
Ok(DataType::Int64)
207-
}
201+
AggregateFunction::Count => Ok(DataType::Int64),
208202
AggregateFunction::Max | AggregateFunction::Min => {
209203
// For min and max agg function, the returned type is same as input type.
210204
// The coerced_data_types is same with input_types.
@@ -268,9 +262,9 @@ impl AggregateFunction {
268262
// note: the physical expression must accept the type returned by this function or the execution panics.
269263
match self {
270264
AggregateFunction::Count => Signature::variadic_any(Volatility::Immutable),
271-
AggregateFunction::ApproxDistinct
272-
| AggregateFunction::Grouping
273-
| AggregateFunction::ArrayAgg => Signature::any(1, Volatility::Immutable),
265+
AggregateFunction::Grouping | AggregateFunction::ArrayAgg => {
266+
Signature::any(1, Volatility::Immutable)
267+
}
274268
AggregateFunction::Min | AggregateFunction::Max => {
275269
let valid = STRINGS
276270
.iter()

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -266,24 +266,6 @@ pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
266266
Expr::InList(InList::new(Box::new(expr), list, negated))
267267
}
268268

269-
/// Returns the approximate number of distinct input values.
270-
/// This function provides an approximation of count(DISTINCT x).
271-
/// Zero is returned if all input values are null.
272-
/// This function should produce a standard error of 0.81%,
273-
/// which is the standard deviation of the (approximately normal)
274-
/// error distribution over all possible sets.
275-
/// It does not guarantee an upper bound on the error for any specific input set.
276-
pub fn approx_distinct(expr: Expr) -> Expr {
277-
Expr::AggregateFunction(AggregateFunction::new(
278-
aggregate_function::AggregateFunction::ApproxDistinct,
279-
vec![expr],
280-
false,
281-
None,
282-
None,
283-
None,
284-
))
285-
}
286-
287269
/// Calculate an approximation of the median for `expr`.
288270
pub fn approx_median(expr: Expr) -> Expr {
289271
Expr::AggregateFunction(AggregateFunction::new(

datafusion/expr/src/type_coercion/aggregates.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ pub fn coerce_types(
9696
check_arg_count(agg_fun.name(), input_types, &signature.type_signature)?;
9797

9898
match agg_fun {
99-
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
100-
Ok(input_types.to_vec())
101-
}
99+
AggregateFunction::Count => Ok(input_types.to_vec()),
102100
AggregateFunction::ArrayAgg => Ok(input_types.to_vec()),
103101
AggregateFunction::Min | AggregateFunction::Max => {
104102
// min and max support the dictionary data type
@@ -549,7 +547,6 @@ mod tests {
549547
let funs = vec![
550548
AggregateFunction::Count,
551549
AggregateFunction::ArrayAgg,
552-
AggregateFunction::ApproxDistinct,
553550
AggregateFunction::Min,
554551
AggregateFunction::Max,
555552
];

0 commit comments

Comments
 (0)