Skip to content

Commit a8d3fae

Browse files
authored
Migrate documentation for Aggregate Functions to code (#12861)
* aggregate function migration * fmt fix
1 parent eddade7 commit a8d3fae

20 files changed

+1363
-361
lines changed

datafusion/functions-aggregate/src/approx_distinct.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,17 @@ use datafusion_common::ScalarValue;
3131
use datafusion_common::{
3232
downcast_value, internal_err, not_impl_err, DataFusionError, Result,
3333
};
34+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
3435
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3536
use datafusion_expr::utils::format_state_name;
36-
use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
37+
use datafusion_expr::{
38+
Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
39+
};
3740
use std::any::Any;
3841
use std::fmt::{Debug, Formatter};
3942
use std::hash::Hash;
4043
use std::marker::PhantomData;
44+
use std::sync::OnceLock;
4145
make_udaf_expr_and_func!(
4246
ApproxDistinct,
4347
approx_distinct,
@@ -303,4 +307,33 @@ impl AggregateUDFImpl for ApproxDistinct {
303307
};
304308
Ok(accumulator)
305309
}
310+
311+
fn documentation(&self) -> Option<&Documentation> {
312+
Some(get_approx_distinct_doc())
313+
}
314+
}
315+
316+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
317+
318+
fn get_approx_distinct_doc() -> &'static Documentation {
319+
DOCUMENTATION.get_or_init(|| {
320+
Documentation::builder()
321+
.with_doc_section(DOC_SECTION_APPROXIMATE)
322+
.with_description(
323+
"Returns the approximate number of distinct input values calculated using the HyperLogLog algorithm.",
324+
)
325+
.with_syntax_example("approx_distinct(expression)")
326+
.with_sql_example(r#"```sql
327+
> SELECT approx_distinct(column_name) FROM table_name;
328+
+-----------------------------------+
329+
| approx_distinct(column_name) |
330+
+-----------------------------------+
331+
| 42 |
332+
+-----------------------------------+
333+
```"#,
334+
)
335+
.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
336+
.build()
337+
.unwrap()
338+
})
306339
}

datafusion/functions-aggregate/src/approx_median.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,19 @@
1919
2020
use std::any::Any;
2121
use std::fmt::Debug;
22+
use std::sync::OnceLock;
2223

2324
use arrow::{datatypes::DataType, datatypes::Field};
2425
use arrow_schema::DataType::{Float64, UInt64};
2526

2627
use datafusion_common::{not_impl_err, plan_err, Result};
28+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
2729
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
2830
use datafusion_expr::type_coercion::aggregates::NUMERICS;
2931
use datafusion_expr::utils::format_state_name;
30-
use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
32+
use datafusion_expr::{
33+
Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
34+
};
3135

3236
use crate::approx_percentile_cont::ApproxPercentileAccumulator;
3337

@@ -116,4 +120,33 @@ impl AggregateUDFImpl for ApproxMedian {
116120
acc_args.exprs[0].data_type(acc_args.schema)?,
117121
)))
118122
}
123+
124+
fn documentation(&self) -> Option<&Documentation> {
125+
Some(get_approx_median_doc())
126+
}
127+
}
128+
129+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
130+
131+
fn get_approx_median_doc() -> &'static Documentation {
132+
DOCUMENTATION.get_or_init(|| {
133+
Documentation::builder()
134+
.with_doc_section(DOC_SECTION_APPROXIMATE)
135+
.with_description(
136+
"Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`.",
137+
)
138+
.with_syntax_example("approx_median(expression)")
139+
.with_sql_example(r#"```sql
140+
> SELECT approx_median(column_name) FROM table_name;
141+
+-----------------------------------+
142+
| approx_median(column_name) |
143+
+-----------------------------------+
144+
| 23.5 |
145+
+-----------------------------------+
146+
```"#,
147+
)
148+
.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
149+
.build()
150+
.unwrap()
151+
})
119152
}

datafusion/functions-aggregate/src/approx_percentile_cont.rs

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::fmt::{Debug, Formatter};
20-
use std::sync::Arc;
20+
use std::sync::{Arc, OnceLock};
2121

2222
use arrow::array::{Array, RecordBatch};
2323
use arrow::compute::{filter, is_not_null};
@@ -34,12 +34,13 @@ use datafusion_common::{
3434
downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
3535
DataFusionError, Result, ScalarValue,
3636
};
37+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
3738
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3839
use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
3940
use datafusion_expr::utils::format_state_name;
4041
use datafusion_expr::{
41-
Accumulator, AggregateUDFImpl, ColumnarValue, Expr, Signature, TypeSignature,
42-
Volatility,
42+
Accumulator, AggregateUDFImpl, ColumnarValue, Documentation, Expr, Signature,
43+
TypeSignature, Volatility,
4344
};
4445
use datafusion_functions_aggregate_common::tdigest::{
4546
TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
@@ -268,6 +269,36 @@ impl AggregateUDFImpl for ApproxPercentileCont {
268269
}
269270
Ok(arg_types[0].clone())
270271
}
272+
273+
fn documentation(&self) -> Option<&Documentation> {
274+
Some(get_approx_percentile_cont_doc())
275+
}
276+
}
277+
278+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
279+
280+
fn get_approx_percentile_cont_doc() -> &'static Documentation {
281+
DOCUMENTATION.get_or_init(|| {
282+
Documentation::builder()
283+
.with_doc_section(DOC_SECTION_APPROXIMATE)
284+
.with_description(
285+
"Returns the approximate percentile of input values using the t-digest algorithm.",
286+
)
287+
.with_syntax_example("approx_percentile_cont(expression, percentile, centroids)")
288+
.with_sql_example(r#"```sql
289+
> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
290+
+-------------------------------------------------+
291+
| approx_percentile_cont(column_name, 0.75, 100) |
292+
+-------------------------------------------------+
293+
| 65.0 |
294+
+-------------------------------------------------+
295+
```"#)
296+
.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
297+
.with_argument("percentile", "Percentile to compute. Must be a float value between 0 and 1 (inclusive).")
298+
.with_argument("centroids", "Number of centroids to use in the t-digest algorithm. _Default is 100_. A higher number results in more accurate approximation but requires more memory.")
299+
.build()
300+
.unwrap()
301+
})
271302
}
272303

273304
#[derive(Debug)]

datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::fmt::{Debug, Formatter};
20-
use std::sync::Arc;
20+
use std::sync::{Arc, OnceLock};
2121

2222
use arrow::{
2323
array::ArrayRef,
@@ -26,10 +26,13 @@ use arrow::{
2626

2727
use datafusion_common::ScalarValue;
2828
use datafusion_common::{not_impl_err, plan_err, Result};
29+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
2930
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3031
use datafusion_expr::type_coercion::aggregates::NUMERICS;
3132
use datafusion_expr::Volatility::Immutable;
32-
use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, TypeSignature};
33+
use datafusion_expr::{
34+
Accumulator, AggregateUDFImpl, Documentation, Signature, TypeSignature,
35+
};
3336
use datafusion_functions_aggregate_common::tdigest::{
3437
Centroid, TDigest, DEFAULT_MAX_SIZE,
3538
};
@@ -151,6 +154,37 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
151154
fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
152155
self.approx_percentile_cont.state_fields(args)
153156
}
157+
158+
fn documentation(&self) -> Option<&Documentation> {
159+
Some(get_approx_percentile_cont_with_weight_doc())
160+
}
161+
}
162+
163+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
164+
165+
fn get_approx_percentile_cont_with_weight_doc() -> &'static Documentation {
166+
DOCUMENTATION.get_or_init(|| {
167+
Documentation::builder()
168+
.with_doc_section(DOC_SECTION_APPROXIMATE)
169+
.with_description(
170+
"Returns the weighted approximate percentile of input values using the t-digest algorithm.",
171+
)
172+
.with_syntax_example("approx_percentile_cont_with_weight(expression, weight, percentile)")
173+
.with_sql_example(r#"```sql
174+
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
175+
+----------------------------------------------------------------------+
176+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
177+
+----------------------------------------------------------------------+
178+
| 78.5 |
179+
+----------------------------------------------------------------------+
180+
```"#,
181+
)
182+
.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
183+
.with_argument("weight", "Expression to use as weight. Can be a constant, column, or function, and any combination of arithmetic operators.")
184+
.with_argument("percentile", "Percentile to compute. Must be a float value between 0 and 1 (inclusive).")
185+
.build()
186+
.unwrap()
187+
})
154188
}
155189

156190
#[derive(Debug)]

datafusion/functions-aggregate/src/array_agg.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ use datafusion_common::cast::as_list_array;
2525
use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
2626
use datafusion_common::{exec_err, ScalarValue};
2727
use datafusion_common::{internal_err, Result};
28+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
2829
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
2930
use datafusion_expr::utils::format_state_name;
30-
use datafusion_expr::AggregateUDFImpl;
3131
use datafusion_expr::{Accumulator, Signature, Volatility};
32+
use datafusion_expr::{AggregateUDFImpl, Documentation};
3233
use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
3334
use datafusion_functions_aggregate_common::utils::ordering_fields;
3435
use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
3536
use std::collections::{HashSet, VecDeque};
36-
use std::sync::Arc;
37+
use std::sync::{Arc, OnceLock};
3738

3839
make_udaf_expr_and_func!(
3940
ArrayAgg,
@@ -142,6 +143,35 @@ impl AggregateUDFImpl for ArrayAgg {
142143
fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
143144
datafusion_expr::ReversedUDAF::Reversed(array_agg_udaf())
144145
}
146+
147+
fn documentation(&self) -> Option<&Documentation> {
148+
Some(get_array_agg_doc())
149+
}
150+
}
151+
152+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
153+
154+
fn get_array_agg_doc() -> &'static Documentation {
155+
DOCUMENTATION.get_or_init(|| {
156+
Documentation::builder()
157+
.with_doc_section(DOC_SECTION_GENERAL)
158+
.with_description(
159+
"Returns an array created from the expression elements. If ordering is required, elements are inserted in the specified order.",
160+
)
161+
.with_syntax_example("array_agg(expression [ORDER BY expression])")
162+
.with_sql_example(r#"```sql
163+
> SELECT array_agg(column_name ORDER BY other_column) FROM table_name;
164+
+-----------------------------------------------+
165+
| array_agg(column_name ORDER BY other_column) |
166+
+-----------------------------------------------+
167+
| [element1, element2, element3] |
168+
+-----------------------------------------------+
169+
```"#,
170+
)
171+
.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
172+
.build()
173+
.unwrap()
174+
})
145175
}
146176

147177
#[derive(Debug)]

datafusion/functions-aggregate/src/average.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@ use arrow::datatypes::{
2828
Float64Type, UInt64Type,
2929
};
3030
use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
31+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
3132
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3233
use datafusion_expr::type_coercion::aggregates::{avg_return_type, coerce_avg_type};
3334
use datafusion_expr::utils::format_state_name;
3435
use datafusion_expr::Volatility::Immutable;
3536
use datafusion_expr::{
36-
Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
37+
Accumulator, AggregateUDFImpl, Documentation, EmitTo, GroupsAccumulator,
38+
ReversedUDAF, Signature,
3739
};
3840

3941
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
@@ -45,7 +47,7 @@ use datafusion_functions_aggregate_common::utils::DecimalAverager;
4547
use log::debug;
4648
use std::any::Any;
4749
use std::fmt::Debug;
48-
use std::sync::Arc;
50+
use std::sync::{Arc, OnceLock};
4951

5052
make_udaf_expr_and_func!(
5153
Avg,
@@ -235,6 +237,36 @@ impl AggregateUDFImpl for Avg {
235237
}
236238
coerce_avg_type(self.name(), arg_types)
237239
}
240+
241+
fn documentation(&self) -> Option<&Documentation> {
242+
Some(get_avg_doc())
243+
}
244+
}
245+
246+
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
247+
248+
fn get_avg_doc() -> &'static Documentation {
249+
DOCUMENTATION.get_or_init(|| {
250+
Documentation::builder()
251+
.with_doc_section(DOC_SECTION_GENERAL)
252+
.with_description(
253+
"Returns the average of numeric values in the specified column.",
254+
)
255+
.with_syntax_example("avg(expression)")
256+
.with_sql_example(r#"```sql
257+
> SELECT avg(column_name) FROM table_name;
258+
+---------------------------+
259+
| avg(column_name) |
260+
+---------------------------+
261+
| 42.75 |
262+
+---------------------------+
263+
```"#,
264+
)
265+
.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
266+
.with_argument("Aliases: ", "`mean`")
267+
.build()
268+
.unwrap()
269+
})
238270
}
239271

240272
/// An accumulator to compute the average

0 commit comments

Comments
 (0)