Skip to content

Commit b644cca

Browse files
committed
Port / Add Documentation for VarianceSample and VariancePopulation
1 parent 1340869 commit b644cca

File tree

3 files changed

+57
-45
lines changed

3 files changed

+57
-45
lines changed

datafusion/expr/src/udf_docs.rs

+13
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ impl DocumentationBuilder {
131131
self
132132
}
133133

134+
/// Adds an argument to the documentation,
134135
pub fn with_argument(
135136
mut self,
136137
arg_name: impl Into<String>,
@@ -142,6 +143,18 @@ impl DocumentationBuilder {
142143
self
143144
}
144145

146+
/// Add a standard "expression" argument to the documentation
147+
///
148+
/// This is a common argument for scalar UDFs that operate on an expression and is rendered like
149+
///
150+
/// ```no-run
151+
/// expression:
152+
/// Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
153+
/// ```
154+
pub fn with_expression_argument(self) -> Self {
155+
self.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
156+
}
157+
145158
pub fn with_related_udf(mut self, related_udf: impl Into<String>) -> Self {
146159
let mut related = self.related_udfs.unwrap_or_default();
147160
related.push(related_udf.into());

datafusion/functions-aggregate/src/variance.rs

+44-3
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,24 @@
1818
//! [`VarianceSample`]: variance sample aggregations.
1919
//! [`VariancePopulation`]: variance population aggregations.
2020
21-
use std::{fmt::Debug, sync::Arc};
22-
2321
use arrow::{
2422
array::{Array, ArrayRef, BooleanArray, Float64Array, UInt64Array},
2523
buffer::NullBuffer,
2624
compute::kernels::cast,
2725
datatypes::{DataType, Field},
2826
};
27+
use std::sync::OnceLock;
28+
use std::{fmt::Debug, sync::Arc};
2929

3030
use datafusion_common::{
3131
downcast_value, not_impl_err, plan_err, DataFusionError, Result, ScalarValue,
3232
};
33+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
3334
use datafusion_expr::{
3435
function::{AccumulatorArgs, StateFieldsArgs},
3536
utils::format_state_name,
36-
Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
37+
Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, Signature,
38+
Volatility,
3739
};
3840
use datafusion_functions_aggregate_common::{
3941
aggregate::groups_accumulator::accumulate::accumulate, stats::StatsType,
@@ -135,6 +137,26 @@ impl AggregateUDFImpl for VarianceSample {
135137
) -> Result<Box<dyn GroupsAccumulator>> {
136138
Ok(Box::new(VarianceGroupsAccumulator::new(StatsType::Sample)))
137139
}
140+
141+
fn documentation(&self) -> Option<&Documentation> {
142+
Some(get_variance_sample_doc())
143+
}
144+
}
145+
146+
static VARIANCE_SAMPLE_DOC: OnceLock<Documentation> = OnceLock::new();
147+
148+
fn get_variance_sample_doc() -> &'static Documentation {
149+
VARIANCE_SAMPLE_DOC.get_or_init(|| {
150+
Documentation::builder()
151+
.with_doc_section(DOC_SECTION_GENERAL)
152+
.with_description(
153+
"Returns the statistical sample variance of a set of numbers.",
154+
)
155+
.with_syntax_example("var(expression)")
156+
.with_expression_argument()
157+
.build()
158+
.unwrap()
159+
})
138160
}
139161

140162
pub struct VariancePopulation {
@@ -222,6 +244,25 @@ impl AggregateUDFImpl for VariancePopulation {
222244
StatsType::Population,
223245
)))
224246
}
247+
fn documentation(&self) -> Option<&Documentation> {
248+
Some(get_variance_population_doc())
249+
}
250+
}
251+
252+
static VARIANCE_POPULATION_DOC: OnceLock<Documentation> = OnceLock::new();
253+
254+
fn get_variance_population_doc() -> &'static Documentation {
255+
VARIANCE_POPULATION_DOC.get_or_init(|| {
256+
Documentation::builder()
257+
.with_doc_section(DOC_SECTION_GENERAL)
258+
.with_description(
259+
"Returns the statistical population variance of a set of numbers.",
260+
)
261+
.with_syntax_example("var_pop(expression)")
262+
.with_expression_argument()
263+
.build()
264+
.unwrap()
265+
})
225266
}
226267

227268
/// An accumulator to compute variance

docs/source/user-guide/sql/aggregate_functions.md

-42
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,6 @@ last_value(expression [ORDER BY expression])
240240
- [stddev](#stddev)
241241
- [stddev_pop](#stddev_pop)
242242
- [stddev_samp](#stddev_samp)
243-
- [var](#var)
244-
- [var_pop](#var_pop)
245-
- [var_samp](#var_samp)
246243
- [regr_avgx](#regr_avgx)
247244
- [regr_avgy](#regr_avgy)
248245
- [regr_count](#regr_count)
@@ -349,45 +346,6 @@ stddev_samp(expression)
349346

350347
#### Arguments
351348

352-
- **expression**: Expression to operate on.
353-
Can be a constant, column, or function, and any combination of arithmetic operators.
354-
355-
### `var`
356-
357-
Returns the statistical variance of a set of numbers.
358-
359-
```
360-
var(expression)
361-
```
362-
363-
#### Arguments
364-
365-
- **expression**: Expression to operate on.
366-
Can be a constant, column, or function, and any combination of arithmetic operators.
367-
368-
### `var_pop`
369-
370-
Returns the statistical population variance of a set of numbers.
371-
372-
```
373-
var_pop(expression)
374-
```
375-
376-
#### Arguments
377-
378-
- **expression**: Expression to operate on.
379-
Can be a constant, column, or function, and any combination of arithmetic operators.
380-
381-
### `var_samp`
382-
383-
Returns the statistical sample variance of a set of numbers.
384-
385-
```
386-
var_samp(expression)
387-
```
388-
389-
#### Arguments
390-
391349
- **expression**: Expression to operate on.
392350
Can be a constant, column, or function, and any combination of arithmetic operators.
393351

0 commit comments

Comments
 (0)