Skip to content

Commit b3bf3af

Browse files
authored
Port / Add Documentation for VarianceSample and VariancePopulation (#12742)
1 parent 84c9409 commit b3bf3af

File tree

3 files changed

+125
-45
lines changed

3 files changed

+125
-45
lines changed

datafusion/functions-aggregate/src/variance.rs

+44-3
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,24 @@
1818
//! [`VarianceSample`]: variance sample aggregations.
1919
//! [`VariancePopulation`]: variance population aggregations.
2020
21-
use std::{fmt::Debug, sync::Arc};
22-
2321
use arrow::{
2422
array::{Array, ArrayRef, BooleanArray, Float64Array, UInt64Array},
2523
buffer::NullBuffer,
2624
compute::kernels::cast,
2725
datatypes::{DataType, Field},
2826
};
27+
use std::sync::OnceLock;
28+
use std::{fmt::Debug, sync::Arc};
2929

3030
use datafusion_common::{
3131
downcast_value, not_impl_err, plan_err, DataFusionError, Result, ScalarValue,
3232
};
33+
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
3334
use datafusion_expr::{
3435
function::{AccumulatorArgs, StateFieldsArgs},
3536
utils::format_state_name,
36-
Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
37+
Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, Signature,
38+
Volatility,
3739
};
3840
use datafusion_functions_aggregate_common::{
3941
aggregate::groups_accumulator::accumulate::accumulate, stats::StatsType,
@@ -135,6 +137,26 @@ impl AggregateUDFImpl for VarianceSample {
135137
) -> Result<Box<dyn GroupsAccumulator>> {
136138
Ok(Box::new(VarianceGroupsAccumulator::new(StatsType::Sample)))
137139
}
140+
141+
fn documentation(&self) -> Option<&Documentation> {
142+
Some(get_variance_sample_doc())
143+
}
144+
}
145+
146+
static VARIANCE_SAMPLE_DOC: OnceLock<Documentation> = OnceLock::new();
147+
148+
fn get_variance_sample_doc() -> &'static Documentation {
149+
VARIANCE_SAMPLE_DOC.get_or_init(|| {
150+
Documentation::builder()
151+
.with_doc_section(DOC_SECTION_GENERAL)
152+
.with_description(
153+
"Returns the statistical sample variance of a set of numbers.",
154+
)
155+
.with_syntax_example("var(expression)")
156+
.with_standard_argument("expression", "Numeric")
157+
.build()
158+
.unwrap()
159+
})
138160
}
139161

140162
pub struct VariancePopulation {
@@ -222,6 +244,25 @@ impl AggregateUDFImpl for VariancePopulation {
222244
StatsType::Population,
223245
)))
224246
}
247+
fn documentation(&self) -> Option<&Documentation> {
248+
Some(get_variance_population_doc())
249+
}
250+
}
251+
252+
static VARIANCE_POPULATION_DOC: OnceLock<Documentation> = OnceLock::new();
253+
254+
fn get_variance_population_doc() -> &'static Documentation {
255+
VARIANCE_POPULATION_DOC.get_or_init(|| {
256+
Documentation::builder()
257+
.with_doc_section(DOC_SECTION_GENERAL)
258+
.with_description(
259+
"Returns the statistical population variance of a set of numbers.",
260+
)
261+
.with_syntax_example("var_pop(expression)")
262+
.with_standard_argument("expression", "Numeric")
263+
.build()
264+
.unwrap()
265+
})
225266
}
226267

227268
/// An accumulator to compute variance

docs/source/user-guide/sql/aggregate_functions.md

-42
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,6 @@ last_value(expression [ORDER BY expression])
240240
- [stddev](#stddev)
241241
- [stddev_pop](#stddev_pop)
242242
- [stddev_samp](#stddev_samp)
243-
- [var](#var)
244-
- [var_pop](#var_pop)
245-
- [var_samp](#var_samp)
246243
- [regr_avgx](#regr_avgx)
247244
- [regr_avgy](#regr_avgy)
248245
- [regr_count](#regr_count)
@@ -349,45 +346,6 @@ stddev_samp(expression)
349346

350347
#### Arguments
351348

352-
- **expression**: Expression to operate on.
353-
Can be a constant, column, or function, and any combination of arithmetic operators.
354-
355-
### `var`
356-
357-
Returns the statistical variance of a set of numbers.
358-
359-
```
360-
var(expression)
361-
```
362-
363-
#### Arguments
364-
365-
- **expression**: Expression to operate on.
366-
Can be a constant, column, or function, and any combination of arithmetic operators.
367-
368-
### `var_pop`
369-
370-
Returns the statistical population variance of a set of numbers.
371-
372-
```
373-
var_pop(expression)
374-
```
375-
376-
#### Arguments
377-
378-
- **expression**: Expression to operate on.
379-
Can be a constant, column, or function, and any combination of arithmetic operators.
380-
381-
### `var_samp`
382-
383-
Returns the statistical sample variance of a set of numbers.
384-
385-
```
386-
var_samp(expression)
387-
```
388-
389-
#### Arguments
390-
391349
- **expression**: Expression to operate on.
392350
Can be a constant, column, or function, and any combination of arithmetic operators.
393351

docs/source/user-guide/sql/aggregate_functions_new.md

+81
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ Aggregate functions operate on a set of values to compute a single result.
3636
- [bit_and](#bit_and)
3737
- [bit_or](#bit_or)
3838
- [bit_xor](#bit_xor)
39+
- [var](#var)
40+
- [var_pop](#var_pop)
41+
- [var_population](#var_population)
42+
- [var_samp](#var_samp)
43+
- [var_sample](#var_sample)
3944

4045
### `bit_and`
4146

@@ -72,3 +77,79 @@ bit_xor(expression)
7277
#### Arguments
7378

7479
- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators.
80+
81+
### `var`
82+
83+
Returns the statistical sample variance of a set of numbers.
84+
85+
```
86+
var(expression)
87+
```
88+
89+
#### Arguments
90+
91+
- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators.
92+
93+
#### Aliases- var_sample
94+
95+
- var_samp
96+
97+
### `var_pop`
98+
99+
Returns the statistical population variance of a set of numbers.
100+
101+
```
102+
var_pop(expression)
103+
```
104+
105+
#### Arguments
106+
107+
- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators.
108+
109+
#### Aliases- var_population
110+
111+
### `var_pop`
112+
113+
Returns the statistical population variance of a set of numbers.
114+
115+
```
116+
var_pop(expression)
117+
```
118+
119+
#### Arguments
120+
121+
- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators.
122+
123+
#### Aliases- var_population
124+
125+
### `var`
126+
127+
Returns the statistical sample variance of a set of numbers.
128+
129+
```
130+
var(expression)
131+
```
132+
133+
#### Arguments
134+
135+
- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators.
136+
137+
#### Aliases- var_sample
138+
139+
- var_samp
140+
141+
### `var`
142+
143+
Returns the statistical sample variance of a set of numbers.
144+
145+
```
146+
var(expression)
147+
```
148+
149+
#### Arguments
150+
151+
- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators.
152+
153+
#### Aliases- var_sample
154+
155+
- var_samp

0 commit comments

Comments
 (0)