|
15 | 15 | // specific language governing permissions and limitations
|
16 | 16 | // under the License.
|
17 | 17 |
|
18 |
| -//! [`VarianceSample`]: covariance sample aggregations. |
| 18 | +//! [`VarianceSample`]: variance sample aggregations. |
| 19 | +//! [`VariancePopulation`]: variance population aggregations. |
19 | 20 |
|
20 | 21 | use std::fmt::Debug;
|
21 | 22 |
|
@@ -43,6 +44,14 @@ make_udaf_expr_and_func!(
|
43 | 44 | var_samp_udaf
|
44 | 45 | );
|
45 | 46 |
|
| 47 | +make_udaf_expr_and_func!( |
| 48 | + VariancePopulation, |
| 49 | + var_pop, |
| 50 | + expression, |
| 51 | + "Computes the population variance.", |
| 52 | + var_pop_udaf |
| 53 | +); |
| 54 | + |
46 | 55 | pub struct VarianceSample {
|
47 | 56 | signature: Signature,
|
48 | 57 | aliases: Vec<String>,
|
@@ -115,6 +124,80 @@ impl AggregateUDFImpl for VarianceSample {
|
115 | 124 | }
|
116 | 125 | }
|
117 | 126 |
|
| 127 | +pub struct VariancePopulation { |
| 128 | + signature: Signature, |
| 129 | + aliases: Vec<String>, |
| 130 | +} |
| 131 | + |
| 132 | +impl Debug for VariancePopulation { |
| 133 | + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
| 134 | + f.debug_struct("VariancePopulation") |
| 135 | + .field("name", &self.name()) |
| 136 | + .field("signature", &self.signature) |
| 137 | + .finish() |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +impl Default for VariancePopulation { |
| 142 | + fn default() -> Self { |
| 143 | + Self::new() |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +impl VariancePopulation { |
| 148 | + pub fn new() -> Self { |
| 149 | + Self { |
| 150 | + aliases: vec![String::from("var_population")], |
| 151 | + signature: Signature::numeric(1, Volatility::Immutable), |
| 152 | + } |
| 153 | + } |
| 154 | +} |
| 155 | + |
| 156 | +impl AggregateUDFImpl for VariancePopulation { |
| 157 | + fn as_any(&self) -> &dyn std::any::Any { |
| 158 | + self |
| 159 | + } |
| 160 | + |
| 161 | + fn name(&self) -> &str { |
| 162 | + "var_pop" |
| 163 | + } |
| 164 | + |
| 165 | + fn signature(&self) -> &Signature { |
| 166 | + &self.signature |
| 167 | + } |
| 168 | + |
| 169 | + fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { |
| 170 | + if !arg_types[0].is_numeric() { |
| 171 | + return plan_err!("Variance requires numeric input types"); |
| 172 | + } |
| 173 | + |
| 174 | + Ok(DataType::Float64) |
| 175 | + } |
| 176 | + |
| 177 | + fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> { |
| 178 | + let name = args.name; |
| 179 | + Ok(vec![ |
| 180 | + Field::new(format_state_name(name, "count"), DataType::UInt64, true), |
| 181 | + Field::new(format_state_name(name, "mean"), DataType::Float64, true), |
| 182 | + Field::new(format_state_name(name, "m2"), DataType::Float64, true), |
| 183 | + ]) |
| 184 | + } |
| 185 | + |
| 186 | + fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> { |
| 187 | + if acc_args.is_distinct { |
| 188 | + return not_impl_err!("VAR_POP(DISTINCT) aggregations are not available"); |
| 189 | + } |
| 190 | + |
| 191 | + Ok(Box::new(VarianceAccumulator::try_new( |
| 192 | + StatsType::Population, |
| 193 | + )?)) |
| 194 | + } |
| 195 | + |
| 196 | + fn aliases(&self) -> &[String] { |
| 197 | + &self.aliases |
| 198 | + } |
| 199 | +} |
| 200 | + |
118 | 201 | /// An accumulator to compute variance
|
119 | 202 | /// The algrithm used is an online implementation and numerically stable. It is based on this paper:
|
120 | 203 | /// Welford, B. P. (1962). "Note on a method for calculating corrected sums of squares and products".
|
|
0 commit comments