Skip to content

Commit ed70c67

Browse files
Add a correction keyword to the std methods (#183)
* add correction to std * Update spec/API_specification/dataframe_api/column_object.py * Clarify the `correction` parameter and its allowed values in more detail --------- Co-authored-by: Ralf Gommers <[email protected]>
1 parent 16dea0b commit ed70c67

File tree

3 files changed

+52
-7
lines changed

3 files changed

+52
-7
lines changed

spec/API_specification/dataframe_api/column_object.py

+30-3
Original file line numberDiff line numberDiff line change
@@ -482,20 +482,47 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar:
482482
dtypes.
483483
"""
484484

485-
def std(self, *, skip_nulls: bool = True) -> Scalar:
485+
def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar:
486486
"""
487487
Reduction returns a scalar. Must be supported for numerical and
488488
datetime data types. Returns a float for numerical data types, and
489489
datetime (with the appropriate timedelta format string) for datetime
490490
dtypes.
491-
"""
492491
493-
def var(self, *, skip_nulls: bool = True) -> Scalar:
492+
Parameters
493+
----------
494+
correction
495+
Degrees of freedom adjustment. Setting this parameter to a value other
496+
than ``0`` has the effect of adjusting the divisor during the
497+
calculation of the standard deviation according to ``N-correction``,
498+
where ``N`` corresponds to the total number of elements over which
499+
the standard deviation is computed. When computing the standard
500+
deviation of a population, setting this parameter to ``0`` is the
501+
standard choice (i.e., the provided column contains data
502+
constituting an entire population). When computing the corrected
503+
sample standard deviation, setting this parameter to ``1`` is the
504+
standard choice (i.e., the provided column contains data sampled
505+
from a larger population; this is commonly referred to as Bessel's
506+
correction). Fractional (float) values are allowed. Default: ``1``.
507+
skip_nulls
508+
Whether to skip null values.
509+
"""
510+
511+
def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar:
494512
"""
495513
Reduction returns a scalar. Must be supported for numerical and
496514
datetime data types. Returns a float for numerical data types, and
497515
datetime (with the appropriate timedelta format string) for datetime
498516
dtypes.
517+
518+
Parameters
519+
----------
520+
correction
521+
Correction to apply to the result. For example, ``0`` for sample
522+
standard deviation and ``1`` for population standard deviation.
523+
See `Column.std` for a more detailed description.
524+
skip_nulls
525+
Whether to skip null values.
499526
"""
500527

501528
def is_null(self) -> Column:

spec/API_specification/dataframe_api/dataframe_object.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -688,15 +688,33 @@ def mean(self, *, skip_nulls: bool = True) -> DataFrame:
688688
"""
689689
...
690690

691-
def std(self, *, skip_nulls: bool = True) -> DataFrame:
691+
def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame:
692692
"""
693693
Reduction returns a 1-row DataFrame.
694+
695+
Parameters
696+
----------
697+
correction
698+
Correction to apply to the result. For example, ``0`` for sample
699+
standard deviation and ``1`` for population standard deviation.
700+
See `Column.std` for a more detailed description.
701+
skip_nulls
702+
Whether to skip null values.
694703
"""
695704
...
696705

697-
def var(self, *, skip_nulls: bool = True) -> DataFrame:
706+
def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame:
698707
"""
699708
Reduction returns a 1-row DataFrame.
709+
710+
Parameters
711+
----------
712+
correction
713+
Correction to apply to the result. For example, ``0`` for sample
714+
standard deviation and ``1`` for population standard deviation.
715+
See `Column.std` for a more detailed description.
716+
skip_nulls
717+
Whether to skip null values.
700718
"""
701719
...
702720

spec/API_specification/dataframe_api/groupby_object.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ def median(self, *, skip_nulls: bool = True) -> "DataFrame":
4141
def mean(self, *, skip_nulls: bool = True) -> "DataFrame":
4242
...
4343

44-
def std(self, *, skip_nulls: bool = True) -> "DataFrame":
44+
def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame":
4545
...
4646

47-
def var(self, *, skip_nulls: bool = True) -> "DataFrame":
47+
def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame":
4848
...
4949

5050
def size(self) -> "DataFrame":

0 commit comments

Comments
 (0)