From ff2b1c9dd5103d62729568cc46edd3bfe85d83ed Mon Sep 17 00:00:00 2001 From: Gleb Levitski <36483986+glevv@users.noreply.github.com> Date: Sat, 30 Nov 2024 11:06:31 +0200 Subject: [PATCH 1/2] added gastwirth location estimator --- pyproject.toml | 2 +- src/obscure_stats/association/association.py | 2 +- .../central_tendency/__init__.py | 2 + .../central_tendency/central_tendency.py | 50 ++++++++++++++++--- src/obscure_stats/skewness/skewness.py | 8 +-- src/obscure_stats/variation/variation.py | 2 +- tests/test_central_tendency.py | 2 + 7 files changed, 53 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4e326b6..7abf0fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ scipy = "^1.9.1" mypy = "^1.6.1" pytest = "^8.0.0" pytest-cov = "^5.0.0" -ruff = "^0.7.0" +ruff = "^0.8.0" hypothesis = "^6.103.1" hypothesis-pytest = "^0.19.0" diff --git a/src/obscure_stats/association/association.py b/src/obscure_stats/association/association.py index 40621ef..7226018 100644 --- a/src/obscure_stats/association/association.py +++ b/src/obscure_stats/association/association.py @@ -468,7 +468,7 @@ def winsorized_correlation(x: np.ndarray, y: np.ndarray, k: float = 0.1) -> floa Input array. y : array_like Input array. - k : float + k : float, default = 0.1 The percentages of values to winsorize on each side of the arrays. Returns diff --git a/src/obscure_stats/central_tendency/__init__.py b/src/obscure_stats/central_tendency/__init__.py index 574b06e..3a477be 100644 --- a/src/obscure_stats/central_tendency/__init__.py +++ b/src/obscure_stats/central_tendency/__init__.py @@ -2,6 +2,7 @@ from .central_tendency import ( contraharmonic_mean, + gastwirth_location, grenanders_m, half_sample_mode, hodges_lehmann_sen_location, @@ -15,6 +16,7 @@ __all__ = [ "contraharmonic_mean", + "gastwirth_location", "grenanders_m", "half_sample_mode", "hodges_lehmann_sen_location", diff --git a/src/obscure_stats/central_tendency/central_tendency.py b/src/obscure_stats/central_tendency/central_tendency.py index bcc80d6..e947e7c 100644 --- a/src/obscure_stats/central_tendency/central_tendency.py +++ b/src/obscure_stats/central_tendency/central_tendency.py @@ -182,7 +182,7 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl ---------- x : array_like Input array. - q : float + q : float, default = 0.5 Quantile value in range (0, 1). Returns @@ -286,7 +286,7 @@ def tau_location(x: np.ndarray, c: float = 4.5) -> float: ---------- x : array_like Input array. - c : float + c : float, default = 4.5 Constant that filter outliers. Returns @@ -310,7 +310,7 @@ def tau_location(x: np.ndarray, c: float = 4.5) -> float: return np.nansum(x * w) / np.nansum(w) -def grenanders_m(x: np.ndarray, p: float = 1.5, k: int = 3) -> float: +def grenanders_m(x: np.ndarray, p: float = 1.001, k: int = 2) -> float: """Calculate Grenander's Mode. This measure is a direct nonparametric estimation of the mode. @@ -322,10 +322,10 @@ def grenanders_m(x: np.ndarray, p: float = 1.5, k: int = 3) -> float: ---------- x : array_like Input array. - p : float + p : float, default = 1.001 Smoothing constant. - k : int - The number of samples to exclude from the calculation. + k : int, default = 2 + The number of samples to filter. Returns ------- @@ -339,6 +339,7 @@ def grenanders_m(x: np.ndarray, p: float = 1.5, k: int = 3) -> float: Annals of Mathematical Statistics, 36, 131-138. """ x_sort = np.sort(x) + x_sort = x_sort.astype("float") x_sort = x_sort[np.isfinite(x_sort)] if p <= 1: @@ -351,8 +352,41 @@ def grenanders_m(x: np.ndarray, p: float = 1.5, k: int = 3) -> float: if len(x_sort) <= k: return np.nan + # pre calculate diffs + diff = x_sort[k:] - x_sort[:-k] + # if the diffs are constant - return the value + if diff.sum() == 0.0: + return x_sort[0] + # to avoid division by zero + diff[diff == 0.0] = np.nan + return ( 0.5 - * np.sum((x_sort[k:] + x_sort[:-k]) / np.power(x_sort[k:] - x_sort[:-k], p)) - / np.sum(np.power(x_sort[k:] - x_sort[:-k], -p)) + * np.nansum((x_sort[k:] + x_sort[:-k]) / np.power(diff, p)) + / np.nansum(np.power(diff, -p)) ) + + +def gastwirth_location(x: np.ndarray) -> float: + """Calculate Gastwirth's location estimator. + + This measure is more robust then average. + + Parameters + ---------- + x : array_like + Input array. + + Returns + ------- + gle : float + The value of the Gastwirth's location. + + References + ---------- + Gastwirth, J. L. (1966). + On Robust Procedures. + J. Amer. Statist. Assn., Vol. 61, pp. 929-948. + """ + p33, p50, p66 = np.nanquantile(x, [1 / 3, 0.5, 2 / 3]) + return 0.3 * p33 + 0.4 * p50 + 0.3 * p66 diff --git a/src/obscure_stats/skewness/skewness.py b/src/obscure_stats/skewness/skewness.py index 8244fb5..cfe6671 100644 --- a/src/obscure_stats/skewness/skewness.py +++ b/src/obscure_stats/skewness/skewness.py @@ -412,8 +412,8 @@ def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float: ---------- x : array_like Input array. - q : float - Quantile to use for the anchor. + q : float, default = 0.25 + Quantile to use for the calculation, (0.0, 0.5) Returns ------- @@ -448,8 +448,8 @@ def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float: ---------- x : array_like Input array. - q : float - Quantile to use for the anchor. + q : float, default = 0.75 + Quantile to use for the calculation, (0.5, 1.0). Returns ------- diff --git a/src/obscure_stats/variation/variation.py b/src/obscure_stats/variation/variation.py index 7a4c45d..b31ca9c 100644 --- a/src/obscure_stats/variation/variation.py +++ b/src/obscure_stats/variation/variation.py @@ -209,7 +209,7 @@ def renyi_entropy(x: np.ndarray, alpha: float = 2) -> float: ---------- x : array_like Input array. - alpha : float + alpha : float, default = 2 Order of the Rényi entropy Returns diff --git a/tests/test_central_tendency.py b/tests/test_central_tendency.py index d53723b..4d70e21 100644 --- a/tests/test_central_tendency.py +++ b/tests/test_central_tendency.py @@ -13,6 +13,7 @@ from obscure_stats.central_tendency import ( contraharmonic_mean, + gastwirth_location, grenanders_m, half_sample_mode, hodges_lehmann_sen_location, @@ -26,6 +27,7 @@ all_functions = [ contraharmonic_mean, + gastwirth_location, grenanders_m, half_sample_mode, hodges_lehmann_sen_location, From 3f6cf44ce8ebcb8adff7951c3f1b3daf2c5f3d61 Mon Sep 17 00:00:00 2001 From: Gleb Levitski <36483986+glevv@users.noreply.github.com> Date: Sat, 30 Nov 2024 11:08:49 +0200 Subject: [PATCH 2/2] fix readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0f6c548..aed9743 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ - Collection of measures of central tendency - `obscure_stats/central_tendency`: * Contraharmonic Mean; + * Gastwirth's Location; * Grenander's Mode; * Half-Sample Mode; * Hodges-Lehmann-Sen Location;