From 44169f6ead40f082f3d1671ab5e14030b6b55fa7 Mon Sep 17 00:00:00 2001 From: daikitag <48062118+daikitag@users.noreply.github.com> Date: Fri, 9 Feb 2024 14:39:37 +0000 Subject: [PATCH] CODE: ddof Add delta degrees of freedom input in `normalise_phenotype` function. --- tests/test_simulate_phenotype.py | 21 +++++++++++++++++++++ tstrait/simulate_phenotype.py | 9 +++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tests/test_simulate_phenotype.py b/tests/test_simulate_phenotype.py index 7d6deb7..c08fd91 100644 --- a/tests/test_simulate_phenotype.py +++ b/tests/test_simulate_phenotype.py @@ -312,6 +312,27 @@ def test_negative_var(self, sample_ts, var): with pytest.raises(ValueError, match="Variance must be greater than 0."): tstrait.normalise_phenotypes(phenotype_df, var=var) + @pytest.mark.parametrize("ddof", [0, 1]) + def test_ddof(self, sample_ts, ddof): + model = tstrait.trait_model(distribution="normal", mean=2, var=6) + sim_result = tstrait.sim_phenotype( + ts=sample_ts, num_causal=100, model=model, h2=0.3, random_seed=1 + ) + phenotype_df = sim_result.phenotype + normalised_df = tstrait.normalise_phenotypes( + phenotype_df, mean=0, var=1, ddof=ddof + ) + normalised_phenotype_array = normalised_df["phenotype"].values + + phenotype_array = phenotype_df["phenotype"].values + phenotype_array = (phenotype_array - np.mean(phenotype_array)) / np.std( + phenotype_array, ddof=ddof + ) + + np.testing.assert_array_almost_equal( + normalised_phenotype_array, phenotype_array + ) + def test_pleiotropy(self, sample_ts): mean = 0 var = 1 diff --git a/tstrait/simulate_phenotype.py b/tstrait/simulate_phenotype.py index 1f56918..877a704 100644 --- a/tstrait/simulate_phenotype.py +++ b/tstrait/simulate_phenotype.py @@ -138,7 +138,7 @@ def sim_phenotype( return result -def normalise_phenotypes(phenotype_df, mean=0, var=1): +def normalise_phenotypes(phenotype_df, mean=0, var=1, ddof=1): """Normalise phenotype dataframe. Parameters @@ -149,6 +149,9 @@ def normalise_phenotypes(phenotype_df, mean=0, var=1): Mean of the resulting phenotype. var : float, default 1 Variance of the resulting phenotype. + ddof : int, default 1 + Delta degrees of freedom. The divisor used in computing the variance + is N - ddof, where N represents the number of elements. Returns ------- @@ -184,7 +187,9 @@ def normalise_phenotypes(phenotype_df, mean=0, var=1): phenotype_df, ["individual_id", "trait_id", "phenotype"], "phenotype_df" ) grouped = phenotype_df.groupby("trait_id")[["phenotype"]] - transformed_phenotype = grouped.transform(lambda x: (x - x.mean()) / x.std()) + transformed_phenotype = grouped.transform( + lambda x: (x - x.mean()) / x.std(ddof=ddof) + ) transformed_phenotype = transformed_phenotype * np.sqrt(var) + mean phenotype_df.loc[:, "phenotype"] = transformed_phenotype