diff --git a/tests/test_simulate_phenotype.py b/tests/test_simulate_phenotype.py index 7d6deb7..c08fd91 100644 --- a/tests/test_simulate_phenotype.py +++ b/tests/test_simulate_phenotype.py @@ -312,6 +312,27 @@ def test_negative_var(self, sample_ts, var): with pytest.raises(ValueError, match="Variance must be greater than 0."): tstrait.normalise_phenotypes(phenotype_df, var=var) + @pytest.mark.parametrize("ddof", [0, 1]) + def test_ddof(self, sample_ts, ddof): + model = tstrait.trait_model(distribution="normal", mean=2, var=6) + sim_result = tstrait.sim_phenotype( + ts=sample_ts, num_causal=100, model=model, h2=0.3, random_seed=1 + ) + phenotype_df = sim_result.phenotype + normalised_df = tstrait.normalise_phenotypes( + phenotype_df, mean=0, var=1, ddof=ddof + ) + normalised_phenotype_array = normalised_df["phenotype"].values + + phenotype_array = phenotype_df["phenotype"].values + phenotype_array = (phenotype_array - np.mean(phenotype_array)) / np.std( + phenotype_array, ddof=ddof + ) + + np.testing.assert_array_almost_equal( + normalised_phenotype_array, phenotype_array + ) + def test_pleiotropy(self, sample_ts): mean = 0 var = 1 diff --git a/tstrait/simulate_phenotype.py b/tstrait/simulate_phenotype.py index 1f56918..877a704 100644 --- a/tstrait/simulate_phenotype.py +++ b/tstrait/simulate_phenotype.py @@ -138,7 +138,7 @@ def sim_phenotype( return result -def normalise_phenotypes(phenotype_df, mean=0, var=1): +def normalise_phenotypes(phenotype_df, mean=0, var=1, ddof=1): """Normalise phenotype dataframe. Parameters @@ -149,6 +149,9 @@ def normalise_phenotypes(phenotype_df, mean=0, var=1): Mean of the resulting phenotype. var : float, default 1 Variance of the resulting phenotype. + ddof : int, default 1 + Delta degrees of freedom. The divisor used in computing the variance + is N - ddof, where N represents the number of elements. Returns ------- @@ -184,7 +187,9 @@ def normalise_phenotypes(phenotype_df, mean=0, var=1): phenotype_df, ["individual_id", "trait_id", "phenotype"], "phenotype_df" ) grouped = phenotype_df.groupby("trait_id")[["phenotype"]] - transformed_phenotype = grouped.transform(lambda x: (x - x.mean()) / x.std()) + transformed_phenotype = grouped.transform( + lambda x: (x - x.mean()) / x.std(ddof=ddof) + ) transformed_phenotype = transformed_phenotype * np.sqrt(var) + mean phenotype_df.loc[:, "phenotype"] = transformed_phenotype