Add numeric_only=True (#396)

raphaelvallat · Jan 13, 2024 · 267ef5e · 267ef5e
1 parent fdb3c60
commit 267ef5e
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 10 deletions.
diff --git a/pingouin/pairwise.py b/pingouin/pairwise.py
@@ -1049,7 +1049,7 @@ def pairwise_gameshowell(data=None, dv=None, between=None, effsize="hedges"):
     labels = np.array(list(grp.groups.keys()))
     n = grp.count().to_numpy()
     gmeans = grp.mean(numeric_only=True).to_numpy()
-    gvars = grp.var().to_numpy()  # numeric_only=True added in pandas 1.5, set to False in 2.0
+    gvars = grp.var(numeric_only=True).to_numpy()  # numeric_only=True added in pandas 1.5
 
     # Pairwise combinations
     g1, g2 = np.array(list(combinations(np.arange(ng), 2))).T

diff --git a/pingouin/parametric.py b/pingouin/parametric.py
@@ -549,7 +549,7 @@ def rm_anova(
     rm = list(data[within].unique())
     n_rm = len(rm)
     n_obs = int(grp_with.count().max())
-    grandmean = data[dv].mean()
+    grandmean = data[dv].mean(numeric_only=True)
 
     # Calculate sums of squares
     ss_with = ((grp_with.mean(numeric_only=True) - grandmean) ** 2 * grp_with.count()).sum()
@@ -583,7 +583,7 @@ def rm_anova(
     # Compute sphericity using Mauchly test, on the wide-format dataframe
     # Sphericity assumption only applies if there are more than 2 levels
     if correction == "auto" or (correction is True and n_rm >= 3):
-        spher, W_spher, chi_sq_spher, ddof_spher, p_spher = sphericity(data_piv, alpha=0.05)
+        spher, W_spher, _, _, p_spher = sphericity(data_piv, alpha=0.05)
         if correction == "auto":
             correction = True if not spher else False
     else:
@@ -697,7 +697,7 @@ def rm_anova2(data=None, dv=None, within=None, subject=None, effsize="ng2"):
     n_a = data[a].nunique()
     n_b = data[b].nunique()
     n_s = data[subject].nunique()
-    mu = data[dv].mean()
+    mu = data[dv].mean(numeric_only=True)
 
     # Groupby means
     # I think that observed=True is actually not needed here since we have already used
@@ -991,7 +991,9 @@ def anova(data=None, dv=None, between=None, ss_type=2, detailed=False, effsize="
     # Calculate sums of squares
     grp = data.groupby(between, observed=True, group_keys=False)[dv]
     # Between effect
-    ssbetween = ((grp.mean(numeric_only=True) - data[dv].mean()) ** 2 * grp.count()).sum()
+    ssbetween = (
+        (grp.mean(numeric_only=True) - data[dv].mean(numeric_only=True)) ** 2 * grp.count()
+    ).sum()
     # Within effect (= error between)
     #  = (grp.var(ddof=0) * grp.count()).sum()
     sserror = grp.transform(lambda x: (x - x.mean()) ** 2).sum()
@@ -1072,7 +1074,7 @@ def anova2(data=None, dv=None, between=None, ss_type=2, effsize="np2"):
         # Sums of squares
         ss_fac1 = aov_fac1.at[0, "SS"]
         ss_fac2 = aov_fac2.at[0, "SS"]
-        ss_tot = ((data[dv] - data[dv].mean()) ** 2).sum()
+        ss_tot = ((data[dv] - data[dv].mean(numeric_only=True)) ** 2).sum()
         ss_resid = np.sum(grp_both.apply(lambda x: (x - x.mean()) ** 2))
         ss_inter = ss_tot - (ss_resid + ss_fac1 + ss_fac2)
         # Degrees of freedom
@@ -1340,12 +1342,14 @@ def welch_anova(data=None, dv=None, between=None):
 
     # Compute weights and ajusted means
     grp = data.groupby(between, observed=True, group_keys=False)[dv]
-    weights = grp.count() / grp.var()
-    adj_grandmean = (weights * grp.mean()).sum() / weights.sum()
+    weights = grp.count() / grp.var(numeric_only=True)
+    adj_grandmean = (weights * grp.mean(numeric_only=True)).sum() / weights.sum()
 
     # Sums of squares (regular and adjusted)
     ss_res = grp.apply(lambda x: (x - x.mean()) ** 2).sum()
-    ss_bet = ((grp.mean(numeric_only=True) - data[dv].mean()) ** 2 * grp.count()).sum()
+    ss_bet = (
+        (grp.mean(numeric_only=True) - data[dv].mean(numeric_only=True)) ** 2 * grp.count()
+    ).sum()
     ss_betadj = np.sum(weights * np.square(grp.mean(numeric_only=True) - adj_grandmean))
     ms_betadj = ss_betadj / ddof1
 
@@ -1504,7 +1508,7 @@ def mixed_anova(
         )
 
     # SUMS OF SQUARES
-    grandmean = data[dv].mean()
+    grandmean = data[dv].mean(numeric_only=True)
     ss_total = ((data[dv] - grandmean) ** 2).sum()
     # Extract main effects of within and between factors
     aov_with = rm_anova(