Merge branch 'main' into strengejacke/issue697

easystats · Mar 19, 2024 · 3b72ffa · 3b72ffa
2 parents 8137ac3 + 01eff88
commit 3b72ffa
Show file tree

Hide file tree

Showing 8 changed files with 161 additions and 80 deletions.
diff --git a/R/check_model.R b/R/check_model.R
@@ -218,9 +218,9 @@ check_model.default <- function(x,
     if (minfo$is_bayesian) {
       suppressWarnings(.check_assumptions_stan(x, ...))
     } else if (minfo$is_linear) {
-      suppressWarnings(.check_assumptions_linear(x, minfo, residual_type, verbose, ...))
+      suppressWarnings(.check_assumptions_linear(x, minfo, check, residual_type, verbose, ...))
     } else {
-      suppressWarnings(.check_assumptions_glm(x, minfo, residual_type, verbose, ...))
+      suppressWarnings(.check_assumptions_glm(x, minfo, check, residual_type, verbose, ...))
     },
     error = function(e) {
       e
@@ -237,6 +237,15 @@ check_model.default <- function(x,
     )
   }
 
+  # did Q-Q plot work with simulated residuals?
+  if (verbose && is.null(assumptions_data$QQ) && residual_type == "simulated") {
+    insight::format_warning(paste0(
+      "Cannot simulate residuals for models of class `",
+      class(x)[1],
+      "`. Please try `check_model(..., residual_type = \"normal\")` instead."
+    ))
+  }
+
   # try to find sensible default for "type" argument
   suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint
   if (missing(type) && suggest_dots) {
@@ -412,26 +421,57 @@ check_model.DHARMa <- check_model.performance_simres
 
 # compile plots for checks of linear models  ------------------------
 
-.check_assumptions_linear <- function(model, model_info, residual_type = "normal", verbose = TRUE, ...) {
+.check_assumptions_linear <- function(model, model_info, check = "all", residual_type = "normal", verbose = TRUE, ...) {
   dat <- list()
 
-  dat$VIF <- .diag_vif(model, verbose = verbose)
-  dat$QQ <- switch(residual_type,
-    simulated = simulate_residuals(model, ...),
-    .diag_qq(model, model_info = model_info, verbose = verbose)
-  )
-  dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
-  dat$NORM <- .diag_norm(model, verbose = verbose)
-  dat$NCV <- .diag_ncv(model, verbose = verbose)
-  dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
-  dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
-  if (is.null(dat$OUTLIERS)) {
-    threshold <- NULL
-  } else {
-    threshold <- attributes(dat$OUTLIERS)$threshold$cook
+  # multicollinearity --------------
+  if (any(c("all", "vif") %in% check)) {
+    dat$VIF <- .diag_vif(model, verbose = verbose)
+  }
+
+  # Q-Q plot (normality/uniformity of residuals) --------------
+  if (any(c("all", "qq") %in% check)) {
+    dat$QQ <- switch(residual_type,
+      simulated = .safe(simulate_residuals(model, ...)),
+      .diag_qq(model, model_info = model_info, verbose = verbose)
+    )
+  }
+
+  # Random Effects Q-Q plot (normality of BLUPs) --------------
+  if (any(c("all", "reqq") %in% check)) {
+    dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
+  }
+
+  # normal-curve plot (normality of residuals) --------------
+  if (any(c("all", "normality") %in% check)) {
+    dat$NORM <- .diag_norm(model, verbose = verbose)
+  }
+
+  # non-constant variance (heteroskedasticity, liniearity) --------------
+  if (any(c("all", "ncv", "linearity") %in% check)) {
+    dat$NCV <- .diag_ncv(model, verbose = verbose)
+  }
+
+  # homogeneity of variance --------------
+  if (any(c("all", "homogeneity") %in% check)) {
+    dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
+  }
+
+  # outliers --------------
+  if (any(c("all", "outliers") %in% check)) {
+    dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
+    if (is.null(dat$OUTLIERS)) {
+      threshold <- NULL
+    } else {
+      threshold <- attributes(dat$OUTLIERS)$threshold$cook
+    }
+    dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
+  }
+
+  # posterior predictive checks --------------
+  if (any(c("all", "pp_check") %in% check)) {
+    dat$PP_CHECK <- .safe(check_predictions(model, ...))
   }
-  dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
-  dat$PP_CHECK <- .safe(check_predictions(model, ...))
 
   dat <- insight::compact_list(dat)
   class(dat) <- c("check_model", "see_check_model")
@@ -442,28 +482,55 @@ check_model.DHARMa <- check_model.performance_simres
 
 # compile plots for checks of generalized linear models  ------------------------
 
-.check_assumptions_glm <- function(model, model_info, residual_type = "simulated", verbose = TRUE, ...) {
+.check_assumptions_glm <- function(model, model_info, check = "all", residual_type = "simulated", verbose = TRUE, ...) {
   dat <- list()
 
-  dat$VIF <- .diag_vif(model, verbose = verbose)
-  dat$QQ <- switch(residual_type,
-    simulated = simulate_residuals(model, ...),
-    .diag_qq(model, model_info = model_info, verbose = verbose)
-  )
-  dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
-  dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
-  dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
-  if (is.null(dat$OUTLIERS)) {
-    threshold <- NULL
-  } else {
-    threshold <- attributes(dat$OUTLIERS)$threshold$cook
+  # multicollinearity --------------
+  if (any(c("all", "vif") %in% check)) {
+    dat$VIF <- .diag_vif(model, verbose = verbose)
+  }
+
+  # Q-Q plot (normality/uniformity of residuals) --------------
+  if (any(c("all", "qq") %in% check)) {
+    dat$QQ <- switch(residual_type,
+      simulated = .safe(simulate_residuals(model, ...)),
+      .diag_qq(model, model_info = model_info, verbose = verbose)
+    )
+  }
+
+  # homogeneity of variance --------------
+  if (any(c("all", "homogeneity") %in% check)) {
+    dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
+  }
+
+  # Random Effects Q-Q plot (normality of BLUPs) --------------
+  if (any(c("all", "reqq") %in% check)) {
+    dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
+  }
+
+  # outliers --------------
+  if (any(c("all", "outliers") %in% check)) {
+    dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
+    if (is.null(dat$OUTLIERS)) {
+      threshold <- NULL
+    } else {
+      threshold <- attributes(dat$OUTLIERS)$threshold$cook
+    }
+    dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
   }
-  dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
-  dat$PP_CHECK <- .safe(check_predictions(model, ...))
-  if (isTRUE(model_info$is_binomial)) {
+
+  # posterior predictive checks --------------
+  if (any(c("all", "pp_check") %in% check)) {
+    dat$PP_CHECK <- .safe(check_predictions(model, ...))
+  }
+
+  # binned residuals for bernoulli/binomial --------------
+  if (isTRUE(model_info$is_binomial) && any(c("all", "binned_residuals") %in% check)) {
     dat$BINNED_RESID <- .safe(binned_residuals(model, verbose = verbose, ...))
   }
-  if (isTRUE(model_info$is_count)) {
+
+  # misspecified dispersion and zero-inflation --------------
+  if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) {
     dat$OVERDISPERSION <- .diag_overdispersion(model)
   }
 

diff --git a/R/check_zeroinflation.R b/R/check_zeroinflation.R
@@ -30,18 +30,19 @@
 #'
 #' @section Tests based on simulated residuals:
 #' For certain models, resp. model from certain families, tests are based on
-#' [`simulated_residuals()`]. These are usually more accurate for tests than the
-#' traditionally used Pearson residuals. However, when simulating from more
-#' complex model, such as mixed models or models with zero-inflation, there are
-#' several important considerations. Arguments specified in `...` are passed to
-#' [`simulate_residuals()`], which relies on [`DHARMa::simulateResiduals()`] (and
-#' therefore, arguments in `...` are passed further down to _DHARMa_). The
-#' defaults in DHARMa are set on the most conservative option that works for
-#' all models. However, in many cases, the help advises to use different settings
-#' in particular situations or for particular models. It is recommended to read
-#' the 'Details' in `?DHARMa::simulateResiduals` closely to understand the
-#' implications of the simulation process and which arguments should be modified
-#' to get the most accurate results.
+#' simulated residuals (see [`simulated_residual()`]). These are usually more
+#' accurate for testing such models than the traditionally used Pearson residuals.
+#' However, when simulating from more complex models, such as mixed models or
+#' models with zero-inflation, there are several important considerations.
+#' Arguments specified in `...` are passed to [`simulate_residuals()`], which
+#' relies on [`DHARMa::simulateResiduals()`] (and therefore, arguments in `...`
+#' are passed further down to _DHARMa_). The defaults in DHARMa are set on the
+#' most conservative option that works for all models. However, in many cases,
+#' the help advises to use different settings in particular situations or for
+#' particular models. It is recommended to read the 'Details' in
+#' `?DHARMa::simulateResiduals` closely to understand the implications of the
+#' simulation process and which arguments should be modified to get the most
+#' accurate results.
 #'
 #' @family functions to check model assumptions and and assess model quality
 #'
@@ -87,7 +88,7 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
   not_supported <- c("fixest", "glmx")
 
   # for models with zero-inflation component or negative binomial families,
-  # we use simulated_residuals()
+  # we use simulate_residuals()
   if (!inherits(x, not_supported) && (model_info$is_zero_inflated || model_info$is_negbin || model_info$family == "genpois")) { # nolint
     if (missing(tolerance)) {
       tolerance <- 0.1

diff --git a/R/simulate_residuals.R b/R/simulate_residuals.R
@@ -24,9 +24,9 @@
 #' @section Tests based on simulated residuals:
 #' For certain models, resp. model from certain families, tests like
 #' [`check_zeroinflation()`] or [`check_overdispersion()`] are based on
-#' `simulated_residuals()`. These are usually more accurate for such tests than
+#' simulated residuals. These are usually more accurate for such tests than
 #' the traditionally used Pearson residuals. However, when simulating from more
-#' complex model, such as mixed models or models with zero-inflation, there are
+#' complex models, such as mixed models or models with zero-inflation, there are
 #' several important considerations. `simulate_residuals()` relies on
 #' [`DHARMa::simulateResiduals()`], and additional arguments specified in `...`
 #' are passed further down to that function. The defaults in DHARMa are set on
@@ -79,7 +79,7 @@ print.performance_simres <- function(x, ...) {
   msg <- paste0(
     "Simulated residuals from a model of class `", class(x$fittedModel)[1],
     "` based on ", x$nSim, " simulations. Use `check_residuals()` to check ",
-    "uniformity of residuals. It is recommended to refer to `?DHARMa::simulateReisudals`",
+    "uniformity of residuals. It is recommended to refer to `?DHARMa::simulateResiudals`",
     " and `vignette(\"DHARMa\")` for more information about different settings",
     " in particular situations or for particular models.\n"
   )

diff --git a/man/check_overdispersion.Rd b/man/check_overdispersion.Rd
diff --git a/man/check_residuals.Rd b/man/check_residuals.Rd
diff --git a/man/check_zeroinflation.Rd b/man/check_zeroinflation.Rd
diff --git a/man/simulate_residuals.Rd b/man/simulate_residuals.Rd
diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R
@@ -69,3 +69,14 @@ test_that("`check_model()` warnings for tweedie", {
     )
   )
 })
+
+
+test_that("`check_model()` warnings for zero-infl", {
+  skip_if_not_installed("pscl")
+  data(bioChemists, package = "pscl")
+  model <- pscl::zeroinfl(
+    art ~ fem + mar + kid5 + ment | kid5 + phd,
+    data = bioChemists
+  )
+  expect_message(expect_warning(check_model(model, verbose = TRUE), regex = "Cannot simulate"), regex = "Homogeneity")
+})