Skip to content

Commit

Permalink
Merge branch 'main' into strengejacke/issue697
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Mar 19, 2024
2 parents 8137ac3 + 01eff88 commit 3b72ffa
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 80 deletions.
139 changes: 103 additions & 36 deletions R/check_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ check_model.default <- function(x,
if (minfo$is_bayesian) {
suppressWarnings(.check_assumptions_stan(x, ...))
} else if (minfo$is_linear) {
suppressWarnings(.check_assumptions_linear(x, minfo, residual_type, verbose, ...))
suppressWarnings(.check_assumptions_linear(x, minfo, check, residual_type, verbose, ...))
} else {
suppressWarnings(.check_assumptions_glm(x, minfo, residual_type, verbose, ...))
suppressWarnings(.check_assumptions_glm(x, minfo, check, residual_type, verbose, ...))
},
error = function(e) {
e
Expand All @@ -237,6 +237,15 @@ check_model.default <- function(x,
)
}

# did Q-Q plot work with simulated residuals?
if (verbose && is.null(assumptions_data$QQ) && residual_type == "simulated") {
insight::format_warning(paste0(
"Cannot simulate residuals for models of class `",
class(x)[1],
"`. Please try `check_model(..., residual_type = \"normal\")` instead."
))
}

# try to find sensible default for "type" argument
suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint
if (missing(type) && suggest_dots) {
Expand Down Expand Up @@ -412,26 +421,57 @@ check_model.DHARMa <- check_model.performance_simres

# compile plots for checks of linear models ------------------------

.check_assumptions_linear <- function(model, model_info, residual_type = "normal", verbose = TRUE, ...) {
.check_assumptions_linear <- function(model, model_info, check = "all", residual_type = "normal", verbose = TRUE, ...) {
dat <- list()

dat$VIF <- .diag_vif(model, verbose = verbose)
dat$QQ <- switch(residual_type,
simulated = simulate_residuals(model, ...),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
dat$NORM <- .diag_norm(model, verbose = verbose)
dat$NCV <- .diag_ncv(model, verbose = verbose)
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
# multicollinearity --------------
if (any(c("all", "vif") %in% check)) {
dat$VIF <- .diag_vif(model, verbose = verbose)
}

# Q-Q plot (normality/uniformity of residuals) --------------
if (any(c("all", "qq") %in% check)) {
dat$QQ <- switch(residual_type,
simulated = .safe(simulate_residuals(model, ...)),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
}

# Random Effects Q-Q plot (normality of BLUPs) --------------
if (any(c("all", "reqq") %in% check)) {
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
}

# normal-curve plot (normality of residuals) --------------
if (any(c("all", "normality") %in% check)) {
dat$NORM <- .diag_norm(model, verbose = verbose)
}

# non-constant variance (heteroskedasticity, liniearity) --------------
if (any(c("all", "ncv", "linearity") %in% check)) {
dat$NCV <- .diag_ncv(model, verbose = verbose)
}

# homogeneity of variance --------------
if (any(c("all", "homogeneity") %in% check)) {
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
}

# outliers --------------
if (any(c("all", "outliers") %in% check)) {
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
}

# posterior predictive checks --------------
if (any(c("all", "pp_check") %in% check)) {
dat$PP_CHECK <- .safe(check_predictions(model, ...))
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
dat$PP_CHECK <- .safe(check_predictions(model, ...))

dat <- insight::compact_list(dat)
class(dat) <- c("check_model", "see_check_model")
Expand All @@ -442,28 +482,55 @@ check_model.DHARMa <- check_model.performance_simres

# compile plots for checks of generalized linear models ------------------------

.check_assumptions_glm <- function(model, model_info, residual_type = "simulated", verbose = TRUE, ...) {
.check_assumptions_glm <- function(model, model_info, check = "all", residual_type = "simulated", verbose = TRUE, ...) {
dat <- list()

dat$VIF <- .diag_vif(model, verbose = verbose)
dat$QQ <- switch(residual_type,
simulated = simulate_residuals(model, ...),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
# multicollinearity --------------
if (any(c("all", "vif") %in% check)) {
dat$VIF <- .diag_vif(model, verbose = verbose)
}

# Q-Q plot (normality/uniformity of residuals) --------------
if (any(c("all", "qq") %in% check)) {
dat$QQ <- switch(residual_type,
simulated = .safe(simulate_residuals(model, ...)),
.diag_qq(model, model_info = model_info, verbose = verbose)
)
}

# homogeneity of variance --------------
if (any(c("all", "homogeneity") %in% check)) {
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
}

# Random Effects Q-Q plot (normality of BLUPs) --------------
if (any(c("all", "reqq") %in% check)) {
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
}

# outliers --------------
if (any(c("all", "outliers") %in% check)) {
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
if (is.null(dat$OUTLIERS)) {
threshold <- NULL
} else {
threshold <- attributes(dat$OUTLIERS)$threshold$cook
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
}
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
dat$PP_CHECK <- .safe(check_predictions(model, ...))
if (isTRUE(model_info$is_binomial)) {

# posterior predictive checks --------------
if (any(c("all", "pp_check") %in% check)) {
dat$PP_CHECK <- .safe(check_predictions(model, ...))
}

# binned residuals for bernoulli/binomial --------------
if (isTRUE(model_info$is_binomial) && any(c("all", "binned_residuals") %in% check)) {
dat$BINNED_RESID <- .safe(binned_residuals(model, verbose = verbose, ...))
}
if (isTRUE(model_info$is_count)) {

# misspecified dispersion and zero-inflation --------------
if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) {
dat$OVERDISPERSION <- .diag_overdispersion(model)
}

Expand Down
27 changes: 14 additions & 13 deletions R/check_zeroinflation.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@
#'
#' @section Tests based on simulated residuals:
#' For certain models, resp. model from certain families, tests are based on
#' [`simulated_residuals()`]. These are usually more accurate for tests than the
#' traditionally used Pearson residuals. However, when simulating from more
#' complex model, such as mixed models or models with zero-inflation, there are
#' several important considerations. Arguments specified in `...` are passed to
#' [`simulate_residuals()`], which relies on [`DHARMa::simulateResiduals()`] (and
#' therefore, arguments in `...` are passed further down to _DHARMa_). The
#' defaults in DHARMa are set on the most conservative option that works for
#' all models. However, in many cases, the help advises to use different settings
#' in particular situations or for particular models. It is recommended to read
#' the 'Details' in `?DHARMa::simulateResiduals` closely to understand the
#' implications of the simulation process and which arguments should be modified
#' to get the most accurate results.
#' simulated residuals (see [`simulated_residual()`]). These are usually more
#' accurate for testing such models than the traditionally used Pearson residuals.
#' However, when simulating from more complex models, such as mixed models or
#' models with zero-inflation, there are several important considerations.
#' Arguments specified in `...` are passed to [`simulate_residuals()`], which
#' relies on [`DHARMa::simulateResiduals()`] (and therefore, arguments in `...`
#' are passed further down to _DHARMa_). The defaults in DHARMa are set on the
#' most conservative option that works for all models. However, in many cases,
#' the help advises to use different settings in particular situations or for
#' particular models. It is recommended to read the 'Details' in
#' `?DHARMa::simulateResiduals` closely to understand the implications of the
#' simulation process and which arguments should be modified to get the most
#' accurate results.
#'
#' @family functions to check model assumptions and and assess model quality
#'
Expand Down Expand Up @@ -87,7 +88,7 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
not_supported <- c("fixest", "glmx")

# for models with zero-inflation component or negative binomial families,
# we use simulated_residuals()
# we use simulate_residuals()
if (!inherits(x, not_supported) && (model_info$is_zero_inflated || model_info$is_negbin || model_info$family == "genpois")) { # nolint
if (missing(tolerance)) {
tolerance <- 0.1
Expand Down
6 changes: 3 additions & 3 deletions R/simulate_residuals.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
#' @section Tests based on simulated residuals:
#' For certain models, resp. model from certain families, tests like
#' [`check_zeroinflation()`] or [`check_overdispersion()`] are based on
#' `simulated_residuals()`. These are usually more accurate for such tests than
#' simulated residuals. These are usually more accurate for such tests than
#' the traditionally used Pearson residuals. However, when simulating from more
#' complex model, such as mixed models or models with zero-inflation, there are
#' complex models, such as mixed models or models with zero-inflation, there are
#' several important considerations. `simulate_residuals()` relies on
#' [`DHARMa::simulateResiduals()`], and additional arguments specified in `...`
#' are passed further down to that function. The defaults in DHARMa are set on
Expand Down Expand Up @@ -79,7 +79,7 @@ print.performance_simres <- function(x, ...) {
msg <- paste0(
"Simulated residuals from a model of class `", class(x$fittedModel)[1],
"` based on ", x$nSim, " simulations. Use `check_residuals()` to check ",
"uniformity of residuals. It is recommended to refer to `?DHARMa::simulateReisudals`",
"uniformity of residuals. It is recommended to refer to `?DHARMa::simulateResiudals`",
" and `vignette(\"DHARMa\")` for more information about different settings",
" in particular situations or for particular models.\n"
)
Expand Down
25 changes: 13 additions & 12 deletions man/check_overdispersion.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/check_residuals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 13 additions & 12 deletions man/check_zeroinflation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/simulate_residuals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/test-check_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,14 @@ test_that("`check_model()` warnings for tweedie", {
)
)
})


test_that("`check_model()` warnings for zero-infl", {
skip_if_not_installed("pscl")
data(bioChemists, package = "pscl")
model <- pscl::zeroinfl(
art ~ fem + mar + kid5 + ment | kid5 + phd,
data = bioChemists
)
expect_message(expect_warning(check_model(model, verbose = TRUE), regex = "Cannot simulate"), regex = "Homogeneity")
})

0 comments on commit 3b72ffa

Please sign in to comment.