From a7802be16e1e84c9f5ecf1eae4bf17000065dc4d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 25 Feb 2022 14:49:49 +0100 Subject: [PATCH] #376 --- NAMESPACE | 1 + R/binned_residuals.R | 18 ++++-------------- R/check_model.R | 7 +++++-- R/print-methods.R | 18 +++++++++++++++++- man/binned_residuals.Rd | 8 +++----- man/check_distribution.Rd | 2 +- 6 files changed, 31 insertions(+), 23 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 678ef5939..1d7edac43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -198,6 +198,7 @@ S3method(performance_mse,negbinmfx) S3method(performance_mse,poissonirr) S3method(performance_mse,poissonmfx) S3method(performance_mse,probitmfx) +S3method(plot,binned_residuals) S3method(plot,check_collinearity) S3method(plot,check_distribution) S3method(plot,check_distribution_numeric) diff --git a/R/binned_residuals.R b/R/binned_residuals.R index 9a7049c52..2ebdbf52e 100644 --- a/R/binned_residuals.R +++ b/R/binned_residuals.R @@ -33,12 +33,9 @@ #' of residuals along the x-axis is a signal to consider taking the logarithm #' of the predictor (cf. Gelman and Hill 2007, pp. 97-98). #' -#' @note Since `binned_residuals()` returns a data frame, the default -#' action for the result is *printing*. However, the `print()`-method for -#' `binned_residuals()` actually creates a plot. For further -#' modifications of the plot, use `print()` and add ggplot-layers to the -#' return values, e.g. `print(binned_residuals(model)) + -#' see::scale_color_pizza()`. +#' @note `binned_residuals()` returns a data frame, however, the `print()` +#' method only returns a short summary of the result. The data frame itself +#' is used for plotting. The `plot()` method, in turn, creates a ggplot-object. #' #' @references #' Gelman, A., & Hill, J. (2007). Data analysis using regression and @@ -103,19 +100,12 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, ...) { resid_ok <- sum(d$group == "yes") / length(d$group) - if (resid_ok < .8) { - insight::print_color(sprintf("Warning: Probably bad model fit. Only about %g%% of the residuals are inside the error bounds.\n", round(100 * resid_ok)), "red") - } else if (resid_ok < .95) { - insight::print_color(sprintf("Warning: About %g%% of the residuals are inside the error bounds (~95%% or higher would be good).\n", round(100 * resid_ok)), "yellow") - } else { - insight::print_color(sprintf("Ok: About %g%% of the residuals are inside the error bounds.\n", round(100 * resid_ok)), "green") - } - add.args <- lapply(match.call(expand.dots = FALSE)$`...`, function(x) x) size <- if ("size" %in% names(add.args)) add.args[["size"]] else 2 color <- if ("color" %in% names(add.args)) add.args[["color"]] else c("#d11141", "#00aedb") class(d) <- c("binned_residuals", "see_binned_residuals", class(d)) + attr(d, "resid_ok") <- resid_ok attr(d, "resp_var") <- insight::find_response(model) attr(d, "term") <- term attr(d, "geom_size") <- size diff --git a/R/check_model.R b/R/check_model.R index 17af1b5ec..ac7caf27d 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -52,8 +52,7 @@ #' plots are helpful to check model assumptions, they do not necessarily #' indicate so-called "lack of fit", e.g. missed non-linear relationships or #' interactions. Thus, it is always recommended to also look at -#' [effect -#' plots, including partial residuals](https://strengejacke.github.io/ggeffects/articles/introduction_partial_residuals.html). +#' [effect plots, including partial residuals](https://strengejacke.github.io/ggeffects/articles/introduction_partial_residuals.html). #' #' @section Residuals for (Generalized) Linear Models: #' Plots that check the normality of residuals (QQ-plot) or the homogeneity of @@ -126,6 +125,7 @@ check_model.default <- function(x, attr(ca, "detrend") <- detrend attr(ca, "colors") <- colors attr(ca, "theme") <- theme + attr(ca, "model_info") <- minfo ca } @@ -243,6 +243,9 @@ check_model.model_fit <- function(x, threshold <- NULL } dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) + if (isTRUE(model_info$is_binomial)) { + dat$BINNED_RESID <- binned_residuals(model) + } dat <- datawizard::compact_list(dat) class(dat) <- c("check_model", "see_check_model") diff --git a/R/print-methods.R b/R/print-methods.R index 7494a06d5..7f51ccbde 100644 --- a/R/print-methods.R +++ b/R/print-methods.R @@ -503,13 +503,29 @@ print.icc_decomposed <- function(x, digits = 2, ...) { #' @export print.binned_residuals <- function(x, ...) { - insight::check_if_installed("see", "to plot binned residuals") + resid_ok <- attributes(x)$resid_ok + + if (!is.null(resid_ok)) { + if (resid_ok < .8) { + insight::print_color(sprintf("Warning: Probably bad model fit. Only about %g%% of the residuals are inside the error bounds.\n", round(100 * resid_ok)), "red") + } else if (resid_ok < .95) { + insight::print_color(sprintf("Warning: About %g%% of the residuals are inside the error bounds (~95%% or higher would be good).\n", round(100 * resid_ok)), "yellow") + } else { + insight::print_color(sprintf("Ok: About %g%% of the residuals are inside the error bounds.\n", round(100 * resid_ok)), "green") + } + } +} + +#' @export +plot.binned_residuals <- function(x, ...) { + insight::check_if_installed("see", "to plot binned residuals") NextMethod() } + #' @export print.performance_hosmer <- function(x, ...) { insight::print_color("# Hosmer-Lemeshow Goodness-of-Fit Test\n\n", "blue") diff --git a/man/binned_residuals.Rd b/man/binned_residuals.Rd index bfb4f0835..612dc2396 100644 --- a/man/binned_residuals.Rd +++ b/man/binned_residuals.Rd @@ -46,11 +46,9 @@ of residuals along the x-axis is a signal to consider taking the logarithm of the predictor (cf. Gelman and Hill 2007, pp. 97-98). } \note{ -Since \code{binned_residuals()} returns a data frame, the default -action for the result is \emph{printing}. However, the \code{print()}-method for -\code{binned_residuals()} actually creates a plot. For further -modifications of the plot, use \code{print()} and add ggplot-layers to the -return values, e.g. \code{print(binned_residuals(model)) + see::scale_color_pizza()}. +\code{binned_residuals()} returns a data frame, however, the \code{print()} +method only returns a short summary of the result. The data frame itself +is used for plotting. The \code{plot()} method, in turn, creates a ggplot-object. } \examples{ if (require("see")) { diff --git a/man/check_distribution.Rd b/man/check_distribution.Rd index 101439865..83ece551a 100644 --- a/man/check_distribution.Rd +++ b/man/check_distribution.Rd @@ -46,7 +46,7 @@ implemented in the } \examples{ if (require("lme4") && require("parameters") && - require("see") && require("patchwork") && require("randomForest")) { + require("see") && require("patchwork") && require("randomForest")) { data(sleepstudy) model <<- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)