From eb4fafa1b8046ec7775b816a9caad70eb057569c Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 26 Oct 2023 10:13:28 +0200 Subject: [PATCH] Improve `binned_residuals()` (#641) --- NEWS.md | 10 +++ R/binned_residuals.R | 83 +++++++++++++++--- R/check_model.R | 35 ++++---- R/check_outliers.R | 116 ++++++++++++------------- man/binned_residuals.Rd | 29 ++++++- man/check_model.Rd | 3 +- man/check_outliers.Rd | 2 +- tests/testthat/test-binned_residuals.R | 102 ++++++++++++++++++++-- 8 files changed, 285 insertions(+), 95 deletions(-) diff --git a/NEWS.md b/NEWS.md index e3eedf313..2bb37d162 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ # performance 0.10.7 +## Breaking changes + +* `binned_residuals()` gains a few new arguments to control the residuals used + for the test, as well as different options to calculate confidence intervals + (namely, `ci_type`, `residuals`, `ci` and `iterations`). The default values + to compute binned residuals have changed. Default residuals are now "deviance" + residuals (and no longer "response" residuals). Default confidence intervals + are now "exact" intervals (and no longer based on Gaussian approximation). + Use `ci_type = "gaussian"` and `residuals = "response"` to get the old defaults. + ## Changes to functions * `binned_residuals()` - like `check_model()` - gains a `show_dots` argument to diff --git a/R/binned_residuals.R b/R/binned_residuals.R index 286323968..8b5533bb0 100644 --- a/R/binned_residuals.R +++ b/R/binned_residuals.R @@ -11,6 +11,19 @@ #' @param n_bins Numeric, the number of bins to divide the data. If #' `n_bins = NULL`, the square root of the number of observations is #' taken. +#' @param ci Numeric, the confidence level for the error bounds. +#' @param ci_type Character, the type of error bounds to calculate. Can be +#' `"exact"` (default), `"gaussian"` or `"boot"`. `"exact"` calculates the +#' error bounds based on the exact binomial distribution, using [`binom.test()`]. +#' `"gaussian"` uses the Gaussian approximation, while `"boot"` uses a simple +#' bootstrap method, where confidence intervals are calculated based on the +#' quantiles of the bootstrap distribution. +#' @param residuals Character, the type of residuals to calculate. Can be +#' `"deviance"` (default), `"pearson"` or `"response"`. It is recommended to +#' use `"response"` only for those models where other residuals are not +#' available. +#' @param iterations Integer, the number of iterations to use for the +#' bootstrap method. Only used if `ci_type = "boot"`. #' @param show_dots Logical, if `TRUE`, will show data points in the plot. Set #' to `FALSE` for models with many observations, if generating the plot is too #' time-consuming. By default, `show_dots = NULL`. In this case `binned_residuals()` @@ -62,12 +75,24 @@ #' } #' #' @export -binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL, ...) { - fv <- stats::fitted(model) +binned_residuals <- function(model, + term = NULL, + n_bins = NULL, + show_dots = NULL, + ci = 0.95, + ci_type = c("exact", "gaussian", "boot"), + residuals = c("deviance", "pearson", "response"), + iterations = 1000, + ...) { + # match arguments + ci_type <- match.arg(ci_type) + residuals <- match.arg(residuals) + + fitted_values <- stats::fitted(model) mf <- insight::get_data(model, verbose = FALSE) if (is.null(term)) { - pred <- fv + pred <- fitted_values } else { pred <- mf[[term]] } @@ -78,7 +103,20 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL show_dots <- is.null(n) || n <= 1e5 } - y <- .recode_to_zero(insight::get_response(model, verbose = FALSE)) - fv + # make sure response is 0/1 (and numeric) + y0 <- .recode_to_zero(insight::get_response(model, verbose = FALSE)) + + # calculate residuals + y <- switch(residuals, + response = y0 - fitted_values, + pearson = .safe((y0 - fitted_values) / sqrt(fitted_values * (1 - fitted_values))), + deviance = .safe(stats::residuals(model, type = "deviance")) + ) + + # make sure we really have residuals + if (is.null(y)) { + insight::format_error("Could not calculate residuals. Try using `residuals = \"response\"`.") + } if (is.null(n_bins)) n_bins <- round(sqrt(length(pred))) @@ -95,24 +133,32 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL n <- length(items) sdev <- stats::sd(y[items], na.rm = TRUE) - data.frame( + conf_int <- switch(ci_type, + gaussian = stats::qnorm(c((1 - ci) / 2, (1 + ci) / 2), mean = ybar, sd = sdev / sqrt(n)), + exact = { + out <- stats::binom.test(sum(y0[items]), n)$conf.int + # center CIs around point estimate + out <- out - (min(out) - ybar) - (diff(out) / 2) + out + }, + boot = .boot_binned_ci(y[items], ci, iterations) + ) + names(conf_int) <- c("CI_low", "CI_high") + + d0 <- data.frame( xbar = xbar, ybar = ybar, n = n, x.lo = model.range[1], x.hi = model.range[2], - se = stats::qnorm(0.975) * sdev / sqrt(n), - ci_range = sdev / sqrt(n) + se = stats::qnorm((1 + ci) / 2) * sdev / sqrt(n) ) + cbind(d0, rbind(conf_int)) })) d <- do.call(rbind, d) d <- d[stats::complete.cases(d), ] - # CIs - d$CI_low <- d$ybar - stats::qnorm(0.975) * d$ci_range - d$CI_high <- d$ybar + stats::qnorm(0.975) * d$ci_range - gr <- abs(d$ybar) > abs(d$se) d$group <- "yes" d$group[gr] <- "no" @@ -129,6 +175,21 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL } +# utilities --------------------------- + +.boot_binned_ci <- function(x, ci = 0.95, iterations = 1000) { + x <- x[!is.na(x)] + n <- length(x) + out <- vector("numeric", iterations) + for (i in seq_len(iterations)) { + out[i] <- sum(x[sample.int(n, n, replace = TRUE)]) + } + out <- out / n + + quant <- stats::quantile(out, c((1 - ci) / 2, (1 + ci) / 2)) + c(CI_low = quant[1L], CI_high = quant[2L]) +} + # methods ----------------------------- diff --git a/R/check_model.R b/R/check_model.R index bbf6c6a84..58757468f 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -35,7 +35,8 @@ #' tries to guess whether performance will be poor due to a very large model #' and thus automatically shows or hides dots. #' @param verbose If `FALSE` (default), suppress most warning messages. -#' @param ... Currently not used. +#' @param ... Arguments passed down to the individual check functions, especially +#' to `check_predictions()` and `binned_residuals()`. #' @inheritParams check_predictions #' #' @return The data frame that is used for plotting. @@ -185,11 +186,11 @@ check_model.default <- function(x, ca <- tryCatch( { if (minfo$is_bayesian) { - suppressWarnings(.check_assumptions_stan(x)) + suppressWarnings(.check_assumptions_stan(x, ...)) } else if (minfo$is_linear) { - suppressWarnings(.check_assumptions_linear(x, minfo, verbose)) + suppressWarnings(.check_assumptions_linear(x, minfo, verbose, ...)) } else { - suppressWarnings(.check_assumptions_glm(x, minfo, verbose)) + suppressWarnings(.check_assumptions_glm(x, minfo, verbose, ...)) } }, error = function(e) { @@ -202,7 +203,7 @@ check_model.default <- function(x, } # try to find sensible default for "type" argument - suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) + suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint if (missing(type) && suggest_dots) { type <- "discrete_interval" } @@ -330,7 +331,7 @@ check_model.model_fit <- function(x, # compile plots for checks of linear models ------------------------ -.check_assumptions_linear <- function(model, model_info, verbose = TRUE) { +.check_assumptions_linear <- function(model, model_info, verbose = TRUE, ...) { dat <- list() dat$VIF <- .diag_vif(model, verbose = verbose) @@ -340,13 +341,13 @@ check_model.model_fit <- function(x, dat$NCV <- .diag_ncv(model, verbose = verbose) dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) dat$OUTLIERS <- check_outliers(model, method = "cook") - if (!is.null(dat$OUTLIERS)) { - threshold <- attributes(dat$OUTLIERS)$threshold$cook - } else { + if (is.null(dat$OUTLIERS)) { threshold <- NULL + } else { + threshold <- attributes(dat$OUTLIERS)$threshold$cook } dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) - dat$PP_CHECK <- .safe(check_predictions(model)) + dat$PP_CHECK <- .safe(check_predictions(model, ...)) dat <- insight::compact_list(dat) class(dat) <- c("check_model", "see_check_model") @@ -357,7 +358,7 @@ check_model.model_fit <- function(x, # compile plots for checks of generalized linear models ------------------------ -.check_assumptions_glm <- function(model, model_info, verbose = TRUE) { +.check_assumptions_glm <- function(model, model_info, verbose = TRUE, ...) { dat <- list() dat$VIF <- .diag_vif(model, verbose = verbose) @@ -365,15 +366,15 @@ check_model.model_fit <- function(x, dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose) dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose) dat$OUTLIERS <- check_outliers(model, method = "cook") - if (!is.null(dat$OUTLIERS)) { - threshold <- attributes(dat$OUTLIERS)$threshold$cook - } else { + if (is.null(dat$OUTLIERS)) { threshold <- NULL + } else { + threshold <- attributes(dat$OUTLIERS)$threshold$cook } dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold) - dat$PP_CHECK <- .safe(check_predictions(model)) + dat$PP_CHECK <- .safe(check_predictions(model, ...)) if (isTRUE(model_info$is_binomial)) { - dat$BINNED_RESID <- binned_residuals(model) + dat$BINNED_RESID <- binned_residuals(model, ...) } if (isTRUE(model_info$is_count)) { dat$OVERDISPERSION <- .diag_overdispersion(model) @@ -388,7 +389,7 @@ check_model.model_fit <- function(x, # compile plots for checks of Bayesian models ------------------------ -.check_assumptions_stan <- function(model) { +.check_assumptions_stan <- function(model, ...) { if (inherits(model, "brmsfit")) { # check if brms can be loaded diff --git a/R/check_outliers.R b/R/check_outliers.R index f7b304c5e..2a1d185d3 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -263,7 +263,7 @@ #' #' - Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M., #' and Makowski, D. (2023). Check your outliers! An introduction to identifying -#' statistical outliers in R with easystats. https://doi.org/10.31234/osf.io/bu6nt +#' statistical outliers in R with easystats. \doi{10.31234/osf.io/bu6nt} #' #' - Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate #' outliers and leverage points. Journal of the American Statistical @@ -410,16 +410,16 @@ check_outliers.default <- function(x, } # Others - if (!all(method %in% c("cook", "pareto"))) { + if (all(method %in% c("cook", "pareto"))) { + df <- data.frame(Row = seq_len(nrow(as.data.frame(data)))) + outlier_count <- list() + outlier_var <- list() + } else { out <- check_outliers(data, method, threshold) outlier_var <- attributes(out)$outlier_var outlier_count <- attributes(out)$outlier_count df <- attributes(out)$data - df <- df[!names(df) %in% "Outlier"] - } else { - df <- data.frame(Row = seq_len(nrow(as.data.frame(data)))) - outlier_count <- list() - outlier_var <- list() + df <- df[!names(df) == "Outlier"] } # Cook @@ -449,17 +449,17 @@ check_outliers.default <- function(x, outlier_count$cook <- count.table - if (!all(method %in% c("cook", "pareto"))) { + if (all(method %in% c("cook", "pareto"))) { + outlier_count$all <- count.table + } else { outlier_count$all <- datawizard::data_merge( list(outlier_count$all, count.table), join = "full", by = "Row" ) - } else { - outlier_count$all <- count.table } } else { - method <- method[!(method %in% "cook")] + method <- method[!(method == "cook")] } # Pareto @@ -489,17 +489,17 @@ check_outliers.default <- function(x, outlier_count$pareto <- count.table - if (!all(method %in% c("cook", "pareto"))) { + if (all(method %in% c("cook", "pareto"))) { + outlier_count$all <- count.table + } else { outlier_count$all <- datawizard::data_merge( list(outlier_count$all, count.table), join = "full", by = "Row" ) - } else { - outlier_count$all <- count.table } } else { - method <- method[!(method %in% "pareto")] + method <- method[!(method == "pareto")] } outlier_count$all <- datawizard::convert_na_to(outlier_count$all, @@ -1442,21 +1442,21 @@ check_outliers.metabin <- check_outliers.metagen lof <- 0.001 list( - "zscore" = zscore, - "zscore_robust" = zscore_robust, - "iqr" = iqr, - "ci" = ci, - "hdi" = hdi, - "eti" = eti, - "bci" = bci, - "cook" = cook, - "pareto" = pareto, - "mahalanobis" = mahalanobis, - "mahalanobis_robust" = mahalanobis_robust, - "mcd" = mcd, - "ics" = ics, - "optics" = optics, - "lof" = lof + zscore = zscore, + zscore_robust = zscore_robust, + iqr = iqr, + ci = ci, + hdi = hdi, + eti = eti, + bci = bci, + cook = cook, + pareto = pareto, + mahalanobis = mahalanobis, + mahalanobis_robust = mahalanobis_robust, + mcd = mcd, + ics = ics, + optics = optics, + lof = lof ) } @@ -1478,15 +1478,15 @@ check_outliers.metabin <- check_outliers.metagen x <- as.data.frame(x) # Standardize - if (!robust) { + if (robust) { d <- abs(as.data.frame(lapply( x, - function(x) (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE) + function(x) (x - stats::median(x, na.rm = TRUE)) / stats::mad(x, na.rm = TRUE) ))) } else { d <- abs(as.data.frame(lapply( x, - function(x) (x - stats::median(x, na.rm = TRUE)) / stats::mad(x, na.rm = TRUE) + function(x) (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE) ))) } @@ -1504,8 +1504,8 @@ check_outliers.metabin <- check_outliers.metagen out$Outlier_Zscore <- as.numeric(out$Distance_Zscore > threshold) output <- list( - "data_zscore" = out, - "threshold_zscore" = threshold + data_zscore = out, + threshold_zscore = threshold ) if (isTRUE(robust)) { @@ -1566,8 +1566,8 @@ check_outliers.metabin <- check_outliers.metagen }, numeric(1)) list( - "data_iqr" = out, - "threshold_iqr" = threshold + data_iqr = out, + threshold_iqr = threshold ) } @@ -1615,8 +1615,8 @@ check_outliers.metabin <- check_outliers.metagen out <- cbind(out.0, out) output <- list( - "data_" = out, - "threshold_" = threshold + data_ = out, + threshold_ = threshold ) names(output) <- paste0(names(output), method) output @@ -1636,8 +1636,8 @@ check_outliers.metabin <- check_outliers.metagen out$Outlier_Cook <- as.numeric(out$Distance_Cook > threshold) list( - "data_cook" = out, - "threshold_cook" = threshold + data_cook = out, + threshold_cook = threshold ) } @@ -1656,8 +1656,8 @@ check_outliers.metabin <- check_outliers.metagen out$Outlier_Pareto <- as.numeric(out$Distance_Pareto > threshold) list( - "data_pareto" = out, - "threshold_pareto" = threshold + data_pareto = out, + threshold_pareto = threshold ) } @@ -1686,8 +1686,8 @@ check_outliers.metabin <- check_outliers.metagen out$Outlier_Mahalanobis <- as.numeric(out$Distance_Mahalanobis > threshold) list( - "data_mahalanobis" = out, - "threshold_mahalanobis" = threshold + data_mahalanobis = out, + threshold_mahalanobis = threshold ) } @@ -1717,8 +1717,8 @@ check_outliers.metabin <- check_outliers.metagen ) list( - "data_mahalanobis_robust" = out, - "threshold_mahalanobis_robust" = threshold + data_mahalanobis_robust = out, + threshold_mahalanobis_robust = threshold ) } @@ -1744,8 +1744,8 @@ check_outliers.metabin <- check_outliers.metagen out$Outlier_MCD <- as.numeric(out$Distance_MCD > threshold) list( - "data_mcd" = out, - "threshold_mcd" = threshold + data_mcd = out, + threshold_mcd = threshold ) } @@ -1765,10 +1765,10 @@ check_outliers.metabin <- check_outliers.metagen insight::check_if_installed("ICSOutlier") # Get n cores - n_cores <- if (!requireNamespace("parallel", quietly = TRUE)) { - NULL - } else { + n_cores <- if (requireNamespace("parallel", quietly = TRUE)) { getOption("mc.cores", 1L) + } else { + NULL } # tell user about n-cores option @@ -1822,8 +1822,8 @@ check_outliers.metabin <- check_outliers.metagen # Out list( - "data_ics" = out, - "threshold_ics" = threshold + data_ics = out, + threshold_ics = threshold ) } @@ -1854,8 +1854,8 @@ check_outliers.metabin <- check_outliers.metagen } list( - "data_optics" = out, - "threshold_optics" = threshold + data_optics = out, + threshold_optics = threshold ) } @@ -1921,8 +1921,8 @@ check_outliers.metabin <- check_outliers.metagen out$Outlier_LOF <- as.numeric(out$Distance_LOF > cutoff) list( - "data_lof" = out, - "threshold_lof" = threshold + data_lof = out, + threshold_lof = threshold ) } diff --git a/man/binned_residuals.Rd b/man/binned_residuals.Rd index b8aee666e..33e710f11 100644 --- a/man/binned_residuals.Rd +++ b/man/binned_residuals.Rd @@ -4,7 +4,17 @@ \alias{binned_residuals} \title{Binned residuals for binomial logistic regression} \usage{ -binned_residuals(model, term = NULL, n_bins = NULL, show_dots = NULL, ...) +binned_residuals( + model, + term = NULL, + n_bins = NULL, + show_dots = NULL, + ci = 0.95, + ci_type = c("exact", "gaussian", "boot"), + residuals = c("deviance", "pearson", "response"), + iterations = 1000, + ... +) } \arguments{ \item{model}{A \code{glm}-object with \emph{binomial}-family.} @@ -24,6 +34,23 @@ time-consuming. By default, \code{show_dots = NULL}. In this case \code{binned_r tries to guess whether performance will be poor due to a very large model and thus automatically shows or hides dots.} +\item{ci}{Numeric, the confidence level for the error bounds.} + +\item{ci_type}{Character, the type of error bounds to calculate. Can be +\code{"exact"} (default), \code{"gaussian"} or \code{"boot"}. \code{"exact"} calculates the +error bounds based on the exact binomial distribution, using \code{\link[=binom.test]{binom.test()}}. +\code{"gaussian"} uses the Gaussian approximation, while \code{"boot"} uses a simple +bootstrap method, where confidence intervals are calculated based on the +quantiles of the bootstrap distribution.} + +\item{residuals}{Character, the type of residuals to calculate. Can be +\code{"deviance"} (default), \code{"pearson"} or \code{"response"}. It is recommended to +use \code{"response"} only for those models where other residuals are not +available.} + +\item{iterations}{Integer, the number of iterations to use for the +bootstrap method. Only used if \code{ci_type = "boot"}.} + \item{...}{Currently not used.} } \value{ diff --git a/man/check_model.Rd b/man/check_model.Rd index 2bf82af92..d5ead9420 100644 --- a/man/check_model.Rd +++ b/man/check_model.Rd @@ -28,7 +28,8 @@ check_model(x, ...) \arguments{ \item{x}{A model object.} -\item{...}{Currently not used.} +\item{...}{Arguments passed down to the individual check functions, especially +to \code{check_predictions()} and \code{binned_residuals()}.} \item{dot_size, line_size}{Size of line and dot-geoms.} diff --git a/man/check_outliers.Rd b/man/check_outliers.Rd index f22a51f6a..74c992b6f 100644 --- a/man/check_outliers.Rd +++ b/man/check_outliers.Rd @@ -345,7 +345,7 @@ statistical models. Journal of Open Source Software, 6(60), 3139. \doi{10.21105/joss.03139} \item Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M., and Makowski, D. (2023). Check your outliers! An introduction to identifying -statistical outliers in R with easystats. https://doi.org/10.31234/osf.io/bu6nt +statistical outliers in R with easystats. \doi{10.31234/osf.io/bu6nt} \item Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate outliers and leverage points. Journal of the American Statistical association, 85(411), 633-639. diff --git a/tests/testthat/test-binned_residuals.R b/tests/testthat/test-binned_residuals.R index 4aa69e0ec..6f0155092 100644 --- a/tests/testthat/test-binned_residuals.R +++ b/tests/testthat/test-binned_residuals.R @@ -1,10 +1,10 @@ test_that("binned_residuals", { data(mtcars) model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") - result <- binned_residuals(model) + result <- binned_residuals(model, ci_type = "gaussian", residuals = "response") expect_named( result, - c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "ci_range", "CI_low", "CI_high", "group") + c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group") ) expect_equal( result$xbar, @@ -16,16 +16,21 @@ test_that("binned_residuals", { c(-0.03786, -0.09514, 0.07423, -0.07955, 0.28891, -0.13786), tolerance = 1e-4 ) + expect_equal( + result$CI_low, + c(-0.05686, -0.12331, -0.35077, -0.57683, 0.17916, -0.44147), + tolerance = 1e-4 + ) }) test_that("binned_residuals, n_bins", { data(mtcars) model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") - result <- binned_residuals(model, n_bins = 10) + result <- binned_residuals(model, ci_type = "gaussian", residuals = "response", n_bins = 10) expect_named( result, - c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "ci_range", "CI_low", "CI_high", "group") + c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group") ) expect_equal( result$xbar, @@ -49,10 +54,10 @@ test_that("binned_residuals, n_bins", { test_that("binned_residuals, terms", { data(mtcars) model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") - result <- binned_residuals(model, term = "mpg") + result <- binned_residuals(model, ci_type = "gaussian", residuals = "response", term = "mpg") expect_named( result, - c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "ci_range", "CI_low", "CI_high", "group") + c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group") ) expect_equal( result$xbar, @@ -65,3 +70,88 @@ test_that("binned_residuals, terms", { tolerance = 1e-4 ) }) + + +test_that("binned_residuals, deviance residuals, gaussian CI", { + data(mtcars) + model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") + result <- binned_residuals(model, residuals = "deviance", ci_type = "gaussian") + expect_named( + result, + c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group") + ) + expect_equal( + result$xbar, + c(0.03786, 0.09514, 0.25911, 0.47955, 0.71109, 0.97119), + tolerance = 1e-4 + ) + expect_equal( + result$ybar, + c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399), + tolerance = 1e-4 + ) + expect_equal( + result$ybar, + c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399), + tolerance = 1e-4 + ) + expect_equal( + result$CI_low, + c(-0.33985, -0.50865, -0.98255, -1.36025, 0.61749, -1.00913), + tolerance = 1e-4 + ) +}) + + +test_that("binned_residuals, default", { + data(mtcars) + model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") + result <- binned_residuals(model) + expect_named( + result, + c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group") + ) + expect_equal( + result$xbar, + c(0.03786, 0.09514, 0.25911, 0.47955, 0.71109, 0.97119), + tolerance = 1e-4 + ) + expect_equal( + result$ybar, + c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399), + tolerance = 1e-4 + ) + expect_equal( + result$CI_low, + c(-0.52997, -0.70426, -0.32935, -0.59948, 0.55472, -0.55251), + tolerance = 1e-4 + ) +}) + + +test_that("binned_residuals, bootstrapped CI", { + skip_on_cran() + data(mtcars) + model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") + set.seed(123) + result <- binned_residuals(model, ci_type = "boot", iterations = 100) + expect_named( + result, + c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group") + ) + expect_equal( + result$xbar, + c(0.03786, 0.09514, 0.25911, 0.47955, 0.71109, 0.97119), + tolerance = 1e-4 + ) + expect_equal( + result$ybar, + c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399), + tolerance = 1e-4 + ) + expect_equal( + result$CI_low, + c(-0.32623, -0.50543, -0.80879, -1.15154, 0.67569, -0.65748), + tolerance = 1e-4 + ) +})