From eb4fafa1b8046ec7775b816a9caad70eb057569c Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Thu, 26 Oct 2023 10:13:28 +0200
Subject: [PATCH] Improve `binned_residuals()` (#641)

---
 NEWS.md                                |  10 +++
 R/binned_residuals.R                   |  83 +++++++++++++++---
 R/check_model.R                        |  35 ++++----
 R/check_outliers.R                     | 116 ++++++++++++-------------
 man/binned_residuals.Rd                |  29 ++++++-
 man/check_model.Rd                     |   3 +-
 man/check_outliers.Rd                  |   2 +-
 tests/testthat/test-binned_residuals.R | 102 ++++++++++++++++++++--
 8 files changed, 285 insertions(+), 95 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e3eedf313..2bb37d162 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,15 @@
 # performance 0.10.7
 
+## Breaking changes
+
+* `binned_residuals()` gains a few new arguments to control the residuals used
+  for the test, as well as different options to calculate confidence intervals
+  (namely, `ci_type`, `residuals`, `ci` and `iterations`). The default values
+  to compute binned residuals have changed. Default residuals are now "deviance"
+  residuals (and no longer "response" residuals). Default confidence intervals
+  are now "exact" intervals (and no longer based on Gaussian approximation).
+  Use `ci_type = "gaussian"` and `residuals = "response"` to get the old defaults.
+
 ## Changes to functions
 
 * `binned_residuals()` - like `check_model()` - gains a `show_dots` argument to
diff --git a/R/binned_residuals.R b/R/binned_residuals.R
index 286323968..8b5533bb0 100644
--- a/R/binned_residuals.R
+++ b/R/binned_residuals.R
@@ -11,6 +11,19 @@
 #' @param n_bins Numeric, the number of bins to divide the data. If
 #'   `n_bins = NULL`, the square root of the number of observations is
 #'   taken.
+#' @param ci Numeric, the confidence level for the error bounds.
+#' @param ci_type Character, the type of error bounds to calculate. Can be
+#'   `"exact"` (default), `"gaussian"` or `"boot"`. `"exact"` calculates the
+#'   error bounds based on the exact binomial distribution, using [`binom.test()`].
+#'   `"gaussian"` uses the Gaussian approximation, while `"boot"` uses a simple
+#'   bootstrap method, where confidence intervals are calculated based on the
+#'   quantiles of the bootstrap distribution.
+#' @param residuals Character, the type of residuals to calculate. Can be
+#'   `"deviance"` (default), `"pearson"` or `"response"`. It is recommended to
+#'   use `"response"` only for those models where other residuals are not
+#'   available.
+#' @param iterations Integer, the number of iterations to use for the
+#'   bootstrap method. Only used if `ci_type = "boot"`.
 #' @param show_dots Logical, if `TRUE`, will show data points in the plot. Set
 #'   to `FALSE` for models with many observations, if generating the plot is too
 #'   time-consuming. By default, `show_dots = NULL`. In this case `binned_residuals()`
@@ -62,12 +75,24 @@
 #' }
 #'
 #' @export
-binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL, ...) {
-  fv <- stats::fitted(model)
+binned_residuals <- function(model,
+                             term = NULL,
+                             n_bins = NULL,
+                             show_dots = NULL,
+                             ci = 0.95,
+                             ci_type = c("exact", "gaussian", "boot"),
+                             residuals = c("deviance", "pearson", "response"),
+                             iterations = 1000,
+                             ...) {
+  # match arguments
+  ci_type <- match.arg(ci_type)
+  residuals <- match.arg(residuals)
+
+  fitted_values <- stats::fitted(model)
   mf <- insight::get_data(model, verbose = FALSE)
 
   if (is.null(term)) {
-    pred <- fv
+    pred <- fitted_values
   } else {
     pred <- mf[[term]]
   }
@@ -78,7 +103,20 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL
     show_dots <- is.null(n) || n <= 1e5
   }
 
-  y <- .recode_to_zero(insight::get_response(model, verbose = FALSE)) - fv
+  # make sure response is 0/1 (and numeric)
+  y0 <- .recode_to_zero(insight::get_response(model, verbose = FALSE))
+
+  # calculate residuals
+  y <- switch(residuals,
+    response = y0 - fitted_values,
+    pearson = .safe((y0 - fitted_values) / sqrt(fitted_values * (1 - fitted_values))),
+    deviance = .safe(stats::residuals(model, type = "deviance"))
+  )
+
+  # make sure we really have residuals
+  if (is.null(y)) {
+    insight::format_error("Could not calculate residuals. Try using `residuals = \"response\"`.")
+  }
 
   if (is.null(n_bins)) n_bins <- round(sqrt(length(pred)))
 
@@ -95,24 +133,32 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL
     n <- length(items)
     sdev <- stats::sd(y[items], na.rm = TRUE)
 
-    data.frame(
+    conf_int <- switch(ci_type,
+      gaussian = stats::qnorm(c((1 - ci) / 2, (1 + ci) / 2), mean = ybar, sd = sdev / sqrt(n)),
+      exact = {
+        out <- stats::binom.test(sum(y0[items]), n)$conf.int
+        # center CIs around point estimate
+        out <- out - (min(out) - ybar) - (diff(out) / 2)
+        out
+      },
+      boot = .boot_binned_ci(y[items], ci, iterations)
+    )
+    names(conf_int) <- c("CI_low", "CI_high")
+
+    d0 <- data.frame(
       xbar = xbar,
       ybar = ybar,
       n = n,
       x.lo = model.range[1],
       x.hi = model.range[2],
-      se = stats::qnorm(0.975) * sdev / sqrt(n),
-      ci_range = sdev / sqrt(n)
+      se = stats::qnorm((1 + ci) / 2) * sdev / sqrt(n)
     )
+    cbind(d0, rbind(conf_int))
   }))
 
   d <- do.call(rbind, d)
   d <- d[stats::complete.cases(d), ]
 
-  # CIs
-  d$CI_low <- d$ybar - stats::qnorm(0.975) * d$ci_range
-  d$CI_high <- d$ybar + stats::qnorm(0.975) * d$ci_range
-
   gr <- abs(d$ybar) > abs(d$se)
   d$group <- "yes"
   d$group[gr] <- "no"
@@ -129,6 +175,21 @@ binned_residuals <- function(model, term = NULL, n_bins = NULL, show_dots = NULL
 }
 
 
+# utilities ---------------------------
+
+.boot_binned_ci <- function(x, ci = 0.95, iterations = 1000) {
+  x <- x[!is.na(x)]
+  n <- length(x)
+  out <- vector("numeric", iterations)
+  for (i in seq_len(iterations)) {
+    out[i] <- sum(x[sample.int(n, n, replace = TRUE)])
+  }
+  out <- out / n
+
+  quant <- stats::quantile(out, c((1 - ci) / 2, (1 + ci) / 2))
+  c(CI_low = quant[1L], CI_high = quant[2L])
+}
+
 
 # methods -----------------------------
 
diff --git a/R/check_model.R b/R/check_model.R
index bbf6c6a84..58757468f 100644
--- a/R/check_model.R
+++ b/R/check_model.R
@@ -35,7 +35,8 @@
 #'   tries to guess whether performance will be poor due to a very large model
 #'   and thus automatically shows or hides dots.
 #' @param verbose If `FALSE` (default), suppress most warning messages.
-#' @param ... Currently not used.
+#' @param ... Arguments passed down to the individual check functions, especially
+#'   to `check_predictions()` and `binned_residuals()`.
 #' @inheritParams check_predictions
 #'
 #' @return The data frame that is used for plotting.
@@ -185,11 +186,11 @@ check_model.default <- function(x,
   ca <- tryCatch(
     {
       if (minfo$is_bayesian) {
-        suppressWarnings(.check_assumptions_stan(x))
+        suppressWarnings(.check_assumptions_stan(x, ...))
       } else if (minfo$is_linear) {
-        suppressWarnings(.check_assumptions_linear(x, minfo, verbose))
+        suppressWarnings(.check_assumptions_linear(x, minfo, verbose, ...))
       } else {
-        suppressWarnings(.check_assumptions_glm(x, minfo, verbose))
+        suppressWarnings(.check_assumptions_glm(x, minfo, verbose, ...))
       }
     },
     error = function(e) {
@@ -202,7 +203,7 @@ check_model.default <- function(x,
   }
 
   # try to find sensible default for "type" argument
-  suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial)
+  suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint
   if (missing(type) && suggest_dots) {
     type <- "discrete_interval"
   }
@@ -330,7 +331,7 @@ check_model.model_fit <- function(x,
 
 # compile plots for checks of linear models  ------------------------
 
-.check_assumptions_linear <- function(model, model_info, verbose = TRUE) {
+.check_assumptions_linear <- function(model, model_info, verbose = TRUE, ...) {
   dat <- list()
 
   dat$VIF <- .diag_vif(model, verbose = verbose)
@@ -340,13 +341,13 @@ check_model.model_fit <- function(x,
   dat$NCV <- .diag_ncv(model, verbose = verbose)
   dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
   dat$OUTLIERS <- check_outliers(model, method = "cook")
-  if (!is.null(dat$OUTLIERS)) {
-    threshold <- attributes(dat$OUTLIERS)$threshold$cook
-  } else {
+  if (is.null(dat$OUTLIERS)) {
     threshold <- NULL
+  } else {
+    threshold <- attributes(dat$OUTLIERS)$threshold$cook
   }
   dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
-  dat$PP_CHECK <- .safe(check_predictions(model))
+  dat$PP_CHECK <- .safe(check_predictions(model, ...))
 
   dat <- insight::compact_list(dat)
   class(dat) <- c("check_model", "see_check_model")
@@ -357,7 +358,7 @@ check_model.model_fit <- function(x,
 
 # compile plots for checks of generalized linear models  ------------------------
 
-.check_assumptions_glm <- function(model, model_info, verbose = TRUE) {
+.check_assumptions_glm <- function(model, model_info, verbose = TRUE, ...) {
   dat <- list()
 
   dat$VIF <- .diag_vif(model, verbose = verbose)
@@ -365,15 +366,15 @@ check_model.model_fit <- function(x,
   dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
   dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
   dat$OUTLIERS <- check_outliers(model, method = "cook")
-  if (!is.null(dat$OUTLIERS)) {
-    threshold <- attributes(dat$OUTLIERS)$threshold$cook
-  } else {
+  if (is.null(dat$OUTLIERS)) {
     threshold <- NULL
+  } else {
+    threshold <- attributes(dat$OUTLIERS)$threshold$cook
   }
   dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
-  dat$PP_CHECK <- .safe(check_predictions(model))
+  dat$PP_CHECK <- .safe(check_predictions(model, ...))
   if (isTRUE(model_info$is_binomial)) {
-    dat$BINNED_RESID <- binned_residuals(model)
+    dat$BINNED_RESID <- binned_residuals(model, ...)
   }
   if (isTRUE(model_info$is_count)) {
     dat$OVERDISPERSION <- .diag_overdispersion(model)
@@ -388,7 +389,7 @@ check_model.model_fit <- function(x,
 
 # compile plots for checks of Bayesian models  ------------------------
 
-.check_assumptions_stan <- function(model) {
+.check_assumptions_stan <- function(model, ...) {
   if (inherits(model, "brmsfit")) {
     # check if brms can be loaded
 
diff --git a/R/check_outliers.R b/R/check_outliers.R
index f7b304c5e..2a1d185d3 100644
--- a/R/check_outliers.R
+++ b/R/check_outliers.R
@@ -263,7 +263,7 @@
 #'
 #' - Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M.,
 #' and Makowski, D. (2023). Check your outliers! An introduction to identifying
-#' statistical outliers in R with easystats. https://doi.org/10.31234/osf.io/bu6nt
+#' statistical outliers in R with easystats. \doi{10.31234/osf.io/bu6nt}
 #'
 #' - Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate
 #' outliers and leverage points. Journal of the American Statistical
@@ -410,16 +410,16 @@ check_outliers.default <- function(x,
   }
 
   # Others
-  if (!all(method %in% c("cook", "pareto"))) {
+  if (all(method %in% c("cook", "pareto"))) {
+    df <- data.frame(Row = seq_len(nrow(as.data.frame(data))))
+    outlier_count <- list()
+    outlier_var <- list()
+  } else {
     out <- check_outliers(data, method, threshold)
     outlier_var <- attributes(out)$outlier_var
     outlier_count <- attributes(out)$outlier_count
     df <- attributes(out)$data
-    df <- df[!names(df) %in% "Outlier"]
-  } else {
-    df <- data.frame(Row = seq_len(nrow(as.data.frame(data))))
-    outlier_count <- list()
-    outlier_var <- list()
+    df <- df[!names(df) == "Outlier"]
   }
 
   # Cook
@@ -449,17 +449,17 @@ check_outliers.default <- function(x,
 
     outlier_count$cook <- count.table
 
-    if (!all(method %in% c("cook", "pareto"))) {
+    if (all(method %in% c("cook", "pareto"))) {
+      outlier_count$all <- count.table
+    } else {
       outlier_count$all <- datawizard::data_merge(
         list(outlier_count$all, count.table),
         join = "full",
         by = "Row"
       )
-    } else {
-      outlier_count$all <- count.table
     }
   } else {
-    method <- method[!(method %in% "cook")]
+    method <- method[!(method == "cook")]
   }
 
   # Pareto
@@ -489,17 +489,17 @@ check_outliers.default <- function(x,
 
     outlier_count$pareto <- count.table
 
-    if (!all(method %in% c("cook", "pareto"))) {
+    if (all(method %in% c("cook", "pareto"))) {
+      outlier_count$all <- count.table
+    } else {
       outlier_count$all <- datawizard::data_merge(
         list(outlier_count$all, count.table),
         join = "full",
         by = "Row"
       )
-    } else {
-      outlier_count$all <- count.table
     }
   } else {
-    method <- method[!(method %in% "pareto")]
+    method <- method[!(method == "pareto")]
   }
 
   outlier_count$all <- datawizard::convert_na_to(outlier_count$all,
@@ -1442,21 +1442,21 @@ check_outliers.metabin <- check_outliers.metagen
   lof <- 0.001
 
   list(
-    "zscore" = zscore,
-    "zscore_robust" = zscore_robust,
-    "iqr" = iqr,
-    "ci" = ci,
-    "hdi" = hdi,
-    "eti" = eti,
-    "bci" = bci,
-    "cook" = cook,
-    "pareto" = pareto,
-    "mahalanobis" = mahalanobis,
-    "mahalanobis_robust" = mahalanobis_robust,
-    "mcd" = mcd,
-    "ics" = ics,
-    "optics" = optics,
-    "lof" = lof
+    zscore = zscore,
+    zscore_robust = zscore_robust,
+    iqr = iqr,
+    ci = ci,
+    hdi = hdi,
+    eti = eti,
+    bci = bci,
+    cook = cook,
+    pareto = pareto,
+    mahalanobis = mahalanobis,
+    mahalanobis_robust = mahalanobis_robust,
+    mcd = mcd,
+    ics = ics,
+    optics = optics,
+    lof = lof
   )
 }
 
@@ -1478,15 +1478,15 @@ check_outliers.metabin <- check_outliers.metagen
   x <- as.data.frame(x)
 
   # Standardize
-  if (!robust) {
+  if (robust) {
     d <- abs(as.data.frame(lapply(
       x,
-      function(x) (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
+      function(x) (x - stats::median(x, na.rm = TRUE)) / stats::mad(x, na.rm = TRUE)
     )))
   } else {
     d <- abs(as.data.frame(lapply(
       x,
-      function(x) (x - stats::median(x, na.rm = TRUE)) / stats::mad(x, na.rm = TRUE)
+      function(x) (x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
     )))
   }
 
@@ -1504,8 +1504,8 @@ check_outliers.metabin <- check_outliers.metagen
   out$Outlier_Zscore <- as.numeric(out$Distance_Zscore > threshold)
 
   output <- list(
-    "data_zscore" = out,
-    "threshold_zscore" = threshold
+    data_zscore = out,
+    threshold_zscore = threshold
   )
 
   if (isTRUE(robust)) {
@@ -1566,8 +1566,8 @@ check_outliers.metabin <- check_outliers.metagen
   }, numeric(1))
 
   list(
-    "data_iqr" = out,
-    "threshold_iqr" = threshold
+    data_iqr = out,
+    threshold_iqr = threshold
   )
 }
 
@@ -1615,8 +1615,8 @@ check_outliers.metabin <- check_outliers.metagen
   out <- cbind(out.0, out)
 
   output <- list(
-    "data_" = out,
-    "threshold_" = threshold
+    data_ = out,
+    threshold_ = threshold
   )
   names(output) <- paste0(names(output), method)
   output
@@ -1636,8 +1636,8 @@ check_outliers.metabin <- check_outliers.metagen
   out$Outlier_Cook <- as.numeric(out$Distance_Cook > threshold)
 
   list(
-    "data_cook" = out,
-    "threshold_cook" = threshold
+    data_cook = out,
+    threshold_cook = threshold
   )
 }
 
@@ -1656,8 +1656,8 @@ check_outliers.metabin <- check_outliers.metagen
   out$Outlier_Pareto <- as.numeric(out$Distance_Pareto > threshold)
 
   list(
-    "data_pareto" = out,
-    "threshold_pareto" = threshold
+    data_pareto = out,
+    threshold_pareto = threshold
   )
 }
 
@@ -1686,8 +1686,8 @@ check_outliers.metabin <- check_outliers.metagen
   out$Outlier_Mahalanobis <- as.numeric(out$Distance_Mahalanobis > threshold)
 
   list(
-    "data_mahalanobis" = out,
-    "threshold_mahalanobis" = threshold
+    data_mahalanobis = out,
+    threshold_mahalanobis = threshold
   )
 }
 
@@ -1717,8 +1717,8 @@ check_outliers.metabin <- check_outliers.metagen
   )
 
   list(
-    "data_mahalanobis_robust" = out,
-    "threshold_mahalanobis_robust" = threshold
+    data_mahalanobis_robust = out,
+    threshold_mahalanobis_robust = threshold
   )
 }
 
@@ -1744,8 +1744,8 @@ check_outliers.metabin <- check_outliers.metagen
   out$Outlier_MCD <- as.numeric(out$Distance_MCD > threshold)
 
   list(
-    "data_mcd" = out,
-    "threshold_mcd" = threshold
+    data_mcd = out,
+    threshold_mcd = threshold
   )
 }
 
@@ -1765,10 +1765,10 @@ check_outliers.metabin <- check_outliers.metagen
   insight::check_if_installed("ICSOutlier")
 
   # Get n cores
-  n_cores <- if (!requireNamespace("parallel", quietly = TRUE)) {
-    NULL
-  } else {
+  n_cores <- if (requireNamespace("parallel", quietly = TRUE)) {
     getOption("mc.cores", 1L)
+  } else {
+    NULL
   }
 
   # tell user about n-cores option
@@ -1822,8 +1822,8 @@ check_outliers.metabin <- check_outliers.metagen
 
   # Out
   list(
-    "data_ics" = out,
-    "threshold_ics" = threshold
+    data_ics = out,
+    threshold_ics = threshold
   )
 }
 
@@ -1854,8 +1854,8 @@ check_outliers.metabin <- check_outliers.metagen
   }
 
   list(
-    "data_optics" = out,
-    "threshold_optics" = threshold
+    data_optics = out,
+    threshold_optics = threshold
   )
 }
 
@@ -1921,8 +1921,8 @@ check_outliers.metabin <- check_outliers.metagen
   out$Outlier_LOF <- as.numeric(out$Distance_LOF > cutoff)
 
   list(
-    "data_lof" = out,
-    "threshold_lof" = threshold
+    data_lof = out,
+    threshold_lof = threshold
   )
 }
 
diff --git a/man/binned_residuals.Rd b/man/binned_residuals.Rd
index b8aee666e..33e710f11 100644
--- a/man/binned_residuals.Rd
+++ b/man/binned_residuals.Rd
@@ -4,7 +4,17 @@
 \alias{binned_residuals}
 \title{Binned residuals for binomial logistic regression}
 \usage{
-binned_residuals(model, term = NULL, n_bins = NULL, show_dots = NULL, ...)
+binned_residuals(
+  model,
+  term = NULL,
+  n_bins = NULL,
+  show_dots = NULL,
+  ci = 0.95,
+  ci_type = c("exact", "gaussian", "boot"),
+  residuals = c("deviance", "pearson", "response"),
+  iterations = 1000,
+  ...
+)
 }
 \arguments{
 \item{model}{A \code{glm}-object with \emph{binomial}-family.}
@@ -24,6 +34,23 @@ time-consuming. By default, \code{show_dots = NULL}. In this case \code{binned_r
 tries to guess whether performance will be poor due to a very large model
 and thus automatically shows or hides dots.}
 
+\item{ci}{Numeric, the confidence level for the error bounds.}
+
+\item{ci_type}{Character, the type of error bounds to calculate. Can be
+\code{"exact"} (default), \code{"gaussian"} or \code{"boot"}. \code{"exact"} calculates the
+error bounds based on the exact binomial distribution, using \code{\link[=binom.test]{binom.test()}}.
+\code{"gaussian"} uses the Gaussian approximation, while \code{"boot"} uses a simple
+bootstrap method, where confidence intervals are calculated based on the
+quantiles of the bootstrap distribution.}
+
+\item{residuals}{Character, the type of residuals to calculate. Can be
+\code{"deviance"} (default), \code{"pearson"} or \code{"response"}. It is recommended to
+use \code{"response"} only for those models where other residuals are not
+available.}
+
+\item{iterations}{Integer, the number of iterations to use for the
+bootstrap method. Only used if \code{ci_type = "boot"}.}
+
 \item{...}{Currently not used.}
 }
 \value{
diff --git a/man/check_model.Rd b/man/check_model.Rd
index 2bf82af92..d5ead9420 100644
--- a/man/check_model.Rd
+++ b/man/check_model.Rd
@@ -28,7 +28,8 @@ check_model(x, ...)
 \arguments{
 \item{x}{A model object.}
 
-\item{...}{Currently not used.}
+\item{...}{Arguments passed down to the individual check functions, especially
+to \code{check_predictions()} and \code{binned_residuals()}.}
 
 \item{dot_size, line_size}{Size of line and dot-geoms.}
 
diff --git a/man/check_outliers.Rd b/man/check_outliers.Rd
index f22a51f6a..74c992b6f 100644
--- a/man/check_outliers.Rd
+++ b/man/check_outliers.Rd
@@ -345,7 +345,7 @@ statistical models. Journal of Open Source Software, 6(60), 3139.
 \doi{10.21105/joss.03139}
 \item Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M.,
 and Makowski, D. (2023). Check your outliers! An introduction to identifying
-statistical outliers in R with easystats. https://doi.org/10.31234/osf.io/bu6nt
+statistical outliers in R with easystats. \doi{10.31234/osf.io/bu6nt}
 \item Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate
 outliers and leverage points. Journal of the American Statistical
 association, 85(411), 633-639.
diff --git a/tests/testthat/test-binned_residuals.R b/tests/testthat/test-binned_residuals.R
index 4aa69e0ec..6f0155092 100644
--- a/tests/testthat/test-binned_residuals.R
+++ b/tests/testthat/test-binned_residuals.R
@@ -1,10 +1,10 @@
 test_that("binned_residuals", {
   data(mtcars)
   model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
-  result <- binned_residuals(model)
+  result <- binned_residuals(model, ci_type = "gaussian", residuals = "response")
   expect_named(
     result,
-    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "ci_range", "CI_low", "CI_high", "group")
+    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group")
   )
   expect_equal(
     result$xbar,
@@ -16,16 +16,21 @@ test_that("binned_residuals", {
     c(-0.03786, -0.09514, 0.07423, -0.07955, 0.28891, -0.13786),
     tolerance = 1e-4
   )
+  expect_equal(
+    result$CI_low,
+    c(-0.05686, -0.12331, -0.35077, -0.57683, 0.17916, -0.44147),
+    tolerance = 1e-4
+  )
 })
 
 
 test_that("binned_residuals, n_bins", {
   data(mtcars)
   model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
-  result <- binned_residuals(model, n_bins = 10)
+  result <- binned_residuals(model, ci_type = "gaussian", residuals = "response", n_bins = 10)
   expect_named(
     result,
-    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "ci_range", "CI_low", "CI_high", "group")
+    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group")
   )
   expect_equal(
     result$xbar,
@@ -49,10 +54,10 @@ test_that("binned_residuals, n_bins", {
 test_that("binned_residuals, terms", {
   data(mtcars)
   model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
-  result <- binned_residuals(model, term = "mpg")
+  result <- binned_residuals(model, ci_type = "gaussian", residuals = "response", term = "mpg")
   expect_named(
     result,
-    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "ci_range", "CI_low", "CI_high", "group")
+    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group")
   )
   expect_equal(
     result$xbar,
@@ -65,3 +70,88 @@ test_that("binned_residuals, terms", {
     tolerance = 1e-4
   )
 })
+
+
+test_that("binned_residuals, deviance residuals, gaussian CI", {
+  data(mtcars)
+  model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
+  result <- binned_residuals(model, residuals = "deviance", ci_type = "gaussian")
+  expect_named(
+    result,
+    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group")
+  )
+  expect_equal(
+    result$xbar,
+    c(0.03786, 0.09514, 0.25911, 0.47955, 0.71109, 0.97119),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$ybar,
+    c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$ybar,
+    c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$CI_low,
+    c(-0.33985, -0.50865, -0.98255, -1.36025, 0.61749, -1.00913),
+    tolerance = 1e-4
+  )
+})
+
+
+test_that("binned_residuals, default", {
+  data(mtcars)
+  model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
+  result <- binned_residuals(model)
+  expect_named(
+    result,
+    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group")
+  )
+  expect_equal(
+    result$xbar,
+    c(0.03786, 0.09514, 0.25911, 0.47955, 0.71109, 0.97119),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$ybar,
+    c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$CI_low,
+    c(-0.52997, -0.70426, -0.32935, -0.59948, 0.55472, -0.55251),
+    tolerance = 1e-4
+  )
+})
+
+
+test_that("binned_residuals, bootstrapped CI", {
+  skip_on_cran()
+  data(mtcars)
+  model <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial")
+  set.seed(123)
+  result <- binned_residuals(model, ci_type = "boot", iterations = 100)
+  expect_named(
+    result,
+    c("xbar", "ybar", "n", "x.lo", "x.hi", "se", "CI_low", "CI_high", "group")
+  )
+  expect_equal(
+    result$xbar,
+    c(0.03786, 0.09514, 0.25911, 0.47955, 0.71109, 0.97119),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$ybar,
+    c(-0.26905, -0.44334, 0.03763, -0.19917, 0.81563, -0.23399),
+    tolerance = 1e-4
+  )
+  expect_equal(
+    result$CI_low,
+    c(-0.32623, -0.50543, -0.80879, -1.15154, 0.67569, -0.65748),
+    tolerance = 1e-4
+  )
+})