Skip to content

Commit

Permalink
DHARMa implementation for new check_residuals() function (#643)
Browse files Browse the repository at this point in the history
Co-authored-by: Michael McCarthy <[email protected]>
  • Loading branch information
strengejacke and mccarthy-m-g committed Mar 18, 2024
1 parent afddb29 commit 0f1125c
Show file tree
Hide file tree
Showing 36 changed files with 1,623 additions and 166 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: performance
Title: Assessment of Regression Models Performance
Version: 0.10.9.6
Version: 0.10.9.8
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down Expand Up @@ -70,9 +70,8 @@ Depends:
R (>= 3.6)
Imports:
bayestestR (>= 0.13.2),
insight (>= 0.19.8),
insight (>= 0.19.9),
datawizard (>= 0.9.1),
methods,
stats,
utils
Suggests:
Expand All @@ -91,6 +90,7 @@ Suggests:
correlation,
cplm,
dbscan,
DHARMa,
estimatr,
fixest,
flextable,
Expand Down
19 changes: 19 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ S3method(check_heteroscedasticity,default)
S3method(check_homogeneity,afex_aov)
S3method(check_homogeneity,default)
S3method(check_homogeneity,htest)
S3method(check_model,DHARMa)
S3method(check_model,brmsfit)
S3method(check_model,default)
S3method(check_model,model_fit)
S3method(check_model,performance_simres)
S3method(check_model,stanreg)
S3method(check_multimodal,data.frame)
S3method(check_multimodal,numeric)
Expand All @@ -71,6 +73,7 @@ S3method(check_normality,lmerModLmerTest)
S3method(check_normality,merMod)
S3method(check_normality,numeric)
S3method(check_outliers,BFBayesFactor)
S3method(check_outliers,DHARMa)
S3method(check_outliers,character)
S3method(check_outliers,data.frame)
S3method(check_outliers,default)
Expand All @@ -87,11 +90,13 @@ S3method(check_outliers,meta)
S3method(check_outliers,metabin)
S3method(check_outliers,metagen)
S3method(check_outliers,numeric)
S3method(check_outliers,performance_simres)
S3method(check_outliers,rma)
S3method(check_outliers,rma.uni)
S3method(check_outliers,rq)
S3method(check_outliers,rqs)
S3method(check_outliers,rqss)
S3method(check_overdispersion,DHARMa)
S3method(check_overdispersion,default)
S3method(check_overdispersion,fixest)
S3method(check_overdispersion,fixest_multi)
Expand All @@ -103,11 +108,15 @@ S3method(check_overdispersion,model_fit)
S3method(check_overdispersion,negbin)
S3method(check_overdispersion,negbinirr)
S3method(check_overdispersion,negbinmfx)
S3method(check_overdispersion,performance_simres)
S3method(check_overdispersion,poissonirr)
S3method(check_overdispersion,poissonmfx)
S3method(check_predictions,BFBayesFactor)
S3method(check_predictions,default)
S3method(check_predictions,lme)
S3method(check_residuals,DHARMa)
S3method(check_residuals,default)
S3method(check_residuals,performance_simres)
S3method(check_singularity,MixMod)
S3method(check_singularity,clmm)
S3method(check_singularity,cpglmm)
Expand All @@ -123,6 +132,9 @@ S3method(check_sphericity,default)
S3method(check_sphericity,mlm)
S3method(check_symmetry,htest)
S3method(check_symmetry,numeric)
S3method(check_zeroinflation,DHARMa)
S3method(check_zeroinflation,default)
S3method(check_zeroinflation,performance_simres)
S3method(cronbachs_alpha,data.frame)
S3method(cronbachs_alpha,matrix)
S3method(cronbachs_alpha,parameters_pca)
Expand Down Expand Up @@ -261,10 +273,12 @@ S3method(plot,check_model)
S3method(plot,check_normality)
S3method(plot,check_outliers)
S3method(plot,check_overdisp)
S3method(plot,check_residuals)
S3method(plot,check_sphericity)
S3method(plot,compare_performance)
S3method(plot,performance_pp_check)
S3method(plot,performance_roc)
S3method(plot,performance_simres)
S3method(plot,test_likelihoodratio)
S3method(plot,test_performance)
S3method(print,binned_residuals)
Expand All @@ -283,7 +297,9 @@ S3method(print,check_normality_binom)
S3method(print,check_outliers)
S3method(print,check_outliers_metafor)
S3method(print,check_outliers_metagen)
S3method(print,check_outliers_simres)
S3method(print,check_overdisp)
S3method(print,check_residuals)
S3method(print,check_sphericity)
S3method(print,check_symmetry)
S3method(print,check_zi)
Expand All @@ -302,6 +318,7 @@ S3method(print,performance_pcp)
S3method(print,performance_pp_check)
S3method(print,performance_roc)
S3method(print,performance_score)
S3method(print,performance_simres)
S3method(print,r2_bayes)
S3method(print,r2_generic)
S3method(print,r2_loo)
Expand Down Expand Up @@ -535,6 +552,7 @@ export(check_outliers)
export(check_overdispersion)
export(check_posterior_predictions)
export(check_predictions)
export(check_residuals)
export(check_singularity)
export(check_sphericity)
export(check_sphericity_bartlett)
Expand Down Expand Up @@ -588,6 +606,7 @@ export(r2_tjur)
export(r2_xu)
export(r2_zeroinflated)
export(rmse)
export(simulate_residuals)
export(test_bf)
export(test_likelihoodratio)
export(test_lrt)
Expand Down
25 changes: 25 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,35 @@

* Rudimentary support for models of class `serp` from package *serp*.

## New functions

* `simulate_residuals()` and `check_residuals()`, to simulate and check residuals
from generalized linear (mixed) models. Simulating residuals is based on the
DHARMa package, and objects returned by `simulate_residuals()` inherit from
the `DHARMa` class, and thus can be used with any functions from the *DHARMa*
package. However, there are also implementations in the *performance* package,
such as `check_overdispersion()`, `check_zeroinflation()`, `check_outliers()`
or `check_model()`.

* Plots for `check_model()` have been improved. The Q-Q plots are now based
on simulated residuals from the DHARMa package for non-Gaussian models, thus
providing more accurate and informative plots. The half-normal QQ plot for
generalized linear models can still be obtained by setting the new argument
`residual_type = "normal"`.

* Following functions now support simulated residuals (from `simulate_residuals()`)
resp. objects returned from `DHARMa::simulateResiduals()`:
- `check_overdispersion()`
- `check_zeroinflation()`
- `check_outliers()`
- `check_model()`

## General

* Improved error messages for `check_model()` when QQ-plots cannot be created.

* `check_distribution()` is more stable for possibly sparse data.

## Bug fixes

* Fixed issue in `check_normality()` for t-tests.
Expand Down
20 changes: 16 additions & 4 deletions R/check_distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ check_distribution.default <- function(model) {
} else {
x <- stats::residuals(model)
}
dat <- .extract_features(x)
dat <- .extract_features(x, "residuals")

dist_residuals <- as.data.frame(t(stats::predict(classify_distribution, dat, type = "prob")))

Expand All @@ -88,7 +88,7 @@ check_distribution.default <- function(model) {
dummy_factors = FALSE,
preserve_levels = TRUE
)
dat <- .extract_features(x)
dat <- .extract_features(x, "response")

dist_response <- as.data.frame(t(stats::predict(classify_distribution, dat, type = "prob")))

Expand Down Expand Up @@ -189,15 +189,27 @@ check_distribution.numeric <- function(model) {

# utilities -----------------------------

.extract_features <- function(x) {
.extract_features <- function(x, type = NULL) {
# validation check, remove missings
x <- x[!is.na(x)]

# this might fail, so we wrap in ".safe()"
map_est <- .safe(mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")))

if (is.null(map_est)) {
map_est <- mean(x) - datawizard::distribution_mode(x)
msg <- "Could not accurately estimate the mode."
if (!is.null(type)) {
msg <- paste(msg, "Predicted distribution of the", type, "may be less accurate.")
}
insight::format_alert(msg)
}

data.frame(
SD = stats::sd(x),
MAD = stats::mad(x, constant = 1),
Mean_Median_Distance = mean(x) - stats::median(x),
Mean_Mode_Distance = mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")),
Mean_Mode_Distance = map_est,
SD_MAD_Distance = stats::sd(x) - stats::mad(x, constant = 1),
Var_Mean_Distance = stats::var(x) - mean(x),
Range_SD = diff(range(x)) / stats::sd(x),
Expand Down
Loading

0 comments on commit 0f1125c

Please sign in to comment.