diff --git a/.gitignore b/.gitignore index 410bc37ca..1e3341f4a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,54 +1,41 @@ # History files .Rhistory .Rapp.history - # Session Data files .RData - # Example code in package build process *-Ex.R - # Output files from R CMD build /*.tar.gz - # Output files from R CMD check /*.Rcheck/ /revdep/ revdep - # RStudio files .Rproj.user/ *.Rproj - # produced vignettes vignettes/*.html vignettes/*.pdf - # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 .httr-oauth - # knitr and R markdown default cache directories /*_cache/ /cache/ - # Temporary files created by R markdown *.utf8.md *.knit.md - # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html rsconnect/ inst/doc - ========================= # Operating System Files # OSX .DS_Store .AppleDouble .LSOverride - # Thumbnails ._* - # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd @@ -56,10 +43,10 @@ inst/doc .TemporaryItems .Trashes .VolumeIcon.icns - # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items -.apdisk \ No newline at end of file +.apdisk +.Rprofile diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 22db08405..167146dfe 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.11.0 -Date: 2024-03-22 21:30:58 UTC -SHA: 051b9bb2b7721c632ce145f85c55aa55c8eebf90 +Version: 0.12.0 +Date: 2024-06-07 17:11:44 UTC +SHA: cb1c46609c8f943a736f3c76b5cadd4272e7bdf2 diff --git a/DESCRIPTION b/DESCRIPTION index 43d03cf76..8ea3d81df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.11.0.5 -Authors@R: +Version: 0.12.0.9 +Authors@R: c(person(given = "Daniel", family = "Lüdecke", role = c("aut", "cre"), @@ -39,8 +39,8 @@ Authors@R: email = "remi.theriault@mail.mcgill.ca", comment = c(ORCID = "0000-0003-4315-6788", Twitter = "@rempsyc")), person(given = "Vincent", - family = "Arel-Bundock", - email = "vincent.arel-bundock@umontreal.ca", + family = "Arel-Bundock", + email = "vincent.arel-bundock@umontreal.ca", role = "ctb", comment = c(ORCID = "0000-0003-2042-7063")), person(given = "Martin", @@ -48,9 +48,9 @@ Authors@R: role = "rev"), person(given = "gjo11", role = "rev"), - person("Etienne", - "Bacher", , - "etienne.bacher@protonmail.com", + person("Etienne", + "Bacher", , + "etienne.bacher@protonmail.com", role = "ctb", comment = c(ORCID = "0000-0002-9271-5075"))) Maintainer: Daniel Lüdecke @@ -70,7 +70,7 @@ Depends: R (>= 3.6) Imports: bayestestR (>= 0.13.2), - insight (>= 0.19.10), + insight (>= 0.20.2), datawizard (>= 0.10.0), stats, utils @@ -93,7 +93,7 @@ Suggests: DHARMa, estimatr, fixest, - flextable, + flextable, forecast, ftExtra, gamm4, @@ -101,7 +101,7 @@ Suggests: glmmTMB, graphics, Hmisc, - httr, + httr2, ICS, ICSOutlier, ISLR, @@ -124,13 +124,14 @@ Suggests: nonnest2, ordinal, parallel, - parameters (>= 0.21.4), + parameters (>= 0.21.6), patchwork, pscl, psych, quantreg, qqplotr (>= 0.0.6), randomForest, + RcppEigen, rempsyc, rmarkdown, rstanarm, @@ -145,7 +146,7 @@ Suggests: withr (>= 3.0.0) Encoding: UTF-8 Language: en-US -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 Config/testthat/parallel: true @@ -154,4 +155,3 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true -Remotes: easystats/see diff --git a/NAMESPACE b/NAMESPACE index 4c8e43f9f..4d7bbd024 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -148,6 +148,7 @@ S3method(display,test_performance) S3method(fitted,BFBayesFactor) S3method(format,compare_performance) S3method(format,performance_model) +S3method(format,performance_rmse) S3method(format,test_performance) S3method(logLik,cpglm) S3method(logLik,iv_robust) @@ -319,6 +320,7 @@ S3method(print,performance_hosmer) S3method(print,performance_model) S3method(print,performance_pcp) S3method(print,performance_pp_check) +S3method(print,performance_rmse) S3method(print,performance_roc) S3method(print,performance_score) S3method(print,performance_simres) @@ -451,6 +453,7 @@ S3method(r2_coxsnell,survreg) S3method(r2_coxsnell,svycoxph) S3method(r2_coxsnell,truncreg) S3method(r2_efron,default) +S3method(r2_ferrari,default) S3method(r2_kullback,default) S3method(r2_kullback,glm) S3method(r2_loo_posterior,BFBayesFactor) @@ -598,6 +601,7 @@ export(r2) export(r2_bayes) export(r2_coxsnell) export(r2_efron) +export(r2_ferrari) export(r2_kullback) export(r2_loo) export(r2_loo_posterior) diff --git a/NEWS.md b/NEWS.md index c94980743..14f4c4e3c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,37 @@ -# performance 0.11.1 +# performance 0.12.1 + +## General + +* `icc()` and `r2_nakagawa()` get a `null_model` argument. This can be useful + when computing R2 or ICC for mixed models, where the internal computation of + the null model fails, or when you already have fit the null model and want + to save time. + +* `icc()` and `r2_nakagawa()` get a `approximation` argument indicating the + approximation method for the distribution-specific (residual) variance. See + Nakagawa et al. 2017 for details. + +* `icc()` and `r2_nakagawa()` get a `model_component` argument indicating the + component for zero-inflation or hurdle models. + +* `performance_rmse()` (resp. `rmse()`) can now compute analytical and + bootstrapped confidence intervals. The function gains following new arguments: + `ci`, `ci_method` and `iterations`. + +* New function `r2_ferrari()` to compute Ferrari & Cribari-Neto's R2 for + generalized linear models, in particular beta-regression. + +# performance 0.12.0 ## Breaking * Aliases `posterior_predictive_check()` and `check_posterior_predictions()` for `check_predictions()` are deprecated. +* Arguments named `group` or `group_by` will be deprecated in a future release. + Please use `by` instead. This affects `check_heterogeneity_bias()` in + *performance*. + ## General * Improved documentation and new vignettes added. @@ -15,6 +42,9 @@ the usual style as for other models and no longer returns plots from `bayesplot::pp_check()`. +* Updated the trained model that is used to prediction distributions in + `check_distribution()`. + ## Bug fixes * `check_model()` now falls back on normal Q-Q plots when a model is not supported diff --git a/R/binned_residuals.R b/R/binned_residuals.R index 8c6608ebf..5fb8132e4 100644 --- a/R/binned_residuals.R +++ b/R/binned_residuals.R @@ -86,7 +86,6 @@ binned_residuals <- function(model, iterations = 1000, verbose = TRUE, ...) { - # match arguments ci_type <- match.arg(ci_type) residuals <- match.arg(residuals) diff --git a/R/check_autocorrelation.R b/R/check_autocorrelation.R index f082a3c52..159f53843 100644 --- a/R/check_autocorrelation.R +++ b/R/check_autocorrelation.R @@ -29,7 +29,6 @@ check_autocorrelation <- function(x, ...) { #' @rdname check_autocorrelation #' @export check_autocorrelation.default <- function(x, nsim = 1000, ...) { - # check for valid input .is_model_valid(x) .residuals <- stats::residuals(x) diff --git a/R/check_clusterstructure.R b/R/check_clusterstructure.R index 8a80c7b8f..9d4bab2d6 100644 --- a/R/check_clusterstructure.R +++ b/R/check_clusterstructure.R @@ -11,7 +11,7 @@ #' number of square shaped blocks along the diagonal. #' #' @param x A data frame. -#' @param standardize Standardize the dataframe before clustering (default). +#' @param standardize Standardize the data frame before clustering (default). #' @param distance Distance method used. Other methods than "euclidean" #' (default) are exploratory in the context of clustering tendency. See #' [stats::dist()] for list of available methods. diff --git a/R/check_collinearity.R b/R/check_collinearity.R index 14dd2fcce..b6d26f938 100644 --- a/R/check_collinearity.R +++ b/R/check_collinearity.R @@ -145,7 +145,6 @@ multicollinearity <- check_collinearity #' @rdname check_collinearity #' @export check_collinearity.default <- function(x, ci = 0.95, verbose = TRUE, ...) { - # check for valid input .is_model_valid(x) .check_collinearity(x, component = "conditional", ci = ci, verbose = verbose) } diff --git a/R/check_convergence.R b/R/check_convergence.R index 1c2ab5f25..6f7628a12 100644 --- a/R/check_convergence.R +++ b/R/check_convergence.R @@ -76,7 +76,6 @@ check_convergence <- function(x, tolerance = 0.001, ...) { #' @export check_convergence.default <- function(x, tolerance = 0.001, ...) { - # check for valid input .is_model_valid(x) message(sprintf("`check_convergence()` does not work for models of class '%s'.", class(x)[1])) } diff --git a/R/check_distribution.R b/R/check_distribution.R index 89f48263a..d743b3ac1 100644 --- a/R/check_distribution.R +++ b/R/check_distribution.R @@ -34,12 +34,11 @@ NULL #' This function uses an internal random forest model to classify the #' distribution from a model-family. Currently, following distributions are #' trained (i.e. results of `check_distribution()` may be one of the -#' following): `"bernoulli"`, `"beta"`, `"beta-binomial"`, -#' `"binomial"`, `"chi"`, `"exponential"`, `"F"`, -#' `"gamma"`, `"lognormal"`, `"normal"`, `"negative -#' binomial"`, `"negative binomial (zero-inflated)"`, `"pareto"`, -#' `"poisson"`, `"poisson (zero-inflated)"`, `"uniform"` and -#' `"weibull"`. +#' following): `"bernoulli"`, `"beta"`, `"beta-binomial"`, `"binomial"`, +#' `"cauchy"`, `"chi"`, `"exponential"`, `"F"`, `"gamma"`, `"half-cauchy"`, +#' `"inverse-gamma"`, `"lognormal"`, `"normal"`, `"negative binomial"`, +#' `"negative binomial (zero-inflated)"`, `"pareto"`, `"poisson"`, +#' `"poisson (zero-inflated)"`, `"tweedie"`, `"uniform"` and `"weibull"`. #' \cr \cr #' Note the similarity between certain distributions according to shape, skewness, #' etc. Thus, the predicted distribution may not be perfectly representing the @@ -67,7 +66,6 @@ check_distribution <- function(model) { #' @export check_distribution.default <- function(model) { - # check for valid input .is_model_valid(model) insight::check_if_installed("randomForest") @@ -193,23 +191,40 @@ check_distribution.numeric <- function(model) { # validation check, remove missings x <- x[!is.na(x)] - # this might fail, so we wrap in ".safe()" - map_est <- .safe(mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0"))) + mode_value <- NULL + # find mode for integer, or MAP for distributions + if (all(.is_integer(x))) { + mode_value <- datawizard::distribution_mode(x) + } else { + # this might fail, so we wrap in ".safe()" + mode_value <- tryCatch( + as.numeric(bayestestR::map_estimate(x, bw = "nrd0")), + error = function(e) NULL + ) + if (is.null(mode_value)) { + mode_value <- tryCatch( + as.numeric(bayestestR::map_estimate(x, bw = "kernel")), + error = function(e) NULL + ) + } + } - if (is.null(map_est)) { - map_est <- mean(x) - datawizard::distribution_mode(x) + if (is.null(mode_value)) { + mean_mode_diff <- mean(x) - datawizard::distribution_mode(x) msg <- "Could not accurately estimate the mode." if (!is.null(type)) { msg <- paste(msg, "Predicted distribution of the", type, "may be less accurate.") } insight::format_alert(msg) + } else { + mean_mode_diff <- .safe(mean(x) - mode_value) } data.frame( SD = stats::sd(x), MAD = stats::mad(x, constant = 1), Mean_Median_Distance = mean(x) - stats::median(x), - Mean_Mode_Distance = map_est, + Mean_Mode_Distance = mean_mode_diff, SD_MAD_Distance = stats::sd(x) - stats::mad(x, constant = 1), Var_Mean_Distance = stats::var(x) - mean(x), Range_SD = diff(range(x)) / stats::sd(x), @@ -222,6 +237,7 @@ check_distribution.numeric <- function(model) { Min = min(x), Max = max(x), Proportion_Positive = sum(x >= 0) / length(x), + Proportion_Zero = sum(x == 0) / length(x), Integer = all(.is_integer(x)) ) } diff --git a/R/check_factorstructure.R b/R/check_factorstructure.R index d4b1c5c54..21f4584c1 100644 --- a/R/check_factorstructure.R +++ b/R/check_factorstructure.R @@ -50,7 +50,7 @@ #' exclusion them from the analysis (note that you would need to re-compute the #' KMO indices as they are dependent on the whole dataset). #' -#' @param x A dataframe or a correlation matrix. If the latter is passed, `n` +#' @param x A data frame or a correlation matrix. If the latter is passed, `n` #' must be provided. #' @param n If a correlation matrix was passed, the number of observations must #' be specified. diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index d9bb337f9..7b7b22fcf 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -8,9 +8,10 @@ #' @param select Character vector (or formula) with names of variables to select #' that should be checked. If `x` is a mixed model object, this argument #' will be ignored. -#' @param group Character vector (or formula) with the name of the variable that +#' @param by Character vector (or formula) with the name of the variable that #' indicates the group- or cluster-ID. If `x` is a model object, this #' argument will be ignored. +#' @param group Deprecated. Use `by` instead. #' #' @seealso #' For further details, read the vignette @@ -25,13 +26,18 @@ #' @examples #' data(iris) #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID -#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' @export -check_heterogeneity_bias <- function(x, select = NULL, group = NULL) { +check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) { + ## TODO: deprecate later + if (!is.null(group)) { + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + by <- group + } if (insight::is_model(x)) { - group <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) - if (is.null(group)) { - insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") # nolint + by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) + if (is.null(by)) { + insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `by`.") # nolint } my_data <- insight::get_data(x, source = "mf", verbose = FALSE) select <- insight::find_predictors(x, effects = "fixed", component = "conditional", flatten = TRUE) @@ -39,18 +45,19 @@ check_heterogeneity_bias <- function(x, select = NULL, group = NULL) { if (inherits(select, "formula")) { select <- all.vars(select) } - if (inherits(group, "formula")) { - group <- all.vars(group) + if (inherits(by, "formula")) { + by <- all.vars(by) } my_data <- x } - unique_groups <- .n_unique(my_data[[group]]) - combinations <- expand.grid(select, group) + unique_groups <- .n_unique(my_data[[by]]) + combinations <- expand.grid(select, by) result <- Map(function(predictor, id) { # demean predictor - d <- datawizard::demean(my_data, select = predictor, group = id, verbose = FALSE) + + d <- datawizard::demean(my_data, select = predictor, by = id, verbose = FALSE) # get new names within_name <- paste0(predictor, "_within") diff --git a/R/check_heteroscedasticity.R b/R/check_heteroscedasticity.R index cf8e4cb09..3fbefec9b 100644 --- a/R/check_heteroscedasticity.R +++ b/R/check_heteroscedasticity.R @@ -46,7 +46,6 @@ check_heteroskedasticity <- check_heteroscedasticity #' @export check_heteroscedasticity.default <- function(x, ...) { - # check for valid input .is_model_valid(x) # only for linear models diff --git a/R/check_homogeneity.R b/R/check_homogeneity.R index db79fa106..d6b486810 100644 --- a/R/check_homogeneity.R +++ b/R/check_homogeneity.R @@ -70,14 +70,14 @@ check_homogeneity.default <- function(x, method = c("bartlett", "fligner", "leve ) if (is.null(check)) { - insight::print_color("'check_homogeneity()' cannot perform check for normality. Please specify the 'method'-argument for the test of equal variances.\n", "red") + insight::print_color("'check_homogeneity()' cannot perform check for normality. Please specify the 'method'-argument for the test of equal variances.\n", "red") # nolint return(NULL) } method <- ifelse(check < 0.05, "fligner", "bartlett") } - if (method == "fligner") { + if (method == "fligner") { # nolint r <- stats::fligner.test(f, data = insight::get_data(x, verbose = FALSE)) p.val <- r$p.value } else if (method == "bartlett") { @@ -157,7 +157,7 @@ check_homogeneity.afex_aov <- function(x, method = "levene", ...) { if (any(is_covar)) { insight::format_alert( - "Levene's test is not appropriate with quantitative explanatory variables. Testing assumption of homogeneity among factor groups only." + "Levene's test is not appropriate with quantitative explanatory variables. Testing assumption of homogeneity among factor groups only." # nolint ) # ## TODO maybe add as option? # warning("Testing assumption of homogeneity on residualzied data among factor groups only.", call. = FALSE) @@ -165,7 +165,7 @@ check_homogeneity.afex_aov <- function(x, method = "levene", ...) { between <- between[!is_covar] } - form <- stats::formula(paste0(dv, "~", paste0(between, collapse = "*"))) + form <- stats::formula(paste0(dv, "~", paste(between, collapse = "*"))) test <- car::leveneTest(form, ag_data, center = mean, ...) p.val <- test[1, "Pr(>F)"] diff --git a/R/check_itemscale.R b/R/check_itemscale.R index 2dff6c5da..465504687 100644 --- a/R/check_itemscale.R +++ b/R/check_itemscale.R @@ -66,7 +66,6 @@ #' ) #' @export check_itemscale <- function(x, factor_index = NULL) { - # check for valid input if (!inherits(x, c("parameters_pca", "data.frame"))) { insight::format_error( "`x` must be an object of class `parameters_pca`, as returned by `parameters::principal_components()`, or a data frame." # nolint diff --git a/R/check_multimodal.R b/R/check_multimodal.R index 55e70f78f..b48a5831e 100644 --- a/R/check_multimodal.R +++ b/R/check_multimodal.R @@ -58,10 +58,10 @@ check_multimodal.data.frame <- function(x, ...) { rez$p <- 1 - stats::pchisq(rez$Chisq, df = rez$df) # Text - text <- "The parametric mixture modelling test suggests that " + msg <- "The parametric mixture modelling test suggests that " if (rez$p < 0.05) { - text <- paste0( - text, + msg <- paste0( + msg, "the multivariate distribution is significantly multimodal (Chi2(", insight::format_value(rez$df, protect_integers = TRUE), ") = ", @@ -70,8 +70,8 @@ check_multimodal.data.frame <- function(x, ...) { ) color <- "green" } else { - text <- paste0( - text, + msg <- paste0( + msg, "the hypothesis of a multimodal multivariate distribution cannot be rejected (Chi2(", insight::format_value(rez$df, protect_integers = TRUE), ") = ", @@ -82,7 +82,7 @@ check_multimodal.data.frame <- function(x, ...) { } - attr(rez, "text") <- insight::format_message(text) + attr(rez, "text") <- insight::format_message(msg) attr(rez, "color") <- color attr(rez, "title") <- "Is the data multimodal?" class(rez) <- c("easystats_check", class(rez)) @@ -99,19 +99,19 @@ check_multimodal.numeric <- function(x, ...) { rez <- multimode::modetest(x, mod0 = 1, method = "ACR") rez <- list(p = rez$p.value, excess_mass = rez$statistic) - text <- "The Ameijeiras-Alonso et al. (2018) excess mass test suggests that " + msg <- "The Ameijeiras-Alonso et al. (2018) excess mass test suggests that " if (rez$p < 0.05) { - text <- paste0( - text, + msg <- paste0( + msg, "the distribution is significantly multimodal (excess mass = ", insight::format_value(rez$excess_mass), ", ", insight::format_p(rez$p), ").\n" ) color <- "green" } else { - text <- paste0( - text, + msg <- paste0( + msg, "the hypothesis of a multimodal distribution cannot be rejected (excess mass = ", insight::format_value(rez$excess_mass), ", ", insight::format_p(rez$p), ").\n" @@ -119,7 +119,7 @@ check_multimodal.numeric <- function(x, ...) { color <- "yellow" } - attr(rez, "text") <- insight::format_message(text) + attr(rez, "text") <- insight::format_message(msg) attr(rez, "color") <- color attr(rez, "title") <- "Is the variable multimodal?" class(rez) <- c("easystats_check", class(rez)) diff --git a/R/check_normality.R b/R/check_normality.R index 297c50179..7ea6970ec 100644 --- a/R/check_normality.R +++ b/R/check_normality.R @@ -55,7 +55,6 @@ check_normality <- function(x, ...) { #' @export check_normality.default <- function(x, ...) { - # check for valid input .is_model_valid(x) if (!insight::model_info(x)$is_linear) { diff --git a/R/check_outliers.R b/R/check_outliers.R index adb4165dc..071bd39a3 100644 --- a/R/check_outliers.R +++ b/R/check_outliers.R @@ -109,7 +109,7 @@ #' default threshold to classify outliers is 1.959 (`threshold = list("zscore" = 1.959)`), #' corresponding to the 2.5% (`qnorm(0.975)`) most extreme observations #' (assuming the data is normally distributed). Importantly, the Z-score -#' method is univariate: it is computed column by column. If a dataframe is +#' method is univariate: it is computed column by column. If a data frame is #' passed, the Z-score is calculated for each variable separately, and the #' maximum (absolute) Z-score is kept for each observations. Thus, all #' observations that are extreme on at least one variable might be detected @@ -255,29 +255,29 @@ #' @references #' - Archimbaud, A., Nordhausen, K., and Ruiz-Gazen, A. (2018). ICS for #' multivariate outlier detection with application to quality control. -#' Computational Statistics and Data Analysis, 128, 184-199. +#' *Computational Statistics and Data Analysis*, *128*, 184-199. #' \doi{10.1016/j.csda.2018.06.011} #' #' - Gnanadesikan, R., and Kettenring, J. R. (1972). Robust estimates, residuals, -#' and outlier detection with multiresponse data. Biometrics, 81-124. +#' and outlier detection with multiresponse data. *Biometrics*, 81-124. #' #' - Bollen, K. A., and Jackman, R. W. (1985). Regression diagnostics: An -#' expository treatment of outliers and influential cases. Sociological Methods -#' and Research, 13(4), 510-542. +#' expository treatment of outliers and influential cases. *Sociological Methods +#' and Research*, *13*(4), 510-542. #' #' - Cabana, E., Lillo, R. E., and Laniado, H. (2019). Multivariate outlier #' detection based on a robust Mahalanobis distance with shrinkage estimators. #' arXiv preprint arXiv:1904.02596. #' #' - Cook, R. D. (1977). Detection of influential observation in linear -#' regression. Technometrics, 19(1), 15-18. +#' regression. *Technometrics*, *19*(1), 15-18. #' #' - Iglewicz, B., and Hoaglin, D. C. (1993). How to detect and handle outliers #' (Vol. 16). Asq Press. #' #' - Leys, C., Klein, O., Dominicy, Y., and Ley, C. (2018). Detecting -#' multivariate outliers: Use a robust variant of Mahalanobis distance. Journal -#' of Experimental Social Psychology, 74, 150-156. +#' multivariate outliers: Use a robust variant of Mahalanobis distance. *Journal +#' of Experimental Social Psychology*, 74, 150-156. #' #' - Liu, F. T., Ting, K. M., and Zhou, Z. H. (2008, December). Isolation forest. #' In 2008 Eighth IEEE International Conference on Data Mining (pp. 413-422). @@ -285,16 +285,17 @@ #' #' - Lüdecke, D., Ben-Shachar, M. S., Patil, I., Waggoner, P., and Makowski, D. #' (2021). performance: An R package for assessment, comparison and testing of -#' statistical models. Journal of Open Source Software, 6(60), 3139. +#' statistical models. *Journal of Open Source Software*, *6*(60), 3139. #' \doi{10.21105/joss.03139} #' #' - Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M., #' and Makowski, D. (2023). Check your outliers! An introduction to identifying -#' statistical outliers in R with easystats. \doi{10.31234/osf.io/bu6nt} +#' statistical outliers in R with easystats. *Behavior Research Methods*, 1-11. +#' \doi{10.3758/s13428-024-02356-w} #' #' - Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate -#' outliers and leverage points. Journal of the American Statistical -#' association, 85(411), 633-639. +#' outliers and leverage points. *Journal of the American Statistical +#' association*, *85*(411), 633-639. #' #' @examples #' data <- mtcars # Size nrow(data) = 32 @@ -303,14 +304,14 @@ #' outliers_list <- check_outliers(data$mpg) # Find outliers #' outliers_list # Show the row index of the outliers #' as.numeric(outliers_list) # The object is a binary vector... -#' filtered_data <- data[!outliers_list, ] # And can be used to filter a dataframe +#' filtered_data <- data[!outliers_list, ] # And can be used to filter a data frame #' nrow(filtered_data) # New size, 28 (4 outliers removed) #' #' # Find all observations beyond +/- 2 SD #' check_outliers(data$mpg, method = "zscore", threshold = 2) #' #' # For dataframes ------------------------------------------------------ -#' check_outliers(data) # It works the same way on dataframes +#' check_outliers(data) # It works the same way on data frames #' #' # You can also use multiple methods at once #' outliers_list <- check_outliers(data, method = c( @@ -331,8 +332,9 @@ #' # We can run the function stratified by groups using `{datawizard}` package: #' group_iris <- datawizard::data_group(iris, "Species") #' check_outliers(group_iris) -#' +#' # nolint start #' @examplesIf require("see") && require("bigutilsr") && require("loo") && require("MASS") && require("ICSOutlier") && require("ICS") && require("dbscan") +#' # nolint end #' \donttest{ #' # You can also run all the methods #' check_outliers(data, method = "all", verbose = FALSE) @@ -586,7 +588,7 @@ check_outliers.default <- function(x, attr(outlier, "threshold") <- thresholds attr(outlier, "method") <- method attr(outlier, "text_size") <- 3 - attr(outlier, "influential_obs") <- .influential_obs(x) + attr(outlier, "influential_obs") <- .influential_obs(x, threshold = unlist(thresholds)) attr(outlier, "variables") <- "(Whole model)" attr(outlier, "raw_data") <- my_data attr(outlier, "outlier_var") <- outlier_var @@ -946,7 +948,7 @@ check_outliers.data.frame <- function(x, outlier_count <- lapply(outlier_count, function(x) { num.df <- x[!names(x) %in% c("Row", ID)] if (isTRUE(nrow(num.df) >= 1)) { - num.df <- datawizard::change_code( + num.df <- datawizard::recode_values( num.df, recode = list(`2` = "(Multivariate)") ) diff --git a/R/check_overdispersion.R b/R/check_overdispersion.R index fa46edfef..0d877b0ad 100644 --- a/R/check_overdispersion.R +++ b/R/check_overdispersion.R @@ -77,7 +77,6 @@ check_overdispersion <- function(x, ...) { #' @export check_overdispersion.default <- function(x, ...) { - # check for valid input .is_model_valid(x) insight::format_error( paste0("`check_overdisperion()` not yet implemented for models of class `", class(x)[1], "`.") @@ -289,7 +288,6 @@ check_overdispersion.glmmTMB <- check_overdispersion.merMod #' @rdname check_overdispersion #' @export check_overdispersion.performance_simres <- function(x, alternative = c("two.sided", "less", "greater"), ...) { - # match arguments alternative <- match.arg(alternative) # check for special arguments - we may pass "object_name" from other methods diff --git a/R/check_predictions.R b/R/check_predictions.R index 682047b50..e9d05fdfd 100644 --- a/R/check_predictions.R +++ b/R/check_predictions.R @@ -100,7 +100,6 @@ check_predictions.default <- function(object, type = "density", verbose = TRUE, ...) { - # check for valid input .is_model_valid(object) # retrieve model information @@ -223,7 +222,9 @@ check_predictions.BFBayesFactor <- function(object, if (isTRUE(is.na(re_formula))) { yy <- everything_we_need[["y_pred_marginal"]] } else { - if (!is.null(re_formula)) warning("re_formula can only be NULL or NA", call. = FALSE) + if (!is.null(re_formula)) { + insight::format_warning("`re_formula` can only be `NULL` or `NA`.") + } yy <- everything_we_need[["y_pred"]] } diff --git a/R/check_singularity.R b/R/check_singularity.R index 44d66d221..e2e369f61 100644 --- a/R/check_singularity.R +++ b/R/check_singularity.R @@ -99,8 +99,11 @@ #' #' # Fixing singularity issues using priors in glmmTMB #' # Example taken from `vignette("priors", package = "glmmTMB")` -#' dat <- readRDS(system.file("vignette_data", "gophertortoise.rds", -#' package = "glmmTMB")) +#' dat <- readRDS(system.file( +#' "vignette_data", +#' "gophertortoise.rds", +#' package = "glmmTMB" +#' )) #' model <- glmmTMB::glmmTMB( #' shells ~ prev + offset(log(Area)) + factor(year) + (1 | Site), #' family = poisson, diff --git a/R/check_zeroinflation.R b/R/check_zeroinflation.R index 5f87941f3..a8e34ad79 100644 --- a/R/check_zeroinflation.R +++ b/R/check_zeroinflation.R @@ -120,7 +120,6 @@ check_zeroinflation.performance_simres <- function(x, tolerance = 0.1, alternative = c("two.sided", "less", "greater"), ...) { - # match arguments alternative <- match.arg(alternative) # compute test results diff --git a/R/display.R b/R/display.R index 0abc992e2..4e3b68b97 100644 --- a/R/display.R +++ b/R/display.R @@ -44,12 +44,3 @@ display.compare_performance <- display.performance_model #' @export display.check_itemscale <- display.performance_model - - - - -# Reexports models ------------------------ - -#' @importFrom insight display -#' @export -insight::display diff --git a/R/helpers.R b/R/helpers.R index c231d6226..09b211678 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,6 +1,6 @@ # small wrapper around this commonly used try-catch .safe <- function(code, on_error = NULL) { - if (getOption("easystats_erros", FALSE) && is.null(on_error)) { + if (isTRUE(getOption("easystats_errors", FALSE) && is.null(on_error))) { code } else { tryCatch(code, error = function(e) on_error) diff --git a/R/icc.R b/R/icc.R index 47fbb9a45..420893f1e 100644 --- a/R/icc.R +++ b/R/icc.R @@ -1,9 +1,11 @@ -#' Intraclass Correlation Coefficient (ICC) +#' @title Intraclass Correlation Coefficient (ICC) +#' @name icc #' +#' @description #' This function calculates the intraclass-correlation coefficient (ICC) - #' sometimes also called *variance partition coefficient* (VPC) or #' *repeatability* - for mixed effects models. The ICC can be calculated for all -#' models supported by `insight::get_variance()`. For models fitted with the +#' models supported by [`insight::get_variance()`]. For models fitted with the #' **brms**-package, `icc()` might fail due to the large variety of #' models and families supported by the **brms**-package. In such cases, an #' alternative to the ICC is the `variance_decomposition()`, which is based @@ -11,31 +13,36 @@ #' #' @param model A (Bayesian) mixed effects model. #' @param re_formula Formula containing group-level effects to be considered in -#' the prediction. If `NULL` (default), include all group-level effects. -#' Else, for instance for nested models, name a specific group-level effect -#' to calculate the variance decomposition for this group-level. See 'Details' -#' and `?brms::posterior_predict`. -#' @param ci Confidence resp. credible interval level. For `icc()` and `r2()`, -#' confidence intervals are based on bootstrapped samples from the ICC resp. -#' R2 value. See `iterations`. -#' @param by_group Logical, if `TRUE`, `icc()` returns the variance -#' components for each random-effects level (if there are multiple levels). -#' See 'Details'. +#' the prediction. If `NULL` (default), include all group-level effects. +#' Else, for instance for nested models, name a specific group-level effect +#' to calculate the variance decomposition for this group-level. See 'Details' +#' and `?brms::posterior_predict`. +#' @param ci Confidence resp. credible interval level. For `icc()`, `r2()`, and +#' `rmse()`, confidence intervals are based on bootstrapped samples from the +#' ICC, R2 or RMSE value. See `iterations`. +#' @param by_group Logical, if `TRUE`, `icc()` returns the variance components +#' for each random-effects level (if there are multiple levels). See 'Details'. #' @param iterations Number of bootstrap-replicates when computing confidence -#' intervals for the ICC or R2. +#' intervals for the ICC, R2, RMSE etc. #' @param ci_method Character string, indicating the bootstrap-method. Should -#' be `NULL` (default), in which case `lme4::bootMer()` is used for -#' bootstrapped confidence intervals. However, if bootstrapped intervals cannot -#' be calculated this was, try `ci_method = "boot"`, which falls back to -#' `boot::boot()`. This may successfully return bootstrapped confidence intervals, -#' but bootstrapped samples may not be appropriate for the multilevel structure -#' of the model. There is also an option `ci_method = "analytical"`, which tries -#' to calculate analytical confidence assuming a chi-squared distribution. -#' However, these intervals are rather inaccurate and often too narrow. It is -#' recommended to calculate bootstrapped confidence intervals for mixed models. +#' be `NULL` (default), in which case `lme4::bootMer()` is used for bootstrapped +#' confidence intervals. However, if bootstrapped intervals cannot be calculated +#' this way, try `ci_method = "boot"`, which falls back to `boot::boot()`. This +#' may successfully return bootstrapped confidence intervals, but bootstrapped +#' samples may not be appropriate for the multilevel structure of the model. +#' There is also an option `ci_method = "analytical"`, which tries to calculate +#' analytical confidence assuming a chi-squared distribution. However, these +#' intervals are rather inaccurate and often too narrow. It is recommended to +#' calculate bootstrapped confidence intervals for mixed models. #' @param verbose Toggle warnings and messages. +#' @param null_model Optional, a null model to compute the random effect variances, +#' which is passed to [`insight::get_variance()`]. Usually only required if +#' calculation of r-squared or ICC fails when `null_model` is not specified. If +#' calculating the null model takes longer and you already have fit the null +#' model, you can pass it here, too, to speed up the process. #' @param ... Arguments passed down to `lme4::bootMer()` or `boot::boot()` -#' for bootstrapped ICC or R2. +#' for bootstrapped ICC, R2, RMSE etc.; for `variance_decomposition()`, +#' arguments are passed down to `brms::posterior_predict()`. #' #' @inheritParams r2_bayes #' @inheritParams insight::get_variance @@ -58,6 +65,8 @@ #' applications and data analysis methods (2nd ed). Thousand Oaks: Sage #' Publications. #' +#' @inheritSection r2_nakagawa Supported models and model families +#' #' @details #' ## Interpretation #' The ICC can be interpreted as "the proportion of the variance explained by @@ -169,6 +178,9 @@ icc <- function(model, ci = NULL, iterations = 100, ci_method = NULL, + null_model = NULL, + approximation = "lognormal", + model_component = NULL, verbose = TRUE, ...) { # special handling for smicd::semLme() @@ -197,7 +209,14 @@ icc <- function(model, } # calculate random effect variances - vars <- .compute_random_vars(model, tolerance) + vars <- .compute_random_vars( + model, + tolerance = tolerance, + null_model = null_model, + approximation = approximation, + model_component = model_component, + verbose = verbose + ) # return if ICC couldn't be computed if (is.null(vars) || all(is.na(vars))) { @@ -230,7 +249,12 @@ icc <- function(model, # iccs between groups # n_grps <- length(vars$var.intercept) # level_combinations <- utils::combn(1:n_grps, m = n_grps - 1, simplify = FALSE) - # icc_grp <- sapply(level_combinations, function(v) vars$var.intercept[v[1]] / (vars$var.intercept[v[1]] + vars$var.intercept[v[2]])) + # icc_grp <- sapply( + # level_combinations, + # function(v) { + # vars$var.intercept[v[1]] / (vars$var.intercept[v[1]] + vars$var.intercept[v[2]]) + # } + # ) # # out2 <- data.frame( # Group1 = group_names[sapply(level_combinations, function(i) i[1])], @@ -256,11 +280,11 @@ icc <- function(model, # this is experimental! if (identical(ci_method, "analytical")) { result <- .safe(.analytical_icc_ci(model, ci)) - if (!is.null(result)) { + if (is.null(result)) { + icc_ci_adjusted <- icc_ci_unadjusted <- NA + } else { icc_ci_adjusted <- result$ICC_adjusted icc_ci_unadjusted <- result$ICC_unadjusted - } else { - icc_ci_adjusted <- icc_ci_unadjusted <- NA } } else { result <- .bootstrap_icc(model, iterations, tolerance, ci_method, ...) @@ -304,8 +328,6 @@ icc <- function(model, -#' @param ... Arguments passed down to `brms::posterior_predict()`. -#' @inheritParams icc #' @rdname icc #' @export variance_decomposition <- function(model, @@ -411,7 +433,7 @@ print.icc <- function(x, digits = 3, ...) { } # separate lines for multiple R2 - out <- paste0(out, collapse = "\n") + out <- paste(out, collapse = "\n") cat(out) cat("\n") @@ -530,12 +552,18 @@ print.icc_decomposed <- function(x, digits = 2, ...) { components = c("var.fixed", "var.random", "var.residual"), name_fun = "icc()", name_full = "ICC", + null_model = NULL, + model_component = NULL, + approximation = "lognormal", verbose = TRUE) { vars <- tryCatch( insight::get_variance(model, name_fun = name_fun, name_full = name_full, tolerance = tolerance, + null_model = null_model, + approximation = approximation, + model_component = model_component, verbose = verbose ), error = function(e) { @@ -568,7 +596,11 @@ print.icc_decomposed <- function(x, digits = 2, ...) { .boot_icc_fun <- function(data, indices, model, tolerance) { d <- data[indices, ] # allows boot to select sample fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) - vars <- .compute_random_vars(fit, tolerance, verbose = FALSE) + vars <- .compute_random_vars( + fit, + tolerance, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } @@ -581,7 +613,11 @@ print.icc_decomposed <- function(x, digits = 2, ...) { # bootstrapping using "lme4::bootMer" .boot_icc_fun_lme4 <- function(model) { - vars <- .compute_random_vars(model, tolerance = 1e-05, verbose = FALSE) + vars <- .compute_random_vars( + model, + tolerance = 1e-10, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } @@ -662,10 +698,10 @@ print.icc_decomposed <- function(x, digits = 2, ...) { } model_rank <- tryCatch( - if (!is.null(model$rank)) { - model$rank - df_int - } else { + if (is.null(model$rank)) { insight::n_parameters(model) - df_int + } else { + model$rank - df_int }, error = function(e) insight::n_parameters(model) - df_int ) diff --git a/R/item_discrimination.R b/R/item_discrimination.R index ae1f83661..b5d0d7bdf 100644 --- a/R/item_discrimination.R +++ b/R/item_discrimination.R @@ -16,8 +16,8 @@ #' This function calculates the item discriminations (corrected item-total #' correlations for each item of `x` with the remaining items) for each item #' of a scale. The absolute value of the item discrimination indices should be -#' above 0.2. An index between 0.2 and 0.4 is considered as "fair", while a -#' satisfactory index ranges from 0.4 to 0.7. Items with low discrimination +#' above `0.2`. An index between `0.2` and `0.4` is considered as "fair", while a +#' satisfactory index ranges from `0.4` to `0.7`. Items with low discrimination #' indices are often ambiguously worded and should be examined. Items with #' negative indices should be examined to determine why a negative value was #' obtained (e.g. reversed answer categories regarding positive and negative diff --git a/R/model_performance.bayesian.R b/R/model_performance.bayesian.R index 2ba4e5130..db923fc26 100644 --- a/R/model_performance.bayesian.R +++ b/R/model_performance.bayesian.R @@ -79,7 +79,7 @@ model_performance.stanreg <- function(model, metrics = "all", verbose = TRUE, .. metrics <- c("LOOIC", "WAIC", "R2", "RMSE") } - # check for valid input + metrics <- toupper(.check_bad_metrics(metrics, all_metrics, verbose)) algorithm <- insight::find_algorithm(model) @@ -232,7 +232,7 @@ model_performance.BFBayesFactor <- function(model, metrics <- all_metrics } - # check for valid input + metrics <- toupper(.check_bad_metrics(metrics, all_metrics, verbose)) # check for valid BFBayesFactor object diff --git a/R/model_performance.bife.R b/R/model_performance.bife.R index a9d4cbd24..633c91cbe 100644 --- a/R/model_performance.bife.R +++ b/R/model_performance.bife.R @@ -11,7 +11,7 @@ model_performance.bife <- function(model, metrics = "all", verbose = TRUE, ...) metrics <- c("AIC", "R2") } - # check for valid input + metrics <- .check_bad_metrics(metrics, all_metrics, verbose) info <- insight::model_info(model) diff --git a/R/model_performance.ivreg.R b/R/model_performance.ivreg.R index b76a621f9..02a4bdf13 100644 --- a/R/model_performance.ivreg.R +++ b/R/model_performance.ivreg.R @@ -24,7 +24,7 @@ model_performance.ivreg <- function(model, metrics = "all", verbose = TRUE, ...) metrics <- c("AIC", "BIC", "R2", "R2_adj", "RMSE") } - # check for valid input + metrics <- .check_bad_metrics(metrics, all_metrics, verbose) # the lm-method does not accept ivreg-specific metrics diff --git a/R/model_performance.lm.R b/R/model_performance.lm.R index e8e5b07cf..c797b396f 100644 --- a/R/model_performance.lm.R +++ b/R/model_performance.lm.R @@ -58,7 +58,7 @@ model_performance.lm <- function(model, metrics = "all", verbose = TRUE, ...) { insight::formula_ok(model) } - # check for valid input + metrics <- .check_bad_metrics(metrics, all_metrics, verbose) info <- suppressWarnings(insight::model_info(model, verbose = FALSE)) diff --git a/R/model_performance.mixed.R b/R/model_performance.mixed.R index 499196dab..a96786618 100644 --- a/R/model_performance.mixed.R +++ b/R/model_performance.mixed.R @@ -57,7 +57,7 @@ model_performance.merMod <- function(model, metrics <- c("AIC", "BIC", "R2", "ICC", "RMSE") } - # check for valid input + metrics <- .check_bad_metrics(metrics, all_metrics, verbose) # check model formula diff --git a/R/model_performance.rma.R b/R/model_performance.rma.R index d7fd37e09..2390c25e3 100644 --- a/R/model_performance.rma.R +++ b/R/model_performance.rma.R @@ -36,7 +36,7 @@ #' or mixed effects model. #' #' - **CochransQ (QE)**: Test for (residual) Heterogeneity. Without -#' moderators in the model, this is simply Cochran's Q-test. +#' moderators in the model, this is simply Cochran's *Q*-test. #' #' - **Omnibus (QM)**: Omnibus test of parameters. #' diff --git a/R/model_performance_default.R b/R/model_performance_default.R index ce1935bd7..51f8c3afd 100644 --- a/R/model_performance_default.R +++ b/R/model_performance_default.R @@ -1,6 +1,5 @@ #' @export model_performance.default <- function(model, metrics = "all", verbose = TRUE, ...) { - # check for valid input .is_model_valid(model) if (any(tolower(metrics) == "log_loss")) { @@ -16,7 +15,7 @@ model_performance.default <- function(model, metrics = "all", verbose = TRUE, .. metrics <- c("AIC", "BIC", "R2", "R2_adj", "RMSE") } - # check for valid input + metrics <- .check_bad_metrics(metrics, all_metrics, verbose) if (!insight::is_model(model) || !insight::is_model_supported(model)) { @@ -32,7 +31,6 @@ model_performance.default <- function(model, metrics = "all", verbose = TRUE, .. .check_bad_metrics <- function(metrics, all_metrics, verbose = TRUE) { - # check for valid input bad_metrics <- which(!metrics %in% all_metrics) if (length(bad_metrics)) { if (verbose) { diff --git a/R/performance_aicc.R b/R/performance_aicc.R index e222dfc82..3c433161f 100644 --- a/R/performance_aicc.R +++ b/R/performance_aicc.R @@ -69,7 +69,6 @@ performance_aic <- function(x, ...) { #' @rdname performance_aicc #' @export performance_aic.default <- function(x, estimator = "ML", verbose = TRUE, ...) { - # check for valid input .is_model_valid(x) info <- list(...)$model_info @@ -203,7 +202,6 @@ AIC.bife <- function(object, ..., k = 2) { #' @export performance_aicc.default <- function(x, estimator = "ML", ...) { - # check for valid input .is_model_valid(x) # check ML estimator diff --git a/R/performance_cv.R b/R/performance_cv.R index c5e08be95..f4bfa87ef 100644 --- a/R/performance_cv.R +++ b/R/performance_cv.R @@ -69,7 +69,7 @@ performance_cv <- function(model, test_pred <- insight::get_predicted(model, ci = NULL, data = data) test_resd <- test_resp - test_pred } else if (method == "holdout") { - train_i <- sample(seq_len(nrow(model_data)), size = round((1 - prop) * nrow(model_data)), replace = FALSE) + train_i <- sample.int(nrow(model_data), size = round((1 - prop) * nrow(model_data)), replace = FALSE) model_upd <- stats::update(model, data = model_data[train_i, ]) test_resp <- model_data[-train_i, resp.name] test_pred <- insight::get_predicted(model_upd, ci = NULL, data = model_data[-train_i, ]) diff --git a/R/performance_logloss.R b/R/performance_logloss.R index 44f9954ef..3bbd14916 100644 --- a/R/performance_logloss.R +++ b/R/performance_logloss.R @@ -30,7 +30,6 @@ performance_logloss <- function(model, verbose = TRUE, ...) { #' @export performance_logloss.default <- function(model, verbose = TRUE, ...) { - # check for valid input .is_model_valid(model) resp <- .recode_to_zero(insight::get_response(model, verbose = verbose)) diff --git a/R/performance_mae.R b/R/performance_mae.R index 0ce7a3023..0c7c4c978 100644 --- a/R/performance_mae.R +++ b/R/performance_mae.R @@ -25,7 +25,6 @@ mae <- performance_mae #' @export performance_mae.default <- function(model, verbose = TRUE, ...) { - # check for valid input .is_model_valid(model) pred <- .safe(insight::get_predicted(model, ci = NULL, verbose = verbose, ...)) diff --git a/R/performance_rmse.R b/R/performance_rmse.R index 0cc5eac90..2e98d5514 100644 --- a/R/performance_rmse.R +++ b/R/performance_rmse.R @@ -6,7 +6,7 @@ #' #' @param model A model. #' @param normalized Logical, use `TRUE` if normalized rmse should be returned. -#' @inheritParams model_performance.lm +#' @inheritParams icc #' #' @details The RMSE is the square root of the variance of the residuals and indicates #' the absolute fit of the model to the data (difference between observed data @@ -30,33 +30,138 @@ #' # normalized RMSE #' performance_rmse(m, normalized = TRUE) #' @export -performance_rmse <- function(model, normalized = FALSE, verbose = TRUE) { +performance_rmse <- function(model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ...) { tryCatch( { - # compute rmse - rmse_val <- sqrt(performance_mse(model, verbose = verbose)) - - # if normalized, divide by range of response - if (normalized) { - # get response - resp <- datawizard::to_numeric(insight::get_response(model, verbose = FALSE), dummy_factors = FALSE, preserve_levels = TRUE) - # compute rmse, normalized - rmse_val <- rmse_val / (max(resp, na.rm = TRUE) - min(resp, na.rm = TRUE)) + out <- .calculate_rmse(model, normalized, verbose) + # check if CIs are requested, and compute CIs + if (!is.null(ci) && !is.na(ci)) { + # analytical CI? + if (identical(ci_method, "analytical")) { + out <- .analytical_rmse_ci(out, model, ci) + } else { + # bootstrapped CI + result <- .bootstrap_rmse(model, iterations, normalized, ci_method, ...) + # CI for RMSE + rmse_ci <- as.vector(result$t[, 1]) + rmse_ci <- rmse_ci[!is.na(rmse_ci)] + # validation check + if (length(rmse_ci) > 0) { + rmse_ci <- bayestestR::eti(rmse_ci, ci = ci) + out <- cbind(data.frame(RMSE = out), rmse_ci) + class(out) <- c("performance_rmse", "data.frame") + } else { + insight::format_warning("Could not compute confidence intervals for RMSE.") + } + } } - - rmse_val }, error = function(e) { if (inherits(e, c("simpleError", "error")) && verbose) { insight::print_color(e$message, "red") cat("\n") } - NA + out <- NA } ) + + out } #' @rdname performance_rmse #' @export rmse <- performance_rmse + + +# methods --------------------------------------------------------------------- + +#' @export +format.performance_rmse <- function(x, ...) { + insight::format_table(x, ...) +} + +#' @export +print.performance_rmse <- function(x, ...) { + cat(insight::export_table(format(x, ...), ...)) +} + + +# helper function to compute RMSE ---------------------------------------------- + +.calculate_rmse <- function(model, normalized = FALSE, verbose = FALSE, ...) { + # compute rmse + rmse_val <- sqrt(performance_mse(model, verbose = verbose)) + + # if normalized, divide by range of response + if (normalized) { + # get response + resp <- datawizard::to_numeric( + insight::get_response(model, verbose = FALSE), + dummy_factors = FALSE, + preserve_levels = TRUE + ) + # compute rmse, normalized + rmse_val <- rmse_val / (max(resp, na.rm = TRUE) - min(resp, na.rm = TRUE)) + } + + rmse_val +} + + +# analytical CIs -------------------------------------------------------------- + +.analytical_rmse_ci <- function(out, model, ci, ...) { + s <- insight::get_sigma(model, ci = ci, verbose = FALSE) + n <- insight::n_obs(model) + conf_ints <- c(attr(s, "CI_low"), attr(s, "CI_high")) * ((n - 1) / n) + out <- data.frame( + RMSE = out, + CI = ci, + CI_low = conf_ints[1], + CI_high = conf_ints[2] + ) + class(out) <- c("performance_rmse", "data.frame") + out +} + + +# bootstrapping CIs ----------------------------------------------------------- + +.boot_calculate_rmse <- function(data, indices, model, normalized, ...) { + d <- data[indices, ] # allows boot to select sample + fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) + .calculate_rmse(model = fit, normalized = normalized) +} + +.bootstrap_rmse <- function(model, iterations = 100, normalized = FALSE, ci_method = NULL, ...) { + if (inherits(model, c("merMod", "lmerMod", "glmmTMB")) && !identical(ci_method, "boot")) { + # cannot pass argument "normalized" to "lme4::bootMer()" + if (isTRUE(normalized)) { + insight::format_error("Normalized RMSE cannot be used with confidence intervals. Please use `ci_method = \"boot\"`.") # nolint + } + result <- .do_lme4_bootmer( + model, + .calculate_rmse, + iterations, + dots = list(...) + ) + } else { + insight::check_if_installed("boot") + result <- boot::boot( + data = insight::get_data(model, verbose = FALSE), + statistic = .boot_calculate_rmse, + R = iterations, + sim = "ordinary", + model = model, + normalized = normalized + ) + } + result +} diff --git a/R/print_md.R b/R/print_md.R index 926696006..9c4f47ce8 100644 --- a/R/print_md.R +++ b/R/print_md.R @@ -113,15 +113,3 @@ print_md.check_itemscale <- function(x, digits = 2, ...) { align = "firstleft" ) } - - -# Reexports models ------------------------ - -#' @importFrom insight print_md -#' @export -insight::print_md - - -#' @importFrom insight print_html -#' @export -insight::print_html diff --git a/R/r2.R b/R/r2.R index 94982b401..dbfc437fc 100644 --- a/R/r2.R +++ b/R/r2.R @@ -23,14 +23,15 @@ #' - Mixed models: [Nakagawa's R2][r2_nakagawa] #' - Bayesian models: [R2 bayes][r2_bayes] #' -#' @note If there is no `r2()`-method defined for the given model class, -#' `r2()` tries to return a "generic" r-quared value, calculated as following: -#' `1-sum((y-y_hat)^2)/sum((y-y_bar)^2))` +#' @note +#' If there is no `r2()`-method defined for the given model class, `r2()` tries +#' to return a "generic" r-quared value, calculated as following: +#' `1-sum((y-y_hat)^2)/sum((y-y_bar)^2)` #' -#' @seealso [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], -#' [`r2_loo()`], [`r2_mcfadden()`], [`r2_nagelkerke()`], -#' [`r2_nakagawa()`], [`r2_tjur()`], [`r2_xu()`] and -#' [`r2_zeroinflated()`]. +#' @seealso +#' [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], [`r2_loo()`], +#' [`r2_mcfadden()`], [`r2_nagelkerke()`], [`r2_nakagawa()`], [`r2_tjur()`], +#' [`r2_xu()`] and [`r2_zeroinflated()`]. #' #' @examplesIf require("lme4") #' # Pseudo r-quared for GLM @@ -291,6 +292,12 @@ r2.glm <- function(model, ci = NULL, verbose = TRUE, ...) { insight::format_warning("Can't calculate accurate R2 for binomial models that are not Bernoulli models.") } out <- NULL + } else if (info$is_orderedbeta) { + # ordered-beta-regression + out <- r2_ferrari(model, correct_bounds = TRUE) + } else if (info$is_beta) { + # beta-regression + out <- r2_ferrari(model) } else { out <- list(R2_Nagelkerke = r2_nagelkerke(model, ...)) names(out$R2_Nagelkerke) <- "Nagelkerke's R2" @@ -527,8 +534,20 @@ r2.glmmTMB <- function(model, ci = NULL, tolerance = 1e-5, verbose = TRUE, ...) } else if (info$is_zero_inflated) { # zero-inflated models use the default method out <- r2_zeroinflated(model) + } else if (info$is_orderedbeta) { + # ordered-beta-regression + out <- r2_ferrari(model, correct_bounds = TRUE) + } else if (info$is_beta) { + # beta-regression + out <- r2_ferrari(model) } else { - insight::format_error("`r2()` does not support models of class `glmmTMB` without random effects and this link-function.") # nolint + insight::format_error(paste0( + "`r2()` does not support models of class `glmmTMB` without random effects and from ", + info$family, + "-family with ", + info$link_function, + "-link-function." + )) } } out diff --git a/R/r2_bayes.R b/R/r2_bayes.R index ec98f754b..f96e98b41 100644 --- a/R/r2_bayes.R +++ b/R/r2_bayes.R @@ -1,36 +1,37 @@ #' @title Bayesian R2 #' @name r2_bayes #' -#' @description Compute R2 for Bayesian models. For mixed models (including a -#' random part), it additionally computes the R2 related to the fixed effects -#' only (marginal R2). While `r2_bayes()` returns a single R2 value, -#' `r2_posterior()` returns a posterior sample of Bayesian R2 values. +#' @description +#' Compute R2 for Bayesian models. For mixed models (including a random part), +#' it additionally computes the R2 related to the fixed effects only (marginal +#' R2). While `r2_bayes()` returns a single R2 value, `r2_posterior()` returns a +#' posterior sample of Bayesian R2 values. #' #' @param model A Bayesian regression model (from **brms**, -#' **rstanarm**, **BayesFactor**, etc). +#' **rstanarm**, **BayesFactor**, etc). #' @param robust Logical, if `TRUE`, the median instead of mean is used to -#' calculate the central tendency of the variances. +#' calculate the central tendency of the variances. #' @param ci Value or vector of probability of the CI (between 0 and 1) to be -#' estimated. +#' estimated. #' @param ... Arguments passed to `r2_posterior()`. #' @inheritParams model_performance.lm #' #' @return A list with the Bayesian R2 value. For mixed models, a list with the -#' Bayesian R2 value and the marginal Bayesian R2 value. The standard errors -#' and credible intervals for the R2 values are saved as attributes. +#' Bayesian R2 value and the marginal Bayesian R2 value. The standard errors and +#' credible intervals for the R2 values are saved as attributes. #' -#' @details `r2_bayes()` returns an "unadjusted" R2 value. See -#' [r2_loo()] to calculate a LOO-adjusted R2, which comes -#' conceptually closer to an adjusted R2 measure. +#' @details +#' `r2_bayes()` returns an "unadjusted" R2 value. See [r2_loo()] to calculate a +#' LOO-adjusted R2, which comes conceptually closer to an adjusted R2 measure. #' -#' For mixed models, the conditional and marginal R2 are returned. The marginal -#' R2 considers only the variance of the fixed effects, while the conditional -#' R2 takes both the fixed and random effects into account. +#' For mixed models, the conditional and marginal R2 are returned. The marginal +#' R2 considers only the variance of the fixed effects, while the conditional R2 +#' takes both the fixed and random effects into account. #' -#' `r2_posterior()` is the actual workhorse for `r2_bayes()` and -#' returns a posterior sample of Bayesian R2 values. +#' `r2_posterior()` is the actual workhorse for `r2_bayes()` and returns a +#' posterior sample of Bayesian R2 values. #' -#' @examplesIf require("rstanarm") && require("rstantools") && require("brms") +#' @examplesIf require("rstanarm") && require("rstantools") && require("brms") && require("RcppEigen") #' library(performance) #' \donttest{ #' model <- suppressWarnings(rstanarm::stan_glm( @@ -71,8 +72,8 @@ #' r2_bayes(model) #' } #' @references -#' Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). -#' R-squared for Bayesian regression models. The American Statistician, 1–6. +#' Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for +#' Bayesian regression models. The American Statistician, 1–6. #' \doi{10.1080/00031305.2018.1549100} #' @export r2_bayes <- function(model, robust = TRUE, ci = 0.95, verbose = TRUE, ...) { diff --git a/R/r2_ferarri.R b/R/r2_ferarri.R new file mode 100644 index 000000000..402c95fdf --- /dev/null +++ b/R/r2_ferarri.R @@ -0,0 +1,81 @@ +#' @title Ferrari's and Cribari-Neto's R2 +#' @name r2_ferrari +#' +#' @description Calculates Ferrari's and Cribari-Neto's pseudo R2 (for +#' beta-regression models). +#' +#' @param model Generalized linear, in particular beta-regression model. +#' @param correct_bounds Logical, whether to correct the bounds of the response +#' variable to avoid 0 and 1. If `TRUE`, the response variable is normalized +#' and "compressed", i.e. zeros and ones are excluded. +#' @param ... Currently not used. +#' +#' @return A list with the pseudo R2 value. +#' +#' @references +#' - Ferrari, S., and Cribari-Neto, F. (2004). Beta Regression for Modelling Rates +#' and Proportions. Journal of Applied Statistics, 31(7), 799–815. +#' \doi{10.1080/0266476042000214501} +#' +#' @examplesIf require("betareg") +#' data("GasolineYield", package = "betareg") +#' model <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) +#' r2_ferrari(model) +#' @export +r2_ferrari <- function(model, ...) { + UseMethod("r2_ferrari") +} + +#' @rdname r2_ferrari +#' @export +r2_ferrari.default <- function(model, correct_bounds = FALSE, ...) { + # on the reponse scale, beta regression link doesn't workd + predictions <- stats::predict(model, type = "response") + eta <- insight::link_function(model)(predictions) + y <- insight::get_response(model) + + # for ordered beta, fix 0 and 1 to specific bounds + if (correct_bounds) { + y <- datawizard::normalize(y, include_bounds = FALSE) + } + + ferrari <- stats::cor(eta, insight::link_function(model)(y))^2 + out <- list(R2 = c(`Ferrari's R2` = ferrari)) + + attr(out, "model_type") <- "Generalized Linear" + structure(class = "r2_generic", out) +} + + +# helper ----------------------------- + +# .r2_ferrari <- function(model, x) { +# if (inherits(model, "glmmTMB")) { +# insight::check_if_installed("lme4") +# # coefficients, but remove phi parameter +# x <- .collapse_cond(lme4::fixef(model)) +# x <- x[names(x) != "(phi)"] +# } else { +# # coefficients, but remove phi parameter +# x <- stats::coef(model) +# x <- x[names(x) != "(phi)"] +# } + +# # model matrix, check dimensions / length +# mm <- insight::get_modelmatrix(model) + +# if (length(x) != ncol(mm)) { +# insight::format_warning("Model matrix and coefficients do not match.") +# return(NULL) +# } + +# # linear predictor for the mean +# eta <- as.vector(x %*% t(mm)) +# y <- insight::get_response(model) + +# ferrari <- stats::cor(eta, insight::link_function(model)(y))^2 +# out <- list(R2 = c(`Ferrari's R2` = ferrari)) + +# attr(out, "model_type") <- "Generalized Linear" +# structure(class = "r2_generic", out) +# } diff --git a/R/r2_mcfadden.R b/R/r2_mcfadden.R index b04977517..de61fac87 100644 --- a/R/r2_mcfadden.R +++ b/R/r2_mcfadden.R @@ -63,15 +63,16 @@ r2_mcfadden.glm <- function(model, verbose = TRUE, ...) { if (is.null(info)) { info <- suppressWarnings(insight::model_info(model, verbose = FALSE)) } + if (info$is_binomial && !info$is_bernoulli && class(model)[1] == "glm") { if (verbose) { insight::format_warning("Can't calculate accurate R2 for binomial models that are not Bernoulli models.") } return(NULL) - } else { - l_null <- insight::get_loglikelihood(stats::update(model, ~1)) - .r2_mcfadden(model, l_null) } + + l_null <- insight::get_loglikelihood(stats::update(model, ~1)) + .r2_mcfadden(model, l_null) } #' @export diff --git a/R/r2_nakagawa.R b/R/r2_nakagawa.R index 3e72a9457..dfa1eb6dc 100644 --- a/R/r2_nakagawa.R +++ b/R/r2_nakagawa.R @@ -18,10 +18,48 @@ #' #' @return A list with the conditional and marginal R2 values. #' +#' @section Supported models and model families: +#' The single variance components that are required to calculate the marginal +#' and conditional r-squared values are calculated using the [`insight::get_variance()`] +#' function. The results are validated against the solutions provided by +#' _Nakagawa et al. (2017)_, in particular examples shown in the Supplement 2 +#' of the paper. Other model families are validated against results from the +#' **MuMIn** package. This means that the r-squared values returned by `r2_nakagawa()` +#' should be accurate and reliable for following mixed models or model families: +#' +#' - Bernoulli (logistic) regression +#' - Binomial regression (with other than binary outcomes) +#' - Poisson and Quasi-Poisson regression +#' - Negative binomial regression (including nbinom1 and nbinom2 families) +#' - Gaussian regression (linear models) +#' - Gamma regression +#' - Tweedie regression +#' - Beta regression +#' - Ordered beta regression +#' +#' Following model families are not yet validated, but should work: +#' +#' - Zero-inflated and hurdle models +#' - Beta-binomial regression +#' - Compound Poisson regression +#' - Generalized Poisson regression +#' - Log-normal regression +#' +#' Extracting variance components for models with zero-inflation part is not +#' straightforward, because it is not definitely clear how the distribution-specific +#' variance should be calculated. Therefore, it is recommended to carefully +#' inspect the results, and probably validate against other models, e.g. Bayesian +#' models (although results may be only roughly comparable). +#' +#' Log-normal regressions (e.g. `lognormal()` family in **glmmTMB** or `gaussian("log")`) +#' often have a very low fixed effects variance (if they were calculated as +#' suggested by _Nakagawa et al. 2017_). This results in very low ICC or +#' r-squared values, which may not be meaningful. +#' #' @details #' Marginal and conditional r-squared values for mixed models are calculated #' based on _Nakagawa et al. (2017)_. For more details on the computation of -#' the variances, see `?insight::get_variance`. The random effect variances are +#' the variances, see [`insight::get_variance()`]. The random effect variances are #' actually the mean random effect variances, thus the r-squared value is also #' appropriate for mixed models with random slopes or nested random effects #' (see _Johnson, 2014_). @@ -53,10 +91,13 @@ #' @export r2_nakagawa <- function(model, by_group = FALSE, - tolerance = 1e-5, + tolerance = 1e-8, ci = NULL, iterations = 100, ci_method = NULL, + null_model = NULL, + approximation = "lognormal", + model_component = NULL, verbose = TRUE, ...) { # calculate random effect variances @@ -64,8 +105,12 @@ r2_nakagawa <- function(model, model, tolerance, components = c("var.fixed", "var.residual"), + null_model = null_model, + approximation = approximation, name_fun = "r2()", - name_full = "r-squared" + name_full = "r-squared", + model_component = model_component, + verbose = verbose ) # return if R2 couldn't be computed @@ -85,7 +130,9 @@ r2_nakagawa <- function(model, } # null-model - null_model <- insight::null_model(model) + if (is.null(null_model)) { + null_model <- insight::null_model(model) + } vars_null <- insight::get_variance(null_model, tolerance = tolerance) # names of group levels @@ -107,7 +154,7 @@ r2_nakagawa <- function(model, if (insight::is_empty_object(vars$var.random) || is.na(vars$var.random)) { if (verbose) { # if no random effect variance, return simple R2 - insight::print_color("Random effect variances not available. Returned R2 does not account for random effects.\n", "red") + insight::print_color("Random effect variances not available. Returned R2 does not account for random effects.\n", "red") # nolint } r2_marginal <- vars$var.fixed / (vars$var.fixed + vars$var.residual) r2_conditional <- NA @@ -125,11 +172,11 @@ r2_nakagawa <- function(model, # this is experimental! if (identical(ci_method, "analytical")) { result <- .safe(.analytical_icc_ci(model, ci, fun = "r2_nakagawa")) - if (!is.null(result)) { + if (is.null(result)) { + r2_ci_marginal <- r2_ci_conditional <- NA + } else { r2_ci_marginal <- result$R2_marginal r2_ci_conditional <- result$R2_conditional - } else { - r2_ci_marginal <- r2_ci_conditional <- NA } } else { result <- .bootstrap_r2_nakagawa(model, iterations, tolerance, ci_method, ...) @@ -219,7 +266,7 @@ print.r2_nakagawa <- function(x, digits = 3, ...) { } # separate lines for multiple R2 - out <- paste0(out, collapse = "\n") + out <- paste(out, collapse = "\n") cat(out) cat("\n") @@ -234,7 +281,11 @@ print.r2_nakagawa <- function(x, digits = 3, ...) { .boot_r2_fun <- function(data, indices, model, tolerance) { d <- data[indices, ] # allows boot to select sample fit <- suppressWarnings(suppressMessages(stats::update(model, data = d))) - vars <- .compute_random_vars(fit, tolerance, verbose = FALSE) + vars <- .compute_random_vars( + fit, + tolerance, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } @@ -251,7 +302,11 @@ print.r2_nakagawa <- function(x, digits = 3, ...) { # bootstrapping using "lme4::bootMer" .boot_r2_fun_lme4 <- function(model) { - vars <- .compute_random_vars(model, tolerance = 1e-05, verbose = FALSE) + vars <- .compute_random_vars( + model, + tolerance = 1e-10, + verbose = isTRUE(getOption("easystats_errors", FALSE)) + ) if (is.null(vars) || all(is.na(vars))) { return(c(NA, NA)) } diff --git a/R/reexports.R b/R/reexports.R new file mode 100644 index 000000000..98700acfd --- /dev/null +++ b/R/reexports.R @@ -0,0 +1,12 @@ +#' @importFrom insight display +#' @export +insight::display + +#' @importFrom insight print_md +#' @export +insight::print_md + + +#' @importFrom insight print_html +#' @export +insight::print_html diff --git a/R/sysdata.rda b/R/sysdata.rda index b3f659f71..cff603cc1 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/test_likelihoodratio.R b/R/test_likelihoodratio.R index 529516487..21ae2694d 100644 --- a/R/test_likelihoodratio.R +++ b/R/test_likelihoodratio.R @@ -97,7 +97,6 @@ print.test_likelihoodratio <- function(x, digits = 2, ...) { } - # other classes --------------------------- #' @export diff --git a/README.Rmd b/README.Rmd index 895fa02a8..e7c540577 100644 --- a/README.Rmd +++ b/README.Rmd @@ -26,7 +26,7 @@ library(performance) ``` [![DOI](https://joss.theoj.org/papers/10.21105/joss.03139/status.svg)](https://doi.org/10.21105/joss.03139) -[![downloads](http://cranlogs.r-pkg.org/badges/performance)](https://cran.r-project.org/package=performance) [![total](https://cranlogs.r-pkg.org/badges/grand-total/performance)](https://cranlogs.r-pkg.org/) [![status](https://tinyverse.netlify.com/badge/performance)](https://CRAN.R-project.org/package=performance) +[![downloads](http://cranlogs.r-pkg.org/badges/performance)](https://cran.r-project.org/package=performance) [![total](https://cranlogs.r-pkg.org/badges/grand-total/performance)](https://cranlogs.r-pkg.org/) ***Test if your model is a good model!*** @@ -145,7 +145,7 @@ icc(model) ``` ...and models of class `brmsfit`. - + ```{r, echo=FALSE, eval=curl::has_internet()} model <- insight::download_model("brms_mixed_1") ``` @@ -224,7 +224,7 @@ model <- lmer( check_singularity(model) ``` -Remedies to cure issues with singular fits can be found [here](https://easystats.github.io/performance/reference/check_singularity.html). +Remedies to cure issues with singular fits can be found [here](https://easystats.github.io/performance/reference/check_singularity.html). #### Check for heteroskedasticity diff --git a/README.md b/README.md index 57e7cd9f0..5153170da 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![DOI](https://joss.theoj.org/papers/10.21105/joss.03139/status.svg)](https://doi.org/10.21105/joss.03139) [![downloads](http://cranlogs.r-pkg.org/badges/performance)](https://cran.r-project.org/package=performance) [![total](https://cranlogs.r-pkg.org/badges/grand-total/performance)](https://cranlogs.r-pkg.org/) -[![status](https://tinyverse.netlify.com/badge/performance)](https://CRAN.R-project.org/package=performance) ***Test if your model is a good model!*** @@ -58,13 +57,13 @@ To cite performance in publications use: ``` r citation("performance") #> To cite package 'performance' in publications use: -#> +#> #> Lüdecke et al., (2021). performance: An R Package for Assessment, Comparison and #> Testing of Statistical Models. Journal of Open Source Software, 6(60), 3139. #> https://doi.org/10.21105/joss.03139 -#> +#> #> A BibTeX entry for LaTeX users is -#> +#> #> @Article{, #> title = {{performance}: An {R} Package for Assessment, Comparison and Testing of Statistical Models}, #> author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Philip Waggoner and Dominique Makowski}, @@ -146,15 +145,15 @@ model <- stan_glmer( r2(model) #> # Bayesian R2 with Compatibility Interval -#> -#> Conditional R2: 0.953 (95% CI [0.941, 0.963]) -#> Marginal R2: 0.823 (95% CI [0.710, 0.898]) +#> +#> Conditional R2: 0.953 (95% CI [0.942, 0.964]) +#> Marginal R2: 0.826 (95% CI [0.724, 0.900]) library(lme4) model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) r2(model) #> # R2 for Mixed Models -#> +#> #> Conditional R2: 0.799 #> Marginal R2: 0.279 ``` @@ -173,7 +172,7 @@ library(lme4) model <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) icc(model) #> # Intraclass Correlation Coefficient -#> +#> #> Adjusted ICC: 0.722 #> Unadjusted ICC: 0.521 ``` @@ -189,9 +188,9 @@ model <- brm(mpg ~ wt + (1 | cyl) + (1 + wt | gear), data = mtcars) ``` r icc(model) #> # Intraclass Correlation Coefficient -#> -#> Adjusted ICC: 0.930 -#> Unadjusted ICC: 0.771 +#> +#> Adjusted ICC: 0.941 +#> Unadjusted ICC: 0.779 ``` ### Model diagnostics @@ -210,7 +209,7 @@ data(Salamanders) model <- glm(count ~ spp + mined, family = poisson, data = Salamanders) check_overdispersion(model) #> # Overdispersion test -#> +#> #> dispersion ratio = 2.946 #> Pearson's Chi-Squared = 1873.710 #> p-value = < 0.001 @@ -235,7 +234,7 @@ fitted model. model <- glm(count ~ spp + mined, family = poisson, data = Salamanders) check_zeroinflation(model) #> # Check for zero-inflation -#> +#> #> Observed zeros: 387 #> Predicted zeros: 298 #> Ratio: 0.77 @@ -323,7 +322,7 @@ be r-squared, AIC, BIC, RMSE, ICC or LOOIC. m1 <- lm(mpg ~ wt + cyl, data = mtcars) model_performance(m1) #> # Indices of model performance -#> +#> #> AIC | AICc | BIC | R2 | R2 (adj.) | RMSE | Sigma #> --------------------------------------------------------------- #> 156.010 | 157.492 | 161.873 | 0.830 | 0.819 | 2.444 | 2.568 @@ -335,7 +334,7 @@ model_performance(m1) m2 <- glm(vs ~ wt + mpg, data = mtcars, family = "binomial") model_performance(m2) #> # Indices of model performance -#> +#> #> AIC | AICc | BIC | Tjur's R2 | RMSE | Sigma | Log_loss | Score_log | Score_spherical | PCP #> ----------------------------------------------------------------------------------------------------- #> 31.298 | 32.155 | 35.695 | 0.478 | 0.359 | 1.000 | 0.395 | -14.903 | 0.095 | 0.743 @@ -348,7 +347,7 @@ library(lme4) m3 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) model_performance(m3) #> # Indices of model performance -#> +#> #> AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma #> ---------------------------------------------------------------------------------- #> 1755.628 | 1756.114 | 1774.786 | 0.799 | 0.279 | 0.722 | 23.438 | 25.592 @@ -368,12 +367,12 @@ m4 <- glm(counts ~ outcome + treatment, family = poisson()) compare_performance(m1, m2, m3, m4, verbose = FALSE) #> # Comparison of Model Performance Indices -#> +#> #> Name | Model | AIC (weights) | AICc (weights) | BIC (weights) | RMSE | Sigma | Score_log | Score_spherical | R2 | R2 (adj.) | Tjur's R2 | Log_loss | PCP | R2 (cond.) | R2 (marg.) | ICC | Nagelkerke's R2 #> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -#> m1 | lm | 156.0 (<.001) | 157.5 (<.001) | 161.9 (<.001) | 2.444 | 2.568 | | | 0.830 | 0.819 | | | | | | | -#> m2 | glm | 31.3 (>.999) | 32.2 (>.999) | 35.7 (>.999) | 0.359 | 1.000 | -14.903 | 0.095 | | | 0.478 | 0.395 | 0.743 | | | | -#> m3 | lmerMod | 1764.0 (<.001) | 1764.5 (<.001) | 1783.1 (<.001) | 23.438 | 25.592 | | | | | | | | 0.799 | 0.279 | 0.722 | +#> m1 | lm | 156.0 (<.001) | 157.5 (<.001) | 161.9 (<.001) | 2.444 | 2.568 | | | 0.830 | 0.819 | | | | | | | +#> m2 | glm | 31.3 (>.999) | 32.2 (>.999) | 35.7 (>.999) | 0.359 | 1.000 | -14.903 | 0.095 | | | 0.478 | 0.395 | 0.743 | | | | +#> m3 | lmerMod | 1764.0 (<.001) | 1764.5 (<.001) | 1783.1 (<.001) | 23.438 | 25.592 | | | | | | | | 0.799 | 0.279 | 0.722 | #> m4 | glm | 56.8 (<.001) | 76.8 (<.001) | 57.7 (<.001) | 3.043 | 1.000 | -2.598 | 0.324 | | | | | | | | | 0.657 ``` @@ -386,7 +385,7 @@ of model performance and sort the models from the best one to the worse. ``` r compare_performance(m1, m2, m3, m4, rank = TRUE, verbose = FALSE) #> # Comparison of Model Performance Indices -#> +#> #> Name | Model | RMSE | Sigma | AIC weights | AICc weights | BIC weights | Performance-Score #> ----------------------------------------------------------------------------------------------- #> m2 | glm | 0.359 | 1.000 | 1.000 | 1.000 | 1.000 | 100.00% @@ -424,7 +423,7 @@ lm4 <- lm(Sepal.Length ~ Species * Sepal.Width + Petal.Length + Petal.Width, dat test_performance(lm1, lm2, lm3, lm4) #> Name | Model | BF | Omega2 | p (Omega2) | LR | p (LR) #> ------------------------------------------------------------ -#> lm1 | lm | | | | | +#> lm1 | lm | | | | | #> lm2 | lm | > 1000 | 0.69 | < .001 | -6.25 | < .001 #> lm3 | lm | > 1000 | 0.36 | < .001 | -3.44 | < .001 #> lm4 | lm | > 1000 | 0.73 | < .001 | -7.77 | < .001 @@ -432,12 +431,12 @@ test_performance(lm1, lm2, lm3, lm4) test_bf(lm1, lm2, lm3, lm4) #> Bayes Factors for Model Comparison -#> +#> #> Model BF #> [lm2] Species + Petal.Length 3.45e+26 #> [lm3] Species * Sepal.Width 4.69e+07 #> [lm4] Species * Sepal.Width + Petal.Length + Petal.Width 7.58e+29 -#> +#> #> * Against Denominator: [lm1] Species #> * Bayes Factor Type: BIC approximation ``` diff --git a/WIP/generate_distribution.R b/WIP/generate_distribution.R index 78ff59b2c..b42e2d70d 100644 --- a/WIP/generate_distribution.R +++ b/WIP/generate_distribution.R @@ -134,12 +134,29 @@ for (di in seq_along(distrs)) { # x_scaled <- parameters::normalize(x, verbose = FALSE) if (length(x) >= 10) { + if (all(.is.integer(x))) { + mode <- datawizard::distribution_mode(x) + } else { + mode <- tryCatch( + as.numeric(bayestestR::map_estimate(x, bw = "nrd0")), + error = function(e) NULL + ) + if (is.null(mode)) { + mode <- tryCatch( + as.numeric(bayestestR::map_estimate(x, bw = "kernel")), + error = function(e) NULL + ) + } + if (is.null(mode)) { + mode <- datawizard::distribution_mode(x) + } + } # Extract features data <- data.frame( "SD" = sd(x), "MAD" = mad(x, constant = 1), "Mean_Median_Distance" = mean(x) - median(x), - "Mean_Mode_Distance" = mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")), + "Mean_Mode_Distance" = mean(x) - as.numeric(mode), "SD_MAD_Distance" = sd(x) - mad(x, constant = 1), "Var_Mean_Distance" = var(x) - mean(x), "Range_SD" = diff(range(x)) / sd(x), @@ -152,8 +169,8 @@ for (di in seq_along(distrs)) { "Min" = min(x), "Max" = max(x), "Proportion_Positive" = sum(x >= 0) / length(x), - "Integer" = all(.is.integer(x)) - # "Proportion_Zero" = sum(x == 0) / length(x) + "Integer" = all(.is.integer(x)), + "Proportion_Zero" = sum(x == 0) / length(x) # "Proportion_Minimum" = sum(x == min(x)) / length(x), # "Proportion_Maximum" = sum(x == max(x)) / length(x) ) diff --git a/cran-comments.md b/cran-comments.md index fa6f2d4c2..010c0c091 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1 +1,3 @@ -Maintainance release. \ No newline at end of file +This release fixes errors in CRAN checks. + +Additionally, in the process of stabilizing the API/user interface for packages from the 'easystats' project, some argument names were renamed and old names have been deprecated. This will not break downstream dependent packages, however, reverse-dependency checks will raise warnings. We have already patched all affected downstream packages and will submit them to CRAN in the next few days, after the release of 'performance'. Once this release-cycle is complete, all warnings due to deprecated argument names should be resolved. \ No newline at end of file diff --git a/inst/WORDLIST b/inst/WORDLIST index ca43db445..e5e90d844 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -39,6 +39,7 @@ Chisq CochransQ CompQuadForm Concurvity +Cribari Cronbach's Crujeiras Csaki @@ -147,12 +148,15 @@ Michalos Moosbrugger Mora Multicollinearity +MuMIn NFI NNFI Nagelkerke Nagelkerke's Nakagawa Nakagawa's +Neto +Neto's Nondegenerate Nordhausen Normed @@ -298,6 +302,7 @@ multicollinearity multimodel multiresponse multivariable +nbinom nd nonnest overfitted diff --git a/man/check_clusterstructure.Rd b/man/check_clusterstructure.Rd index 577687335..cd15897b4 100644 --- a/man/check_clusterstructure.Rd +++ b/man/check_clusterstructure.Rd @@ -9,7 +9,7 @@ check_clusterstructure(x, standardize = TRUE, distance = "euclidean", ...) \arguments{ \item{x}{A data frame.} -\item{standardize}{Standardize the dataframe before clustering (default).} +\item{standardize}{Standardize the data frame before clustering (default).} \item{distance}{Distance method used. Other methods than "euclidean" (default) are exploratory in the context of clustering tendency. See diff --git a/man/check_distribution.Rd b/man/check_distribution.Rd index d8fd6c949..cb80405d9 100644 --- a/man/check_distribution.Rd +++ b/man/check_distribution.Rd @@ -21,11 +21,11 @@ model family, consider this function as somewhat experimental. This function uses an internal random forest model to classify the distribution from a model-family. Currently, following distributions are trained (i.e. results of \code{check_distribution()} may be one of the -following): \code{"bernoulli"}, \code{"beta"}, \code{"beta-binomial"}, -\code{"binomial"}, \code{"chi"}, \code{"exponential"}, \code{"F"}, -\code{"gamma"}, \code{"lognormal"}, \code{"normal"}, \code{"negative binomial"}, \code{"negative binomial (zero-inflated)"}, \code{"pareto"}, -\code{"poisson"}, \code{"poisson (zero-inflated)"}, \code{"uniform"} and -\code{"weibull"}. +following): \code{"bernoulli"}, \code{"beta"}, \code{"beta-binomial"}, \code{"binomial"}, +\code{"cauchy"}, \code{"chi"}, \code{"exponential"}, \code{"F"}, \code{"gamma"}, \code{"half-cauchy"}, +\code{"inverse-gamma"}, \code{"lognormal"}, \code{"normal"}, \code{"negative binomial"}, +\code{"negative binomial (zero-inflated)"}, \code{"pareto"}, \code{"poisson"}, +\code{"poisson (zero-inflated)"}, \code{"tweedie"}, \code{"uniform"} and \code{"weibull"}. \cr \cr Note the similarity between certain distributions according to shape, skewness, etc. Thus, the predicted distribution may not be perfectly representing the diff --git a/man/check_factorstructure.Rd b/man/check_factorstructure.Rd index 471b93bb4..960549d62 100644 --- a/man/check_factorstructure.Rd +++ b/man/check_factorstructure.Rd @@ -13,7 +13,7 @@ check_kmo(x, n = NULL, ...) check_sphericity_bartlett(x, n = NULL, ...) } \arguments{ -\item{x}{A dataframe or a correlation matrix. If the latter is passed, \code{n} +\item{x}{A data frame or a correlation matrix. If the latter is passed, \code{n} must be provided.} \item{n}{If a correlation matrix was passed, the number of observations must diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 20c0bba4c..21534540e 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -4,7 +4,7 @@ \alias{check_heterogeneity_bias} \title{Check model predictor for heterogeneity bias} \usage{ -check_heterogeneity_bias(x, select = NULL, group = NULL) +check_heterogeneity_bias(x, select = NULL, by = NULL, group = NULL) } \arguments{ \item{x}{A data frame or a mixed model object.} @@ -13,9 +13,11 @@ check_heterogeneity_bias(x, select = NULL, group = NULL) that should be checked. If \code{x} is a mixed model object, this argument will be ignored.} -\item{group}{Character vector (or formula) with the name of the variable that +\item{by}{Character vector (or formula) with the name of the variable that indicates the group- or cluster-ID. If \code{x} is a model object, this argument will be ignored.} + +\item{group}{Deprecated. Use \code{by} instead.} } \description{ \code{check_heterogeneity_bias()} checks if model predictors or variables may @@ -25,7 +27,7 @@ between-effect (\emph{Bell and Jones, 2015}). \examples{ data(iris) iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID -check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") } \references{ \itemize{ diff --git a/man/check_outliers.Rd b/man/check_outliers.Rd index c75d5fc4a..623eae4b2 100644 --- a/man/check_outliers.Rd +++ b/man/check_outliers.Rd @@ -150,7 +150,7 @@ the median (which are robust measures of dispersion and centrality). The default threshold to classify outliers is 1.959 (\code{threshold = list("zscore" = 1.959)}), corresponding to the 2.5\% (\code{qnorm(0.975)}) most extreme observations (assuming the data is normally distributed). Importantly, the Z-score -method is univariate: it is computed column by column. If a dataframe is +method is univariate: it is computed column by column. If a data frame is passed, the Z-score is calculated for each variable separately, and the maximum (absolute) Z-score is kept for each observations. Thus, all observations that are extreme on at least one variable might be detected @@ -303,14 +303,14 @@ data <- mtcars # Size nrow(data) = 32 outliers_list <- check_outliers(data$mpg) # Find outliers outliers_list # Show the row index of the outliers as.numeric(outliers_list) # The object is a binary vector... -filtered_data <- data[!outliers_list, ] # And can be used to filter a dataframe +filtered_data <- data[!outliers_list, ] # And can be used to filter a data frame nrow(filtered_data) # New size, 28 (4 outliers removed) # Find all observations beyond +/- 2 SD check_outliers(data$mpg, method = "zscore", threshold = 2) # For dataframes ------------------------------------------------------ -check_outliers(data) # It works the same way on dataframes +check_outliers(data) # It works the same way on data frames # You can also use multiple methods at once outliers_list <- check_outliers(data, method = c( @@ -331,8 +331,9 @@ filtered_data <- data[outliers_info$Outlier < 0.1, ] # We can run the function stratified by groups using `{datawizard}` package: group_iris <- datawizard::data_group(iris, "Species") check_outliers(group_iris) - +# nolint start \dontshow{if (require("see") && require("bigutilsr") && require("loo") && require("MASS") && require("ICSOutlier") && require("ICS") && require("dbscan")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# nolint end \donttest{ # You can also run all the methods check_outliers(data, method = "all", verbose = FALSE) @@ -359,36 +360,37 @@ insight::get_data(model)[outliers_list, ] # Show outliers data \itemize{ \item Archimbaud, A., Nordhausen, K., and Ruiz-Gazen, A. (2018). ICS for multivariate outlier detection with application to quality control. -Computational Statistics and Data Analysis, 128, 184-199. +\emph{Computational Statistics and Data Analysis}, \emph{128}, 184-199. \doi{10.1016/j.csda.2018.06.011} \item Gnanadesikan, R., and Kettenring, J. R. (1972). Robust estimates, residuals, -and outlier detection with multiresponse data. Biometrics, 81-124. +and outlier detection with multiresponse data. \emph{Biometrics}, 81-124. \item Bollen, K. A., and Jackman, R. W. (1985). Regression diagnostics: An -expository treatment of outliers and influential cases. Sociological Methods -and Research, 13(4), 510-542. +expository treatment of outliers and influential cases. \emph{Sociological Methods +and Research}, \emph{13}(4), 510-542. \item Cabana, E., Lillo, R. E., and Laniado, H. (2019). Multivariate outlier detection based on a robust Mahalanobis distance with shrinkage estimators. arXiv preprint arXiv:1904.02596. \item Cook, R. D. (1977). Detection of influential observation in linear -regression. Technometrics, 19(1), 15-18. +regression. \emph{Technometrics}, \emph{19}(1), 15-18. \item Iglewicz, B., and Hoaglin, D. C. (1993). How to detect and handle outliers (Vol. 16). Asq Press. \item Leys, C., Klein, O., Dominicy, Y., and Ley, C. (2018). Detecting -multivariate outliers: Use a robust variant of Mahalanobis distance. Journal -of Experimental Social Psychology, 74, 150-156. +multivariate outliers: Use a robust variant of Mahalanobis distance. \emph{Journal +of Experimental Social Psychology}, 74, 150-156. \item Liu, F. T., Ting, K. M., and Zhou, Z. H. (2008, December). Isolation forest. In 2008 Eighth IEEE International Conference on Data Mining (pp. 413-422). IEEE. \item Lüdecke, D., Ben-Shachar, M. S., Patil, I., Waggoner, P., and Makowski, D. (2021). performance: An R package for assessment, comparison and testing of -statistical models. Journal of Open Source Software, 6(60), 3139. +statistical models. \emph{Journal of Open Source Software}, \emph{6}(60), 3139. \doi{10.21105/joss.03139} \item Thériault, R., Ben-Shachar, M. S., Patil, I., Lüdecke, D., Wiernik, B. M., and Makowski, D. (2023). Check your outliers! An introduction to identifying -statistical outliers in R with easystats. \doi{10.31234/osf.io/bu6nt} +statistical outliers in R with easystats. \emph{Behavior Research Methods}, 1-11. +\doi{10.3758/s13428-024-02356-w} \item Rousseeuw, P. J., and Van Zomeren, B. C. (1990). Unmasking multivariate -outliers and leverage points. Journal of the American Statistical -association, 85(411), 633-639. +outliers and leverage points. \emph{Journal of the American Statistical +association}, \emph{85}(411), 633-639. } } \seealso{ diff --git a/man/check_singularity.Rd b/man/check_singularity.Rd index e4cafa99a..cfe2dca0c 100644 --- a/man/check_singularity.Rd +++ b/man/check_singularity.Rd @@ -94,8 +94,11 @@ check_singularity(model) # Fixing singularity issues using priors in glmmTMB # Example taken from `vignette("priors", package = "glmmTMB")` -dat <- readRDS(system.file("vignette_data", "gophertortoise.rds", - package = "glmmTMB")) +dat <- readRDS(system.file( + "vignette_data", + "gophertortoise.rds", + package = "glmmTMB" +)) model <- glmmTMB::glmmTMB( shells ~ prev + offset(log(Area)) + factor(year) + (1 | Site), family = poisson, diff --git a/man/figures/unnamed-chunk-14-1.png b/man/figures/unnamed-chunk-14-1.png index 89a6592ba..c8ac9bd3a 100644 Binary files a/man/figures/unnamed-chunk-14-1.png and b/man/figures/unnamed-chunk-14-1.png differ diff --git a/man/figures/unnamed-chunk-20-1.png b/man/figures/unnamed-chunk-20-1.png index 063ce5313..17d827a8a 100644 Binary files a/man/figures/unnamed-chunk-20-1.png and b/man/figures/unnamed-chunk-20-1.png differ diff --git a/man/icc.Rd b/man/icc.Rd index d25004e07..6b6edbcb8 100644 --- a/man/icc.Rd +++ b/man/icc.Rd @@ -12,6 +12,9 @@ icc( ci = NULL, iterations = 100, ci_method = NULL, + null_model = NULL, + approximation = "lognormal", + model_component = NULL, verbose = TRUE, ... ) @@ -21,36 +24,55 @@ variance_decomposition(model, re_formula = NULL, robust = TRUE, ci = 0.95, ...) \arguments{ \item{model}{A (Bayesian) mixed effects model.} -\item{by_group}{Logical, if \code{TRUE}, \code{icc()} returns the variance -components for each random-effects level (if there are multiple levels). -See 'Details'.} +\item{by_group}{Logical, if \code{TRUE}, \code{icc()} returns the variance components +for each random-effects level (if there are multiple levels). See 'Details'.} \item{tolerance}{Tolerance for singularity check of random effects, to decide whether to compute random effect variances or not. Indicates up to which value the convergence result is accepted. The larger tolerance is, the stricter the test will be. See \code{\link[performance:check_singularity]{performance::check_singularity()}}.} -\item{ci}{Confidence resp. credible interval level. For \code{icc()} and \code{r2()}, -confidence intervals are based on bootstrapped samples from the ICC resp. -R2 value. See \code{iterations}.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} \item{iterations}{Number of bootstrap-replicates when computing confidence -intervals for the ICC or R2.} +intervals for the ICC, R2, RMSE etc.} \item{ci_method}{Character string, indicating the bootstrap-method. Should -be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for -bootstrapped confidence intervals. However, if bootstrapped intervals cannot -be calculated this was, try \code{ci_method = "boot"}, which falls back to -\code{boot::boot()}. This may successfully return bootstrapped confidence intervals, -but bootstrapped samples may not be appropriate for the multilevel structure -of the model. There is also an option \code{ci_method = "analytical"}, which tries -to calculate analytical confidence assuming a chi-squared distribution. -However, these intervals are rather inaccurate and often too narrow. It is -recommended to calculate bootstrapped confidence intervals for mixed models.} +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} + +\item{null_model}{Optional, a null model to compute the random effect variances, +which is passed to \code{\link[insight:get_variance]{insight::get_variance()}}. Usually only required if +calculation of r-squared or ICC fails when \code{null_model} is not specified. If +calculating the null model takes longer and you already have fit the null +model, you can pass it here, too, to speed up the process.} + +\item{approximation}{Character string, indicating the approximation method +for the distribution-specific (observation level, or residual) variance. Only +applies to non-Gaussian models. Can be \code{"lognormal"} (default), \code{"delta"} or +\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} distribution +specific variance, however, it can also be \code{"observation_level"}. See +\emph{Nakagawa et al. 2017}, in particular supplement 2, for details.} + +\item{model_component}{For models that can have a zero-inflation component, +specify for which component variances should be returned. If \code{NULL} or \code{"full"} +(the default), both the conditional and the zero-inflation component are taken +into account. If \code{"conditional"}, only the conditional component is considered.} \item{verbose}{Toggle warnings and messages.} -\item{...}{Arguments passed down to \code{brms::posterior_predict()}.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} \item{re_formula}{Formula containing group-level effects to be considered in the prediction. If \code{NULL} (default), include all group-level effects. @@ -70,7 +92,7 @@ ICC as well as the credible intervals for this ICC. This function calculates the intraclass-correlation coefficient (ICC) - sometimes also called \emph{variance partition coefficient} (VPC) or \emph{repeatability} - for mixed effects models. The ICC can be calculated for all -models supported by \code{insight::get_variance()}. For models fitted with the +models supported by \code{\link[insight:get_variance]{insight::get_variance()}}. For models fitted with the \strong{brms}-package, \code{icc()} might fail due to the large variety of models and families supported by the \strong{brms}-package. In such cases, an alternative to the ICC is the \code{variance_decomposition()}, which is based @@ -175,6 +197,48 @@ very large, the variance ratio in the output makes no sense, e.g. because it is negative. In such cases, it might help to use \code{robust = TRUE}. } } +\section{Supported models and model families}{ + +The single variance components that are required to calculate the marginal +and conditional r-squared values are calculated using the \code{\link[insight:get_variance]{insight::get_variance()}} +function. The results are validated against the solutions provided by +\emph{Nakagawa et al. (2017)}, in particular examples shown in the Supplement 2 +of the paper. Other model families are validated against results from the +\strong{MuMIn} package. This means that the r-squared values returned by \code{r2_nakagawa()} +should be accurate and reliable for following mixed models or model families: +\itemize{ +\item Bernoulli (logistic) regression +\item Binomial regression (with other than binary outcomes) +\item Poisson and Quasi-Poisson regression +\item Negative binomial regression (including nbinom1 and nbinom2 families) +\item Gaussian regression (linear models) +\item Gamma regression +\item Tweedie regression +\item Beta regression +\item Ordered beta regression +} + +Following model families are not yet validated, but should work: +\itemize{ +\item Zero-inflated and hurdle models +\item Beta-binomial regression +\item Compound Poisson regression +\item Generalized Poisson regression +\item Log-normal regression +} + +Extracting variance components for models with zero-inflation part is not +straightforward, because it is not definitely clear how the distribution-specific +variance should be calculated. Therefore, it is recommended to carefully +inspect the results, and probably validate against other models, e.g. Bayesian +models (although results may be only roughly comparable). + +Log-normal regressions (e.g. \code{lognormal()} family in \strong{glmmTMB} or \code{gaussian("log")}) +often have a very low fixed effects variance (if they were calculated as +suggested by \emph{Nakagawa et al. 2017}). This results in very low ICC or +r-squared values, which may not be meaningful. +} + \examples{ \dontshow{if (require("lme4")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} model <- lme4::lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) diff --git a/man/item_discrimination.Rd b/man/item_discrimination.Rd index 503ebadb7..37292f905 100644 --- a/man/item_discrimination.Rd +++ b/man/item_discrimination.Rd @@ -25,8 +25,8 @@ for tests or item-scales of questionnaires. This function calculates the item discriminations (corrected item-total correlations for each item of \code{x} with the remaining items) for each item of a scale. The absolute value of the item discrimination indices should be -above 0.2. An index between 0.2 and 0.4 is considered as "fair", while a -satisfactory index ranges from 0.4 to 0.7. Items with low discrimination +above \code{0.2}. An index between \code{0.2} and \code{0.4} is considered as "fair", while a +satisfactory index ranges from \code{0.4} to \code{0.7}. Items with low discrimination indices are often ambiguously worded and should be examined. Items with negative indices should be examined to determine why a negative value was obtained (e.g. reversed answer categories regarding positive and negative diff --git a/man/model_performance.rma.Rd b/man/model_performance.rma.Rd index f6489005b..69747fd6d 100644 --- a/man/model_performance.rma.Rd +++ b/man/model_performance.rma.Rd @@ -55,7 +55,7 @@ estimates to the amount of sampling variability. \item \strong{TAU2}: The amount of (residual) heterogeneity in the random or mixed effects model. \item \strong{CochransQ (QE)}: Test for (residual) Heterogeneity. Without -moderators in the model, this is simply Cochran's Q-test. +moderators in the model, this is simply Cochran's \emph{Q}-test. \item \strong{Omnibus (QM)}: Omnibus test of parameters. \item \strong{R2}: Pseudo-R2-statistic, which indicates the amount of heterogeneity accounted for by the moderators included in a fixed-effects diff --git a/man/performance_mae.Rd b/man/performance_mae.Rd index 2d68e1606..20874f5ba 100644 --- a/man/performance_mae.Rd +++ b/man/performance_mae.Rd @@ -12,7 +12,9 @@ mae(model, ...) \arguments{ \item{model}{A model.} -\item{...}{Arguments passed to or from other methods.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the mean absolute error of \code{model}. diff --git a/man/performance_mse.Rd b/man/performance_mse.Rd index ef4d6781a..e81327c14 100644 --- a/man/performance_mse.Rd +++ b/man/performance_mse.Rd @@ -12,7 +12,9 @@ mse(model, ...) \arguments{ \item{model}{A model.} -\item{...}{Arguments passed to or from other methods.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the mean square error of \code{model}. diff --git a/man/performance_rmse.Rd b/man/performance_rmse.Rd index bea4534b5..124570c29 100644 --- a/man/performance_rmse.Rd +++ b/man/performance_rmse.Rd @@ -5,16 +5,54 @@ \alias{rmse} \title{Root Mean Squared Error} \usage{ -performance_rmse(model, normalized = FALSE, verbose = TRUE) +performance_rmse( + model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ... +) -rmse(model, normalized = FALSE, verbose = TRUE) +rmse( + model, + normalized = FALSE, + ci = NULL, + iterations = 100, + ci_method = NULL, + verbose = TRUE, + ... +) } \arguments{ \item{model}{A model.} \item{normalized}{Logical, use \code{TRUE} if normalized rmse should be returned.} -\item{verbose}{Toggle off warnings.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} + +\item{iterations}{Number of bootstrap-replicates when computing confidence +intervals for the ICC, R2, RMSE etc.} + +\item{ci_method}{Character string, indicating the bootstrap-method. Should +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} + +\item{verbose}{Toggle warnings and messages.} + +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ Numeric, the root mean squared error. diff --git a/man/r2.Rd b/man/r2.Rd index 9c5c648c3..bf783e8d9 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -49,9 +49,9 @@ determination, value for different model objects. Depending on the model, R2, pseudo-R2, or marginal / adjusted R2 values are returned. } \note{ -If there is no \code{r2()}-method defined for the given model class, -\code{r2()} tries to return a "generic" r-quared value, calculated as following: -\verb{1-sum((y-y_hat)^2)/sum((y-y_bar)^2))} +If there is no \code{r2()}-method defined for the given model class, \code{r2()} tries +to return a "generic" r-quared value, calculated as following: +\code{1-sum((y-y_hat)^2)/sum((y-y_bar)^2)} } \examples{ \dontshow{if (require("lme4")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} @@ -68,8 +68,7 @@ r2(model) \dontshow{\}) # examplesIf} } \seealso{ -\code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, -\code{\link[=r2_loo]{r2_loo()}}, \code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, -\code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, \code{\link[=r2_xu]{r2_xu()}} and -\code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. +\code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, \code{\link[=r2_loo]{r2_loo()}}, +\code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, \code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, +\code{\link[=r2_xu]{r2_xu()}} and \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. } diff --git a/man/r2_bayes.Rd b/man/r2_bayes.Rd index 67200fb7d..63f9843ba 100644 --- a/man/r2_bayes.Rd +++ b/man/r2_bayes.Rd @@ -40,26 +40,25 @@ the first model (or the denominator, for \code{BFBayesFactor} objects). For } \value{ A list with the Bayesian R2 value. For mixed models, a list with the -Bayesian R2 value and the marginal Bayesian R2 value. The standard errors -and credible intervals for the R2 values are saved as attributes. +Bayesian R2 value and the marginal Bayesian R2 value. The standard errors and +credible intervals for the R2 values are saved as attributes. } \description{ -Compute R2 for Bayesian models. For mixed models (including a -random part), it additionally computes the R2 related to the fixed effects -only (marginal R2). While \code{r2_bayes()} returns a single R2 value, -\code{r2_posterior()} returns a posterior sample of Bayesian R2 values. +Compute R2 for Bayesian models. For mixed models (including a random part), +it additionally computes the R2 related to the fixed effects only (marginal +R2). While \code{r2_bayes()} returns a single R2 value, \code{r2_posterior()} returns a +posterior sample of Bayesian R2 values. } \details{ -\code{r2_bayes()} returns an "unadjusted" R2 value. See -\code{\link[=r2_loo]{r2_loo()}} to calculate a LOO-adjusted R2, which comes -conceptually closer to an adjusted R2 measure. +\code{r2_bayes()} returns an "unadjusted" R2 value. See \code{\link[=r2_loo]{r2_loo()}} to calculate a +LOO-adjusted R2, which comes conceptually closer to an adjusted R2 measure. For mixed models, the conditional and marginal R2 are returned. The marginal -R2 considers only the variance of the fixed effects, while the conditional -R2 takes both the fixed and random effects into account. +R2 considers only the variance of the fixed effects, while the conditional R2 +takes both the fixed and random effects into account. -\code{r2_posterior()} is the actual workhorse for \code{r2_bayes()} and -returns a posterior sample of Bayesian R2 values. +\code{r2_posterior()} is the actual workhorse for \code{r2_bayes()} and returns a +posterior sample of Bayesian R2 values. } \examples{ \dontshow{if (require("rstanarm") && require("rstantools") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} @@ -105,7 +104,7 @@ r2_bayes(model) \dontshow{\}) # examplesIf} } \references{ -Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). -R-squared for Bayesian regression models. The American Statistician, 1–6. +Gelman, A., Goodrich, B., Gabry, J., and Vehtari, A. (2018). R-squared for +Bayesian regression models. The American Statistician, 1–6. \doi{10.1080/00031305.2018.1549100} } diff --git a/man/r2_ferrari.Rd b/man/r2_ferrari.Rd new file mode 100644 index 000000000..78635539b --- /dev/null +++ b/man/r2_ferrari.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/r2_ferarri.R +\name{r2_ferrari} +\alias{r2_ferrari} +\alias{r2_ferrari.default} +\title{Ferrari's and Cribari-Neto's R2} +\usage{ +r2_ferrari(model, ...) + +\method{r2_ferrari}{default}(model, correct_bounds = FALSE, ...) +} +\arguments{ +\item{model}{Generalized linear, in particular beta-regression model.} + +\item{...}{Currently not used.} + +\item{correct_bounds}{Logical, whether to correct the bounds of the response +variable to avoid 0 and 1. If \code{TRUE}, the response variable is normalized +and "compressed", i.e. zeros and ones are excluded.} +} +\value{ +A list with the pseudo R2 value. +} +\description{ +Calculates Ferrari's and Cribari-Neto's pseudo R2 (for +beta-regression models). +} +\examples{ +\dontshow{if (require("betareg")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +data("GasolineYield", package = "betareg") +model <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) +r2_ferrari(model) +\dontshow{\}) # examplesIf} +} +\references{ +\itemize{ +\item Ferrari, S., and Cribari-Neto, F. (2004). Beta Regression for Modelling Rates +and Proportions. Journal of Applied Statistics, 31(7), 799–815. +\doi{10.1080/0266476042000214501} +} +} diff --git a/man/r2_nakagawa.Rd b/man/r2_nakagawa.Rd index 357c2a3e9..b8c8601e3 100644 --- a/man/r2_nakagawa.Rd +++ b/man/r2_nakagawa.Rd @@ -7,10 +7,13 @@ r2_nakagawa( model, by_group = FALSE, - tolerance = 1e-05, + tolerance = 1e-08, ci = NULL, iterations = 100, ci_method = NULL, + null_model = NULL, + approximation = "lognormal", + model_component = NULL, verbose = TRUE, ... ) @@ -29,27 +32,47 @@ or not. Indicates up to which value the convergence result is accepted. When can't be computed (and thus, the conditional r-squared is \code{NA}), decrease the tolerance-level. See also \code{\link[=check_singularity]{check_singularity()}}.} -\item{ci}{Confidence resp. credible interval level. For \code{icc()} and \code{r2()}, -confidence intervals are based on bootstrapped samples from the ICC resp. -R2 value. See \code{iterations}.} +\item{ci}{Confidence resp. credible interval level. For \code{icc()}, \code{r2()}, and +\code{rmse()}, confidence intervals are based on bootstrapped samples from the +ICC, R2 or RMSE value. See \code{iterations}.} \item{iterations}{Number of bootstrap-replicates when computing confidence -intervals for the ICC or R2.} +intervals for the ICC, R2, RMSE etc.} \item{ci_method}{Character string, indicating the bootstrap-method. Should -be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for -bootstrapped confidence intervals. However, if bootstrapped intervals cannot -be calculated this was, try \code{ci_method = "boot"}, which falls back to -\code{boot::boot()}. This may successfully return bootstrapped confidence intervals, -but bootstrapped samples may not be appropriate for the multilevel structure -of the model. There is also an option \code{ci_method = "analytical"}, which tries -to calculate analytical confidence assuming a chi-squared distribution. -However, these intervals are rather inaccurate and often too narrow. It is -recommended to calculate bootstrapped confidence intervals for mixed models.} +be \code{NULL} (default), in which case \code{lme4::bootMer()} is used for bootstrapped +confidence intervals. However, if bootstrapped intervals cannot be calculated +this way, try \code{ci_method = "boot"}, which falls back to \code{boot::boot()}. This +may successfully return bootstrapped confidence intervals, but bootstrapped +samples may not be appropriate for the multilevel structure of the model. +There is also an option \code{ci_method = "analytical"}, which tries to calculate +analytical confidence assuming a chi-squared distribution. However, these +intervals are rather inaccurate and often too narrow. It is recommended to +calculate bootstrapped confidence intervals for mixed models.} + +\item{null_model}{Optional, a null model to compute the random effect variances, +which is passed to \code{\link[insight:get_variance]{insight::get_variance()}}. Usually only required if +calculation of r-squared or ICC fails when \code{null_model} is not specified. If +calculating the null model takes longer and you already have fit the null +model, you can pass it here, too, to speed up the process.} + +\item{approximation}{Character string, indicating the approximation method +for the distribution-specific (observation level, or residual) variance. Only +applies to non-Gaussian models. Can be \code{"lognormal"} (default), \code{"delta"} or +\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} distribution +specific variance, however, it can also be \code{"observation_level"}. See +\emph{Nakagawa et al. 2017}, in particular supplement 2, for details.} + +\item{model_component}{For models that can have a zero-inflation component, +specify for which component variances should be returned. If \code{NULL} or \code{"full"} +(the default), both the conditional and the zero-inflation component are taken +into account. If \code{"conditional"}, only the conditional component is considered.} \item{verbose}{Toggle warnings and messages.} -\item{...}{Arguments passed down to \code{brms::posterior_predict()}.} +\item{...}{Arguments passed down to \code{lme4::bootMer()} or \code{boot::boot()} +for bootstrapped ICC, R2, RMSE etc.; for \code{variance_decomposition()}, +arguments are passed down to \code{brms::posterior_predict()}.} } \value{ A list with the conditional and marginal R2 values. @@ -61,7 +84,7 @@ mixed effects models with complex random effects structures. \details{ Marginal and conditional r-squared values for mixed models are calculated based on \emph{Nakagawa et al. (2017)}. For more details on the computation of -the variances, see \code{?insight::get_variance}. The random effect variances are +the variances, see \code{\link[insight:get_variance]{insight::get_variance()}}. The random effect variances are actually the mean random effect variances, thus the r-squared value is also appropriate for mixed models with random slopes or nested random effects (see \emph{Johnson, 2014}). @@ -73,6 +96,48 @@ appropriate for mixed models with random slopes or nested random effects The contribution of random effects can be deduced by subtracting the marginal R2 from the conditional R2 or by computing the \code{\link[=icc]{icc()}}. } +\section{Supported models and model families}{ + +The single variance components that are required to calculate the marginal +and conditional r-squared values are calculated using the \code{\link[insight:get_variance]{insight::get_variance()}} +function. The results are validated against the solutions provided by +\emph{Nakagawa et al. (2017)}, in particular examples shown in the Supplement 2 +of the paper. Other model families are validated against results from the +\strong{MuMIn} package. This means that the r-squared values returned by \code{r2_nakagawa()} +should be accurate and reliable for following mixed models or model families: +\itemize{ +\item Bernoulli (logistic) regression +\item Binomial regression (with other than binary outcomes) +\item Poisson and Quasi-Poisson regression +\item Negative binomial regression (including nbinom1 and nbinom2 families) +\item Gaussian regression (linear models) +\item Gamma regression +\item Tweedie regression +\item Beta regression +\item Ordered beta regression +} + +Following model families are not yet validated, but should work: +\itemize{ +\item Zero-inflated and hurdle models +\item Beta-binomial regression +\item Compound Poisson regression +\item Generalized Poisson regression +\item Log-normal regression +} + +Extracting variance components for models with zero-inflation part is not +straightforward, because it is not definitely clear how the distribution-specific +variance should be calculated. Therefore, it is recommended to carefully +inspect the results, and probably validate against other models, e.g. Bayesian +models (although results may be only roughly comparable). + +Log-normal regressions (e.g. \code{lognormal()} family in \strong{glmmTMB} or \code{gaussian("log")}) +often have a very low fixed effects variance (if they were calculated as +suggested by \emph{Nakagawa et al. 2017}). This results in very low ICC or +r-squared values, which may not be meaningful. +} + \examples{ \dontshow{if (require("lme4")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} model <- lme4::lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) diff --git a/man/reexports.Rd b/man/reexports.Rd index eacc26d6d..392beca45 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/display.R, R/print_md.R +% Please edit documentation in R/reexports.R \docType{import} \name{reexports} \alias{reexports} diff --git a/_pkgdown.yaml b/pkgdown/_pkgdown.yaml similarity index 100% rename from _pkgdown.yaml rename to pkgdown/_pkgdown.yaml diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png new file mode 100644 index 000000000..6296e8cc0 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-120x120.png differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png new file mode 100644 index 000000000..4161c6943 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-152x152.png differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png new file mode 100644 index 000000000..b8fab8cb7 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-180x180.png differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png new file mode 100644 index 000000000..b3cff4a07 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-60x60.png differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png new file mode 100644 index 000000000..af9a1c21b Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-76x76.png differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png new file mode 100644 index 000000000..17f9d62aa Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png new file mode 100644 index 000000000..b19cc9d60 Binary files /dev/null and b/pkgdown/favicon/favicon-16x16.png differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png new file mode 100644 index 000000000..7c2ce1b08 Binary files /dev/null and b/pkgdown/favicon/favicon-32x32.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico new file mode 100644 index 000000000..1f36b217a Binary files /dev/null and b/pkgdown/favicon/favicon.ico differ diff --git a/tests/testthat/_snaps/check_distribution.md b/tests/testthat/_snaps/check_distribution.md index 11187a23b..38a2fdca7 100644 --- a/tests/testthat/_snaps/check_distribution.md +++ b/tests/testthat/_snaps/check_distribution.md @@ -7,15 +7,14 @@ Predicted Distribution of Residuals - Distribution Probability - cauchy 94% - lognormal 3% - weibull 3% + Distribution Probability + cauchy 91% + gamma 6% + neg. binomial (zero-infl.) 3% Predicted Distribution of Response - Distribution Probability - lognormal 47% - gamma 44% - beta-binomial 3% + Distribution Probability + lognormal 66% + gamma 34% diff --git a/tests/testthat/test-binned_residuals.R b/tests/testthat/test-binned_residuals.R index e23e05d01..c0c6a4be7 100644 --- a/tests/testthat/test-binned_residuals.R +++ b/tests/testthat/test-binned_residuals.R @@ -162,7 +162,6 @@ test_that("binned_residuals, bootstrapped CI", { test_that("binned_residuals, msg for non-bernoulli", { skip_on_cran() - skip_if(packageVersion("insight") < "0.19.7") tot <- rep(10, 100) suc <- rbinom(100, prob = 0.9, size = tot) diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index 042142073..ba456ae6f 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -216,7 +216,6 @@ test_that("check_collinearity, hurdle/zi models w/o zi-formula", { }) test_that("check_collinearity, invalid data", { - skip_if(packageVersion("insight") < "0.19.8.2") dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) expect_error(check_collinearity(m1), "Can't extract variance-covariance matrix") diff --git a/tests/testthat/test-check_distribution.R b/tests/testthat/test-check_distribution.R index e8ab2835f..ca2e134be 100644 --- a/tests/testthat/test-check_distribution.R +++ b/tests/testthat/test-check_distribution.R @@ -18,16 +18,16 @@ test_that("check_distribution", { expect_equal( out$p_Residuals, c( - 0, 0, 0, 0, 0.9375, 0, 0, 0, 0, 0, 0, 0.03125, 0, 0, 0, 0, - 0, 0, 0, 0, 0.03125 + 0, 0, 0, 0, 0.90625, 0, 0, 0, 0.0625, 0, 0, 0, 0.03125, 0, 0, + 0, 0, 0, 0, 0, 0 ), tolerance = 1e-4 ) expect_equal( out$p_Response, c( - 0, 0, 0.03125, 0, 0, 0, 0, 0, 0.4375, 0.03125, 0, 0.46875, - 0.03125, 0, 0, 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0.34375, 0, 0, 0.65625, 0, 0, 0, 0, + 0, 0, 0, 0, 0 ), tolerance = 1e-4 ) diff --git a/tests/testthat/test-check_heterogeneity_bias.R b/tests/testthat/test-check_heterogeneity_bias.R index 2bd63856e..7042f3064 100644 --- a/tests/testthat/test-check_heterogeneity_bias.R +++ b/tests/testthat/test-check_heterogeneity_bias.R @@ -2,29 +2,29 @@ test_that("check_heterogeneity_bias", { data(iris) set.seed(123) iris$ID <- sample.int(4, nrow(iris), replace = TRUE) # fake-ID - out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") + out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE) expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length") - out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, group = ~ID) + out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, by = ~ID) expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE) expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length") m <- lm(Sepal.Length ~ Petal.Length + Petal.Width + Species + ID, data = iris) expect_error( - check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID"), + check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID"), regex = "no mixed model" ) skip_if_not_installed("lme4") m <- lme4::lmer(Sepal.Length ~ Petal.Length + Petal.Width + Species + (1 | ID), data = iris) - out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID") + out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID") expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE) expect_output( print(out), "Possible heterogeneity bias due to following predictors: Petal\\.Length, Petal\\.Width, Species" ) - out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, group = ~ID) + out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, by = ~ID) expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE) expect_output( print(out), diff --git a/tests/testthat/test-check_model.R b/tests/testthat/test-check_model.R index 6543e5065..4e23653b3 100644 --- a/tests/testthat/test-check_model.R +++ b/tests/testthat/test-check_model.R @@ -38,7 +38,6 @@ test_that("`check_outliers()` works if convergence issues", { }) test_that("`check_model()` for invalid models", { - skip_if(packageVersion("insight") < "0.19.8.2") dd <- data.frame(y = as.difftime(0:5, units = "days")) m1 <- lm(y ~ 1, data = dd) expect_error(check_model(m1)) diff --git a/tests/testthat/test-check_outliers.R b/tests/testthat/test-check_outliers.R index 704b27feb..6aa64516f 100644 --- a/tests/testthat/test-check_outliers.R +++ b/tests/testthat/test-check_outliers.R @@ -255,6 +255,7 @@ test_that("multiple methods with ID", { x$outlier_var$zscore$mpg$car, "Toyota Corolla" ) + skip_if(getRversion() > "4.4.0") expect_identical( x$outlier_count$all$car[1], "Maserati Bora" diff --git a/tests/testthat/test-check_predictions.R b/tests/testthat/test-check_predictions.R index 1f7774c8a..5eb44658f 100644 --- a/tests/testthat/test-check_predictions.R +++ b/tests/testthat/test-check_predictions.R @@ -82,7 +82,6 @@ test_that("check_predictions, glmmTMB", { test_that("check_predictions, glm, binomial", { - skip_if(packageVersion("insight") <= "0.19.6") data(mtcars) set.seed(1) tot <- rep(10, 100) diff --git a/tests/testthat/test-cronbachs_alpha.R b/tests/testthat/test-cronbachs_alpha.R index ecf244745..fcc6e743d 100644 --- a/tests/testthat/test-cronbachs_alpha.R +++ b/tests/testthat/test-cronbachs_alpha.R @@ -1,4 +1,6 @@ test_that("cronbachs_alpha, data frame", { + skip_if_not_installed("parameters") + data(mtcars) x <- mtcars[, c("cyl", "gear", "carb", "hp")] expect_equal(cronbachs_alpha(x), 0.09463206, tolerance = 1e-3) @@ -9,7 +11,8 @@ test_that("cronbachs_alpha", { }) test_that("cronbachs_alpha, principal_components", { - skip_if_not_installed("parameters", minimum_version = "0.21.3") + skip_if_not_installed("parameters") + pca <- parameters::principal_components(mtcars[, c("cyl", "gear", "carb", "hp")], n = 2) expect_equal(cronbachs_alpha(pca, verbose = FALSE), c(PC1 = 0.1101384), tolerance = 1e-3) expect_message(cronbachs_alpha(pca), regex = "Too few") @@ -20,12 +23,15 @@ test_that("cronbachs_alpha, principal_components", { }) test_that("cronbachs_alpha, principal_components", { - skip_if_not_installed("parameters", minimum_version = "0.20.3") + skip_if_not_installed("parameters") + pca <- parameters::principal_components(mtcars, n = 2) expect_equal(cronbachs_alpha(pca), c(PC1 = 0.4396, PC2 = -1.44331), tolerance = 1e-3) }) test_that("cronbachs_alpha, matrix", { + skip_if_not_installed("parameters") + m <- as.matrix(mtcars[c("cyl", "gear", "carb", "hp")]) expect_equal(cronbachs_alpha(m), 0.09463206, tolerance = 1e-3) }) diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index d1d6a5545..91b09db34 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -1,7 +1,7 @@ skip_on_cran() skip_if_not_installed("withr") withr::with_options( - list(easystats_erros = TRUE), + list(easystats_errors = TRUE), test_that(".safe works with options", { expect_error(performance:::.safe(mean(fd)), regex = "object 'fd' not found") expect_identical(performance:::.safe(mean(fd), 1L), 1L) diff --git a/tests/testthat/test-icc.R b/tests/testthat/test-icc.R index 68624ff09..e537b8b1f 100644 --- a/tests/testthat/test-icc.R +++ b/tests/testthat/test-icc.R @@ -40,13 +40,13 @@ test_that("icc", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") m2 <- insight::download_model("stanreg_lmerMod_1") expect_equal( icc(m2), data.frame( - ICC_adjusted = 0.399303562702568, ICC_conditional = 0.216907586891627, - ICC_unadjusted = 0.216907586891627 + ICC_adjusted = 0.40579, ICC_conditional = 0.21881, + ICC_unadjusted = 0.21881 ), tolerance = 1e-2, ignore_attr = TRUE @@ -57,7 +57,7 @@ test_that("icc", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") m3 <- insight::download_model("brms_mixed_1") set.seed(123) expect_equal( @@ -71,7 +71,7 @@ test_that("icc", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") m3 <- insight::download_model("brms_mixed_1") set.seed(123) expect_equal( diff --git a/tests/testthat/test-model_performance.bayesian.R b/tests/testthat/test-model_performance.bayesian.R index a15e41a10..da4b6ff18 100644 --- a/tests/testthat/test-model_performance.bayesian.R +++ b/tests/testthat/test-model_performance.bayesian.R @@ -2,9 +2,9 @@ test_that("model_performance.stanreg", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") set.seed(333) - model <- tryCatch(insight::download_model("stanreg_lm_1"), error = function(e) NULL) + model <- insight::download_model("stanreg_lm_1") skip_if(is.null(model)) perf <- model_performance(model) @@ -12,7 +12,7 @@ test_that("model_performance.stanreg", { expect_equal(perf$R2_adjusted, 0.7162912, tolerance = 1e-3) expect_equal(perf$ELPD, -83.49838, tolerance = 1e-3) - model <- tryCatch(insight::download_model("stanreg_lm_2"), error = function(e) NULL) + model <- insight::download_model("stanreg_lm_2") skip_if(is.null(model)) perf <- model_performance(model) @@ -20,7 +20,7 @@ test_that("model_performance.stanreg", { expect_equal(perf$R2_adjusted, 0.7979026, tolerance = 1e-3) expect_equal(perf$ELPD, -78.38735, tolerance = 1e-3) - model <- tryCatch(insight::download_model("stanreg_lmerMod_1"), error = function(e) NULL) + model <- insight::download_model("stanreg_lmerMod_1") skip_if(is.null(model)) perf <- model_performance(model) @@ -34,10 +34,10 @@ test_that("model_performance.brmsfit", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") set.seed(333) - model <- tryCatch(insight::download_model("brms_1"), error = function(e) NULL) + model <- insight::download_model("brms_1") skip_if(is.null(model)) expect_message({ perf <- model_performance(model) @@ -50,7 +50,7 @@ test_that("model_performance.brmsfit", { "RMSE", "Sigma" )) - model <- tryCatch(insight::download_model("brms_mixed_4"), error = function(e) NULL) + model <- insight::download_model("brms_mixed_4") skip_if(is.null(model)) expect_message({ perf <- model_performance(model) @@ -63,7 +63,7 @@ test_that("model_performance.brmsfit", { "R2_adjusted", "R2_adjusted_marginal", "ICC", "RMSE", "Sigma" )) - model <- tryCatch(insight::download_model("brms_ordinal_1"), error = function(e) NULL) + model <- insight::download_model("brms_ordinal_1") skip_if(is.null(model)) perf <- suppressWarnings(model_performance(model)) expect_equal(perf$R2, 0.8760015, tolerance = 1e-3) diff --git a/tests/testthat/test-model_performance.merMod.R b/tests/testthat/test-model_performance.merMod.R index 0c70da3c3..82360de46 100644 --- a/tests/testthat/test-model_performance.merMod.R +++ b/tests/testthat/test-model_performance.merMod.R @@ -2,7 +2,7 @@ test_that("model_performance.merMod", { skip_on_cran() skip_if_not_installed("curl") skip_if_offline() - skip_if_not_installed("httr") + skip_if_not_installed("httr2") model <- insight::download_model("lmerMod_1") expect_equal(model_performance(model, estimator = "ML")$AIC, AIC(logLik(model, REML = FALSE)), tolerance = 0.01) diff --git a/tests/testthat/test-nestedLogit.R b/tests/testthat/test-nestedLogit.R index 47a852b04..52ffaddb9 100644 --- a/tests/testthat/test-nestedLogit.R +++ b/tests/testthat/test-nestedLogit.R @@ -1,5 +1,4 @@ skip_on_os(c("mac", "linux")) -skip_if(packageVersion("insight") <= "0.19.5.10") skip_if_not_installed("nestedLogit") skip_if_not_installed("carData") diff --git a/tests/testthat/test-pkg-ivreg.R b/tests/testthat/test-pkg-ivreg.R index 310af142c..83b534938 100644 --- a/tests/testthat/test-pkg-ivreg.R +++ b/tests/testthat/test-pkg-ivreg.R @@ -1,6 +1,5 @@ test_that("Issue #530 from the `modelsummary` repo", { skip_if_not_installed("ivreg") - skip_if_not(packageVersion("insight") >= "0.19.1.3") # formatting of results # for ivreg diagnostics data(mtcars) iv_model <- suppressMessages(ivreg::ivreg(mpg ~ qsec + cyl + drat | disp | wt, data = mtcars)) diff --git a/tests/testthat/test-r2_ferrari.R b/tests/testthat/test-r2_ferrari.R new file mode 100644 index 000000000..35aab72c1 --- /dev/null +++ b/tests/testthat/test-r2_ferrari.R @@ -0,0 +1,37 @@ +test_that("r2_ferarri", { + skip_if_not_installed("betareg") + data("GasolineYield", package = "betareg") + model <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) + out <- r2_ferrari(model) + expect_equal(out$R2, summary(model)$pseudo.r.squared, tolerance = 1e-3, ignore_attr = TRUE) +}) + + +test_that("r2_ferarri", { + skip_if_not_installed("betareg") + skip_if_not_installed("glmmTMB") + data("GasolineYield", package = "betareg") + model <- glmmTMB::glmmTMB( + yield ~ batch + temp, + data = GasolineYield, + family = glmmTMB::beta_family() + ) + out <- r2_ferrari(model) + expect_equal(out$R2, c(`Ferrari's R2` = 0.96173), tolerance = 1e-3, ignore_attr = TRUE) +}) + + +test_that("r2_ferarri", { + skip_if_not_installed("betareg") + skip_if_not_installed("glmmTMB") + skip_if_not_installed("lme4") + data(sleepstudy, package = "lme4") + sleepstudy$y <- datawizard::normalize(sleepstudy$Reaction) + m <- glmmTMB::glmmTMB( + y ~ Days, + data = sleepstudy, + family = glmmTMB::ordbeta() + ) + out <- r2(m) + expect_equal(out$R2, c(`Ferrari's R2` = 0.2354701), tolerance = 1e-3, ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-r2_nakagawa.R b/tests/testthat/test-r2_nakagawa.R index 541109944..12ea23f00 100644 --- a/tests/testthat/test-r2_nakagawa.R +++ b/tests/testthat/test-r2_nakagawa.R @@ -1,3 +1,4 @@ +skip_on_os("mac") skip_if_not_installed("lme4") model <- lme4::lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) diff --git a/tests/testthat/test-rmse.R b/tests/testthat/test-rmse.R index 151aabb34..e19bd49b3 100644 --- a/tests/testthat/test-rmse.R +++ b/tests/testthat/test-rmse.R @@ -16,3 +16,28 @@ test_that("rmse", { ) expect_equal(cp$RMSE, c(47.4489, 47.39881, 47.38701, 47.41375, 47.39979, 47.38705), tolerance = 1e-3) }) + +test_that("rmse, ci", { + data(mtcars) + model <- lm(mpg ~ hp + gear, data = mtcars) + # analytical + out <- performance_rmse(model, ci = 0.95, ci_method = "analytical") + expect_equal(out$CI_low, 2.30486, tolerance = 1e-4) + expect_equal(out$CI_high, 3.79093, tolerance = 1e-4) + + # bootstrapped + set.seed(123) + out <- performance_rmse(model, ci = 0.95, ci_method = "boot") + expect_equal(out$CI_low, 1.9494, tolerance = 1e-3) + expect_equal(out$CI_high, 3.38406, tolerance = 1e-3) + + # bootstrapped, mixed models + skip_on_cran() + skip_if_not_installed("lme4") + data(sleepstudy, package = "lme4") + m <- lme4::lmer(Reaction ~ Days + (1 | Subject), data = sleepstudy) + set.seed(123) + out <- performance_rmse(m, ci = 0.95, iterations = 100) + expect_equal(out$CI_low, 26.26066, tolerance = 1e-3) + expect_equal(out$CI_high, 32.5642, tolerance = 1e-3) +})