Merge branch 'main' into check_smooth

easystats · Jul 13, 2024 · b0af181 · b0af181
2 parents 0a5bae4 + 413d85b
commit b0af181
Show file tree

Hide file tree

Showing 100 changed files with 1,025 additions and 400 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,65 +1,52 @@
 # History files
 .Rhistory
 .Rapp.history
-
 # Session Data files
 .RData
-
 # Example code in package build process
 *-Ex.R
-
 # Output files from R CMD build
 /*.tar.gz
-
 # Output files from R CMD check
 /*.Rcheck/
 /revdep/
 revdep
-
 # RStudio files
 .Rproj.user/
 *.Rproj
-
 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf
-
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth
-
 # knitr and R markdown default cache directories
 /*_cache/
 /cache/
-
 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md
-
 # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
 rsconnect/
 inst/doc
-
 =========================
 # Operating System Files
 # OSX
 .DS_Store
 .AppleDouble
 .LSOverride
-
 # Thumbnails
 ._*
-
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
-
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
-.apdisk
+.apdisk
+.Rprofile
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 0.11.0
-Date: 2024-03-22 21:30:58 UTC
-SHA: 051b9bb2b7721c632ce145f85c55aa55c8eebf90
+Version: 0.12.0
+Date: 2024-06-07 17:11:44 UTC
+SHA: cb1c46609c8f943a736f3c76b5cadd4272e7bdf2
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.11.0.5
-Authors@R: 
+Version: 0.12.0.9
+Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
              role = c("aut", "cre"),
@@ -39,18 +39,18 @@ Authors@R:
              email = "[email protected]",
              comment = c(ORCID = "0000-0003-4315-6788", Twitter = "@rempsyc")),
       person(given = "Vincent",
-             family = "Arel-Bundock", 
-             email = "[email protected]", 
+             family = "Arel-Bundock",
+             email = "[email protected]",
              role = "ctb",
              comment = c(ORCID = "0000-0003-2042-7063")),
       person(given = "Martin",
              family = "Jullum",
              role = "rev"),
       person(given = "gjo11",
              role = "rev"),
-      person("Etienne", 
-             "Bacher", , 
-             "[email protected]", 
+      person("Etienne",
+             "Bacher", ,
+             "[email protected]",
              role = "ctb",
              comment = c(ORCID = "0000-0002-9271-5075")))
 Maintainer: Daniel Lüdecke <[email protected]>
@@ -70,7 +70,7 @@ Depends:
     R (>= 3.6)
 Imports:
     bayestestR (>= 0.13.2),
-    insight (>= 0.19.10),
+    insight (>= 0.20.2),
     datawizard (>= 0.10.0),
     stats,
     utils
@@ -93,15 +93,15 @@ Suggests:
     DHARMa,
     estimatr,
     fixest,
-    flextable, 
+    flextable,
     forecast,
     ftExtra,
     gamm4,
     ggplot2,
     glmmTMB,
     graphics,
     Hmisc,
-    httr,
+    httr2,
     ICS,
     ICSOutlier,
     ISLR,
@@ -124,13 +124,14 @@ Suggests:
     nonnest2,
     ordinal,
     parallel,
-    parameters (>= 0.21.4),
+    parameters (>= 0.21.6),
     patchwork,
     pscl,
     psych,
     quantreg,
     qqplotr (>= 0.0.6),
     randomForest,
+    RcppEigen,
     rempsyc,
     rmarkdown,
     rstanarm,
@@ -145,7 +146,7 @@ Suggests:
     withr (>= 3.0.0)
 Encoding: UTF-8
 Language: en-US
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Roxygen: list(markdown = TRUE)
 Config/testthat/edition: 3
 Config/testthat/parallel: true
@@ -154,4 +155,3 @@ Config/Needs/website:
     r-lib/pkgdown,
     easystats/easystatstemplate
 Config/rcmdcheck/ignore-inconsequential-notes: true
-Remotes: easystats/see
diff --git a/NAMESPACE b/NAMESPACE
@@ -148,6 +148,7 @@ S3method(display,test_performance)
 S3method(fitted,BFBayesFactor)
 S3method(format,compare_performance)
 S3method(format,performance_model)
+S3method(format,performance_rmse)
 S3method(format,test_performance)
 S3method(logLik,cpglm)
 S3method(logLik,iv_robust)
@@ -319,6 +320,7 @@ S3method(print,performance_hosmer)
 S3method(print,performance_model)
 S3method(print,performance_pcp)
 S3method(print,performance_pp_check)
+S3method(print,performance_rmse)
 S3method(print,performance_roc)
 S3method(print,performance_score)
 S3method(print,performance_simres)
@@ -451,6 +453,7 @@ S3method(r2_coxsnell,survreg)
 S3method(r2_coxsnell,svycoxph)
 S3method(r2_coxsnell,truncreg)
 S3method(r2_efron,default)
+S3method(r2_ferrari,default)
 S3method(r2_kullback,default)
 S3method(r2_kullback,glm)
 S3method(r2_loo_posterior,BFBayesFactor)
@@ -598,6 +601,7 @@ export(r2)
 export(r2_bayes)
 export(r2_coxsnell)
 export(r2_efron)
+export(r2_ferrari)
 export(r2_kullback)
 export(r2_loo)
 export(r2_loo_posterior)

diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,37 @@
-# performance 0.11.1
+# performance 0.12.1
+
+## General
+
+* `icc()` and `r2_nakagawa()` get a `null_model` argument. This can be useful
+  when computing R2 or ICC for mixed models, where the internal computation of
+  the null model fails, or when you already have fit the null model and want
+  to save time.
+
+* `icc()` and `r2_nakagawa()` get a `approximation` argument indicating the 
+  approximation method for the distribution-specific (residual) variance. See
+  Nakagawa et al. 2017 for details.
+
+* `icc()` and `r2_nakagawa()` get a `model_component` argument indicating the 
+  component for zero-inflation or hurdle models.
+
+* `performance_rmse()` (resp. `rmse()`) can now compute analytical and 
+  bootstrapped confidence intervals. The function gains following new arguments:
+  `ci`, `ci_method` and `iterations`.
+
+* New function `r2_ferrari()` to compute Ferrari & Cribari-Neto's R2 for 
+  generalized linear models, in particular beta-regression.
+
+# performance 0.12.0
 
 ## Breaking
 
 * Aliases `posterior_predictive_check()` and `check_posterior_predictions()` for
   `check_predictions()` are deprecated.
 
+* Arguments named `group` or `group_by` will be deprecated in a future release.
+  Please use `by` instead. This affects `check_heterogeneity_bias()` in
+  *performance*.
+
 ## General
 
 * Improved documentation and new vignettes added.
@@ -15,6 +42,9 @@
   the usual style as for other models and no longer returns plots from
   `bayesplot::pp_check()`.
 
+* Updated the trained model that is used to prediction distributions in
+  `check_distribution()`.
+
 ## Bug fixes
 
 * `check_model()` now falls back on normal Q-Q plots when a model is not supported

diff --git a/R/binned_residuals.R b/R/binned_residuals.R
@@ -86,7 +86,6 @@ binned_residuals <- function(model,
                              iterations = 1000,
                              verbose = TRUE,
                              ...) {
-  # match arguments
   ci_type <- match.arg(ci_type)
   residuals <- match.arg(residuals)
 

diff --git a/R/check_autocorrelation.R b/R/check_autocorrelation.R
@@ -29,7 +29,6 @@ check_autocorrelation <- function(x, ...) {
 #' @rdname check_autocorrelation
 #' @export
 check_autocorrelation.default <- function(x, nsim = 1000, ...) {
-  # check for valid input
   .is_model_valid(x)
 
   .residuals <- stats::residuals(x)

diff --git a/R/check_clusterstructure.R b/R/check_clusterstructure.R
@@ -11,7 +11,7 @@
 #' number of square shaped blocks along the diagonal.
 #'
 #' @param x A data frame.
-#' @param standardize Standardize the dataframe before clustering (default).
+#' @param standardize Standardize the data frame before clustering (default).
 #' @param distance Distance method used. Other methods than "euclidean"
 #'   (default) are exploratory in the context of clustering tendency. See
 #'   [stats::dist()] for list of available methods.

diff --git a/R/check_collinearity.R b/R/check_collinearity.R
@@ -145,7 +145,6 @@ multicollinearity <- check_collinearity
 #' @rdname check_collinearity
 #' @export
 check_collinearity.default <- function(x, ci = 0.95, verbose = TRUE, ...) {
-  # check for valid input
   .is_model_valid(x)
   .check_collinearity(x, component = "conditional", ci = ci, verbose = verbose)
 }

diff --git a/R/check_convergence.R b/R/check_convergence.R
@@ -76,7 +76,6 @@ check_convergence <- function(x, tolerance = 0.001, ...) {
 
 #' @export
 check_convergence.default <- function(x, tolerance = 0.001, ...) {
-  # check for valid input
   .is_model_valid(x)
   message(sprintf("`check_convergence()` does not work for models of class '%s'.", class(x)[1]))
 }

diff --git a/R/check_distribution.R b/R/check_distribution.R
@@ -34,12 +34,11 @@ NULL
 #' This function uses an internal random forest model to classify the
 #' distribution from a model-family. Currently, following distributions are
 #' trained (i.e. results of `check_distribution()` may be one of the
-#' following): `"bernoulli"`, `"beta"`, `"beta-binomial"`,
-#' `"binomial"`, `"chi"`, `"exponential"`, `"F"`,
-#' `"gamma"`, `"lognormal"`, `"normal"`, `"negative
-#' binomial"`, `"negative binomial (zero-inflated)"`, `"pareto"`,
-#' `"poisson"`, `"poisson (zero-inflated)"`, `"uniform"` and
-#' `"weibull"`.
+#' following): `"bernoulli"`, `"beta"`, `"beta-binomial"`, `"binomial"`,
+#' `"cauchy"`, `"chi"`, `"exponential"`, `"F"`, `"gamma"`, `"half-cauchy"`,
+#' `"inverse-gamma"`, `"lognormal"`, `"normal"`, `"negative binomial"`,
+#' `"negative binomial (zero-inflated)"`, `"pareto"`, `"poisson"`,
+#' `"poisson (zero-inflated)"`, `"tweedie"`, `"uniform"` and `"weibull"`.
 #' \cr \cr
 #' Note the similarity between certain distributions according to shape, skewness,
 #' etc. Thus, the predicted distribution may not be perfectly representing the
@@ -67,7 +66,6 @@ check_distribution <- function(model) {
 
 #' @export
 check_distribution.default <- function(model) {
-  # check for valid input
   .is_model_valid(model)
 
   insight::check_if_installed("randomForest")
@@ -193,23 +191,40 @@ check_distribution.numeric <- function(model) {
   # validation check, remove missings
   x <- x[!is.na(x)]
 
-  # this might fail, so we wrap in ".safe()"
-  map_est <- .safe(mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")))
+  mode_value <- NULL
+  # find mode for integer, or MAP for distributions
+  if (all(.is_integer(x))) {
+    mode_value <- datawizard::distribution_mode(x)
+  } else {
+    # this might fail, so we wrap in ".safe()"
+    mode_value <- tryCatch(
+      as.numeric(bayestestR::map_estimate(x, bw = "nrd0")),
+      error = function(e) NULL
+    )
+    if (is.null(mode_value)) {
+      mode_value <- tryCatch(
+        as.numeric(bayestestR::map_estimate(x, bw = "kernel")),
+        error = function(e) NULL
+      )
+    }
+  }
 
-  if (is.null(map_est)) {
-    map_est <- mean(x) - datawizard::distribution_mode(x)
+  if (is.null(mode_value)) {
+    mean_mode_diff <- mean(x) - datawizard::distribution_mode(x)
     msg <- "Could not accurately estimate the mode."
     if (!is.null(type)) {
       msg <- paste(msg, "Predicted distribution of the", type, "may be less accurate.")
     }
     insight::format_alert(msg)
+  } else {
+    mean_mode_diff <- .safe(mean(x) - mode_value)
   }
 
   data.frame(
     SD = stats::sd(x),
     MAD = stats::mad(x, constant = 1),
     Mean_Median_Distance = mean(x) - stats::median(x),
-    Mean_Mode_Distance = map_est,
+    Mean_Mode_Distance = mean_mode_diff,
     SD_MAD_Distance = stats::sd(x) - stats::mad(x, constant = 1),
     Var_Mean_Distance = stats::var(x) - mean(x),
     Range_SD = diff(range(x)) / stats::sd(x),
@@ -222,6 +237,7 @@ check_distribution.numeric <- function(model) {
     Min = min(x),
     Max = max(x),
     Proportion_Positive = sum(x >= 0) / length(x),
+    Proportion_Zero = sum(x == 0) / length(x),
     Integer = all(.is_integer(x))
   )
 }

diff --git a/R/check_factorstructure.R b/R/check_factorstructure.R
@@ -50,7 +50,7 @@
 #' exclusion them from the analysis (note that you would need to re-compute the
 #' KMO indices as they are dependent on the whole dataset).
 #'
-#' @param x A dataframe or a correlation matrix. If the latter is passed, `n`
+#' @param x A data frame or a correlation matrix. If the latter is passed, `n`
 #'   must be provided.
 #' @param n If a correlation matrix was passed, the number of observations must
 #'   be specified.