Merge branch 'main' into JOSE_paper

easystats · Sep 19, 2023 · 409ed4c · 409ed4c
2 parents 0084952 + d555b2a
commit 409ed4c
Show file tree

Hide file tree

Showing 83 changed files with 1,074 additions and 634 deletions.
diff --git a/.github/SUPPORT.md b/.github/SUPPORT.md
@@ -0,0 +1,29 @@
+# Getting help with `{performance}`
+
+Thanks for using `{performance}`. Before filing an issue, there are a few places
+to explore and pieces to put together to make the process as smooth as possible.
+
+Start by making a minimal **repr**oducible **ex**ample using the
+[reprex](http://reprex.tidyverse.org/) package. If you haven't heard of or used
+reprex before, you're in for a treat! Seriously, reprex will make all of your
+R-question-asking endeavors easier (which is a pretty insane ROI for the five to
+ten minutes it'll take you to learn what it's all about). For additional reprex
+pointers, check out the [Get help!](https://www.tidyverse.org/help/) resource
+used by the tidyverse team.
+
+Armed with your reprex, the next step is to figure out where to ask:
+
+  * If it's a question: start with StackOverflow. There are more people there to answer questions.
+  * If it's a bug: you're in the right place, file an issue.
+  * If you're not sure: let's [discuss](https://github.com/easystats/performance/discussions) it and try to figure it out! If your
+    problem _is_ a bug or a feature request, you can easily return here and
+    report it.
+
+Before opening a new issue, be sure to [search issues and pull requests](https://github.com/easystats/performance/issues) to make sure the
+bug hasn't been reported and/or already fixed in the development version. By
+default, the search will be pre-populated with `is:issue is:open`. You can
+[edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/)
+(e.g. `is:pr`, `is:closed`) as needed. For example, you'd simply
+remove `is:open` to search _all_ issues in the repo, open or closed.
+
+Thanks for your help!
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 0.10.3
-Date: 2023-04-06 14:07:07 UTC
-SHA: 3198a3d95e27c0bc6470733dacf0496be7f96f43
+Version: 0.10.5
+Date: 2023-09-11 21:16:32 UTC
+SHA: c3348f5c1183042544ebdfc7dbaa9489186c71ea
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.10.3.1
+Version: 0.10.5.2
 Authors@R: 
     c(person(given = "Daniel",
              family = "Lüdecke",
@@ -70,7 +70,7 @@ Depends:
     R (>= 3.6)
 Imports:
     bayestestR (>= 0.13.0),
-    insight (>= 0.19.1),
+    insight (>= 0.19.4),
     datawizard (>= 0.7.0),
     methods,
     stats,
@@ -86,6 +86,7 @@ Suggests:
     boot,
     brms,
     car,
+    carData,
     CompQuadForm,
     correlation,
     cplm,
@@ -124,6 +125,7 @@ Suggests:
     patchwork,
     pscl,
     psych,
+    qqplotr (>= 0.0.6),
     randomForest,
     rmarkdown,
     rstanarm,
@@ -147,4 +149,4 @@ Config/Needs/website:
     r-lib/pkgdown,
     easystats/easystatstemplate
 Config/rcmdcheck/ignore-inconsequential-notes: true
-Remotes: easystats/insight, easystats/see
+Remotes: easystats/see, easystats/parameters
diff --git a/NAMESPACE b/NAMESPACE
@@ -45,6 +45,7 @@ S3method(check_collinearity,probitmfx)
 S3method(check_collinearity,zerocount)
 S3method(check_collinearity,zeroinfl)
 S3method(check_concurvity,gam)
+S3method(check_convergence,"_glm")
 S3method(check_convergence,default)
 S3method(check_convergence,glmmTMB)
 S3method(check_convergence,merMod)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,28 @@
+# performance (development version)
+
+# performance 0.10.5
+
+## Changes to functions
+
+* More informative message for `test_*()` functions that "nesting" only refers
+  to fixed effects parameters and currently ignores random effects when detecting
+  nested models.
+
+* `check_outliers()` for `"ICS"` method is now more stable and less likely to
+  fail.
+
+* `check_convergence()` now works for *parsnip* `_glm` models.
+
+## Bug fixes
+
+* `check_collinearity()` did not work for hurdle- or zero-inflated models of
+  package *pscl* when model had no explicitly defined formula for the
+  zero-inflation model.
+
 # performance 0.10.4
 
+## Changes to functions
+
 * `icc()` and `r2_nakagawa()` gain a `ci_method` argument, to either calculate
   confidence intervals using `boot::boot()` (instead of `lmer::bootMer()`) when
   `ci_method = "boot"` or analytical confidence intervals
@@ -8,6 +31,22 @@
   bootstrapped intervals cannot be calculated at all. Note that the default
   computation method is preferred.
 
+* `check_predictions()` accepts a `bandwidth` argument (smoothing bandwidth),
+  which is passed down to the `plot()` methods density-estimation.
+
+* `check_predictions()` gains a `type` argument, which is passed down to the
+  `plot()` method to change plot-type (density or discrete dots/intervals).
+  By default, `type` is set to `"default"` for models without discrete outcomes,
+  and else `type = "discrete_interval"`.
+
+* `performance_accuracy()` now includes confidence intervals, and reports those
+  by default (the standard error is no longer reported, but still included).
+
+## Bug fixes
+
+* Fixed issue in `check_collinearity()` for _fixest_ models that used `i()`
+  to create interactions in formulas.
+
 # performance 0.10.3
 
 ## New functions
@@ -79,11 +118,6 @@
 
 * `r2()` gets `ci`, to compute (analytical) confidence intervals for the R2.
 
-* `check_predictions()` accepts a `bw` argument (smoothing bandwidth), which is
-  passed down to the `plot()` methods density-estimation. The default for the
-  smoothing bandwidth `bw` has changed from `"nrd0"` to `"nrd"`, which seems
-  to produce better fitting plots for non-gaussian models.
-
 * The model underlying `check_distribution()` was now also trained to detect
   cauchy, half-cauchy and inverse-gamma distributions.
 

diff --git a/R/binned_residuals.R b/R/binned_residuals.R
@@ -49,11 +49,13 @@
 #' # look at the data frame
 #' as.data.frame(result)
 #'
-#' \dontrun{
+#' \donttest{
 #' # plot
 #' if (require("see")) {
-#'   plot(result)
-#' }}
+#'   plot(result, show_dots = TRUE)
+#' }
+#' }
+#'
 #' @export
 binned_residuals <- function(model, term = NULL, n_bins = NULL, ...) {
   fv <- stats::fitted(model)

diff --git a/R/check_collinearity.R b/R/check_collinearity.R
@@ -104,7 +104,7 @@
 #' examples in R and Stan. 2nd edition. Chapman and Hall/CRC.
 #'
 #' - Vanhove, J. (2019). Collinearity isn't a disease that needs curing.
-#' [webpage](https://janhove.github.io/analysis/2019/09/11/collinearity)
+#' [webpage](https://janhove.github.io/posts/2019-09-11-collinearity/)
 #'
 #' - Zuur AF, Ieno EN, Elphick CS. A protocol for data exploration to avoid
 #' common statistical problems: Data exploration. Methods in Ecology and
@@ -190,7 +190,12 @@ plot.check_collinearity <- function(x, ...) {
 
   # format table for each "ViF" group - this ensures that CIs are properly formatted
   x <- insight::format_table(x)
-  colnames(x)[4] <- "Increased SE"
+  x <- datawizard::data_rename(
+    x,
+    pattern = "SE_factor",
+    replacement = "Increased SE",
+    verbose = FALSE
+  )
 
   if (length(low_vif)) {
     cat("\n")
@@ -435,6 +440,14 @@ check_collinearity.zerocount <- function(x,
 
   f <- insight::find_formula(x)
 
+  # hurdle or zeroinfl model can have no zero-inflation formula, in which case
+  # we have the same formula as for conditional formula part
+  if (inherits(x, c("hurdle", "zeroinfl", "zerocount")) &&
+    component == "zero_inflated" &&
+    is.null(f[["zero_inflated"]])) {
+    f$zero_inflated <- f$conditional
+  }
+
   if (inherits(x, "mixor")) {
     terms <- labels(x$terms)
   } else {

diff --git a/R/check_convergence.R b/R/check_convergence.R
@@ -46,31 +46,27 @@
 #'
 #' @family functions to check model assumptions and and assess model quality
 #'
-#' @examples
-#' if (require("lme4")) {
-#'   data(cbpp)
-#'   set.seed(1)
-#'   cbpp$x <- rnorm(nrow(cbpp))
-#'   cbpp$x2 <- runif(nrow(cbpp))
+#' @examplesIf require("lme4") && require("glmmTMB")
+#' data(cbpp, package = "lme4")
+#' set.seed(1)
+#' cbpp$x <- rnorm(nrow(cbpp))
+#' cbpp$x2 <- runif(nrow(cbpp))
 #'
-#'   model <- glmer(
-#'     cbind(incidence, size - incidence) ~ period + x + x2 + (1 + x | herd),
-#'     data = cbpp,
-#'     family = binomial()
-#'   )
+#' model <- lme4::glmer(
+#'   cbind(incidence, size - incidence) ~ period + x + x2 + (1 + x | herd),
+#'   data = cbpp,
+#'   family = binomial()
+#' )
 #'
-#'   check_convergence(model)
-#' }
+#' check_convergence(model)
 #'
-#' \dontrun{
-#' if (require("glmmTMB")) {
-#'   model <- glmmTMB(
-#'     Sepal.Length ~ poly(Petal.Width, 4) * poly(Petal.Length, 4) +
-#'       (1 + poly(Petal.Width, 4) | Species),
-#'     data = iris
-#'   )
-#'   check_convergence(model)
-#' }
+#' \donttest{
+#' model <- suppressWarnings(glmmTMB::glmmTMB(
+#'   Sepal.Length ~ poly(Petal.Width, 4) * poly(Petal.Length, 4) +
+#'     (1 + poly(Petal.Width, 4) | Species),
+#'   data = iris
+#' ))
+#' check_convergence(model)
 #' }
 #' @export
 check_convergence <- function(x, tolerance = 0.001, ...) {
@@ -107,3 +103,9 @@ check_convergence.glmmTMB <- function(x, ...) {
   # https://github.com/glmmTMB/glmmTMB/issues/275
   isTRUE(x$sdr$pdHess)
 }
+
+
+#' @export
+check_convergence._glm <- function(x, ...) {
+  isTRUE(x$fit$converged)
+}
diff --git a/R/check_distribution.R b/R/check_distribution.R
@@ -48,15 +48,14 @@ NULL
 #' There is a `plot()` method, which shows the probabilities of all predicted
 #' distributions, however, only if the probability is greater than zero.
 #'
-#' @examples
-#' if (require("lme4") && require("parameters") &&
-#'   require("see") && require("patchwork") && require("randomForest")) {
-#'   data(sleepstudy)
+#' @examplesIf require("lme4") && require("parameters") && require("randomForest")
+#' data(sleepstudy, package = "lme4")
+#' model <<- lme4::lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
+#' check_distribution(model)
+#'
+#' @examplesIf require("see") && require("patchwork") && require("randomForest")
+#' plot(check_distribution(model))
 #'
-#'   model <<- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
-#'   check_distribution(model)
-#'   plot(check_distribution(model))
-#' }
 #' @export
 check_distribution <- function(model) {
   UseMethod("check_distribution")
@@ -196,23 +195,23 @@ check_distribution.numeric <- function(model) {
   x <- x[!is.na(x)]
 
   data.frame(
-    "SD" = stats::sd(x),
-    "MAD" = stats::mad(x, constant = 1),
-    "Mean_Median_Distance" = mean(x) - stats::median(x),
-    "Mean_Mode_Distance" = mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")),
-    "SD_MAD_Distance" = stats::sd(x) - stats::mad(x, constant = 1),
-    "Var_Mean_Distance" = stats::var(x) - mean(x),
-    "Range_SD" = diff(range(x)) / stats::sd(x),
-    "Range" = diff(range(x)),
-    "IQR" = stats::IQR(x),
-    "Skewness" = .skewness(x),
-    "Kurtosis" = .kurtosis(x),
-    "Uniques" = length(unique(x)) / length(x),
-    "N_Uniques" = length(unique(x)),
-    "Min" = min(x),
-    "Max" = max(x),
-    "Proportion_Positive" = sum(x >= 0) / length(x),
-    "Integer" = all(.is_integer(x))
+    SD = stats::sd(x),
+    MAD = stats::mad(x, constant = 1),
+    Mean_Median_Distance = mean(x) - stats::median(x),
+    Mean_Mode_Distance = mean(x) - as.numeric(bayestestR::map_estimate(x, bw = "nrd0")),
+    SD_MAD_Distance = stats::sd(x) - stats::mad(x, constant = 1),
+    Var_Mean_Distance = stats::var(x) - mean(x),
+    Range_SD = diff(range(x)) / stats::sd(x),
+    Range = diff(range(x)),
+    IQR = stats::IQR(x),
+    Skewness = .skewness(x),
+    Kurtosis = .kurtosis(x),
+    Uniques = length(unique(x)) / length(x),
+    N_Uniques = length(unique(x)),
+    Min = min(x),
+    Max = max(x),
+    Proportion_Positive = sum(x >= 0) / length(x),
+    Integer = all(.is_integer(x))
   )
 }
 

diff --git a/R/check_factorstructure.R b/R/check_factorstructure.R
@@ -189,10 +189,22 @@ check_sphericity_bartlett <- function(x, n = NULL, ...) {
   out <- list(chisq = statistic, p = pval, dof = df)
 
   if (pval < 0.001) {
-    text <- sprintf("Bartlett's test of sphericity suggests that there is sufficient significant correlation in the data for factor analysis (Chisq(%i) = %.2f, %s).", df, statistic, insight::format_p(pval))
+    text <-
+      sprintf(
+        "Bartlett's test of sphericity suggests that there is sufficient significant correlation in the data for factor analysis (Chisq(%i) = %.2f, %s).",
+        df,
+        statistic,
+        insight::format_p(pval)
+      )
     color <- "green"
   } else {
-    text <- sprintf("Bartlett's test of sphericity suggests that there is not enough significant correlation in the data for factor analysis (Chisq(%i) = %.2f, %s).", df, statistic, insight::format_p(pval))
+    text <-
+      sprintf(
+        "Bartlett's test of sphericity suggests that there is not enough significant correlation in the data for factor analysis (Chisq(%i) = %.2f, %s).",
+        df,
+        statistic,
+        insight::format_p(pval)
+      )
     color <- "red"
   }
 

diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R
@@ -2,7 +2,7 @@
 #'
 #' `check_heterogeneity_bias()` checks if model predictors or variables may
 #' cause a heterogeneity bias, i.e. if variables have a within- and/or
-#' between-effect.
+#' between-effect (_Bell and Jones, 2015_).
 #'
 #' @param x A data frame or a mixed model object.
 #' @param select Character vector (or formula) with names of variables to select
@@ -15,7 +15,12 @@
 #' @seealso
 #' For further details, read the vignette
 #' <https://easystats.github.io/parameters/articles/demean.html> and also
-#' see documentation for `?datawizard::demean`.
+#' see documentation for [`datawizard::demean()`].
+#'
+#' @references
+#' - Bell A, Jones K. 2015. Explaining Fixed Effects: Random Effects
+#'   Modeling of Time-Series Cross-Sectional and Panel Data. Political Science
+#'   Research and Methods, 3(1), 133–153.
 #'
 #' @examples
 #' data(iris)