From d18afa44b5ab228c1ed50dc1d9632d2651048541 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Tue, 17 Sep 2024 12:07:01 +0200
Subject: [PATCH] allow named vectors

---
 R/p_significance.R                   | 57 +++++++++++++++++++++-------
 R/rope.R                             | 25 +++++++-----
 man/equivalence_test.Rd              | 23 +++++++----
 man/p_rope.Rd                        | 23 +++++++----
 man/p_significance.Rd                |  7 +++-
 man/rope.Rd                          | 25 ++++++++----
 tests/testthat/test-p_significance.R | 27 ++++++++++++-
 tests/testthat/test-rope.R           |  4 ++
 8 files changed, 145 insertions(+), 46 deletions(-)

diff --git a/R/p_significance.R b/R/p_significance.R
index f2f9a34e2..6edd70c8d 100644
--- a/R/p_significance.R
+++ b/R/p_significance.R
@@ -16,7 +16,10 @@
 #'   interval)
 #' - a numeric vector of length two (e.g., `c(-0.2, 0.1)`), useful for
 #'   asymmetric intervals
-#' - a list of numeric vectors, where each vector corresponds to a parameter.
+#' - a list of numeric vectors, where each vector corresponds to a parameter
+#' - a list of *named* numeric vectors, where names correspond to parameter
+#'   names. In this case, all parameters that have no matching name in `threshold`
+#'   will be set to `"default"`.
 #' @inheritParams rope
 #' @inheritParams hdi
 #'
@@ -56,6 +59,8 @@
 #' p_significance(model)
 #' # multiple thresholds - asymmetric, symmetric, default
 #' p_significance(model, threshold = list(c(-10, 5), 0.2, "default"))
+#' # names thresholds
+#' p_significance(model, threshold = list(wt = 0.2, `(Intercept)` = c(-10, 5)))
 #' }
 #' @export
 p_significance <- function(x, ...) {
@@ -131,7 +136,7 @@ p_significance.data.frame <- function(x, threshold = "default", rvar_col = NULL,
   }
 
 
-  threshold <- .select_threshold_ps(threshold = threshold)
+  threshold <- .select_threshold_ps(threshold = threshold, params = x)
   x <- .select_nums(x)
 
   if (ncol(x) == 1) {
@@ -286,12 +291,15 @@ p_significance.stanreg <- function(x,
                                    ...) {
   effects <- match.arg(effects)
   component <- match.arg(component)
-  threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose)
+  params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters)
 
-  result <- p_significance(
-    insight::get_parameters(x, effects = effects, component = component, parameters = parameters),
-    threshold = threshold
+  threshold <- .select_threshold_ps(
+    model = x,
+    threshold = threshold,
+    params = params,
+    verbose = verbose
   )
+  result <- p_significance(params, threshold = threshold)
 
   cleaned_parameters <- insight::clean_parameters(x)
   out <- .prepare_output(result, cleaned_parameters, inherits(x, "stanmvreg"))
@@ -322,12 +330,15 @@ p_significance.brmsfit <- function(x,
                                    ...) {
   effects <- match.arg(effects)
   component <- match.arg(component)
-  threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose)
+  params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters)
 
-  result <- p_significance(
-    insight::get_parameters(x, effects = effects, component = component, parameters = parameters),
-    threshold = threshold
+  threshold <- .select_threshold_ps(
+    model = x,
+    threshold = threshold,
+    params = params,
+    verbose = verbose
   )
+  result <- p_significance(params, threshold = threshold)
 
   cleaned_parameters <- insight::clean_parameters(x)
   out <- .prepare_output(result, cleaned_parameters)
@@ -382,8 +393,28 @@ as.double.p_significance <- as.numeric.p_significance
 # helpers --------------------------
 
 #' @keywords internal
-.select_threshold_ps <- function(model = NULL, threshold = "default", verbose = TRUE) {
+.select_threshold_ps <- function(model = NULL, threshold = "default", params = NULL, verbose = TRUE) {
   if (is.list(threshold)) {
+    # if we have named elements, complete list
+    if (!is.null(params)) {
+      named_threshold <- names(threshold)
+      if (!is.null(named_threshold)) {
+        # find out which name belongs to which parameter
+        pos <- match(named_threshold, colnames(params))
+        # if not all element names were found, error
+        if (anyNA(pos)) {
+          insight::format_error(paste(
+            "Not all elements of `threshold` were found in the parameters. Please check following names:",
+            toString(named_threshold[is.na(pos)])
+          ))
+        }
+        # now "fill" non-specified elements with "default"
+        out <- as.list(rep("default", ncol(params)))
+        out[pos] <- threshold
+        # overwrite former threshold
+        threshold <- out
+      }
+    }
     lapply(threshold, function(i) {
       out <- .select_threshold_list(model = model, threshold = i, verbose = verbose)
       if (length(out) == 1) {
@@ -426,11 +457,11 @@ as.double.p_significance <- as.numeric.p_significance
     if (anyNA(pos)) {
       insight::format_error(paste(
         "Not all elements of `range` were found in the parameters. Please check following names:",
-        toString(names_range[is.na])
+        toString(named_range[is.na(pos)])
       ))
     }
     # now "fill" non-specified elements with "default"
-    out <- rep("default", ncol(params))
+    out <- as.list(rep("default", ncol(params)))
     out[pos] <- range
     # overwrite former range
     range <- out
diff --git a/R/rope.R b/R/rope.R
index 61eec4d32..b678c24eb 100644
--- a/R/rope.R
+++ b/R/rope.R
@@ -6,14 +6,21 @@
 #' @param x Vector representing a posterior distribution. Can also be a
 #' `stanreg` or `brmsfit` model.
 #' @param range ROPE's lower and higher bounds. Should be `"default"` or
-#' depending on the number of outcome variables a vector or a list. In models
-#' with one response, `range` can be a vector of length two (e.g., `c(-0.1,
-#' 0.1)`), or a list of numeric vector of the same length as numbers of
-#' parameters (see 'Examples'). In multivariate models, `range` should be a list
-#' with a numeric vectors for each response variable. Vector names should
-#' correspond to the name of the response variables. If `"default"` and input is
-#' a vector, the range is set to `c(-0.1, 0.1)`. If `"default"` and input is a
-#' Bayesian model, [`rope_range()`][rope_range] is used.
+#' depending on the number of outcome variables a vector or a list. For models
+#' with one response, `range` can be:
+#'
+#' - a vector of length two (e.g., `c(-0.1, 0.1)`),
+#' - a list of numeric vector of the same length as numbers of parameters (see
+#'   'Examples').
+#' - a list of *named* numeric vectors, where names correspond to parameter
+#'   names. In this case, all parameters that have no matching name in `range`
+#'   will be set to `"default"`.
+#'
+#' In multivariate models, `range` should be a list with a numeric vectors for
+#' each response variable. Vector names should correspond to the name of the
+#' response variables. If `"default"` and input is a vector, the range is set to
+#' `c(-0.1, 0.1)`. If `"default"` and input is a Bayesian model,
+#' [`rope_range()`] is used.
 #' @param ci The Credible Interval (CI) probability, corresponding to the
 #' proportion of HDI, to use for the percentage in ROPE.
 #' @param ci_method The type of interval to use to quantify the percentage in
@@ -34,7 +41,7 @@
 #' size according to Cohen, 1988). This could be generalized: For instance,
 #' for linear models, the ROPE could be set as `0 +/- .1 * sd(y)`.
 #' This ROPE range can be automatically computed for models using the
-#' [rope_range] function.
+#' [`rope_range()`] function.
 #'
 #' Kruschke (2010, 2011, 2014) suggests using the proportion of  the `95%`
 #' (or `89%`, considered more stable) [HDI][hdi] that falls within the
diff --git a/man/equivalence_test.Rd b/man/equivalence_test.Rd
index 81f5d42fd..adc86a51e 100644
--- a/man/equivalence_test.Rd
+++ b/man/equivalence_test.Rd
@@ -51,13 +51,22 @@ equivalence_test(x, ...)
 \item{...}{Currently not used.}
 
 \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or
-depending on the number of outcome variables a vector or a list. In models
-with one response, \code{range} can be a vector of length two (e.g., \code{c(-0.1, 0.1)}), or a list of numeric vector of the same length as numbers of
-parameters (see 'Examples'). In multivariate models, \code{range} should be a list
-with a numeric vectors for each response variable. Vector names should
-correspond to the name of the response variables. If \code{"default"} and input is
-a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a
-Bayesian model, \code{\link[=rope_range]{rope_range()}} is used.}
+depending on the number of outcome variables a vector or a list. For models
+with one response, \code{range} can be:
+\itemize{
+\item a vector of length two (e.g., \code{c(-0.1, 0.1)}),
+\item a list of numeric vector of the same length as numbers of parameters (see
+'Examples').
+\item a list of \emph{named} numeric vectors, where names correspond to parameter
+names. In this case, all parameters that have no matching name in \code{range}
+will be set to \code{"default"}.
+}
+
+In multivariate models, \code{range} should be a list with a numeric vectors for
+each response variable. Vector names should correspond to the name of the
+response variables. If \code{"default"} and input is a vector, the range is set to
+\code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model,
+\code{\link[=rope_range]{rope_range()}} is used.}
 
 \item{ci}{The Credible Interval (CI) probability, corresponding to the
 proportion of HDI, to use for the percentage in ROPE.}
diff --git a/man/p_rope.Rd b/man/p_rope.Rd
index aa76e2d1e..4d27fe375 100644
--- a/man/p_rope.Rd
+++ b/man/p_rope.Rd
@@ -42,13 +42,22 @@ p_rope(x, ...)
 \item{...}{Currently not used.}
 
 \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or
-depending on the number of outcome variables a vector or a list. In models
-with one response, \code{range} can be a vector of length two (e.g., \code{c(-0.1, 0.1)}), or a list of numeric vector of the same length as numbers of
-parameters (see 'Examples'). In multivariate models, \code{range} should be a list
-with a numeric vectors for each response variable. Vector names should
-correspond to the name of the response variables. If \code{"default"} and input is
-a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a
-Bayesian model, \code{\link[=rope_range]{rope_range()}} is used.}
+depending on the number of outcome variables a vector or a list. For models
+with one response, \code{range} can be:
+\itemize{
+\item a vector of length two (e.g., \code{c(-0.1, 0.1)}),
+\item a list of numeric vector of the same length as numbers of parameters (see
+'Examples').
+\item a list of \emph{named} numeric vectors, where names correspond to parameter
+names. In this case, all parameters that have no matching name in \code{range}
+will be set to \code{"default"}.
+}
+
+In multivariate models, \code{range} should be a list with a numeric vectors for
+each response variable. Vector names should correspond to the name of the
+response variables. If \code{"default"} and input is a vector, the range is set to
+\code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model,
+\code{\link[=rope_range]{rope_range()}} is used.}
 
 \item{verbose}{Toggle off warnings.}
 
diff --git a/man/p_significance.Rd b/man/p_significance.Rd
index 78e10864c..38abc8d2e 100644
--- a/man/p_significance.Rd
+++ b/man/p_significance.Rd
@@ -60,7 +60,10 @@ and based on \code{\link[=rope_range]{rope_range()}} if a (Bayesian) model is pr
 interval)
 \item a numeric vector of length two (e.g., \code{c(-0.2, 0.1)}), useful for
 asymmetric intervals
-\item a list of numeric vectors, where each vector corresponds to a parameter.
+\item a list of numeric vectors, where each vector corresponds to a parameter
+\item a list of \emph{named} numeric vectors, where names correspond to parameter
+names. In this case, all parameters that have no matching name in \code{threshold}
+will be set to \code{"default"}.
 }}
 
 \item{use_iterations}{Logical, if \code{TRUE} and \code{x} is a \code{get_predicted} object,
@@ -134,6 +137,8 @@ model <- rstanarm::stan_glm(mpg ~ wt + cyl,
 p_significance(model)
 # multiple thresholds - asymmetric, symmetric, default
 p_significance(model, threshold = list(c(-10, 5), 0.2, "default"))
+# names thresholds
+p_significance(model, threshold = list(wt = 0.2, `(Intercept)` = c(-10, 5)))
 }
 \dontshow{\}) # examplesIf}
 }
diff --git a/man/rope.Rd b/man/rope.Rd
index cf82a12e8..21f960185 100644
--- a/man/rope.Rd
+++ b/man/rope.Rd
@@ -54,13 +54,22 @@ rope(x, ...)
 \item{...}{Currently not used.}
 
 \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or
-depending on the number of outcome variables a vector or a list. In models
-with one response, \code{range} can be a vector of length two (e.g., \code{c(-0.1, 0.1)}), or a list of numeric vector of the same length as numbers of
-parameters (see 'Examples'). In multivariate models, \code{range} should be a list
-with a numeric vectors for each response variable. Vector names should
-correspond to the name of the response variables. If \code{"default"} and input is
-a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a
-Bayesian model, \code{\link[=rope_range]{rope_range()}} is used.}
+depending on the number of outcome variables a vector or a list. For models
+with one response, \code{range} can be:
+\itemize{
+\item a vector of length two (e.g., \code{c(-0.1, 0.1)}),
+\item a list of numeric vector of the same length as numbers of parameters (see
+'Examples').
+\item a list of \emph{named} numeric vectors, where names correspond to parameter
+names. In this case, all parameters that have no matching name in \code{range}
+will be set to \code{"default"}.
+}
+
+In multivariate models, \code{range} should be a list with a numeric vectors for
+each response variable. Vector names should correspond to the name of the
+response variables. If \code{"default"} and input is a vector, the range is set to
+\code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model,
+\code{\link[=rope_range]{rope_range()}} is used.}
 
 \item{ci}{The Credible Interval (CI) probability, corresponding to the
 proportion of HDI, to use for the percentage in ROPE.}
@@ -107,7 +116,7 @@ to the -0.1 to 0.1 range of a standardized parameter (negligible effect
 size according to Cohen, 1988). This could be generalized: For instance,
 for linear models, the ROPE could be set as \verb{0 +/- .1 * sd(y)}.
 This ROPE range can be automatically computed for models using the
-\link{rope_range} function.
+\code{\link[=rope_range]{rope_range()}} function.
 
 Kruschke (2010, 2011, 2014) suggests using the proportion of  the \verb{95\%}
 (or \verb{89\%}, considered more stable) \link[=hdi]{HDI} that falls within the
diff --git a/tests/testthat/test-p_significance.R b/tests/testthat/test-p_significance.R
index a03d42e0c..dbfae00a5 100644
--- a/tests/testthat/test-p_significance.R
+++ b/tests/testthat/test-p_significance.R
@@ -37,7 +37,6 @@ test_that("p_significance", {
 test_that("stanreg", {
   skip_if_offline()
   skip_if_not_or_load_if_installed("rstanarm")
-
   m <- insight::download_model("stanreg_merMod_5")
 
   expect_equal(
@@ -80,3 +79,29 @@ test_that("brms", {
     regex = "Length of"
   )
 })
+
+test_that("stan", {
+  skip_if_offline()
+  skip_if_not_or_load_if_installed("rstanarm")
+  m <- insight::download_model("stanreg_merMod_5")
+
+  expect_equal(
+    p_significance(m, threshold = list("(Intercept)" = 1, period4 = 1.5, period3 = 0.5))$ps,
+    p_significance(m, threshold = list(1, "default", "default", 0.5, 1.5))$ps,
+    tolerance = 1e-4
+  )
+
+  expect_error(
+    p_significance(m, threshold = list("(Intercept)" = 1, point = 1.5, period3 = 0.5)),
+    regex = "Not all elements"
+  )
+  expect_error(
+    p_significance(m, threshold = list(1, "a", 2), effects = "all"),
+    regex = "should be one of"
+  )
+  expect_error(
+    p_significance(m, threshold = list(1, 2, 3, 4), effects = "all"),
+    regex = "Length of"
+  )
+
+})
diff --git a/tests/testthat/test-rope.R b/tests/testthat/test-rope.R
index 48e71e7aa..c7c8eead5 100644
--- a/tests/testthat/test-rope.R
+++ b/tests/testthat/test-rope.R
@@ -72,6 +72,10 @@ test_that("rope", {
     rope(m, range = list(c(-0.1, 0.1), c(2, 2), "default", "a", c(1, 3))),
     regex = "should be 'default'"
   )
+  expect_error(
+    rope(m, range = list("(Intercept)" = c(-1, 0.1), pointout = c(-1.5, -1), period3 = c(-1, 1))),
+    regex = "Not all elements"
+  )
 })