From d18afa44b5ab228c1ed50dc1d9632d2651048541 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 17 Sep 2024 12:07:01 +0200 Subject: [PATCH] allow named vectors --- R/p_significance.R | 57 +++++++++++++++++++++------- R/rope.R | 25 +++++++----- man/equivalence_test.Rd | 23 +++++++---- man/p_rope.Rd | 23 +++++++---- man/p_significance.Rd | 7 +++- man/rope.Rd | 25 ++++++++---- tests/testthat/test-p_significance.R | 27 ++++++++++++- tests/testthat/test-rope.R | 4 ++ 8 files changed, 145 insertions(+), 46 deletions(-) diff --git a/R/p_significance.R b/R/p_significance.R index f2f9a34e2..6edd70c8d 100644 --- a/R/p_significance.R +++ b/R/p_significance.R @@ -16,7 +16,10 @@ #' interval) #' - a numeric vector of length two (e.g., `c(-0.2, 0.1)`), useful for #' asymmetric intervals -#' - a list of numeric vectors, where each vector corresponds to a parameter. +#' - a list of numeric vectors, where each vector corresponds to a parameter +#' - a list of *named* numeric vectors, where names correspond to parameter +#' names. In this case, all parameters that have no matching name in `threshold` +#' will be set to `"default"`. #' @inheritParams rope #' @inheritParams hdi #' @@ -56,6 +59,8 @@ #' p_significance(model) #' # multiple thresholds - asymmetric, symmetric, default #' p_significance(model, threshold = list(c(-10, 5), 0.2, "default")) +#' # names thresholds +#' p_significance(model, threshold = list(wt = 0.2, `(Intercept)` = c(-10, 5))) #' } #' @export p_significance <- function(x, ...) { @@ -131,7 +136,7 @@ p_significance.data.frame <- function(x, threshold = "default", rvar_col = NULL, } - threshold <- .select_threshold_ps(threshold = threshold) + threshold <- .select_threshold_ps(threshold = threshold, params = x) x <- .select_nums(x) if (ncol(x) == 1) { @@ -286,12 +291,15 @@ p_significance.stanreg <- function(x, ...) { effects <- match.arg(effects) component <- match.arg(component) - threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose) + params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters) - result <- p_significance( - insight::get_parameters(x, effects = effects, component = component, parameters = parameters), - threshold = threshold + threshold <- .select_threshold_ps( + model = x, + threshold = threshold, + params = params, + verbose = verbose ) + result <- p_significance(params, threshold = threshold) cleaned_parameters <- insight::clean_parameters(x) out <- .prepare_output(result, cleaned_parameters, inherits(x, "stanmvreg")) @@ -322,12 +330,15 @@ p_significance.brmsfit <- function(x, ...) { effects <- match.arg(effects) component <- match.arg(component) - threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose) + params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters) - result <- p_significance( - insight::get_parameters(x, effects = effects, component = component, parameters = parameters), - threshold = threshold + threshold <- .select_threshold_ps( + model = x, + threshold = threshold, + params = params, + verbose = verbose ) + result <- p_significance(params, threshold = threshold) cleaned_parameters <- insight::clean_parameters(x) out <- .prepare_output(result, cleaned_parameters) @@ -382,8 +393,28 @@ as.double.p_significance <- as.numeric.p_significance # helpers -------------------------- #' @keywords internal -.select_threshold_ps <- function(model = NULL, threshold = "default", verbose = TRUE) { +.select_threshold_ps <- function(model = NULL, threshold = "default", params = NULL, verbose = TRUE) { if (is.list(threshold)) { + # if we have named elements, complete list + if (!is.null(params)) { + named_threshold <- names(threshold) + if (!is.null(named_threshold)) { + # find out which name belongs to which parameter + pos <- match(named_threshold, colnames(params)) + # if not all element names were found, error + if (anyNA(pos)) { + insight::format_error(paste( + "Not all elements of `threshold` were found in the parameters. Please check following names:", + toString(named_threshold[is.na(pos)]) + )) + } + # now "fill" non-specified elements with "default" + out <- as.list(rep("default", ncol(params))) + out[pos] <- threshold + # overwrite former threshold + threshold <- out + } + } lapply(threshold, function(i) { out <- .select_threshold_list(model = model, threshold = i, verbose = verbose) if (length(out) == 1) { @@ -426,11 +457,11 @@ as.double.p_significance <- as.numeric.p_significance if (anyNA(pos)) { insight::format_error(paste( "Not all elements of `range` were found in the parameters. Please check following names:", - toString(names_range[is.na]) + toString(named_range[is.na(pos)]) )) } # now "fill" non-specified elements with "default" - out <- rep("default", ncol(params)) + out <- as.list(rep("default", ncol(params))) out[pos] <- range # overwrite former range range <- out diff --git a/R/rope.R b/R/rope.R index 61eec4d32..b678c24eb 100644 --- a/R/rope.R +++ b/R/rope.R @@ -6,14 +6,21 @@ #' @param x Vector representing a posterior distribution. Can also be a #' `stanreg` or `brmsfit` model. #' @param range ROPE's lower and higher bounds. Should be `"default"` or -#' depending on the number of outcome variables a vector or a list. In models -#' with one response, `range` can be a vector of length two (e.g., `c(-0.1, -#' 0.1)`), or a list of numeric vector of the same length as numbers of -#' parameters (see 'Examples'). In multivariate models, `range` should be a list -#' with a numeric vectors for each response variable. Vector names should -#' correspond to the name of the response variables. If `"default"` and input is -#' a vector, the range is set to `c(-0.1, 0.1)`. If `"default"` and input is a -#' Bayesian model, [`rope_range()`][rope_range] is used. +#' depending on the number of outcome variables a vector or a list. For models +#' with one response, `range` can be: +#' +#' - a vector of length two (e.g., `c(-0.1, 0.1)`), +#' - a list of numeric vector of the same length as numbers of parameters (see +#' 'Examples'). +#' - a list of *named* numeric vectors, where names correspond to parameter +#' names. In this case, all parameters that have no matching name in `range` +#' will be set to `"default"`. +#' +#' In multivariate models, `range` should be a list with a numeric vectors for +#' each response variable. Vector names should correspond to the name of the +#' response variables. If `"default"` and input is a vector, the range is set to +#' `c(-0.1, 0.1)`. If `"default"` and input is a Bayesian model, +#' [`rope_range()`] is used. #' @param ci The Credible Interval (CI) probability, corresponding to the #' proportion of HDI, to use for the percentage in ROPE. #' @param ci_method The type of interval to use to quantify the percentage in @@ -34,7 +41,7 @@ #' size according to Cohen, 1988). This could be generalized: For instance, #' for linear models, the ROPE could be set as `0 +/- .1 * sd(y)`. #' This ROPE range can be automatically computed for models using the -#' [rope_range] function. +#' [`rope_range()`] function. #' #' Kruschke (2010, 2011, 2014) suggests using the proportion of the `95%` #' (or `89%`, considered more stable) [HDI][hdi] that falls within the diff --git a/man/equivalence_test.Rd b/man/equivalence_test.Rd index 81f5d42fd..adc86a51e 100644 --- a/man/equivalence_test.Rd +++ b/man/equivalence_test.Rd @@ -51,13 +51,22 @@ equivalence_test(x, ...) \item{...}{Currently not used.} \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or -depending on the number of outcome variables a vector or a list. In models -with one response, \code{range} can be a vector of length two (e.g., \code{c(-0.1, 0.1)}), or a list of numeric vector of the same length as numbers of -parameters (see 'Examples'). In multivariate models, \code{range} should be a list -with a numeric vectors for each response variable. Vector names should -correspond to the name of the response variables. If \code{"default"} and input is -a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a -Bayesian model, \code{\link[=rope_range]{rope_range()}} is used.} +depending on the number of outcome variables a vector or a list. For models +with one response, \code{range} can be: +\itemize{ +\item a vector of length two (e.g., \code{c(-0.1, 0.1)}), +\item a list of numeric vector of the same length as numbers of parameters (see +'Examples'). +\item a list of \emph{named} numeric vectors, where names correspond to parameter +names. In this case, all parameters that have no matching name in \code{range} +will be set to \code{"default"}. +} + +In multivariate models, \code{range} should be a list with a numeric vectors for +each response variable. Vector names should correspond to the name of the +response variables. If \code{"default"} and input is a vector, the range is set to +\code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model, +\code{\link[=rope_range]{rope_range()}} is used.} \item{ci}{The Credible Interval (CI) probability, corresponding to the proportion of HDI, to use for the percentage in ROPE.} diff --git a/man/p_rope.Rd b/man/p_rope.Rd index aa76e2d1e..4d27fe375 100644 --- a/man/p_rope.Rd +++ b/man/p_rope.Rd @@ -42,13 +42,22 @@ p_rope(x, ...) \item{...}{Currently not used.} \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or -depending on the number of outcome variables a vector or a list. In models -with one response, \code{range} can be a vector of length two (e.g., \code{c(-0.1, 0.1)}), or a list of numeric vector of the same length as numbers of -parameters (see 'Examples'). In multivariate models, \code{range} should be a list -with a numeric vectors for each response variable. Vector names should -correspond to the name of the response variables. If \code{"default"} and input is -a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a -Bayesian model, \code{\link[=rope_range]{rope_range()}} is used.} +depending on the number of outcome variables a vector or a list. For models +with one response, \code{range} can be: +\itemize{ +\item a vector of length two (e.g., \code{c(-0.1, 0.1)}), +\item a list of numeric vector of the same length as numbers of parameters (see +'Examples'). +\item a list of \emph{named} numeric vectors, where names correspond to parameter +names. In this case, all parameters that have no matching name in \code{range} +will be set to \code{"default"}. +} + +In multivariate models, \code{range} should be a list with a numeric vectors for +each response variable. Vector names should correspond to the name of the +response variables. If \code{"default"} and input is a vector, the range is set to +\code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model, +\code{\link[=rope_range]{rope_range()}} is used.} \item{verbose}{Toggle off warnings.} diff --git a/man/p_significance.Rd b/man/p_significance.Rd index 78e10864c..38abc8d2e 100644 --- a/man/p_significance.Rd +++ b/man/p_significance.Rd @@ -60,7 +60,10 @@ and based on \code{\link[=rope_range]{rope_range()}} if a (Bayesian) model is pr interval) \item a numeric vector of length two (e.g., \code{c(-0.2, 0.1)}), useful for asymmetric intervals -\item a list of numeric vectors, where each vector corresponds to a parameter. +\item a list of numeric vectors, where each vector corresponds to a parameter +\item a list of \emph{named} numeric vectors, where names correspond to parameter +names. In this case, all parameters that have no matching name in \code{threshold} +will be set to \code{"default"}. }} \item{use_iterations}{Logical, if \code{TRUE} and \code{x} is a \code{get_predicted} object, @@ -134,6 +137,8 @@ model <- rstanarm::stan_glm(mpg ~ wt + cyl, p_significance(model) # multiple thresholds - asymmetric, symmetric, default p_significance(model, threshold = list(c(-10, 5), 0.2, "default")) +# names thresholds +p_significance(model, threshold = list(wt = 0.2, `(Intercept)` = c(-10, 5))) } \dontshow{\}) # examplesIf} } diff --git a/man/rope.Rd b/man/rope.Rd index cf82a12e8..21f960185 100644 --- a/man/rope.Rd +++ b/man/rope.Rd @@ -54,13 +54,22 @@ rope(x, ...) \item{...}{Currently not used.} \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or -depending on the number of outcome variables a vector or a list. In models -with one response, \code{range} can be a vector of length two (e.g., \code{c(-0.1, 0.1)}), or a list of numeric vector of the same length as numbers of -parameters (see 'Examples'). In multivariate models, \code{range} should be a list -with a numeric vectors for each response variable. Vector names should -correspond to the name of the response variables. If \code{"default"} and input is -a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a -Bayesian model, \code{\link[=rope_range]{rope_range()}} is used.} +depending on the number of outcome variables a vector or a list. For models +with one response, \code{range} can be: +\itemize{ +\item a vector of length two (e.g., \code{c(-0.1, 0.1)}), +\item a list of numeric vector of the same length as numbers of parameters (see +'Examples'). +\item a list of \emph{named} numeric vectors, where names correspond to parameter +names. In this case, all parameters that have no matching name in \code{range} +will be set to \code{"default"}. +} + +In multivariate models, \code{range} should be a list with a numeric vectors for +each response variable. Vector names should correspond to the name of the +response variables. If \code{"default"} and input is a vector, the range is set to +\code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model, +\code{\link[=rope_range]{rope_range()}} is used.} \item{ci}{The Credible Interval (CI) probability, corresponding to the proportion of HDI, to use for the percentage in ROPE.} @@ -107,7 +116,7 @@ to the -0.1 to 0.1 range of a standardized parameter (negligible effect size according to Cohen, 1988). This could be generalized: For instance, for linear models, the ROPE could be set as \verb{0 +/- .1 * sd(y)}. This ROPE range can be automatically computed for models using the -\link{rope_range} function. +\code{\link[=rope_range]{rope_range()}} function. Kruschke (2010, 2011, 2014) suggests using the proportion of the \verb{95\%} (or \verb{89\%}, considered more stable) \link[=hdi]{HDI} that falls within the diff --git a/tests/testthat/test-p_significance.R b/tests/testthat/test-p_significance.R index a03d42e0c..dbfae00a5 100644 --- a/tests/testthat/test-p_significance.R +++ b/tests/testthat/test-p_significance.R @@ -37,7 +37,6 @@ test_that("p_significance", { test_that("stanreg", { skip_if_offline() skip_if_not_or_load_if_installed("rstanarm") - m <- insight::download_model("stanreg_merMod_5") expect_equal( @@ -80,3 +79,29 @@ test_that("brms", { regex = "Length of" ) }) + +test_that("stan", { + skip_if_offline() + skip_if_not_or_load_if_installed("rstanarm") + m <- insight::download_model("stanreg_merMod_5") + + expect_equal( + p_significance(m, threshold = list("(Intercept)" = 1, period4 = 1.5, period3 = 0.5))$ps, + p_significance(m, threshold = list(1, "default", "default", 0.5, 1.5))$ps, + tolerance = 1e-4 + ) + + expect_error( + p_significance(m, threshold = list("(Intercept)" = 1, point = 1.5, period3 = 0.5)), + regex = "Not all elements" + ) + expect_error( + p_significance(m, threshold = list(1, "a", 2), effects = "all"), + regex = "should be one of" + ) + expect_error( + p_significance(m, threshold = list(1, 2, 3, 4), effects = "all"), + regex = "Length of" + ) + +}) diff --git a/tests/testthat/test-rope.R b/tests/testthat/test-rope.R index 48e71e7aa..c7c8eead5 100644 --- a/tests/testthat/test-rope.R +++ b/tests/testthat/test-rope.R @@ -72,6 +72,10 @@ test_that("rope", { rope(m, range = list(c(-0.1, 0.1), c(2, 2), "default", "a", c(1, 3))), regex = "should be 'default'" ) + expect_error( + rope(m, range = list("(Intercept)" = c(-1, 0.1), pointout = c(-1.5, -1), period3 = c(-1, 1))), + regex = "Not all elements" + ) })