From d27281c0edcd5d742f52d79e83f9d31121cf8bcc Mon Sep 17 00:00:00 2001 From: Joseph Luchman Date: Wed, 28 Aug 2024 06:16:42 -0500 Subject: [PATCH] Adding `r2_mlm()` and possible update to `r2()` to work with it (#764) Co-authored-by: Daniel --- DESCRIPTION | 8 +++- NAMESPACE | 2 + R/print-methods.R | 40 ++++++++++++----- R/r2.R | 40 ++++++++++------- R/r2_mlm.R | 85 ++++++++++++++++++++++++++++++++++++ inst/WORDLIST | 4 ++ man/performance-package.Rd | 1 + man/r2.Rd | 9 +++- man/r2_mlm.Rd | 74 +++++++++++++++++++++++++++++++ tests/testthat/test-r2_mlm.R | 9 ++++ 10 files changed, 241 insertions(+), 31 deletions(-) create mode 100644 R/r2_mlm.R create mode 100644 man/r2_mlm.Rd create mode 100644 tests/testthat/test-r2_mlm.R diff --git a/DESCRIPTION b/DESCRIPTION index ee591ae47..593afc5b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.12.2.12 +Version: 0.12.2.13 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -52,7 +52,11 @@ Authors@R: "Bacher", , "etienne.bacher@protonmail.com", role = "ctb", - comment = c(ORCID = "0000-0002-9271-5075"))) + comment = c(ORCID = "0000-0002-9271-5075")), + person(given = "Joseph", + family = "Luchman", + role = "ctb", + comment = c(ORCID = "0000-0002-8886-9717"))) Maintainer: Daniel Lüdecke Description: Utilities for computing measures to assess model quality, which are not directly provided by R's 'base' or 'stats' packages. diff --git a/NAMESPACE b/NAMESPACE index e0543f2b8..67add6a04 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -486,6 +486,7 @@ S3method(r2_mcfadden,serp) S3method(r2_mcfadden,truncreg) S3method(r2_mcfadden,vglm) S3method(r2_mckelvey,default) +S3method(r2_mlm,mlm) S3method(r2_nagelkerke,BBreg) S3method(r2_nagelkerke,DirichletRegModel) S3method(r2_nagelkerke,bife) @@ -610,6 +611,7 @@ export(r2_loo) export(r2_loo_posterior) export(r2_mcfadden) export(r2_mckelvey) +export(r2_mlm) export(r2_nagelkerke) export(r2_nakagawa) export(r2_posterior) diff --git a/R/print-methods.R b/R/print-methods.R index e3cdbbfed..1509188fa 100644 --- a/R/print-methods.R +++ b/R/print-methods.R @@ -68,23 +68,39 @@ print.r2_pseudo <- function(x, digits = 3, ...) { #' @export print.r2_mlm <- function(x, digits = 3, ...) { model_type <- attr(x, "model_type") - if (!is.null(model_type)) { - insight::print_color(sprintf("# R2 for %s Regression\n\n", model_type), "blue") + is_multivar_r2 <- all(names(x) == c("Symmetric Rxy", "Asymmetric Pxy")) + if (!is.null(model_type) && !is_multivar_r2) { + insight::print_color( + sprintf("# R2 for %s Regression\n\n", model_type), + "blue" + ) + } else if (!is.null(model_type) && is_multivar_r2) { + insight::print_color( + sprintf("# Multivariate R2 for %s Regression\n", model_type), + "blue" + ) } else { insight::print_color("# R2\n\n", "blue") } - for (i in names(x)) { - insight::print_color(sprintf("## %s\n", i), "cyan") - out <- paste0( - c( - sprintf(" R2: %.*f", digits, x[[i]]$R2), - sprintf(" adj. R2: %.*f", digits, x[[i]]$R2_adjusted) - ), - collapse = "\n" - ) - cat(out) + if (is_multivar_r2) { + cat(sprintf(" Symmetric Rxy: %.*f", digits, x[["Symmetric Rxy"]])) + cat("\n") + cat(sprintf("Asymmetric Pxy: %.*f", digits, x[["Asymmetric Pxy"]])) cat("\n\n") + } else { + for (i in names(x)) { + insight::print_color(sprintf("## %s\n", i), "cyan") + out <- paste( + c( + sprintf(" R2: %.*f", digits, x[[i]]$R2), + sprintf(" adj. R2: %.*f", digits, x[[i]]$R2_adjusted) + ), + collapse = "\n" + ) + cat(out) + cat("\n\n") + } } invisible(x) } diff --git a/R/r2.R b/R/r2.R index dbfc437fc..aeddd56ce 100644 --- a/R/r2.R +++ b/R/r2.R @@ -10,6 +10,9 @@ #' (`TRUE`) or not (`FALSE`)? #' @param ci Confidence interval level, as scalar. If `NULL` (default), no #' confidence intervals for R2 are calculated. +#' @param multivariate Logical. Should multiple R2 values be reported as +#' separated by response (FALSE) or should a single R2 be reported as +#' combined across responses computed by [`r2_mlm`] (TRUE). #' @param ... Arguments passed down to the related r2-methods. #' @inheritParams r2_nakagawa #' @@ -31,7 +34,7 @@ #' @seealso #' [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], [`r2_loo()`], #' [`r2_mcfadden()`], [`r2_nagelkerke()`], [`r2_nakagawa()`], [`r2_tjur()`], -#' [`r2_xu()`] and [`r2_zeroinflated()`]. +#' [`r2_xu()`], [`r2_zeroinflated()`], and [`r2_mlm()`]. #' #' @examplesIf require("lme4") #' # Pseudo r-quared for GLM @@ -245,24 +248,29 @@ r2.aov <- function(model, ci = NULL, ...) { structure(class = "r2_generic", out) } - +#' @rdname r2 #' @export -r2.mlm <- function(model, ...) { - model_summary <- summary(model) +r2.mlm <- function(model, multivariate = TRUE, ...) { - out <- lapply(names(model_summary), function(i) { - tmp <- list( - R2 = model_summary[[i]]$r.squared, - R2_adjusted = model_summary[[i]]$adj.r.squared, - Response = sub("Response ", "", i, fixed = TRUE) - ) - names(tmp$R2) <- "R2" - names(tmp$R2_adjusted) <- "adjusted R2" - names(tmp$Response) <- "Response" - tmp - }) + if (multivariate) { + out <- r2_mlm(model) + } else { + model_summary <- summary(model) + + out <- lapply(names(model_summary), function(i) { + tmp <- list( + R2 = model_summary[[i]]$r.squared, + R2_adjusted = model_summary[[i]]$adj.r.squared, + Response = sub("Response ", "", i, fixed = TRUE) + ) + names(tmp$R2) <- "R2" + names(tmp$R2_adjusted) <- "adjusted R2" + names(tmp$Response) <- "Response" + tmp + }) - names(out) <- names(model_summary) + names(out) <- names(model_summary) + } attr(out, "model_type") <- "Multivariate Linear" structure(class = "r2_mlm", out) diff --git a/R/r2_mlm.R b/R/r2_mlm.R new file mode 100644 index 000000000..0f08ad235 --- /dev/null +++ b/R/r2_mlm.R @@ -0,0 +1,85 @@ +#' @title Multivariate R2 +#' @name r2_mlm +#' +#' @description +#' Calculates two multivariate R2 values for multivariate linear regression. +#' +#' @param model Multivariate linear regression model. +#' @param ... Currently not used. +#' +#' @details +#' The two indexes returned summarize model fit for the set of predictors +#' given the system of responses. As compared to the default +#' [r2][performance::r2] index for multivariate linear models, the indexes +#' returned by this function provide a single fit value collapsed across +#' all responses. +#' +#' The two returned indexes were proposed by *Van den Burg and Lewis (1988)* +#' as an extension of the metrics proposed by *Cramer and Nicewander (1979)*. +#' Of the numerous indexes proposed across these two papers, only two metrics, +#' the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use +#' by *Azen and Budescu (2006)*. +#' +#' For a multivariate linear regression with \eqn{p} predictors and +#' \eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is +#' computed as: +#' +#' \deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)} +#' +#' Where \eqn{\rho} is a canonical variate from a +#' [canonical correlation][cancor] between the predictors and responses. +#' This metric is symmetric and its value does not change when the roles of +#' the variables as predictors or responses are swapped. +#' +#' The \eqn{P_{xy}} is computed as: +#' +#' \deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}} +#' +#' Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and +#' \eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given +#' the predictors. This metric is asymmetric and can change +#' depending on which variables are considered predictors versus responses. +#' +#' @return A named vector with the R2 values. +#' +#' @examples +#' model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars) +#' r2_mlm(model) +#' +#' model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars) +#' r2_mlm(model_swap) +#' +#' @references +#' - Azen, R., & Budescu, D. V. (2006). Comparing predictors in +#' multivariate regression models: An extension of dominance analysis. +#' Journal of Educational and Behavioral Statistics, 31(2), 157-180. +#'- Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +#' invariant measures of multivariate association. Psychometrika, 44, 43-54. +#' - Van den Burg, W., & Lewis, C. (1988). Some properties of two +#' measures of multivariate association. Psychometrika, 53, 109-122. +#' +#' @author Joseph Luchman +#' +#' @export +r2_mlm <- function(model, ...) { + UseMethod("r2_mlm") +} + +# methods --------------------------- + +#' @export +r2_mlm.mlm <- function(model, verbose = TRUE, ...) { + rho2_vec <- 1 - stats::cancor( + insight::get_predictors(model), + insight::get_response(model) + )$cor^2 + R_xy <- 1 - Reduce(`*`, rho2_vec, 1) + + resid_cov <- stats::cov(residuals(model)) + resp_cov <- stats::cov(insight::get_response(model)) + qq <- ncol(insight::get_response(model)) + V_xy <- qq - sum(diag(solve(resp_cov) %*% resid_cov)) + P_xy <- V_xy / qq + + c("Symmetric Rxy" = R_xy, "Asymmetric Pxy" = P_xy) +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 2366773d8..2989faf72 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -9,6 +9,7 @@ Ankerst Archimbaud Arel Asq +Azen BCI BFBayesFactor BMJ @@ -27,6 +28,7 @@ Breunig Breusch BRM Bryk +Budescu Bundock Burnham Byrne @@ -40,6 +42,7 @@ CochransQ CompQuadForm Concurvity Confounder +Cramer Cribari Cronbach's Crujeiras @@ -162,6 +165,7 @@ Neto's Nondegenerate Nordhausen Normed +Nicewander ORCID OSF OSX diff --git a/man/performance-package.Rd b/man/performance-package.Rd index b3f1d5a3f..f7a05c369 100644 --- a/man/performance-package.Rd +++ b/man/performance-package.Rd @@ -52,6 +52,7 @@ Other contributors: \item Martin Jullum [reviewer] \item gjo11 [reviewer] \item Etienne Bacher \email{etienne.bacher@protonmail.com} (\href{https://orcid.org/0000-0002-9271-5075}{ORCID}) [contributor] + \item Joseph Luchman (\href{https://orcid.org/0000-0002-8886-9717}{ORCID}) [contributor] } } diff --git a/man/r2.Rd b/man/r2.Rd index bf783e8d9..a8d514908 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -3,6 +3,7 @@ \name{r2} \alias{r2} \alias{r2.default} +\alias{r2.mlm} \alias{r2.merMod} \title{Compute the model's R2} \usage{ @@ -10,6 +11,8 @@ r2(model, ...) \method{r2}{default}(model, ci = NULL, verbose = TRUE, ...) +\method{r2}{mlm}(model, multivariate = TRUE, ...) + \method{r2}{merMod}(model, ci = NULL, tolerance = 1e-05, ...) } \arguments{ @@ -23,6 +26,10 @@ confidence intervals for R2 are calculated.} \item{verbose}{Logical. Should details about R2 and CI methods be given (\code{TRUE}) or not (\code{FALSE})?} +\item{multivariate}{Logical. Should multiple R2 values be reported as +separated by response (FALSE) or should a single R2 be reported as +combined across responses computed by \code{\link{r2_mlm}} (TRUE).} + \item{tolerance}{Tolerance for singularity check of random effects, to decide whether to compute random effect variances for the conditional r-squared or not. Indicates up to which value the convergence result is accepted. When @@ -70,5 +77,5 @@ r2(model) \seealso{ \code{\link[=r2_bayes]{r2_bayes()}}, \code{\link[=r2_coxsnell]{r2_coxsnell()}}, \code{\link[=r2_kullback]{r2_kullback()}}, \code{\link[=r2_loo]{r2_loo()}}, \code{\link[=r2_mcfadden]{r2_mcfadden()}}, \code{\link[=r2_nagelkerke]{r2_nagelkerke()}}, \code{\link[=r2_nakagawa]{r2_nakagawa()}}, \code{\link[=r2_tjur]{r2_tjur()}}, -\code{\link[=r2_xu]{r2_xu()}} and \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}. +\code{\link[=r2_xu]{r2_xu()}}, \code{\link[=r2_zeroinflated]{r2_zeroinflated()}}, and \code{\link[=r2_mlm]{r2_mlm()}}. } diff --git a/man/r2_mlm.Rd b/man/r2_mlm.Rd new file mode 100644 index 000000000..50b5389fd --- /dev/null +++ b/man/r2_mlm.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/r2_mlm.R +\name{r2_mlm} +\alias{r2_mlm} +\title{Multivariate R2} +\usage{ +r2_mlm(model, ...) +} +\arguments{ +\item{model}{Multivariate linear regression model.} + +\item{...}{Currently not used.} +} +\value{ +A named vector with the R2 values. +} +\description{ +Calculates two multivariate R2 values for multivariate linear regression. +} +\details{ +The two indexes returned summarize model fit for the set of predictors +given the system of responses. As compared to the default +\link[=r2]{r2} index for multivariate linear models, the indexes +returned by this function provide a single fit value collapsed across +all responses. + +The two returned indexes were proposed by \emph{Van den Burg and Lewis (1988)} +as an extension of the metrics proposed by \emph{Cramer and Nicewander (1979)}. +Of the numerous indexes proposed across these two papers, only two metrics, +the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use +by \emph{Azen and Budescu (2006)}. + +For a multivariate linear regression with \eqn{p} predictors and +\eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is +computed as: + +\deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)} + +Where \eqn{\rho} is a canonical variate from a +\link[=cancor]{canonical correlation} between the predictors and responses. +This metric is symmetric and its value does not change when the roles of +the variables as predictors or responses are swapped. + +The \eqn{P_{xy}} is computed as: + +\deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}} + +Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and +\eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given +the predictors. This metric is asymmetric and can change +depending on which variables are considered predictors versus responses. +} +\examples{ +model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars) +r2_mlm(model) + +model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars) +r2_mlm(model_swap) + +} +\references{ +\itemize{ +\item Azen, R., & Budescu, D. V. (2006). Comparing predictors in +multivariate regression models: An extension of dominance analysis. +Journal of Educational and Behavioral Statistics, 31(2), 157-180. +\item Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric, +invariant measures of multivariate association. Psychometrika, 44, 43-54. +\item Van den Burg, W., & Lewis, C. (1988). Some properties of two +measures of multivariate association. Psychometrika, 53, 109-122. +} +} +\author{ +Joseph Luchman +} diff --git a/tests/testthat/test-r2_mlm.R b/tests/testthat/test-r2_mlm.R new file mode 100644 index 000000000..a532b57a7 --- /dev/null +++ b/tests/testthat/test-r2_mlm.R @@ -0,0 +1,9 @@ +test_that("r2_mlm_Rxy", { + model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars) + expect_equal(r2_mlm(model)[["Symmetric Rxy"]], 0.68330688076502, tolerance = 1e-3) +}) + +test_that("r2_mlm_Pxy", { + model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars) + expect_equal(r2_mlm(model)[["Asymmetric Pxy"]], 0.407215267524997, tolerance = 1e-3) +})