diff --git a/DESCRIPTION b/DESCRIPTION index 8c7c196b6..702d65055 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gtsummary Title: Presentation-Ready Data Summary and Analytic Result Tables -Version: 2.0.4.9009 +Version: 2.0.4.9010 Authors@R: c( person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0862-2018")), diff --git a/NEWS.md b/NEWS.md index c824172c4..a9b329500 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,10 @@ * The `add_ci.tbl_summary()` function now works with categorical variables that were summarized using `tbl_summary(percent = c('row', 'cell'))`. (#1929) +* Adding the `tbl_merge(merge_vars)` argument. This argument allows users to specify any merging columns providing much more flexibility when merging unlike tables. Additionally, columns selected by `cards::all_ard_groups()` have been added to the default merging columns, which provides the functionality for merging the results from `tbl_hierarchical()` and `tbl_hierarchical_count()`. (#1861) + + This does, however, introduce one change in behavior from the previous version of `tbl_merge()`. Previously, merging on a table with the same variable, but with a different label would be reconciled silently in the background and the first label would be used in the final table. While this may have been useful in a few edge cases, it largely was an unintuitive result. This update performs more straightforward merging and the results are more aligned with users' expectations. + # gtsummary 2.0.4 ### New Features and Functions diff --git a/R/add_glance.R b/R/add_glance.R index b5f1bb483..7ae426e74 100644 --- a/R/add_glance.R +++ b/R/add_glance.R @@ -42,8 +42,8 @@ #' To re-order the rows with glance statistics on bottom, use the script below: #' #' ```r -#' tbl_merge(list(tbl1, tbl2)) %>% -#' modify_table_body(~.x %>% arrange(row_type == "glance_statistic")) +#' tbl_merge(list(tbl1, tbl2)) |> +#' modify_table_body(~.x |> dplyr::arrange(row_type == "glance_statistic")) #' ```` #' #' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) && gtsummary:::is_pkg_installed("cardx") diff --git a/R/tbl_merge.R b/R/tbl_merge.R index 222a8c85b..c5833d121 100644 --- a/R/tbl_merge.R +++ b/R/tbl_merge.R @@ -11,6 +11,12 @@ #' strings are interpreted with `gt::md`. #' Must be same length as `tbls` argument. Default is `NULL`, and places #' a default spanning header. If `FALSE`, no header will be placed. +#' @param merge_vars (`character`)\cr +#' Column names that are used as the merge IDs. +#' The default is `NULL`, which merges on +#' `c(any_of(c("variable", "row_type", "var_label", "label"), cards::all_ard_groups())`. +#' Any column name included here that does not appear in all tables, will +#' be removed. #' #' @author Daniel D. Sjoberg #' @export @@ -51,7 +57,7 @@ #' #' tbl_merge(tbls = list(t3, t4)) %>% #' modify_spanning_header(everything() ~ NA_character_) -tbl_merge <- function(tbls, tab_spanner = NULL) { +tbl_merge <- function(tbls, tab_spanner = NULL, merge_vars = NULL) { set_cli_abort_call() # input checks --------------------------------------------------------------- @@ -67,30 +73,41 @@ tbl_merge <- function(tbls, tab_spanner = NULL) { predicate = \(x) inherits(x, "gtsummary"), error_msg = "All objects in {.arg tbls} list must be class {.cls gtsummary}." ) + check_class(merge_vars, cls = "character", allow_empty = TRUE) - # check all tbls have the merging columns - if (some(tbls, ~ any(!c("variable", "row_type", "var_label", "label") %in% names(.x$table_body)))) { + if (!is_empty(tab_spanner) && !isFALSE(tab_spanner) && !is.character(tab_spanner)) { cli::cli_abort( - "All objects in the {.arg tbls} list must have columns - {.val {c('variable', 'row_type', 'var_label', 'label')}} - in {.code .$table_body} for merging", + "The {.arg tab_spanner} argument must be {.code NULL}, {.val {FALSE}}, or class {.cls character}.", call = get_cli_abort_call() ) } - if (!is_empty(tab_spanner) && !isFALSE(tab_spanner) && !is.character(tab_spanner)) { + # setting the merging columns ------------------------------------------------ + if (is_empty(merge_vars)) { + merge_vars <- + dplyr::select(tbls[[1]]$table_body, + any_of(c("variable", "row_type", "var_label", "label")), + cards::all_ard_groups()) |> + names() + } + # merge columns will be those that appear in all tbls + merge_vars <- + map(tbls, ~names(.x$table_body)) |> + reduce(.f = intersect, .init = merge_vars) + + if (is_empty(merge_vars)) { cli::cli_abort( - "The {.arg tab_spanner} argument must be {.val {NULL}}, {.val {FALSE}}, or class {.cls character}.", + "The tables in the {.arg tbls} argument do not share any columns specified in {.arg merge_vars} argument and merge cannot be performed.", call = get_cli_abort_call() ) } tbls_length <- length(tbls) - # adding tab spanners if requested + # adding tab spanners if requested ------------------------------------------- if (!isFALSE(tab_spanner)) { # if tab spanner is null, default is Table 1, Table 2, etc.... - if (is.null(tab_spanner)) { + if (is_empty(tab_spanner)) { tab_spanner <- paste0(c("**Table "), seq_len(tbls_length), "**") } @@ -106,124 +123,63 @@ tbl_merge <- function(tbls, tab_spanner = NULL) { tbls <- map2( tbls, seq_along(tbls), - ~ modify_spanning_header( - .x, c( - everything(), - # TODO: Use of the "ci" column was deprecated in v2.0 and it can be removed from here in the future - -any_of(c("variable", "row_type", "var_label", "label", "ci")) - ) ~ tab_spanner[.y] - ) + ~ modify_spanning_header(.x, -all_of(merge_vars) ~ tab_spanner[.y]) ) } # merging tables ------------------------------------------------------------- - # nesting data by variable (one line per variable), and renaming columns with number suffix - nested_table <- map2( - tbls, seq_along(tbls), - function(x, y) { - # creating a column that is the variable label - dplyr::group_by(x$table_body, .data$variable) %>% - dplyr::mutate( - var_label = ifelse(.data$row_type == "label", .data$label, NA) - ) %>% - tidyr::fill("var_label", .direction = "downup") %>% - dplyr::ungroup() %>% - dplyr::rename_at( - vars(-c("variable", "row_type", "var_label", "label")), - ~ glue("{.}_{y}") - ) - } - ) - - # checking that merging rows are unique -------------------------------------- - nested_table %>% - some( - ~ nrow(.x) != - dplyr::select(.x, all_of(c("variable", "row_type", "var_label", "label"))) %>% - dplyr::distinct() %>% - nrow() - ) %>% - switch( - cli::cli_inform( - "The merging columns (variable name, variable label, row type, and label column) - are not unique and the merge may fail or result in a malformed table. - If you previously called {.fun tbl_stack} on your tables, - then merging with {.fun tbl_merge} before calling {.arg tbl_stack} may resolve the issue." - ) + # first renaming columns with index suffix + lst_table_body <- + map( + seq_along(tbls), + \(i) { + tbls[[i]]$table_body |> + dplyr::rename_with( + .fn = ~paste(., i, sep = "_"), + .cols = -all_of(merge_vars) + ) + } ) - # nesting results within variable - nested_table <- map( - nested_table, - ~ tidyr::nest(.x, data = -any_of(c("variable", "var_label"))) - ) - - # merging formatted objects together - merged_table <- - nested_table[[1]] %>% - dplyr::rename(table = "data") - - if (tbls_length > 1) { - # cycling through all tbls, merging results into a column tibble - for (i in 2:tbls_length) { - merged_table <- - merged_table %>% - dplyr::full_join( - nested_table[[i]], - by = c("variable", "var_label") - ) %>% - dplyr::mutate( - table = map2( - .data$table, .data$data, - function(table, data) { - if (is.null(table)) { - return(data) - } - if (is.null(data)) { - return(table) - } - dplyr::full_join(table, data, by = c("row_type", "label")) - } - ) - ) %>% - select(-c("data", "table"), "table") - } + # check that the merge variables are unique in all table bodies + if (some(lst_table_body, ~anyDuplicated(.x[merge_vars]) > 0L)) { + cli::cli_inform(c( + "The merging columns ({.val {merge_vars}}) do not uniquely identify rows for + each table in {.arg tbls}, and the merge may fail or result in a malformed table.", + "i" = "If you previously called {.fun tbl_stack} on your tables, + then merging with {.fun tbl_merge} before calling {.arg tbl_stack} may resolve the issue." + )) } - # unnesting results from within variable column tibbles - ends_with_selectors <- - map(seq_len(tbls_length), ~ rlang::expr(ends_with(!!paste0("_", .x)))) + # now merge all the table bodies together table_body <- - merged_table %>% - tidyr::unnest("table") %>% - dplyr::select( - "variable", "var_label", "row_type", "label", - !!!ends_with_selectors, everything() - ) + lst_table_body |> + reduce(.f = dplyr::full_join, by = merge_vars) |> + dplyr::relocate(all_of(merge_vars), .before = 1L) # renaming columns in stylings and updating ---------------------------------- x <- .create_gtsummary_object(table_body = table_body, tbls = tbls, call_list = list(tbl_merge = match.call())) - x <- .tbl_merge_update_table_styling(x, tbls) + x <- .tbl_merge_update_table_styling(x = x, tbls = tbls, merge_vars = merge_vars) - # returning results + # returning results ---------------------------------------------------------- class(x) <- c("tbl_merge", "gtsummary") x } -.tbl_merge_update_table_styling <- function(x, tbls) { +.tbl_merge_update_table_styling <- function(x, tbls, merge_vars) { # update table_styling$header x$table_styling$header <- map2( tbls, seq_along(tbls), ~ .x$table_styling$header %>% - dplyr::filter(!(.data$column %in% c("label", "variable", "var_label", "row_type") & .y != 1)) %>% + dplyr::filter(!(.data$column %in% .env$merge_vars & .y != 1)) %>% dplyr::mutate( column = ifelse( - .data$column %in% c("label", "variable", "var_label", "row_type") & .y == 1, + .data$column %in% .env$merge_vars & .y == 1, .data$column, paste0(.data$column, "_", .y) ) @@ -249,7 +205,7 @@ tbl_merge <- function(tbls, tab_spanner = NULL) { if ("column" %in% names(style_updated)) { style_updated$column <- ifelse( - style_updated$column %in% c("label", "variable", "var_label", "row_type") | is.na(style_updated$column), + style_updated$column %in% merge_vars | is.na(style_updated$column), style_updated$column, paste0(style_updated$column, "_", i) ) %>% diff --git a/inst/WORDLIST b/inst/WORDLIST index d553a93aa..984e04b3e 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,3 +1,4 @@ +AGEGR ANCOVA ARD ARDs @@ -82,11 +83,14 @@ mis nevent ng nnet +ons pkgdown pre pvalue quosure quosures +reproducibility +reusability saddlepoint smd srvyr @@ -108,6 +112,7 @@ un unhidden unhide unicode +unintuitive univariable unstratified usethis diff --git a/man/add_glance.Rd b/man/add_glance.Rd index eee497fb0..3a9998775 100644 --- a/man/add_glance.Rd +++ b/man/add_glance.Rd @@ -80,8 +80,8 @@ When combining \code{add_glance_table()} with \code{tbl_merge()}, the ordering of the model terms and the glance statistics may become jumbled. To re-order the rows with glance statistics on bottom, use the script below: -\if{html}{\out{
}}\preformatted{tbl_merge(list(tbl1, tbl2)) \%>\% - modify_table_body(~.x \%>\% arrange(row_type == "glance_statistic")) +\if{html}{\out{
}}\preformatted{tbl_merge(list(tbl1, tbl2)) |> + modify_table_body(~.x |> dplyr::arrange(row_type == "glance_statistic")) }\if{html}{\out{
}} } diff --git a/man/tbl_merge.Rd b/man/tbl_merge.Rd index 046c52572..4eb7b9852 100644 --- a/man/tbl_merge.Rd +++ b/man/tbl_merge.Rd @@ -4,7 +4,7 @@ \alias{tbl_merge} \title{Merge tables} \usage{ -tbl_merge(tbls, tab_spanner = NULL) +tbl_merge(tbls, tab_spanner = NULL, merge_vars = NULL) } \arguments{ \item{tbls}{(\code{list})\cr @@ -16,6 +16,13 @@ Must be the same length as \code{tbls}. The strings are interpreted with \code{gt::md}. Must be same length as \code{tbls} argument. Default is \code{NULL}, and places a default spanning header. If \code{FALSE}, no header will be placed.} + +\item{merge_vars}{(\code{character})\cr +Column names that are used as the merge IDs. +The default is \code{NULL}, which merges on +\verb{c(any_of(c("variable", "row_type", "var_label", "label"), cards::all_ard_groups())}. +Any column name included here that does not appear in all tables, will +be removed.} } \value{ A \code{'tbl_merge'} object diff --git a/tests/testthat/_snaps/tbl_merge.md b/tests/testthat/_snaps/tbl_merge.md index fc68ee94e..0a9769410 100644 --- a/tests/testthat/_snaps/tbl_merge.md +++ b/tests/testthat/_snaps/tbl_merge.md @@ -37,7 +37,7 @@ tbl_merge(tbls = list(t0, t1), tab_spanner = 1) Condition Error in `tbl_merge()`: - ! The `tab_spanner` argument must be , FALSE, or class . + ! The `tab_spanner` argument must be `NULL`, FALSE, or class . --- @@ -53,5 +53,5 @@ tbl_merge(list(tbl, tbl)) Condition Error in `tbl_merge()`: - ! All objects in the `tbls` list must have columns "variable", "row_type", "var_label", and "label" in `.$table_body` for merging + ! The tables in the `tbls` argument do not share any columns specified in `merge_vars` argument and merge cannot be performed. diff --git a/tests/testthat/test-tbl_merge.R b/tests/testthat/test-tbl_merge.R index f9191f54f..d73c4461d 100644 --- a/tests/testthat/test-tbl_merge.R +++ b/tests/testthat/test-tbl_merge.R @@ -171,7 +171,7 @@ test_that("tbl_merge works with more complex merge", { test_that("tbl_merge returns expected message when nonunique columns present", { expect_message( tbl_merge(list(tbl_stack(list(t1, t1)))), - "not unique and the merge may fail or result in a malformed table" + "do not uniquely identify rows for each table.*merge may fail or result in a malformed table" ) }) @@ -239,3 +239,63 @@ test_that("tbl_merge() merges mixed-type from .$table_styling$header$modify_* co expect_silent(tbl_merge(tbls = list(t1, t2))) expect_silent(tbl_merge(tbls = list(t3, t4))) }) + +test_that("tbl_merge(merge_vars)", { + # no errors when merging + expect_silent( + tbl <- + as_gtsummary(mtcars[duplicated(mtcars$mpg),c("mpg", "cyl")]) %>% + list(., .) |> + tbl_merge(merge_vars = "mpg") + ) + + # check the headers are correct + expect_equal( + tbl$table_styling$header, + dplyr::tribble( + ~column, ~hide, ~align, ~interpret_label, ~label, + "mpg", FALSE, "center", "gt::md", "mpg", + "cyl_1", FALSE, "center", "gt::md", "cyl", + "cyl_2", FALSE, "center", "gt::md", "cyl" + ) + ) + + # check the spanning headers are correct + expect_equal( + tbl$table_styling$spanning_header, + dplyr::tribble( + ~level, ~column, ~spanning_header, ~text_interpret, ~remove, + 1L, "cyl_2", "**Table 2**", "gt::md", FALSE, + 1L, "cyl_1", "**Table 1**", "gt::md", FALSE + ) + ) +}) + +test_that("tbl_merge() works with tbl_hierarchical()", { + # check that AE table can be merged + expect_silent( + tbl <- + cards::ADAE |> + dplyr::filter( + AESOC %in% unique(cards::ADAE$AESOC)[1:5], + AETERM %in% unique(cards::ADAE$AETERM)[1:5] + ) |> + tbl_hierarchical( + variables = c(AESOC, AETERM), + denominator = cards::ADSL, + id = USUBJID, + digits = everything() ~ list(p = 1), + overall_row = TRUE, + label = list(..ard_hierarchical_overall.. = "Any Adverse Event") + ) + ) + expect_equal( + list(tbl, tbl) |> + tbl_merge() |> + as.data.frame(col_labels = FALSE) |> + dplyr::pull("label"), + tbl |> + as.data.frame(col_labels = FALSE) |> + dplyr::pull("label") + ) +})