Add ard_categorical_max() (#244)

**What changes are proposed in this pull request?** * Added function `ard_categorical_max()` to calculate categorical occurrence rates by maximum level per unique ID. (#240) Closes #240 -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [x] **All** GitHub Action workflows pass with a ✅ - [x] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [x] If a bug was fixed, a unit test was added. - [x] If a new `ard_*()` function was added, it passes the ARD structural checks from `cards::check_ard_structure()`. - [x] If a new `ard_*()` function was added, `set_cli_abort_call()` has been set. - [x] If a new `ard_*()` function was added and it depends on another package (such as, `broom`), `is_pkg_installed("broom")` has been set in the function call and the following added to the roxygen comments: `@examplesIf do.call(asNamespace("cardx")$is_pkg_installed, list(pkg = "broom""))` - [x] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cardx (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a ✅ - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge". --------- Co-authored-by: Daniel Sjoberg <[email protected]>
insightsengineering · Jan 15, 2025 · 63391c0 · 63391c0
1 parent a9235b8
commit 63391c0
Show file tree

Hide file tree

Showing 7 changed files with 638 additions and 0 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -26,6 +26,7 @@ export(ard_car_anova)
 export(ard_car_vif)
 export(ard_categorical)
 export(ard_categorical_ci)
+export(ard_categorical_max)
 export(ard_continuous)
 export(ard_continuous_ci)
 export(ard_dichotomous)

diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,8 @@
 
 * Fixed a bug in `ard_survival_survfit()` causing an error when "=" character is present in stratification variable level labels. (#252)
 
+* Added function `ard_categorical_max()` to calculate categorical occurrence rates by maximum level per unique ID. (#240)
+
 # cardx 0.2.2
 
 * Added a `data.frame` method to `ard_survival_survfit()`.

diff --git a/R/ard_categorical_max.R b/R/ard_categorical_max.R
@@ -0,0 +1,108 @@
+#' ARD to Calculate Categorical Occurrence Rates by Maximum Level Per Unique ID
+#'
+#' Function calculates categorical variable level occurrences rates by maximum level per unique ID.
+#' Each variable in `variables` is evaluated independently and then results for all variables are stacked.
+#' Only the highest-ordered level will be counted for each unique ID.
+#' Unordered, non-numeric variables will be converted to factor and the default level order used for ordering.
+#'
+#' @inheritParams cards::ard_categorical
+#' @inheritParams cards::ard_stack
+#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   The categorical variables for which occurrence rates per unique ID (by maximum level) will be calculated.
+#' @param id ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
+#'   Argument used to subset `data` to identify rows in `data` to calculate categorical variable level occurrence rates.
+#' @param denominator (`data.frame`, `integer`)\cr
+#'   An optional argument to change the denominator used for `"N"` and `"p"` statistic calculations.
+#'   Defaults to `NULL`, in which case `dplyr::distinct(data, dplyr::pick(all_of(c(id, by))))` is used for these
+#'   calculations. See [cards::ard_categorical()] for more details on specifying denominators.
+#' @param quiet (scalar `logical`)\cr
+#'   Logical indicating whether to suppress additional messaging. Default is `FALSE`.
+#'
+#' @return an ARD data frame of class 'card'
+#' @name ard_categorical_max
+#'
+#' @examples
+#' # Occurrence Rates by Max Level (Highest Severity) --------------------------
+#' ard_categorical_max(
+#'   cards::ADAE,
+#'   variables = c(AESER, AESEV),
+#'   id = USUBJID,
+#'   by = TRTA,
+#'   denominator = cards::ADSL |> dplyr::rename(TRTA = ARM)
+#' )
+NULL
+
+#' @rdname ard_categorical_max
+#' @export
+ard_categorical_max <- function(data,
+                                variables,
+                                id,
+                                by = dplyr::group_vars(data),
+                                statistic = everything() ~ c("n", "p", "N"),
+                                denominator = NULL,
+                                fmt_fn = NULL,
+                                stat_label = everything() ~ cards::default_stat_labels(),
+                                quiet = FALSE,
+                                ...) {
+  set_cli_abort_call()
+
+  # check inputs ---------------------------------------------------------------
+  check_not_missing(data)
+  check_not_missing(variables)
+  check_not_missing(id)
+  cards::process_selectors(data, variables = {{ variables }}, id = {{ id }}, by = {{ by }})
+  data <- dplyr::ungroup(data)
+
+  # check the id argument is not empty
+  if (is_empty(id)) {
+    cli::cli_abort("Argument {.arg id} cannot be empty.", call = get_cli_abort_call())
+  }
+
+  # return empty ARD if no variables selected ----------------------------------
+  if (is_empty(variables)) {
+    return(dplyr::tibble() |> cards::as_card())
+  }
+
+  lst_results <- lapply(
+    variables,
+    function(x) {
+      ard_categorical(
+        data = data |>
+          arrange_using_order(c(id, by, x)) |>
+          dplyr::slice_tail(n = 1L, by = all_of(c(id, by))),
+        variables = all_of(x),
+        by = all_of(by),
+        statistic = statistic,
+        denominator = denominator,
+        fmt_fn = fmt_fn,
+        stat_label = stat_label
+      )
+    }
+  )
+
+  # print default order of variable levels -------------------------------------
+  for (v in variables) {
+    lvls <- .unique_and_sorted(data[[v]])
+    vec <- cli::cli_vec(
+      lvls,
+      style = list("vec-sep" = " < ", "vec-sep2" = " < ", "vec-last" = " < ", "vec-trunc" = 3)
+    )
+    if (!quiet) cli::cli_inform("{.var {v}}: {.val {vec}}")
+  }
+
+  # combine results ------------------------------------------------------------
+  result <- lst_results |>
+    dplyr::bind_rows() |>
+    dplyr::mutate(context = "categorical_max") |>
+    cards::tidy_ard_column_order() |>
+    cards::tidy_ard_row_order()
+
+  # return final result --------------------------------------------------------
+  result
+}
+
+# internal function copied from cards
+# like `dplyr::arrange()`, but uses base R's `order()` to keep consistency in some edge cases
+arrange_using_order <- function(data, columns) {
+  inject(data[with(data, order(!!!syms(columns))), ])
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -88,6 +88,7 @@ reference:
       - ard_categorical_ci.data.frame
       - ard_regression
       - ard_regression_basic
+      - ard_categorical_max
 
   - title: "Helpers"
   - contents:

diff --git a/man/ard_categorical_max.Rd b/man/ard_categorical_max.Rd