Adds denom argument to count s_* functions (#1326)

# Pull Request Fixes #1325 Adds `denom` to `s_count_occurrences_by_grade()` as well as `s_count_cumulative()` and `s_count_missed_doses()`. Downstream changes: - [ ] insightsengineering/scda.test#155 - [ ] insightsengineering/tlg-catalog#276 --------- Co-authored-by: shajoezhu <[email protected]> Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com>
insightsengineering · Oct 24, 2024 · d1609a6 · d1609a6
1 parent de68073
commit d1609a6
Show file tree

Hide file tree

Showing 29 changed files with 555 additions and 272 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,10 @@
 # tern 0.9.6.9007
 
+### Enhancements
+* Added the `denom` parameter to `s_count_cumulative()`, `s_count_missed_doses()`, and `s_count_occurrences_by_grade()`.
+* Added `"N_row"` as an optional input to `denom` in `s_count_occurrences()`.
+* Refactored `a_count_occurrences_by_grade()` to no longer use `make_afun()`.
+
 ### Bug Fixes
 * Fixed bug in `a_summary()` causing non-unique `row_name` values to occur when multiple statistics are selected for count variables.
 
@@ -15,7 +20,7 @@
 * Refactored `estimate_incidence_rate` to work as both an analyze function and a summarize function, controlled by the added `summarize` parameter. When `summarize = TRUE`, labels can be fine-tuned via the new `label_fmt` argument to the same function.
 * Added `fraction` statistic to the `analyze_var_count` method group.
 * Improved `summarize_glm_count()` documentation and all its associated functions to better describe the results and the functions' purpose.
-* Added `method` argument to `s_odds_ratio()` and `estimate_odds_ratio()` to control whether exact or approximate conditional likelihood calculations are used. 
+* Added `method` argument to `s_odds_ratio()` and `estimate_odds_ratio()` to control whether exact or approximate conditional likelihood calculations are used.
 
 ### Bug Fixes
 * Added defaults for `d_count_cumulative` parameters as described in the documentation.
@@ -72,7 +77,7 @@
 ### Miscellaneous
 * Added function `expect_snapshot_ggplot` to test setup file to process plot snapshot tests and allow plot dimensions to be set.
 * Adapted to argument renames introduced in `ggplot2` 3.5.0.
-* Renamed `individual_patient_plot.R` to `g_ipp.R`. 
+* Renamed `individual_patient_plot.R` to `g_ipp.R`.
 * Removed all instances of deprecated parameters `time_unit_input`, `time_unit_output`, `na_level` and `indent_mod`.
 * Removed deprecated functions `summarize_vars`, `control_summarize_vars`, `a_compare`, `create_afun_summary`, `create_afun_compare`, and `summary_custom`.
 * Removed `vdiffr` package from Suggests in DESCRIPTION file.

diff --git a/R/analyze_variables.R b/R/analyze_variables.R
@@ -238,11 +238,6 @@ s_summary.numeric <- function(x,
 
 #' @describeIn analyze_variables Method for `factor` class.
 #'
-#' @param denom (`string`)\cr choice of denominator for factor proportions. Options are:
-#'   * `n`: number of values in this row and column intersection.
-#'   * `N_row`: total number of values in this row across columns.
-#'   * `N_col`: total number of values in this column across rows.
-#'
 #' @return
 #'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
 #'     * `n`: The [length()] of `x`.
@@ -283,12 +278,11 @@ s_summary.numeric <- function(x,
 #' @export
 s_summary.factor <- function(x,
                              na.rm = TRUE, # nolint
-                             denom = c("n", "N_row", "N_col"),
+                             denom = c("n", "N_col", "N_row"),
                              .N_row, # nolint
                              .N_col, # nolint
                              ...) {
   assert_valid_factor(x)
-  denom <- match.arg(denom)
 
   if (na.rm) {
     x <- x[!is.na(x)] %>% fct_discard("<Missing>")
@@ -301,20 +295,23 @@ s_summary.factor <- function(x,
   y$n <- length(x)
 
   y$count <- as.list(table(x, useNA = "ifany"))
-  dn <- switch(denom,
-    n = length(x),
-    N_row = .N_row,
-    N_col = .N_col
-  )
+
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(x),
+      N_row = .N_row,
+      N_col = .N_col
+    )
+
   y$count_fraction <- lapply(
     y$count,
     function(x) {
-      c(x, ifelse(dn > 0, x / dn, 0))
+      c(x, ifelse(denom > 0, x / denom, 0))
     }
   )
   y$fraction <- lapply(
     y$count,
-    function(count) c("num" = count, "denom" = dn)
+    function(count) c("num" = count, "denom" = denom)
   )
 
   y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x))
@@ -346,7 +343,7 @@ s_summary.factor <- function(x,
 #' @export
 s_summary.character <- function(x,
                                 na.rm = TRUE, # nolint
-                                denom = c("n", "N_row", "N_col"),
+                                denom = c("n", "N_col", "N_row"),
                                 .N_row, # nolint
                                 .N_col, # nolint
                                 .var,
@@ -370,11 +367,6 @@ s_summary.character <- function(x,
 
 #' @describeIn analyze_variables Method for `logical` class.
 #'
-#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
-#'   * `n`: number of values in this row and column intersection.
-#'   * `N_row`: total number of values in this row across columns.
-#'   * `N_col`: total number of values in this column across rows.
-#'
 #' @return
 #'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
 #'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
@@ -406,22 +398,22 @@ s_summary.character <- function(x,
 #' @export
 s_summary.logical <- function(x,
                               na.rm = TRUE, # nolint
-                              denom = c("n", "N_row", "N_col"),
+                              denom = c("n", "N_col", "N_row"),
                               .N_row, # nolint
                               .N_col, # nolint
                               ...) {
-  denom <- match.arg(denom)
   if (na.rm) x <- x[!is.na(x)]
   y <- list()
   y$n <- length(x)
   count <- sum(x, na.rm = TRUE)
-  dn <- switch(denom,
-    n = length(x),
-    N_row = .N_row,
-    N_col = .N_col
-  )
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(x),
+      N_row = .N_row,
+      N_col = .N_col
+    )
   y$count <- count
-  y$count_fraction <- c(count, ifelse(dn > 0, count / dn, 0))
+  y$count_fraction <- c(count, ifelse(denom > 0, count / denom, 0))
   y$n_blq <- 0L
   y
 }

diff --git a/R/argument_convention.R b/R/argument_convention.R
@@ -30,6 +30,10 @@
 #' @param col_by (`factor`)\cr defining column groups.
 #' @param conf_level (`proportion`)\cr confidence level of the interval.
 #' @param data (`data.frame`)\cr the dataset containing the variables to summarize.
+#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
+#'   * `n`: number of values in this row and column intersection.
+#'   * `N_row`: total number of values in this row across columns.
+#'   * `N_col`: total number of values in this column across rows.
 #' @param df (`data.frame`)\cr data set containing all analysis variables.
 #' @param groups_lists (named `list` of `list`)\cr optionally contains for each `subgroups` variable a
 #'   list, which specifies the new group levels via the names and the

diff --git a/R/count_cumulative.R b/R/count_cumulative.R
@@ -78,7 +78,10 @@ h_count_cumulative <- function(x,
     length(x[is_keep & x > threshold])
   }
 
-  result <- c(count = count, fraction = count / .N_col)
+  result <- c(
+    count = count,
+    fraction = if (count == 0 && .N_col == 0) 0 else count / .N_col
+  )
   result
 }
 
@@ -112,11 +115,20 @@ s_count_cumulative <- function(x,
                                lower_tail = TRUE,
                                include_eq = TRUE,
                                .N_col, # nolint
+                               .N_row, # nolint
+                               denom = c("N_col", "n", "N_row"),
                                ...) {
   checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)
 
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(x),
+      N_row = .N_row,
+      N_col = .N_col
+    )
+
   count_fraction_list <- Map(function(thres) {
-    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
+    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = denom, ...)
     label <- d_count_cumulative(thres, lower_tail, include_eq)
     formatters::with_label(result, label)
   }, thresholds)

diff --git a/R/count_missed_doses.R b/R/count_missed_doses.R
@@ -58,13 +58,17 @@ d_count_missed_doses <- function(thresholds) {
 #' @keywords internal
 s_count_missed_doses <- function(x,
                                  thresholds,
-                                 .N_col) { # nolint
+                                 .N_col, # nolint
+                                 .N_row, # nolint
+                                 denom = c("N_col", "n", "N_row")) {
   stat <- s_count_cumulative(
     x = x,
     thresholds = thresholds,
     lower_tail = FALSE,
     include_eq = TRUE,
-    .N_col = .N_col
+    .N_col = .N_col,
+    .N_row = .N_row,
+    denom = denom
   )
   labels <- d_count_missed_doses(thresholds)
   for (i in seq_along(stat$count_fraction)) {

diff --git a/R/count_occurrences.R b/R/count_occurrences.R
@@ -51,9 +51,10 @@ NULL
 #' @describeIn count_occurrences Statistics function which counts number of patients that report an
 #' occurrence.
 #'
-#' @param denom (`string`)\cr choice of denominator for patient proportions. Can be:
-#'   - `N_col`: total number of patients in this column across rows
-#'   - `n`: number of patients with any occurrences
+#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
+#'   * `N_col`: total number of patients in this column across rows.
+#'   * `n`: number of patients with any occurrences.
+#'   * `N_row`: total number of patients in this row across columns.
 #'
 #' @return
 #' * `s_count_occurrences()` returns a list with:
@@ -66,15 +67,17 @@ NULL
 #' s_count_occurrences(
 #'   df,
 #'   .N_col = 4L,
+#'   .N_row = 4L,
 #'   .df_row = df,
 #'   .var = "MHDECOD",
 #'   id = "USUBJID"
 #' )
 #'
 #' @export
 s_count_occurrences <- function(df,
-                                denom = c("N_col", "n"),
+                                denom = c("N_col", "n", "N_row"),
                                 .N_col, # nolint
+                                .N_row, # nolint
                                 .df_row,
                                 drop = TRUE,
                                 .var = "MHDECOD",
@@ -84,7 +87,6 @@ s_count_occurrences <- function(df,
   checkmate::assert_count(.N_col)
   checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
   checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))
-  denom <- match.arg(denom)
 
   occurrences <- if (drop) {
     # Note that we don't try to preserve original level order here since a) that would required
@@ -101,10 +103,12 @@ s_count_occurrences <- function(df,
     df[[.var]]
   }
   ids <- factor(df[[id]])
-  dn <- switch(denom,
-    n = nlevels(ids),
-    N_col = .N_col
-  )
+  denom <- match.arg(denom) %>%
+    switch(
+      n = nlevels(ids),
+      N_row = .N_row,
+      N_col = .N_col
+    )
   has_occurrence_per_id <- table(occurrences, ids) > 0
   n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
   list(
@@ -118,12 +122,12 @@ s_count_occurrences <- function(df,
           c(i, i / denom)
         }
       },
-      denom = dn
+      denom = denom
     ),
     fraction = lapply(
       n_ids_per_occurrence,
       function(i, denom) c("num" = i, "denom" = denom),
-      denom = dn
+      denom = denom
     )
   )
 }
@@ -147,9 +151,10 @@ s_count_occurrences <- function(df,
 a_count_occurrences <- function(df,
                                 labelstr = "",
                                 id = "USUBJID",
-                                denom = c("N_col", "n"),
+                                denom = c("N_col", "n", "N_row"),
                                 drop = TRUE,
                                 .N_col, # nolint
+                                .N_row, # nolint
                                 .var = NULL,
                                 .df_row = NULL,
                                 .stats = NULL,
@@ -159,7 +164,7 @@ a_count_occurrences <- function(df,
                                 na_str = default_na_str()) {
   denom <- match.arg(denom)
   x_stats <- s_count_occurrences(
-    df = df, denom = denom, .N_col = .N_col, .df_row = .df_row, drop = drop, .var = .var, id = id
+    df = df, denom = denom, .N_col = .N_col, .N_row = .N_row, .df_row = .df_row, drop = drop, .var = .var, id = id
   )
   if (is.null(unlist(x_stats))) {
     return(NULL)