merge local branch 2427 with upstream

pharmaverse · Oct 2, 2024 · 1323f8c · 1323f8c
2 parents 678a46e + dd42a67
commit 1323f8c
Show file tree

Hide file tree

Showing 30 changed files with 1,163 additions and 354 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: admiral
 Title: ADaM in R Asset Library
-Version: 1.1.1.9010
+Version: 1.1.1.9011
 Authors@R: c(
     person("Ben", "Straub", , "[email protected]", role = c("aut", "cre")),
     person("Stefan", "Bundfuss", role = "aut",

diff --git a/NAMESPACE b/NAMESPACE
@@ -100,6 +100,7 @@ export(derive_var_trtdurd)
 export(derive_var_trtemfl)
 export(derive_vars_aage)
 export(derive_vars_atc)
+export(derive_vars_cat)
 export(derive_vars_computed)
 export(derive_vars_crit_flag)
 export(derive_vars_dt)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,8 @@
 
 ## New Features
 
+- New function `derive_vars_cat()` for deriving pairs of variables or more, e.g. 
+`AVALCATx` & `AVALCAxN`. (#2480)
 - New function `derive_vars_crit_flag()` for deriving criterion flag variables
 (`CRITy`, `CRITyFL`, `CRITyFLN`). (#2468)
 

diff --git a/R/derive_vars_cat.R b/R/derive_vars_cat.R
@@ -0,0 +1,258 @@
+#' Derive Categorization Variables Like `AVALCATy` and `AVALCAyN`
+#' @param dataset
+#' `r roxygen_param_dataset(expected_vars = c("by_vars", "definition"))`
+#' @param definition List of expressions created by `exprs()`.
+#' Must be in rectangular format and specified using the same syntax as when creating
+#' a `tibble` using the `tribble()` function.
+#' The `definition` object will be converted to a `tibble` using `tribble()` inside this function.
+#'
+#' Must contain:
+#'  - the column `condition` which will be converted to a logical expression and
+#'  will be used on the `dataset` input.
+#'  - at least one additional column with the new column name and
+#'  the category value(s) used by the logical expression.
+#'  - the column specified in `by_vars` (if `by_vars` is specified)
+#'
+#' e.g. if `by_vars` is not specified:
+#'
+#' ```{r}
+#' #| eval: false
+#' exprs(~condition,   ~AVALCAT1, ~AVALCA1N,
+#'       AVAL >= 140, ">=140 cm",         1,
+#'       AVAL < 140,   "<140 cm",         2)
+#' ```
+#'
+#' e.g. if `by_vars` is specified as `exprs(VSTEST)`:
+#'
+#' ```{r}
+#' #| eval: false
+#' exprs(~VSTEST,   ~condition,  ~AVALCAT1, ~AVALCA1N,
+#'       "Height", AVAL >= 140, ">=140 cm",         1,
+#'       "Height",  AVAL < 140,  "<140 cm",         2)
+#' ```
+#'
+#' @param by_vars list of expressions with one element. `NULL` by default.
+#' Allows for specifying by groups, e.g. `exprs(PARAMCD)`.
+#' Variable must be present in both `dataset` and `definition`.
+#' The conditions in `definition` are applied only to those records that match `by_vars`.
+#' The categorization variables are set to `NA` for records
+#' not matching any of the by groups in `definition`.
+#'
+#'
+#' @details
+#' If conditions are overlapping, the row order of `definitions` must be carefully considered.
+#' The **first** match will determine the category.
+#' i.e. if
+#'
+#' `AVAL = 155`
+#'
+#' and the `definition` is:
+#'
+#' ```{r}
+#' #| eval: false
+#' definition <- exprs(
+#'   ~VSTEST,   ~condition,  ~AVALCAT1, ~AVALCA1N,
+#'   "Height",  AVAL > 170,  ">170 cm",         1,
+#'   "Height", AVAL <= 170, "<=170 cm",         2,
+#'   "Height", AVAL <= 160, "<=160 cm",         3
+#' )
+#' ```
+#' then `AVALCAT1` will be `"<=170 cm"`, as this is the first match for `AVAL`.
+#' If you specify:
+#'
+#' ```{r}
+#' #| eval: false
+#' definition <- exprs(
+#'   ~VSTEST,   ~condition,  ~AVALCAT1, ~AVALCA1N,
+#'   "Height", AVAL <= 160, "<=160 cm",         3,
+#'   "Height", AVAL <= 170, "<=170 cm",         2,
+#'   "Height",  AVAL > 170,  ">170 cm",         1
+#' )
+#' ```
+#'
+#' Then `AVAL <= 160` will lead to `AVALCAT1 == "<=160 cm"`,
+#' `AVAL` in-between `160` and `170` will lead to `AVALCAT1 == "<=170 cm"`,
+#' and `AVAL <= 170` will lead to `AVALCAT1 == ">170 cm"`.
+#'
+#' However, we suggest to be more explicit when defining the `condition`, to avoid overlap.
+#' In this case, the middle condition should be:
+#' `AVAL <= 170 & AVAL > 160`
+#'
+#' @return The input dataset with the new variables defined in `definition` added
+#' @family der_gen
+#' @keywords der_gen
+#' @export
+#'
+#' @examples
+#' library(dplyr)
+#' library(tibble)
+#'
+#' advs <- tibble::tribble(
+#'   ~USUBJID,       ~VSTEST,  ~AVAL,
+#'   "01-701-1015", "Height", 147.32,
+#'   "01-701-1015", "Weight",  53.98,
+#'   "01-701-1023", "Height", 162.56,
+#'   "01-701-1023", "Weight",     NA,
+#'   "01-701-1028", "Height",     NA,
+#'   "01-701-1028", "Weight",     NA,
+#'   "01-701-1033", "Height", 175.26,
+#'   "01-701-1033", "Weight",  88.45
+#' )
+#'
+#' definition <- exprs(
+#'   ~condition,                        ~AVALCAT1, ~AVALCA1N,  ~NEWCOL,
+#'   VSTEST == "Height" & AVAL > 160,   ">160 cm",         1, "extra1",
+#'   VSTEST == "Height" & AVAL <= 160, "<=160 cm",         2, "extra2"
+#' )
+#' derive_vars_cat(
+#'   dataset = advs,
+#'   definition = definition
+#' )
+#'
+#' # Using by_vars:
+#' definition2 <- exprs(
+#'   ~VSTEST,   ~condition,  ~AVALCAT1, ~AVALCA1N,
+#'   "Height",  AVAL > 160,  ">160 cm",         1,
+#'   "Height", AVAL <= 160, "<=160 cm",         2,
+#'   "Weight",   AVAL > 70,   ">70 kg",         1,
+#'   "Weight",  AVAL <= 70,  "<=70 kg",         2
+#' )
+#'
+#' derive_vars_cat(
+#'   dataset = advs,
+#'   definition = definition2,
+#'   by_vars = exprs(VSTEST)
+#' )
+#'
+#' # With three conditions:
+#' definition3 <- exprs(
+#'   ~VSTEST,                ~condition,  ~AVALCAT1, ~AVALCA1N,
+#'   "Height",               AVAL > 170,  ">170 cm",         1,
+#'   "Height", AVAL <= 170 & AVAL > 160, "<=170 cm",         2,
+#'   "Height",              AVAL <= 160, "<=160 cm",         3
+#' )
+#'
+#' derive_vars_cat(
+#'   dataset = advs,
+#'   definition = definition3,
+#'   by_vars = exprs(VSTEST)
+#' )
+#'
+#' # Let's derive both the MCRITyML and the MCRITyMN variables
+#' adlb <- tibble::tribble(
+#'   ~USUBJID,     ~PARAM, ~AVAL, ~AVALU,  ~ANRHI,
+#'   "01-701-1015", "ALT",   150,  "U/L",      40,
+#'   "01-701-1023", "ALT",    70,  "U/L",      40,
+#'   "01-701-1036", "ALT",   130,  "U/L",      40,
+#'   "01-701-1048", "ALT",    30,  "U/L",      40,
+#'   "01-701-1015", "AST",    50,  "U/L",      35
+#' )
+#'
+#' definition_mcrit <- exprs(
+#'   ~PARAM,                      ~condition,    ~MCRIT1ML, ~MCRIT1MN,
+#'   "ALT",                    AVAL <= ANRHI,    "<=ANRHI",         1,
+#'   "ALT", ANRHI < AVAL & AVAL <= 3 * ANRHI, ">1-3*ANRHI",         2,
+#'   "ALT",                 3 * ANRHI < AVAL,   ">3*ANRHI",         3
+#' )
+#'
+#' adlb %>%
+#'   derive_vars_cat(
+#'     definition = definition_mcrit,
+#'     by_vars = exprs(PARAM)
+#'   )
+derive_vars_cat <- function(dataset,
+                            definition,
+                            by_vars = NULL) {
+  assert_expr_list(definition)
+  assert_vars(by_vars, optional = TRUE)
+  if (length(by_vars) > 1) {
+    cli_abort("{.arg by_vars} must contain just one variable, e.g. {.code exprs(PARAMCD)}")
+  }
+
+  assert_data_frame(dataset,
+    required_vars = c(
+      admiraldev::extract_vars(definition) %>% unique(),
+      by_vars
+    )
+  )
+
+  # transform definition to tibble
+  names(definition) <- NULL
+  definition <- tryCatch(
+    {
+      tibble::tribble(!!!definition)
+    },
+    error = function(e) {
+      # Catch the error and append your own message
+      cli_abort(
+        c(
+          paste(
+            "Failed to convert {.arg definition} to {.cls tibble}.",
+            "{.arg definition} should be specified similarly to how you would",
+            "specify a {.cls tibble} using the {.fun tibble::tribble} function so it",
+            "can be converted to {.cls tibble} using {.fun tibble::tribble}."
+          ),
+          e$message
+        )
+      )
+    }
+  )
+  assert_data_frame(definition, required_vars = c(exprs(condition), by_vars))
+  if (!is.null(by_vars)) {
+    # add condition
+    definition <- definition %>%
+      mutate(
+        condition = extend_condition(as.character(condition),
+          as.character(by_vars),
+          is = !!sym(as.character(by_vars))
+        ) %>%
+          parse_exprs()
+      ) %>%
+      select(-by_vars[[1]])
+  }
+
+  # extract new variable names and conditions
+  new_col_names <- names(definition)[!names(definition) == "condition"]
+  condition <- definition[["condition"]]
+
+  # warn if new variables already exist
+  if (any(new_col_names %in% names(dataset))) {
+    cli_warn(paste(
+      "Column(s) in {.arg definition} already exist in {.arg dataset}.",
+      "Did you forget to specify {.arg by_vars},",
+      "or are you rerunning your code?"
+    ))
+  }
+
+  # (re)apply the function for each new variable name and iteratively derive the categories
+  new_dataset <- reduce(new_col_names, function(.data, col_name) {
+    # extract conditions
+    values <- definition[[col_name]]
+
+    .data %>%
+      mutate(!!sym(col_name) := eval(rlang::call2(
+        "case_when",
+        !!!map2(condition, values, ~ expr(!!.x ~ !!.y))
+      )))
+  }, .init = dataset)
+
+  return(new_dataset)
+}
+
+#' Extend a condition string by adding a new condition based on a variable and its value
+#'
+#' This internal helper function extends a condition string by appending a new condition
+#' that checks if a variable equals a specific value.
+#'
+#' @param cond A character string representing an existing condition.
+#' @param var A character string representing the name of the variable to check.
+#' @param is A character string representing the value the variable should be equal to.
+#'
+#' @return A character string representing the extended condition.
+#' @examples
+#' # Extend an existing condition to include a check for 'AGE == "30"'
+#' admiral:::extend_condition("SEX == 'M'", "AGE", "30")
+#' @keywords internal
+extend_condition <- function(cond, var, is) {
+  paste(cond, " & ", var, " == '", is, "'", sep = "")
+}