diff --git a/NEWS.md b/NEWS.md index 83e405a93..93e689861 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,11 @@ ## Updates of Existing Functions +- The `keep_nas` argument of `derive_param_computed()` was enhanced such that it +is now possible to specify a list of variables for which `NA`s are acceptable. +I.e., records are added even if some of the specified variables are `NA`. +(#2510) + ## Breaking Changes - The following function arguments are entering the next phase of the deprecation process: (#2487) diff --git a/R/derive_param_computed.R b/R/derive_param_computed.R index 394b89820..dcc117bb6 100644 --- a/R/derive_param_computed.R +++ b/R/derive_param_computed.R @@ -117,13 +117,18 @@ #' @param keep_nas Keep observations with `NA`s #' #' If the argument is set to `TRUE`, observations are added even if some of -#' the values contributing to the computed value are `NA`. +#' the values contributing to the computed value are `NA` (see Example 1b). +#' +#' If the argument is set to a list of variables, observations are added even +#' if some of specified variables are `NA` (see Example 1c). #' #' @details For each group (with respect to the variables specified for the #' `by_vars` parameter) an observation is added to the output dataset if the #' filtered input dataset (`dataset`) or the additional dataset #' (`dataset_add`) contains exactly one observation for each parameter code -#' specified for `parameters`. +#' specified for `parameters` and all contributing values like `AVAL.SYSBP` +#' are not `NA`. The `keep_nas` can be used to specify variables for which +#' `NA`s are acceptable. See also Example 1b and 1c. #' #' For the new observations the variables specified for `set_values_to` are #' set to the provided values. The values of the other variables of the input @@ -145,17 +150,18 @@ #' #' # Example 1a: Derive MAP #' advs <- tribble( -#' ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, -#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "mmHg", "BASELINE", -#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "mmHg", "WEEK 2", -#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "BASELINE", -#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "WEEK 2", -#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "mmHg", "BASELINE", -#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "mmHg", "WEEK 2", -#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "mmHg", "BASELINE", -#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 132, "mmHg", "WEEK 2" +#' ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~VISIT, +#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", +#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", +#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", +#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", +#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", +#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", +#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", +#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" #' ) %>% #' mutate( +#' AVALU = "mmHg", #' ADT = case_when( #' VISIT == "BASELINE" ~ as.Date("2024-01-10"), #' VISIT == "WEEK 2" ~ as.Date("2024-01-24") @@ -176,8 +182,8 @@ #' ) #' ) #' -#' # Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all values -#' # of a variable used in the computation are missing +#' # Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all +#' # values of a variable used in the computation are missing #' #' derive_param_computed( #' advs, @@ -194,6 +200,24 @@ #' keep_nas = TRUE #' ) #' +#' # Example 1c: Using option `keep_nas = exprs(ADTF)` to derive MAP in the case where +#' # some/all values of a variable used in the computation are missing but ignoring ADTF +#' +#' derive_param_computed( +#' advs, +#' by_vars = exprs(USUBJID, VISIT), +#' parameters = c("SYSBP", "DIABP"), +#' set_values_to = exprs( +#' AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, +#' PARAMCD = "MAP", +#' PARAM = "Mean Arterial Pressure (mmHg)", +#' AVALU = "mmHg", +#' ADT = ADT.SYSBP, +#' ADTF = ADTF.SYSBP +#' ), +#' keep_nas = exprs(ADTF) +#' ) +#' #' # Example 2: Derive BMI where height is measured only once #' advs <- tribble( #' ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, @@ -303,7 +327,17 @@ derive_param_computed <- function(dataset = NULL, if (!is.null(set_values_to$PARAMCD) && !is.null(dataset)) { assert_param_does_not_exist(dataset, set_values_to$PARAMCD) } - assert_logical_scalar(keep_nas) + if (typeof(keep_nas) == "list") { + assert_vars(keep_nas) + } else { + assert_logical_scalar( + keep_nas, + message = paste( + "Argument {.arg {arg_name}} must be either {.val {TRUE}}, {.val {FALSE}},", + "or a list of {.cls symbol}, but is {.obj_type_friendly {arg}}." + ) + ) + } parameters <- assert_parameters_argument(parameters) constant_parameters <- assert_parameters_argument(constant_parameters, optional = TRUE) @@ -346,17 +380,26 @@ derive_param_computed <- function(dataset = NULL, hori_data <- inner_join(hori_data, hori_const_data, by = vars2chr(constant_by_vars)) } - # add analysis value (AVAL) and parameter variables, e.g., PARAMCD - if (!keep_nas) { - # keep only observations where all analysis values are available + if (isFALSE(keep_nas) || typeof(keep_nas) == "list") { + # keep only observations where the specified analysis values are available + if (typeof(keep_nas) == "list") { + na_vars <- discard( + analysis_vars_chr, + ~ str_detect(., paste0("^(", paste(vars2chr(keep_nas), collapse = "|"), ")\\.")) + ) + } else { + na_vars <- analysis_vars_chr + } hori_data <- filter( hori_data, !!!parse_exprs(map_chr( - analysis_vars_chr, + na_vars, ~ str_c("!is.na(", .x, ")") )) ) } + + # add computed variables like AVAL and constant variables like PARAMCD hori_data <- hori_data %>% process_set_values_to(set_values_to) %>% select(-all_of(analysis_vars_chr[str_detect(analysis_vars_chr, "\\.")])) diff --git a/_pkgdown.yml b/_pkgdown.yml index 771160852..c897b5dba 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,7 @@ url: https://pharmaverse.github.io/admiral template: + math-rendering: mathjax bootstrap: 5 params: bootswatch: flatly diff --git a/man/derive_param_computed.Rd b/man/derive_param_computed.Rd index ded4df277..be423bc11 100644 --- a/man/derive_param_computed.Rd +++ b/man/derive_param_computed.Rd @@ -128,7 +128,10 @@ parameter code \code{"HGHT"}. \item{keep_nas}{Keep observations with \code{NA}s If the argument is set to \code{TRUE}, observations are added even if some of -the values contributing to the computed value are \code{NA}.} +the values contributing to the computed value are \code{NA} (see Example 1b). + +If the argument is set to a list of variables, observations are added even +if some of specified variables are \code{NA} (see Example 1c).} } \value{ The input dataset with the new parameter added. Note, a variable will only @@ -147,7 +150,9 @@ For each group (with respect to the variables specified for the \code{by_vars} parameter) an observation is added to the output dataset if the filtered input dataset (\code{dataset}) or the additional dataset (\code{dataset_add}) contains exactly one observation for each parameter code -specified for \code{parameters}. +specified for \code{parameters} and all contributing values like \code{AVAL.SYSBP} +are not \code{NA}. The \code{keep_nas} can be used to specify variables for which +\code{NA}s are acceptable. See also Example 1b and 1c. For the new observations the variables specified for \code{set_values_to} are set to the provided values. The values of the other variables of the input @@ -160,17 +165,18 @@ library(lubridate) # Example 1a: Derive MAP advs <- tribble( - ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, - "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "mmHg", "BASELINE", - "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "mmHg", "WEEK 2", - "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "BASELINE", - "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "WEEK 2", - "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "mmHg", "BASELINE", - "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "mmHg", "WEEK 2", - "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "mmHg", "BASELINE", - "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 132, "mmHg", "WEEK 2" + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~VISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" ) \%>\% mutate( + AVALU = "mmHg", ADT = case_when( VISIT == "BASELINE" ~ as.Date("2024-01-10"), VISIT == "WEEK 2" ~ as.Date("2024-01-24") @@ -191,8 +197,8 @@ derive_param_computed( ) ) -# Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all values -# of a variable used in the computation are missing +# Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all +# values of a variable used in the computation are missing derive_param_computed( advs, @@ -209,6 +215,24 @@ derive_param_computed( keep_nas = TRUE ) +# Example 1c: Using option `keep_nas = exprs(ADTF)` to derive MAP in the case where +# some/all values of a variable used in the computation are missing but ignoring ADTF + +derive_param_computed( + advs, + by_vars = exprs(USUBJID, VISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + AVALU = "mmHg", + ADT = ADT.SYSBP, + ADTF = ADTF.SYSBP + ), + keep_nas = exprs(ADTF) +) + # Example 2: Derive BMI where height is measured only once advs <- tribble( ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, diff --git a/tests/testthat/_snaps/derive_param_computed.md b/tests/testthat/_snaps/derive_param_computed.md index 5bd7adb53..c45ea2b6a 100644 --- a/tests/testthat/_snaps/derive_param_computed.md +++ b/tests/testthat/_snaps/derive_param_computed.md @@ -1,4 +1,15 @@ -# assert_parameters_argument Test 10: error if argument is of wrong type +# derive_param_computed Test 11: error if keep_nas is invalid + + Code + derive_param_computed(advs, by_vars = exprs(USUBJID, AVISIT), parameters = c( + "SYSBP", "DIABP"), set_values_to = exprs(AVAL = (AVAL.SYSBP + 2 * + AVAL.DIABP) / 3, PARAMCD = "MAP", PARAM = "Mean Arterial Pressure (mmHg)"), + keep_nas = 3) + Condition + Error in `derive_param_computed()`: + ! Argument `keep_nas` must be either TRUE, FALSE, or a list of , but is a number. + +# assert_parameters_argument Test 12: error if argument is of wrong type Code assert_parameters_argument(myparameters <- c(1, 2, 3)) @@ -6,7 +17,7 @@ Error in `assert_parameters_argument()`: ! `myparameters <- c(1, 2, 3)` must be a character vector or a list of expressions but it is a double vector. -# get_hori_data Test 11: error if variables with more than one dot +# get_hori_data Test 13: error if variables with more than one dot Code get_hori_data(input, parameters = exprs(SYSBP, DIABP), by_vars = exprs(USUBJID, diff --git a/tests/testthat/test-derive_param_computed.R b/tests/testthat/test-derive_param_computed.R index 21804a964..4f3fa1cf2 100644 --- a/tests/testthat/test-derive_param_computed.R +++ b/tests/testthat/test-derive_param_computed.R @@ -399,9 +399,101 @@ test_that("derive_param_computed Test 9: compute multiple variables, keep_nas", ) }) +## Test 10: specify variables for keep_nas ---- +test_that("derive_param_computed Test 10: specify variables for keep_nas", { + advs <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" + ) %>% + mutate( + ADT = case_when( + AVISIT == "BASELINE" ~ as.Date("2024-01-10"), + AVISIT == "WEEK 2" ~ as.Date("2024-01-24") + ), + ADTF = NA_character_ + ) + + expected <- bind_rows( + advs, + tibble::tribble( + ~USUBJID, ~AVISIT, ~AVAL, + "01-701-1015", "BASELINE", (121 + 2*51)/3, + "01-701-1015", "WEEK 2", (121 + 2*50)/3, + "01-701-1028", "BASELINE", (130 + 2*79)/3 + ) %>% + mutate( + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + ADT = case_when( + AVISIT == "BASELINE" ~ as.Date("2024-01-10"), + AVISIT == "WEEK 2" ~ as.Date("2024-01-24") + ), + ADTF = NA_character_ + ) + ) + + actual <- derive_param_computed( + advs, + by_vars = exprs(USUBJID, AVISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + ADT = ADT.SYSBP, + ADTF = ADTF.SYSBP + ), + keep_nas = exprs(ADTF) + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "PARAMCD", "AVISIT") + ) +}) + +## Test 11: error if keep_nas is invalid ---- +test_that("derive_param_computed Test 11: error if keep_nas is invalid", { + advs <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" + ) + + expect_snapshot( + derive_param_computed( + advs, + by_vars = exprs(USUBJID, AVISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)" + ), + keep_nas = 3 + ), + error = TRUE + ) +} +) + # assert_parameters_argument ---- -## Test 10: error if argument is of wrong type ---- -test_that("assert_parameters_argument Test 10: error if argument is of wrong type", { +## Test 12: error if argument is of wrong type ---- +test_that("assert_parameters_argument Test 12: error if argument is of wrong type", { expect_snapshot( assert_parameters_argument(myparameters <- c(1, 2, 3)), error = TRUE @@ -409,8 +501,8 @@ test_that("assert_parameters_argument Test 10: error if argument is of wrong typ }) # get_hori_data ---- -## Test 11: error if variables with more than one dot ---- -test_that("get_hori_data Test 11: error if variables with more than one dot", { +## Test 13: error if variables with more than one dot ---- +test_that("get_hori_data Test 13: error if variables with more than one dot", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "mmHg", "BASELINE",