diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md index 24aa0a3cd..8c898b175 100644 --- a/.github/CODE_OF_CONDUCT.md +++ b/.github/CODE_OF_CONDUCT.md @@ -21,5 +21,5 @@ Instances of abusive, harassing, or otherwise unacceptable behavior may be repor opening an issue or contacting one or more of the project maintainers. This Code of Conduct is adapted from the Contributor Covenant -(http://contributor-covenant.org), version 1.0.0, available at -http://contributor-covenant.org/version/1/0/0/ +(http://www.contributor-covenant.org), version 1.0.0, available at +http://www.contributor-covenant.org/version/1/0/0/ diff --git a/NEWS.md b/NEWS.md index 83e405a93..93e689861 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,11 @@ ## Updates of Existing Functions +- The `keep_nas` argument of `derive_param_computed()` was enhanced such that it +is now possible to specify a list of variables for which `NA`s are acceptable. +I.e., records are added even if some of the specified variables are `NA`. +(#2510) + ## Breaking Changes - The following function arguments are entering the next phase of the deprecation process: (#2487) diff --git a/R/derive_param_computed.R b/R/derive_param_computed.R index 394b89820..d0a0147ed 100644 --- a/R/derive_param_computed.R +++ b/R/derive_param_computed.R @@ -117,13 +117,21 @@ #' @param keep_nas Keep observations with `NA`s #' #' If the argument is set to `TRUE`, observations are added even if some of -#' the values contributing to the computed value are `NA`. +#' the values contributing to the computed value are `NA` (see Example 1b). +#' +#' If the argument is set to a list of variables, observations are added even +#' if some of specified variables are `NA` (see Example 1c). +#' +#' *Permitted Values:* `TRUE`, `FALSE`, or a list of variables created by +#' `exprs()` e.g. `exprs(ADTF, ATMF)` #' #' @details For each group (with respect to the variables specified for the #' `by_vars` parameter) an observation is added to the output dataset if the #' filtered input dataset (`dataset`) or the additional dataset #' (`dataset_add`) contains exactly one observation for each parameter code -#' specified for `parameters`. +#' specified for `parameters` and all contributing values like `AVAL.SYSBP` +#' are not `NA`. The `keep_nas` can be used to specify variables for which +#' `NA`s are acceptable. See also Example 1b and 1c. #' #' For the new observations the variables specified for `set_values_to` are #' set to the provided values. The values of the other variables of the input @@ -145,17 +153,18 @@ #' #' # Example 1a: Derive MAP #' advs <- tribble( -#' ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, -#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "mmHg", "BASELINE", -#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "mmHg", "WEEK 2", -#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "BASELINE", -#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "WEEK 2", -#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "mmHg", "BASELINE", -#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "mmHg", "WEEK 2", -#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "mmHg", "BASELINE", -#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 132, "mmHg", "WEEK 2" +#' ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~VISIT, +#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", +#' "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", +#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", +#' "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", +#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", +#' "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", +#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", +#' "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" #' ) %>% #' mutate( +#' AVALU = "mmHg", #' ADT = case_when( #' VISIT == "BASELINE" ~ as.Date("2024-01-10"), #' VISIT == "WEEK 2" ~ as.Date("2024-01-24") @@ -176,8 +185,8 @@ #' ) #' ) #' -#' # Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all values -#' # of a variable used in the computation are missing +#' # Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all +#' # values of a variable used in the computation are missing #' #' derive_param_computed( #' advs, @@ -194,6 +203,24 @@ #' keep_nas = TRUE #' ) #' +#' # Example 1c: Using option `keep_nas = exprs(ADTF)` to derive MAP in the case where +#' # some/all values of a variable used in the computation are missing but ignoring ADTF +#' +#' derive_param_computed( +#' advs, +#' by_vars = exprs(USUBJID, VISIT), +#' parameters = c("SYSBP", "DIABP"), +#' set_values_to = exprs( +#' AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, +#' PARAMCD = "MAP", +#' PARAM = "Mean Arterial Pressure (mmHg)", +#' AVALU = "mmHg", +#' ADT = ADT.SYSBP, +#' ADTF = ADTF.SYSBP +#' ), +#' keep_nas = exprs(ADTF) +#' ) +#' #' # Example 2: Derive BMI where height is measured only once #' advs <- tribble( #' ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, @@ -303,7 +330,17 @@ derive_param_computed <- function(dataset = NULL, if (!is.null(set_values_to$PARAMCD) && !is.null(dataset)) { assert_param_does_not_exist(dataset, set_values_to$PARAMCD) } - assert_logical_scalar(keep_nas) + if (typeof(keep_nas) == "list") { + assert_vars(keep_nas) + } else { + assert_logical_scalar( + keep_nas, + message = paste( + "Argument {.arg {arg_name}} must be either {.val {TRUE}}, {.val {FALSE}},", + "or a list of {.cls symbol}, but is {.obj_type_friendly {arg}}." + ) + ) + } parameters <- assert_parameters_argument(parameters) constant_parameters <- assert_parameters_argument(constant_parameters, optional = TRUE) @@ -346,17 +383,39 @@ derive_param_computed <- function(dataset = NULL, hori_data <- inner_join(hori_data, hori_const_data, by = vars2chr(constant_by_vars)) } - # add analysis value (AVAL) and parameter variables, e.g., PARAMCD - if (!keep_nas) { - # keep only observations where all analysis values are available + if (isFALSE(keep_nas) || typeof(keep_nas) == "list") { + # keep only observations where the specified analysis values are available + if (typeof(keep_nas) == "list") { + na_vars <- discard( + analysis_vars_chr, + ~ str_detect(., paste0("^(", paste(vars2chr(keep_nas), collapse = "|"), ")\\.")) + ) + } else { + na_vars <- analysis_vars_chr + } + nobs_before <- nrow(hori_data) hori_data <- filter( hori_data, !!!parse_exprs(map_chr( - analysis_vars_chr, + na_vars, ~ str_c("!is.na(", .x, ")") )) ) + if (nobs_before > 0 && nrow(hori_data) == 0) { + cli_inform(c( + paste( + "No computed records were added because for all potential computed", + "records at least one of the contributing values was {.val {NA}}." + ), + paste( + "If this is not expected, please check the input data and the value of", + "the {.arg keep_nas} argument." + ) + )) + } } + + # add computed variables like AVAL and constant variables like PARAMCD hori_data <- hori_data %>% process_set_values_to(set_values_to) %>% select(-all_of(analysis_vars_chr[str_detect(analysis_vars_chr, "\\.")])) diff --git a/_pkgdown.yml b/_pkgdown.yml index 771160852..c897b5dba 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,6 +1,7 @@ url: https://pharmaverse.github.io/admiral template: + math-rendering: mathjax bootstrap: 5 params: bootswatch: flatly diff --git a/man/derive_param_computed.Rd b/man/derive_param_computed.Rd index ded4df277..df0d0cfa3 100644 --- a/man/derive_param_computed.Rd +++ b/man/derive_param_computed.Rd @@ -128,7 +128,13 @@ parameter code \code{"HGHT"}. \item{keep_nas}{Keep observations with \code{NA}s If the argument is set to \code{TRUE}, observations are added even if some of -the values contributing to the computed value are \code{NA}.} +the values contributing to the computed value are \code{NA} (see Example 1b). + +If the argument is set to a list of variables, observations are added even +if some of specified variables are \code{NA} (see Example 1c). + +\emph{Permitted Values:} \code{TRUE}, \code{FALSE}, or a list of variables created by +\code{exprs()} e.g. \code{exprs(ADTF, ATMF)}} } \value{ The input dataset with the new parameter added. Note, a variable will only @@ -147,7 +153,9 @@ For each group (with respect to the variables specified for the \code{by_vars} parameter) an observation is added to the output dataset if the filtered input dataset (\code{dataset}) or the additional dataset (\code{dataset_add}) contains exactly one observation for each parameter code -specified for \code{parameters}. +specified for \code{parameters} and all contributing values like \code{AVAL.SYSBP} +are not \code{NA}. The \code{keep_nas} can be used to specify variables for which +\code{NA}s are acceptable. See also Example 1b and 1c. For the new observations the variables specified for \code{set_values_to} are set to the provided values. The values of the other variables of the input @@ -160,17 +168,18 @@ library(lubridate) # Example 1a: Derive MAP advs <- tribble( - ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, - "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "mmHg", "BASELINE", - "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "mmHg", "WEEK 2", - "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "BASELINE", - "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "mmHg", "WEEK 2", - "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "mmHg", "BASELINE", - "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "mmHg", "WEEK 2", - "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "mmHg", "BASELINE", - "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 132, "mmHg", "WEEK 2" + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~VISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" ) \%>\% mutate( + AVALU = "mmHg", ADT = case_when( VISIT == "BASELINE" ~ as.Date("2024-01-10"), VISIT == "WEEK 2" ~ as.Date("2024-01-24") @@ -191,8 +200,8 @@ derive_param_computed( ) ) -# Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all values -# of a variable used in the computation are missing +# Example 1b: Using option `keep_nas = TRUE` to derive MAP in the case where some/all +# values of a variable used in the computation are missing derive_param_computed( advs, @@ -209,6 +218,24 @@ derive_param_computed( keep_nas = TRUE ) +# Example 1c: Using option `keep_nas = exprs(ADTF)` to derive MAP in the case where +# some/all values of a variable used in the computation are missing but ignoring ADTF + +derive_param_computed( + advs, + by_vars = exprs(USUBJID, VISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + AVALU = "mmHg", + ADT = ADT.SYSBP, + ADTF = ADTF.SYSBP + ), + keep_nas = exprs(ADTF) +) + # Example 2: Derive BMI where height is measured only once advs <- tribble( ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, diff --git a/tests/testthat/_snaps/derive_param_computed.md b/tests/testthat/_snaps/derive_param_computed.md index 5bd7adb53..0cd3d78b4 100644 --- a/tests/testthat/_snaps/derive_param_computed.md +++ b/tests/testthat/_snaps/derive_param_computed.md @@ -1,4 +1,38 @@ -# assert_parameters_argument Test 10: error if argument is of wrong type +# derive_param_computed Test 11: error if keep_nas is invalid + + Code + derive_param_computed(advs, by_vars = exprs(USUBJID, AVISIT), parameters = c( + "SYSBP", "DIABP"), set_values_to = exprs(AVAL = (AVAL.SYSBP + 2 * + AVAL.DIABP) / 3, PARAMCD = "MAP", PARAM = "Mean Arterial Pressure (mmHg)"), + keep_nas = 3) + Condition + Error in `derive_param_computed()`: + ! Argument `keep_nas` must be either TRUE, FALSE, or a list of , but is a number. + +# derive_param_computed Test 12: inform if no new records due to NAs + + Code + derive_param_computed(advs, by_vars = exprs(USUBJID, AVISIT), parameters = c( + "SYSBP", "DIABP"), set_values_to = exprs(AVAL = (AVAL.SYSBP + 2 * + AVAL.DIABP) / 3, PARAMCD = "MAP", PARAM = "Mean Arterial Pressure (mmHg)", ADT = ADT.SYSBP, + ADTF = ADTF.SYSBP)) + Message + No computed records were added because for all potential computed records at least one of the contributing values was NA. + If this is not expected, please check the input data and the value of the `keep_nas` argument. + Output + # A tibble: 8 x 7 + USUBJID PARAMCD PARAM AVAL AVISIT ADT ADTF + + 1 01-701-1015 DIABP Diastolic Blood Pressure (m~ 51 BASEL~ 2024-01-10 + 2 01-701-1015 DIABP Diastolic Blood Pressure (m~ 50 WEEK 2 2024-01-24 + 3 01-701-1015 SYSBP Systolic Blood Pressure (mm~ 121 BASEL~ 2024-01-10 + 4 01-701-1015 SYSBP Systolic Blood Pressure (mm~ 121 WEEK 2 2024-01-24 + 5 01-701-1028 DIABP Diastolic Blood Pressure (m~ 79 BASEL~ 2024-01-10 + 6 01-701-1028 DIABP Diastolic Blood Pressure (m~ 80 WEEK 2 2024-01-24 + 7 01-701-1028 SYSBP Systolic Blood Pressure (mm~ 130 BASEL~ 2024-01-10 + 8 01-701-1028 SYSBP Systolic Blood Pressure (mm~ NA WEEK 2 2024-01-24 + +# assert_parameters_argument Test 13: error if argument is of wrong type Code assert_parameters_argument(myparameters <- c(1, 2, 3)) @@ -6,7 +40,7 @@ Error in `assert_parameters_argument()`: ! `myparameters <- c(1, 2, 3)` must be a character vector or a list of expressions but it is a double vector. -# get_hori_data Test 11: error if variables with more than one dot +# get_hori_data Test 14: error if variables with more than one dot Code get_hori_data(input, parameters = exprs(SYSBP, DIABP), by_vars = exprs(USUBJID, diff --git a/tests/testthat/test-derive_param_computed.R b/tests/testthat/test-derive_param_computed.R index 21804a964..e51276fee 100644 --- a/tests/testthat/test-derive_param_computed.R +++ b/tests/testthat/test-derive_param_computed.R @@ -399,9 +399,135 @@ test_that("derive_param_computed Test 9: compute multiple variables, keep_nas", ) }) +## Test 10: specify variables for keep_nas ---- +test_that("derive_param_computed Test 10: specify variables for keep_nas", { + advs <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" + ) %>% + mutate( + ADT = case_when( + AVISIT == "BASELINE" ~ as.Date("2024-01-10"), + AVISIT == "WEEK 2" ~ as.Date("2024-01-24") + ), + ADTF = NA_character_ + ) + + expected <- bind_rows( + advs, + tibble::tribble( + ~USUBJID, ~AVISIT, ~AVAL, + "01-701-1015", "BASELINE", (121 + 2 * 51) / 3, + "01-701-1015", "WEEK 2", (121 + 2 * 50) / 3, + "01-701-1028", "BASELINE", (130 + 2 * 79) / 3 + ) %>% + mutate( + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + ADT = case_when( + AVISIT == "BASELINE" ~ as.Date("2024-01-10"), + AVISIT == "WEEK 2" ~ as.Date("2024-01-24") + ), + ADTF = NA_character_ + ) + ) + + actual <- derive_param_computed( + advs, + by_vars = exprs(USUBJID, AVISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + ADT = ADT.SYSBP, + ADTF = ADTF.SYSBP + ), + keep_nas = exprs(ADTF) + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "PARAMCD", "AVISIT") + ) +}) + +## Test 11: error if keep_nas is invalid ---- +test_that("derive_param_computed Test 11: error if keep_nas is invalid", { + advs <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" + ) + + expect_snapshot( + derive_param_computed( + advs, + by_vars = exprs(USUBJID, AVISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)" + ), + keep_nas = 3 + ), + error = TRUE + ) +}) + +## Test 12: inform if no new records due to NAs ---- +test_that("derive_param_computed Test 12: inform if no new records due to NAs", { + advs <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVISIT, + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "BASELINE", + "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 50, "WEEK 2", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "BASELINE", + "01-701-1015", "SYSBP", "Systolic Blood Pressure (mmHg)", 121, "WEEK 2", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 79, "BASELINE", + "01-701-1028", "DIABP", "Diastolic Blood Pressure (mmHg)", 80, "WEEK 2", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", 130, "BASELINE", + "01-701-1028", "SYSBP", "Systolic Blood Pressure (mmHg)", NA, "WEEK 2" + ) %>% + mutate( + ADT = case_when( + AVISIT == "BASELINE" ~ as.Date("2024-01-10"), + AVISIT == "WEEK 2" ~ as.Date("2024-01-24") + ), + ADTF = NA_character_ + ) + + expect_snapshot(derive_param_computed( + advs, + by_vars = exprs(USUBJID, AVISIT), + parameters = c("SYSBP", "DIABP"), + set_values_to = exprs( + AVAL = (AVAL.SYSBP + 2 * AVAL.DIABP) / 3, + PARAMCD = "MAP", + PARAM = "Mean Arterial Pressure (mmHg)", + ADT = ADT.SYSBP, + ADTF = ADTF.SYSBP + ) + )) +}) + # assert_parameters_argument ---- -## Test 10: error if argument is of wrong type ---- -test_that("assert_parameters_argument Test 10: error if argument is of wrong type", { +## Test 13: error if argument is of wrong type ---- +test_that("assert_parameters_argument Test 13: error if argument is of wrong type", { expect_snapshot( assert_parameters_argument(myparameters <- c(1, 2, 3)), error = TRUE @@ -409,8 +535,8 @@ test_that("assert_parameters_argument Test 10: error if argument is of wrong typ }) # get_hori_data ---- -## Test 11: error if variables with more than one dot ---- -test_that("get_hori_data Test 11: error if variables with more than one dot", { +## Test 14: error if variables with more than one dot ---- +test_that("get_hori_data Test 14: error if variables with more than one dot", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~PARAM, ~AVAL, ~AVALU, ~VISIT, "01-701-1015", "DIABP", "Diastolic Blood Pressure (mmHg)", 51, "mmHg", "BASELINE",