diff --git a/DESCRIPTION b/DESCRIPTION index 5f9482beab..5c1291ece0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: admiral Type: Package Title: ADaM in R Asset Library -Version: 0.12.0 +Version: 0.12.1 Authors@R: c( person("Ben", "Straub", email = "ben.x.straub@gsk.com", role = c("aut", "cre")), person("Stefan", "Bundfuss", role = "aut"), diff --git a/NEWS.md b/NEWS.md index e87c1e454f..9f48559ee0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# admiral 0.12.1 + +- `derive_extreme_records()` no longer fails if `dataset_add` is specified and a +variable specified for `order` is not in `dataset`. (#2113) + +- The `type` argument in `compute_duration()` changed the underlying default behavior in `derive_vars_duration()` without allowing the user to toggle between `"duration"` and `"interval"` as originally intended. This was fixed by adding the `type` argument for `derive_vars_duration()` and a wrapper function `derive_vars_aage()` such that it gets passed through `compute_duration()` appropriately (#2112) + +- Template `ad_adpp.R` updated to replace `left_join()` with `derive_vars_merged()` (#2109). # admiral 0.12.0 diff --git a/R/derive_extreme_records.R b/R/derive_extreme_records.R index 6f46196df0..4093aab5bd 100644 --- a/R/derive_extreme_records.R +++ b/R/derive_extreme_records.R @@ -284,11 +284,25 @@ derive_extreme_records <- function(dataset = NULL, assert_expr_list(order, optional = TRUE) assert_expr_list(keep_source_vars, optional = TRUE) + if (is.null(dataset_add)) { + expected_vars <- expr_c(by_vars, extract_vars(order)) + } else { + expected_vars <- by_vars + } + assert_data_frame( dataset, - required_vars = expr_c( - by_vars, extract_vars(order) - ), + required_vars = expected_vars, + optional = TRUE + ) + assert_data_frame( + dataset_add, + required_vars = expr_c(by_vars, extract_vars(order)), + optional = TRUE + ) + assert_data_frame( + dataset_ref, + required_vars = by_vars, optional = TRUE ) mode <- assert_character_scalar( diff --git a/R/derive_vars_aage.R b/R/derive_vars_aage.R index 575fecc10b..5c7c5a1233 100644 --- a/R/derive_vars_aage.R +++ b/R/derive_vars_aage.R @@ -4,6 +4,8 @@ #' #' **Note:** This is a wrapper function for the more generic `derive_vars_duration()`. #' +#' @inheritParams derive_vars_duration +#' #' @param dataset Input dataset #' #' The columns specified by the `start_date` and the `end_date` parameter are @@ -37,12 +39,28 @@ #' #' @param unit *Deprecated*, please use `age_unit` instead. #' -#' @details The age is derived as the integer part of the duration from start to -#' end date in the specified unit. When 'years' or 'months' are specified in the `out_unit` -#' parameter, because of the underlying `lubridate::time_length()` function that is used -#' here, results are calculated based on the actual calendar length of months or years -#' rather than assuming equal days every month (30.4375 days) or every year (365.25 days). +#' @details The duration is derived as time from start to end date in the +#' specified output unit. If the end date is before the start date, the duration +#' is negative. The start and end date variable must be present in the specified +#' input dataset. +#' +#' The [lubridate](https://lubridate.tidyverse.org/) package calculates two +#' types of spans between two dates: duration and interval. +#' While these calculations are largely the same, when the unit of the time period +#' is month or year the result can be slightly different. +#' +#' The difference arises from the ambiguity in the length of `"1 month"` or +#' `"1 year"`. +#' Months may have 31, 30, 28, or 29 days, and years are 365 days and 366 during leap years. +#' Durations and intervals help solve the ambiguity in these measures. +#' +#' The **interval** between `2000-02-01` and `2000-03-01` is `1` (i.e. one month). +#' The **duration** between these two dates is `0.95`, which accounts for the fact +#' that the year 2000 is a leap year, February has 29 days, and the average month +#' length is `30.4375`, i.e. `29 / 30.4375 = 0.95`. #' +#' For additional details, review the +#' [lubridate time span reference page](https://lubridate.tidyverse.org/reference/timespan.html). #' #' @return The input dataset with ``AAGE`` and ``AAGEU`` added #' @@ -67,7 +85,8 @@ derive_vars_aage <- function(dataset, start_date = BRTHDT, end_date = RANDDT, unit = "years", - age_unit = "years") { + age_unit = "years", + type = "interval") { if (!missing(unit)) { deprecate_warn("0.12.0", "derive_vars_aage(unit = )", "derive_vars_aage(age_unit = )") age_unit <- unit @@ -89,7 +108,8 @@ derive_vars_aage <- function(dataset, end_date = !!end_date, out_unit = age_unit, add_one = FALSE, - trunc_out = TRUE + trunc_out = TRUE, + type = type ) } diff --git a/R/derive_vars_duration.R b/R/derive_vars_duration.R index 2c625b6b91..2b46e4b6ae 100644 --- a/R/derive_vars_duration.R +++ b/R/derive_vars_duration.R @@ -72,11 +72,35 @@ #' #' Permitted Values: `TRUE`, `FALSE` #' +#' @param type lubridate duration type. +#' +#' See below for details. +#' +#' Permitted Values: `"duration"`, `"interval"` +#' #' @details The duration is derived as time from start to end date in the #' specified output unit. If the end date is before the start date, the duration #' is negative. The start and end date variable must be present in the specified #' input dataset. #' +#' The [lubridate](https://lubridate.tidyverse.org/) package calculates two +#' types of spans between two dates: duration and interval. +#' While these calculations are largely the same, when the unit of the time period +#' is month or year the result can be slightly different. +#' +#' The difference arises from the ambiguity in the length of `"1 month"` or +#' `"1 year"`. +#' Months may have 31, 30, 28, or 29 days, and years are 365 days and 366 during leap years. +#' Durations and intervals help solve the ambiguity in these measures. +#' +#' The **interval** between `2000-02-01` and `2000-03-01` is `1` (i.e. one month). +#' The **duration** between these two dates is `0.95`, which accounts for the fact +#' that the year 2000 is a leap year, February has 29 days, and the average month +#' length is `30.4375`, i.e. `29 / 30.4375 = 0.95`. +#' +#' For additional details, review the +#' [lubridate time span reference page](https://lubridate.tidyverse.org/reference/timespan.html). +#' #' #' @return The input dataset with the duration and unit variable added #' @@ -174,7 +198,8 @@ derive_vars_duration <- function(dataset, out_unit = "days", floor_in = TRUE, add_one = TRUE, - trunc_out = FALSE) { + trunc_out = FALSE, + type = "duration") { new_var <- assert_symbol(enexpr(new_var)) new_var_unit <- assert_symbol(enexpr(new_var_unit), optional = TRUE) start_date <- assert_symbol(enexpr(start_date)) @@ -206,7 +231,8 @@ derive_vars_duration <- function(dataset, out_unit = out_unit, floor_in = floor_in, add_one = add_one, - trunc_out = trunc_out + trunc_out = trunc_out, + type = type ) ) diff --git a/README.md b/README.md index a36098122c..1ed1d6ac3b 100644 --- a/README.md +++ b/README.md @@ -33,20 +33,19 @@ remotes::install_github("pharmaverse/admiral") ## Release Schedule -[{admiral}](https://pharmaverse.github.io/admiral/cran-release/)` releases are targeted for the first Monday of the last month of each quarter. Pull Requests will be frozen the week before a release. +[{admiral}](https://pharmaverse.github.io/admiral/cran-release/) releases are targeted for the first Monday of the last month of each quarter. Pull Requests will be frozen the week before a release. The {admiral} family has several downstream and upstream dependencies and so this release shall be done in three Phases: -* Phase 1 release is for [{admiraldev}](https://pharmaverse.github.io/admiraldev/main/), [{pharmaversesdtm}](https://pharmaverse.github.io/pharmaversesdtm/main/), and [{admiral}](https://pharmaverse.github.io/admiral/cran-release/) core +* Phase 1 release is for [{admiraldev}](https://pharmaverse.github.io/admiraldev/main/), [{pharmaversesdtm}](https://pharmaverse.github.io/pharmaversesdtm/main/), and [{admiral}](https://pharmaverse.github.io/admiral/cran-release/) core * Phase 2 release is extension packages, e.g. [{admiralonco}](https://pharmaverse.github.io/admiralonco/main/), [{admiralophtha}](https://pharmaverse.github.io/admiralophtha/main/), [{admiralvaccine}](https://pharmaverse.github.io/admiralvaccine/main/) - -|Release Schedule | Phase 1- Date and Packages | Phase 2- Date and Packages | -|---------------- | -------------------------- | -------------------------- | -| Q4-2023 | December 4th | December 11th | -| | [{pharmaversesdtm}](https://pharmaverse.github.io/pharmaversesdtm/main/) | [{admiralonco}](https://pharmaverse.github.io/admiralonco/main/) | -| | [{admiraldev}](https://pharmaverse.github.io/admiraldev/main/) | [{admiralophtha}](https://pharmaverse.github.io/admiralophtha/main/) | -| | [{admiral}](https://pharmaverse.github.io/admiral/main/) | | +| Release Schedule | Phase 1- Date and Packages | Phase 2- Date and Packages | +| ---------------- | ------------------------------------------------------------------------ | -------------------------------------------------------------------- | +| Q4-2023 | December 4th | December 11th | +| | [{pharmaversesdtm}](https://pharmaverse.github.io/pharmaversesdtm/main/) | [{admiralonco}](https://pharmaverse.github.io/admiralonco/main/) | +| | [{admiraldev}](https://pharmaverse.github.io/admiraldev/main/) | [{admiralophtha}](https://pharmaverse.github.io/admiralophtha/main/) | +| | [{admiral}](https://pharmaverse.github.io/admiral/main/) | | The `{admiral}` Q4-2023 release will officially be `{admiral}`'s version 1.0.0 release, where we commit to increased package maturity and pivot towards focusing on maintenance rather than new content. This does not mean that there will never be any new content in `{admiral}`, rather it means we will be more mindful about introducing new functionality and/or breaking changes. The release schedule in 2024 and onward will also shift to twice-yearly, rather than quarterly, so that our users have ample time to react to any new content and changes that do make it onto `{admiral}`. @@ -151,11 +150,11 @@ that all our developers and contributors must follow, so that all our code has a * Please see [FAQ: R and Package Versions](https://pharmaverse.github.io/admiral/cran-release/articles/faq.html#why-do-we-use-a-certain-r-version-and-package-versions-for-development) for why we develop with certain R and package versions. ## Pharmaverse Blog + If you are interested in R and Clinical Reporting, then visit the [pharmaverse blog](https://pharmaverse.github.io/blog/). This contains regular, bite-sized posts showcasing how `{admiral}` and other packages in the pharmaverse can be used to realize the vision of full end-to-end Clinical Reporting in R. We are also always looking for keen `{admiral}` users to publish their own blog posts about how they use the package. If this could be you, feel free make an issue in the [GitHub repo](https://github.com/pharmaverse/blog) and get started! - ## Conference Presentations * [Cross Industry Package Development](https://www.youtube.com/watch?v=M4L1PPMu0pU) (recording from R in Pharma 2022) @@ -176,5 +175,5 @@ We use the following for support and communications between user and developer c ## Acknowledgments -Along with the authors and contributors, thanks to the following people for their work on the package: +Along with the authors and contributors, thanks to the following people for their work on the package: Jaxon Abercrombie, Mahdi About, Teckla Akinyi, James Black, Claudia Carlucci, Bill Denney, Kamila Duniec, Alice Ehmann, Ania Golab, Alana Harris, Declan Hodges, Anthony Howard, Shimeng Huang, Samia Kabi, James Kim, John Kirkpatrick, Leena Khatri, Robin Koeger, Konstantina Koukourikou, Pavan Kumar, Pooja Kumari, Shan Lee, Wenyi Liu, Jack McGavigan, Jordanna Morrish, Syed Mubasheer, Yohann Omnes, Barbara O'Reilly, Hamza Rahal, Nick Ramirez, Tom Ratford, Tamara Senior, Sophie Shapcott, Ondrej Slama, Andrew Smith, Daniil Stefonishin, Vignesh Thanikachalam, Michael Thorpe, Annie Yang, Ojesh Upadhyay and Franciszek Walkowiak. diff --git a/inst/templates/ad_adpp.R b/inst/templates/ad_adpp.R index d1acd39964..3bb2eeed5e 100644 --- a/inst/templates/ad_adpp.R +++ b/inst/templates/ad_adpp.R @@ -80,11 +80,12 @@ format_avalcat1n <- function(param, aval) { # Get list of ADSL vars required for derivations adsl_vars <- exprs(TRTSDT, TRTEDT, DTHDT, EOSDT, TRT01P, TRT01A) -adpp <- pp %>% +adpp_pp <- pp %>% # Join ADSL with PP (need TRTSDT for ADY derivation) - left_join( - select(admiral_adsl, STUDYID, USUBJID, !!!adsl_vars), - by = c("STUDYID", "USUBJID") + derive_vars_merged( + dataset_add = admiral_adsl, + new_vars = adsl_vars, + by_vars = exprs(STUDYID, USUBJID) ) %>% ## Calculate ADT, ADY ---- derive_vars_dt( @@ -93,7 +94,7 @@ adpp <- pp %>% ) %>% derive_vars_dy(reference_date = TRTSDT, source_vars = exprs(ADT)) -adpp <- adpp %>% +adpp_aval <- adpp_pp %>% ## Add PARAMCD only - add PARAM etc later ---- left_join( select(param_lookup, PPTESTCD, PARAMCD), @@ -117,7 +118,7 @@ adpp <- adpp %>% ## Get visit info ---- # See also the "Visit and Period Variables" vignette # (https://pharmaverse.github.io/admiral/cran-release/articles/visits_periods.html#visit_bds) -adpp <- adpp %>% +adpp_avisit <- adpp_aval %>% # Derive Timing mutate( VISIT = "", # /!\ To remove @@ -141,9 +142,10 @@ adpp <- adpp %>% derive_vars_merged(dataset_add = avalcat_lookup, by_vars = exprs(PARAMCD, AVALCA1N)) # Add all ADSL variables -adpp <- adpp %>% - left_join(admiral_adsl, - by = c("STUDYID", "USUBJID") +adpp <- adpp_avisit %>% + derive_vars_merged( + dataset_add = select(admiral_adsl, !!!negate_vars(adsl_vars)), + by_vars = exprs(STUDYID, USUBJID) ) # Final Steps, Select final variables and Add labels diff --git a/man/derive_vars_aage.Rd b/man/derive_vars_aage.Rd index 2b3fa6c496..189334d6ac 100644 --- a/man/derive_vars_aage.Rd +++ b/man/derive_vars_aage.Rd @@ -9,7 +9,8 @@ derive_vars_aage( start_date = BRTHDT, end_date = RANDDT, unit = "years", - age_unit = "years" + age_unit = "years", + type = "interval" ) } \arguments{ @@ -45,6 +46,12 @@ The age is derived in the specified unit Default: 'years' Permitted Values: 'years', 'months', 'weeks', 'days', 'hours', 'minutes', 'seconds'} + +\item{type}{lubridate duration type. + +See below for details. + +Permitted Values: \code{"duration"}, \code{"interval"}} } \value{ The input dataset with \code{AAGE} and \code{AAGEU} added @@ -55,11 +62,28 @@ Derives analysis age (\code{AAGE}) and analysis age unit (\code{AAGEU}). \strong{Note:} This is a wrapper function for the more generic \code{derive_vars_duration()}. } \details{ -The age is derived as the integer part of the duration from start to -end date in the specified unit. When 'years' or 'months' are specified in the \code{out_unit} -parameter, because of the underlying \code{lubridate::time_length()} function that is used -here, results are calculated based on the actual calendar length of months or years -rather than assuming equal days every month (30.4375 days) or every year (365.25 days). +The duration is derived as time from start to end date in the +specified output unit. If the end date is before the start date, the duration +is negative. The start and end date variable must be present in the specified +input dataset. + +The \href{https://lubridate.tidyverse.org/}{lubridate} package calculates two +types of spans between two dates: duration and interval. +While these calculations are largely the same, when the unit of the time period +is month or year the result can be slightly different. + +The difference arises from the ambiguity in the length of \code{"1 month"} or +\code{"1 year"}. +Months may have 31, 30, 28, or 29 days, and years are 365 days and 366 during leap years. +Durations and intervals help solve the ambiguity in these measures. + +The \strong{interval} between \code{2000-02-01} and \code{2000-03-01} is \code{1} (i.e. one month). +The \strong{duration} between these two dates is \code{0.95}, which accounts for the fact +that the year 2000 is a leap year, February has 29 days, and the average month +length is \code{30.4375}, i.e. \code{29 / 30.4375 = 0.95}. + +For additional details, review the +\href{https://lubridate.tidyverse.org/reference/timespan.html}{lubridate time span reference page}. } \examples{ library(tibble) diff --git a/man/derive_vars_duration.Rd b/man/derive_vars_duration.Rd index e7d9a1ba04..0dcee21daa 100644 --- a/man/derive_vars_duration.Rd +++ b/man/derive_vars_duration.Rd @@ -14,7 +14,8 @@ derive_vars_duration( out_unit = "days", floor_in = TRUE, add_one = TRUE, - trunc_out = FALSE + trunc_out = FALSE, + type = "duration" ) } \arguments{ @@ -86,6 +87,12 @@ integer part is returned. Default: \code{FALSE} Permitted Values: \code{TRUE}, \code{FALSE}} + +\item{type}{lubridate duration type. + +See below for details. + +Permitted Values: \code{"duration"}, \code{"interval"}} } \value{ The input dataset with the duration and unit variable added @@ -99,6 +106,24 @@ The duration is derived as time from start to end date in the specified output unit. If the end date is before the start date, the duration is negative. The start and end date variable must be present in the specified input dataset. + +The \href{https://lubridate.tidyverse.org/}{lubridate} package calculates two +types of spans between two dates: duration and interval. +While these calculations are largely the same, when the unit of the time period +is month or year the result can be slightly different. + +The difference arises from the ambiguity in the length of \code{"1 month"} or +\code{"1 year"}. +Months may have 31, 30, 28, or 29 days, and years are 365 days and 366 during leap years. +Durations and intervals help solve the ambiguity in these measures. + +The \strong{interval} between \code{2000-02-01} and \code{2000-03-01} is \code{1} (i.e. one month). +The \strong{duration} between these two dates is \code{0.95}, which accounts for the fact +that the year 2000 is a leap year, February has 29 days, and the average month +length is \code{30.4375}, i.e. \code{29 / 30.4375 = 0.95}. + +For additional details, review the +\href{https://lubridate.tidyverse.org/reference/timespan.html}{lubridate time span reference page}. } \examples{ library(lubridate) diff --git a/tests/testthat/test-derive_extreme_records.R b/tests/testthat/test-derive_extreme_records.R index 8f2a7b801d..e5ee12a87a 100644 --- a/tests/testthat/test-derive_extreme_records.R +++ b/tests/testthat/test-derive_extreme_records.R @@ -453,3 +453,44 @@ test_that("derive_extreme_records Test 9: keep all vars in the new records when keys = c("USUBJID", "AVISITN", "LBSEQ", "DTYPE") ) }) + +## Test 10: order vars from dataset_add ---- +test_that("derive_extreme_records Test 10: order vars from dataset_add", { + bds <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~AVALC, + "1", "PARAM", "1" + ) + + xx <- tibble::tribble( + ~USUBJID, ~XXTESTCD, ~XXSEQ, + "1", "A", 1, + "1", "A", 2, + "1", "B", 3 + ) + + actual <- derive_extreme_records( + bds, + dataset_add = xx, + dataset_ref = bds, + by_vars = exprs(USUBJID), + order = exprs(XXSEQ), + mode = "first", + filter_add = XXTESTCD == "A", + exist_flag = AVALC, + set_values_to = exprs( + PARAMCD = "XXFL" + ) + ) + + expected <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~AVALC, ~XXTESTCD, ~XXSEQ, + "1", "PARAM", "1", NA_character_, NA_real_, + "1", "XXFL", "Y", "A", 1 + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "PARAMCD") + ) +}) diff --git a/tests/testthat/test-derive_vars_duration.R b/tests/testthat/test-derive_vars_duration.R index a4a3cf0c5e..09e6794943 100644 --- a/tests/testthat/test-derive_vars_duration.R +++ b/tests/testthat/test-derive_vars_duration.R @@ -1,3 +1,5 @@ +# derive_vars_duration ---- +## Test 1: Duration and unit variable are added ---- test_that("derive_vars_duration Test 1: Duration and unit variable are added", { input <- tibble::tribble( ~USUBJID, ~BRTHDT, ~RANDDT, @@ -23,6 +25,7 @@ test_that("derive_vars_duration Test 1: Duration and unit variable are added", { expect_dfs_equal(actual_output, expected_output, keys = "USUBJID") }) +## Test 2: Duration and unit variable are added ---- test_that("derive_vars_duration Test 2: Duration and unit variable are added", { input <- tibble::tribble( ~USUBJID, ~ASTDT, ~AENDT, @@ -48,6 +51,7 @@ test_that("derive_vars_duration Test 2: Duration and unit variable are added", { expect_dfs_equal(actual_output, expected_output, keys = "USUBJID") }) +## Test 3: Duration and unit variable are added ---- test_that("derive_vars_duration Test 3: Duration and unit variable are added", { input <- tibble::tribble( ~USUBJID, ~ADTM, ~TRTSDTM, @@ -74,3 +78,55 @@ test_that("derive_vars_duration Test 3: Duration and unit variable are added", { expect_dfs_equal(actual_output, expected_output, keys = "USUBJID") }) + +## Test 4: type argument works for interval ---- +test_that("derive_vars_duration Test 4: type argument works for interval", { + input <- tibble::tribble( + ~USUBJID, ~TRTSDTM, ~TRTEDTM, + "P01", ymd_hms("2019-02-01T00:00:00"), ymd_hms("2019-03-01T00:00:00"), + "P02", ymd_hms("2020-02-01T00:00:00"), ymd_hms("2020-03-01T00:00:00") + ) + actual_output <- derive_vars_duration( + input, + new_var = ADURN, + new_var_unit = ADURU, + start_date = TRTSDTM, + end_date = TRTEDTM, + in_unit = "months", + out_unit = "months", + add_one = FALSE, + type = "interval" + ) + expected_output <- dplyr::mutate( + input, + ADURN = c(1, 1), + ADURU = c("MONTHS", "MONTHS") + ) + expect_dfs_equal(actual_output, expected_output, keys = "USUBJID") +}) + +## Test 5: type argument works for duration ---- +test_that("derive_vars_duration Test 5: type argument works for duration", { + input <- tibble::tribble( + ~USUBJID, ~TRTSDTM, ~TRTEDTM, + "P01", ymd_hms("2019-02-01T00:00:00"), ymd_hms("2019-03-01T00:00:00"), + "P02", ymd_hms("2020-02-01T00:00:00"), ymd_hms("2020-03-01T00:00:00") + ) + actual_output <- derive_vars_duration( + input, + new_var = ADURN, + new_var_unit = ADURU, + start_date = TRTSDTM, + end_date = TRTEDTM, + in_unit = "months", + out_unit = "months", + add_one = FALSE, + type = "duration" + ) + expected_output <- dplyr::mutate( + input, + ADURN = c((28 / (365.25 / 12)), (29 / (365.25 / 12))), + ADURU = c("MONTHS", "MONTHS") + ) + expect_dfs_equal(actual_output, expected_output, keys = "USUBJID") +})