From c4213d32bf029be7d311c4166dfa86989c863c94 Mon Sep 17 00:00:00 2001 From: Ben Straub Date: Fri, 28 Jul 2023 08:28:43 -0400 Subject: [PATCH 1/5] Closes #123 Add Edoardo to status check (#2031) fix: https://github.com/pharmaverse/admiralci/issues/123 add Edoardo to status check --- .github/workflows/cran-status.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cran-status.yml b/.github/workflows/cran-status.yml index c1802ff19f..504680a914 100644 --- a/.github/workflows/cran-status.yml +++ b/.github/workflows/cran-status.yml @@ -17,7 +17,7 @@ jobs: with: # Whom should the issue be assigned to if errors are encountered # in the CRAN status checks? - issue-assignees: "bundfussr,esimms999-gsk,thomas-neitmann,bms63" + issue-assignees: "bundfussr,esimms999-gsk,manciniedoardo,bms63" # Create an issue if one or more of the following # statuses are reported on the check report. statuses: "WARN,ERROR,NOTE" From 2e487b4386b83f1b375e3d35c32e1293b9ba4f2e Mon Sep 17 00:00:00 2001 From: Sophie Shapcott <90790226+sophie-gem@users.noreply.github.com> Date: Fri, 28 Jul 2023 14:31:43 +0100 Subject: [PATCH 2/5] Closes #2014 derive_var_shift change na_val@devel (#2032) * #2014 `na_val` deprecated and `missing_value` inserted into `derive_var_shift`. * #2014 - Update `NEWS.md`. * #2014 - update tests and write deprecation parameter test * #2014 - run required tasks for PR --------- Co-authored-by: Ben Straub --- .Rprofile | 4 +-- NEWS.md | 3 ++ R/derive_var_shift.R | 22 ++++++++++---- man/derive_var_shift.Rd | 9 ++++-- tests/testthat/test-derive_var_shift.R | 41 ++++++++++++++++++++++---- 5 files changed, 63 insertions(+), 16 deletions(-) diff --git a/.Rprofile b/.Rprofile index 63eee44871..f9983c698b 100644 --- a/.Rprofile +++ b/.Rprofile @@ -1,7 +1,7 @@ # Set renv profile base on R version. renv_profile <- paste(R.version$major, substr(R.version$minor, 1, 1), sep = ".") if (file.exists("./renv/profile")) { - message("Using renv profile from `renv/profile` file.") + message("Using renv profile from `renv/profile` file.") } else if (renv_profile %in% c("4.1", "4.2", "4.3")) { message("Set renv profile to `", renv_profile, "`") Sys.setenv("RENV_PROFILE" = renv_profile) @@ -11,6 +11,6 @@ if (file.exists("./renv/profile")) { if ((Sys.getenv("GITHUB_ACTIONS") != "") || (Sys.getenv("DOCKER_CONTAINER_CONTEXT") != "")) { options(repos = c(CRAN = "https://cran.rstudio.com")) - Sys.setenv(RENV_AUTOLOADER_ENABLED=FALSE) + Sys.setenv(RENV_AUTOLOADER_ENABLED = FALSE) } source("renv/activate.R") diff --git a/NEWS.md b/NEWS.md index ddfcac1408..a16e4f0db4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -34,6 +34,9 @@ - The `filter` argument in `derive_extreme_records()` was deprecated in favor of the `filter_add` using the next phase of the deprecation process. (#1950) +- The `na_val` argument in `derive_var_shift()` has been deprecated in favor of +`missing_value` using the first phase of the deprecation process. (#2014) + ## Documentation diff --git a/R/derive_var_shift.R b/R/derive_var_shift.R index dcbe9904e0..1305cfa218 100644 --- a/R/derive_var_shift.R +++ b/R/derive_var_shift.R @@ -14,7 +14,9 @@ #' #' @param to_var Variable containing value to shift to. #' -#' @param na_val Character string to replace missing values in `from_var` or `to_var`. +#' @param na_val *Deprecated*, please use `missing_value` instead. +#' +#' @param missing_value Character string to replace missing values in `from_var` or `to_var`. #' #' Default: "NULL" #' @@ -24,7 +26,7 @@ #' #' @details `new_var` is derived by concatenating the values of `from_var` to values of `to_var` #' (e.g. "NORMAL to HIGH"). When `from_var` or `to_var` has missing value, the -#' missing value is replaced by `na_val` (e.g. "NORMAL to NULL"). +#' missing value is replaced by `missing_value` (e.g. "NORMAL to NULL"). #' #' #' @return The input dataset with the character shift variable added @@ -71,20 +73,28 @@ derive_var_shift <- function(dataset, new_var, from_var, to_var, - na_val = "NULL", + na_val, + missing_value = "NULL", sep_val = " to ") { + ### BEGIN DEPRECATION + if (!missing(na_val)) { + deprecate_warn("0.12.0", "derive_var_shift(na_val = )", "derive_var_shift(missing_value = )") + missing_value <- na_val + } + ### END DEPRECATION + new_var <- assert_symbol(enexpr(new_var)) from_var <- assert_symbol(enexpr(from_var)) to_var <- assert_symbol(enexpr(to_var)) - na_val <- assert_character_scalar(na_val) + missing_value <- assert_character_scalar(missing_value) sep_val <- assert_character_scalar(sep_val) assert_data_frame(dataset, required_vars = exprs(!!from_var, !!to_var)) # Derive shift variable. If from_var or to_var has missing value then set to na_val. dataset %>% mutate( - temp_from_var = if_else(is.na(!!from_var), !!na_val, as.character(!!from_var)), - temp_to_var = if_else(is.na(!!to_var), !!na_val, as.character(!!to_var)) + temp_from_var = if_else(is.na(!!from_var), !!missing_value, as.character(!!from_var)), + temp_to_var = if_else(is.na(!!to_var), !!missing_value, as.character(!!to_var)) ) %>% mutate( !!new_var := paste(temp_from_var, temp_to_var, sep = !!sep_val) diff --git a/man/derive_var_shift.Rd b/man/derive_var_shift.Rd index 76c6a38679..ac4ff9fdfd 100644 --- a/man/derive_var_shift.Rd +++ b/man/derive_var_shift.Rd @@ -9,7 +9,8 @@ derive_var_shift( new_var, from_var, to_var, - na_val = "NULL", + na_val, + missing_value = "NULL", sep_val = " to " ) } @@ -24,7 +25,9 @@ The columns specified by \code{from_var} and the \code{to_var} parameters are ex \item{to_var}{Variable containing value to shift to.} -\item{na_val}{Character string to replace missing values in \code{from_var} or \code{to_var}. +\item{na_val}{\emph{Deprecated}, please use \code{missing_value} instead.} + +\item{missing_value}{Character string to replace missing values in \code{from_var} or \code{to_var}. Default: "NULL"} @@ -43,7 +46,7 @@ analysis value, shift from baseline grade to analysis grade, ... \details{ \code{new_var} is derived by concatenating the values of \code{from_var} to values of \code{to_var} (e.g. "NORMAL to HIGH"). When \code{from_var} or \code{to_var} has missing value, the -missing value is replaced by \code{na_val} (e.g. "NORMAL to NULL"). +missing value is replaced by \code{missing_value} (e.g. "NORMAL to NULL"). } \examples{ library(tibble) diff --git a/tests/testthat/test-derive_var_shift.R b/tests/testthat/test-derive_var_shift.R index e5dcdb733a..bebbabb1f4 100644 --- a/tests/testthat/test-derive_var_shift.R +++ b/tests/testthat/test-derive_var_shift.R @@ -1,4 +1,7 @@ -test_that("Shift based on character variables", { +# derive_var_shift ---- + +## Test 1: Shift based on character variables ---- +test_that("derive_var_shift Test 1: Shift based on character variables", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND, "P01", "ALB", 33, "Y", "LOW", "LOW", @@ -28,7 +31,8 @@ test_that("Shift based on character variables", { }) -test_that("Shift based on character variables with missing values", { +## Test 2: Shift based on character variables with missing values ---- +test_that("derive_var_shift Test 2: Shift based on character variables with missing values", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND, "P01", "ALB", 33, "Y", "LOW", "LOW", @@ -60,7 +64,8 @@ test_that("Shift based on character variables with missing values", { }) -test_that("Shift based on numeric variables with missing values", { +## Test 3: Shift based on numeric variables with missing values ---- +test_that("derive_var_shift Test 3: Shift based on numeric variables with missing values", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BASE, "P01", "ALB", 33.1, "Y", 33.1, @@ -91,7 +96,8 @@ test_that("Shift based on numeric variables with missing values", { ) }) -test_that("Shift with user-specified na_val and sep_val", { +## Test 4: Shift with user-specified missing_value and sep_val ---- +test_that("derive_var_shift Test 4: Shift with user-specified missing_value and sep_val", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND, "P01", "ALB", 33, "Y", "LOW", "LOW", @@ -117,9 +123,34 @@ test_that("Shift with user-specified na_val and sep_val", { new_var = SHIFT1, from_var = BNRIND, to_var = ANRIND, - na_val = "MISSING", + missing_value = "MISSING", sep_val = " - " ), expected_output ) }) + +## Test 5: Test deprecation warning of na_val argument ---- +test_that("derive_var_shift Test 5: Test deprecation warning of na_val argument", { + input <- tibble::tribble( + ~USUBJID, ~PARAMCD, ~AVAL, ~ABLFL, ~BNRIND, ~ANRIND, + "P01", "ALB", 33, "Y", "LOW", "LOW", + "P01", "ALB", 38, NA, "LOW", "NORMAL", + "P01", "ALB", NA, NA, "LOW", NA, + "P02", "ALB", NA, "Y", NA, NA, + "P02", "ALB", 49, NA, NA, "HIGH", + "P02", "SODIUM", 147, "Y", "HIGH", "HIGH" + ) + + expect_warning( + derive_var_shift( + input, + new_var = SHIFT1, + from_var = BNRIND, + to_var = ANRIND, + na_val = "MISSING", + sep_val = " - " + ), + class = "lifecycle_warning_deprecated" + ) +}) From 01c6be7f141d2431db678c107b3f7b96e4f07c3a Mon Sep 17 00:00:00 2001 From: ynsec37 <98389771+ynsec37@users.noreply.github.com> Date: Tue, 1 Aug 2023 22:59:30 +0800 Subject: [PATCH 3/5] Closes #2012_derive_vars_dy (#2013) * derive dy_vars by argument .name in across (#2012) use named vector to avoid the variabes end with `_temp` in input dataset * test no error for variable end with `_temp`(#2012) * styled file * add update information for #2012 --------- Co-authored-by: Daniel Sjoberg --- NEWS.md | 3 +++ R/derive_vars_dy.R | 32 ++++++++++------------------ tests/testthat/test-derive_vars_dy.R | 32 ++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index a16e4f0db4..760168a981 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,9 @@ - The function `derive_var_extreme_flag()` has a new function argument, `flag_all` that additionally flags all records if the first or last record is not unique. (#1979) +- The function `derive_vars_dy()` is updated to avoid potential error when the input `dataset` with columns ending with `temp`. (#2012) + + ## Breaking Changes - The following functions, which were deprecated in previous `{admiral}` versions, have been removed: (#1950) diff --git a/R/derive_vars_dy.R b/R/derive_vars_dy.R index aa085c32eb..3a1e243240 100644 --- a/R/derive_vars_dy.R +++ b/R/derive_vars_dy.R @@ -102,31 +102,21 @@ derive_vars_dy <- function(dataset, abort(err_msg) } - dy_vars <- if_else( + # named vector passed to `.names` in `across()` to derive name of dy_vars + dy_vars <- set_names(if_else( source_names == "", str_replace_all(vars2chr(source_vars), "(DT|DTM)$", "DY"), source_names - ) + ), vars2chr(source_vars)) + warn_if_vars_exist(dataset, dy_vars) - if (n_vars > 1L) { - dataset %>% - mutate( - across( - .cols = vars2chr(unname(source_vars)), - .fns = list(temp = ~ - compute_duration(start_date = !!reference_date, end_date = .)) - ) - ) %>% - rename_with( - .cols = ends_with("temp"), - .fn = ~dy_vars - ) - } else { - dataset %>% - mutate( - !!sym(dy_vars) := - compute_duration(start_date = !!reference_date, end_date = !!source_vars[[1]]) + dataset %>% + mutate( + across( + .cols = vars2chr(unname(source_vars)), + .fns = ~ compute_duration(start_date = !!reference_date, end_date = .x), + .names = "{dy_vars}" ) - } + ) } diff --git a/tests/testthat/test-derive_vars_dy.R b/tests/testthat/test-derive_vars_dy.R index e619550f76..db246d5737 100644 --- a/tests/testthat/test-derive_vars_dy.R +++ b/tests/testthat/test-derive_vars_dy.R @@ -285,3 +285,35 @@ test_that("derive_vars_dy Test 9: Single named --DT input when ref date is --DTM keys = c("STUDYID", "USUBJID") ) }) + +## Test 10: no error if input with variable end with `_temp` ---- +test_that("derive_vars_dy Test 10: no error if input with variable end with `_temp`", { + datain <- tibble::tribble( + ~STUDYID, ~USUBJID, ~TRTSDTM, ~ASTDT, ~test_temp, + "TEST01", "PAT01", "2014-01-17T23:59:59", "2014-01-18", "test" + ) %>% + mutate( + TRTSDTM = lubridate::as_datetime(TRTSDTM), + ASTDT = lubridate::ymd(ASTDT) + ) + + expected_output <- tibble::tribble( + ~STUDYID, ~USUBJID, ~TRTSDTM, ~ASTDT, ~test_temp, ~ASTDY, + "TEST01", "PAT01", "2014-01-17T23:59:59", "2014-01-18", "test", 2 + ) %>% + mutate( + TRTSDTM = lubridate::as_datetime(TRTSDTM), + ASTDT = lubridate::ymd(ASTDT) + ) + + actual_output <- derive_vars_dy(datain, + reference_date = TRTSDTM, + source_vars = exprs(ASTDT) + ) + + expect_dfs_equal( + expected_output, + actual_output, + keys = c("STUDYID", "USUBJID") + ) +}) From 4ed7aac63a7592f0640464317b5a0c7c7a55710b Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 1 Aug 2023 15:27:09 -0400 Subject: [PATCH 4/5] Closes #1966 address derive_vars_joined bugs (#2016) * feat: #1966 make our check_type consistent * feat: #1966 hacky solution to null new_vars .join problem * chore: #1966 inserted line in wrong place * feat: #1966 add news blurb for what was done * feat: #1966 add tests * chore: #1966 lintr * chore: #1966 swap appropriate order selection and restore check_type arg * chore: #1966 looks like that fixed it * feat: #1966 issue warning for dataset_add naming conflicts when `new_vars` is NULL * chore: #1966 clean up for readability * chore: #1966 restore original replace_values_by_names * chore: #1966 add additional test to demonstrate how order vars were fixed/selected * feat: #1966 adopt feedback for error messaging of naming conflicts --------- Co-authored-by: Zelos Zhu --- NEWS.md | 2 + R/derive_joined.R | 16 ++++- tests/testthat/test-derive_joined.R | 91 +++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 760168a981..db4eb86c67 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,6 +47,8 @@ the `filter_add` using the next phase of the deprecation process. (#1950) - The list of package authors/contributors has been reformatted so that those who are actively maintaining the code base are now marked as *authors*, whereas those who made a significant contribution in the past are now down as *contributors*. All other acknowledgements have been moved to README section (#1941). +- `derive_vars_joined()` had two bugs with regards to duplicates messaging and when `new_vars` was set to `NULL` that have now been addressed (#1966). + # admiral 0.11.1 - Fix bug in `derive_param_tte()`. (#1962) diff --git a/R/derive_joined.R b/R/derive_joined.R index ef6c58aa4b..1e0f5cff48 100644 --- a/R/derive_joined.R +++ b/R/derive_joined.R @@ -353,6 +353,19 @@ derive_vars_joined <- function(dataset, if (is.null(new_vars)) { new_vars <- chr2vars(colnames(dataset_add)) } + preexisting_vars <- chr2vars(colnames(dataset)) + preexisting_vars_no_by_vars <- preexisting_vars[which(!(preexisting_vars %in% by_vars))] + duplicates <- intersect(replace_values_by_names(new_vars), preexisting_vars_no_by_vars) + if (length(duplicates) > 0) { + err_msg <- sprintf( + paste( + "The following columns in `dataset_add` have naming conflicts with `dataset`,\n", + "please make the appropriate modifications to `new_vars`, with respect to:\n%s" + ), + enumerate(vars2chr(duplicates)) + ) + abort(err_msg) + } # number observations of the input dataset to get a unique key # (by_vars and tmp_obs_nr) @@ -371,7 +384,7 @@ derive_vars_joined <- function(dataset, filter_if(filter_add) %>% select( !!!by_vars, - !!!chr2vars(names(order)), + !!!replace_values_by_names(extract_vars(order)), !!!replace_values_by_names(join_vars), !!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add))) ) @@ -410,6 +423,7 @@ derive_vars_joined <- function(dataset, by_vars = exprs(!!!by_vars_left, !!tmp_obs_nr), new_vars = add_suffix_to_vars(new_vars, vars = common_vars, suffix = ".join"), missing_values = missing_values, + check_type = check_type, duplicate_msg = paste( paste( "After applying `filter_join` the joined dataset contains more", diff --git a/tests/testthat/test-derive_joined.R b/tests/testthat/test-derive_joined.R index 8574f3b564..eeb1afcc23 100644 --- a/tests/testthat/test-derive_joined.R +++ b/tests/testthat/test-derive_joined.R @@ -242,3 +242,94 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from keys = c("USUBJID", "AESEQ") ) }) + +## Test 8: error if new_vars are already in dataset ---- +test_that("derive_vars_joined Test 8: error if new_vars are already in dataset", { + myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21)) + expect_error( + derive_vars_joined( + myd, + dataset_add = myd, + order = exprs(day), + mode = "last", + filter_join = day < day.join + ), + regexp = paste( + "The following columns in `dataset_add` have naming conflicts with `dataset`" + ) + ) +}) + +## Test 9: fixing a bug from issue 1966 ---- +test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint + adlb_ast <- tribble( + ~ADT, ~ASEQ, + "2002-01-01", 1, + "2002-02-02", 2, + "2002-02-02", 3 + ) %>% + mutate( + STUDYID = "ABC", + USUBJID = "1", + ADT = ymd(ADT), + ADTM = as_datetime(ADT) + ) + + adlb_tbili_pbl <- tribble( + ~ADT, ~ASEQ, + "2002-01-01", 4, + "2002-02-02", 5, + "2002-02-02", 6 + ) %>% + mutate( + STUDYID = "ABC", + USUBJID = "1", + ADT = ymd(ADT), + ADTM = as_datetime(ADT) + ) + + adlb_joined <- derive_vars_joined( + adlb_ast, + dataset_add = adlb_tbili_pbl, + by_vars = exprs(STUDYID, USUBJID), + order = exprs(ADTM, ASEQ), + new_vars = exprs(TBILI_ADT = ADT), + filter_join = ADT <= ADT.join, + mode = "first" + ) + + expected <- adlb_ast %>% + mutate(TBILI_ADT = as.Date(c("2002-01-01", "2002-02-02", "2002-02-02"), "%Y-%m-%d")) + + expect_dfs_equal( + base = expected, + compare = adlb_joined, + keys = c("ADT", "ASEQ", "STUDYID", "USUBJID", "ADTM", "TBILI_ADT") + ) +}) + +## Test 10: order vars are selected properly in function body ---- +test_that("derive_vars_joined Test 10: order vars are selected properly in function body", { + myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21)) + actual <- derive_vars_joined( + myd, + dataset_add = myd, + new_vars = exprs(first_val = val), + join_vars = exprs(day), + order = exprs(-day), + mode = "last", + filter_join = day < day.join + ) + expected <- tribble( + ~day, ~val, ~first_val, + 1, 0, 17, + 2, 17, 21, + 3, 21, NA + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("day", "val", "first_val") + ) +}) From b6f39a5551dd9634cbd7e00fb9206899ac01400c Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Wed, 2 Aug 2023 08:22:12 -0400 Subject: [PATCH 5/5] Closes #2037 deprecate `dataset_expected_obs` for `dataset_ref` in respective functions (#2039) feat: #2037 deprecate `dataset_expected_obs` for `dataset_ref` Co-authored-by: Zelos Zhu --- NEWS.md | 3 ++ R/derive_expected_records.R | 37 +++++++++++++------ R/derive_locf_records.R | 34 ++++++++++++----- man/derive_expected_records.Rd | 15 +++++--- man/derive_locf_records.Rd | 13 ++++--- tests/testthat/test-derive_expected_records.R | 8 ++-- tests/testthat/test-derive_locf_records.R | 12 +++--- 7 files changed, 79 insertions(+), 43 deletions(-) diff --git a/NEWS.md b/NEWS.md index db4eb86c67..d5103d24e4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,9 @@ the `filter_add` using the next phase of the deprecation process. (#1950) - The `na_val` argument in `derive_var_shift()` has been deprecated in favor of `missing_value` using the first phase of the deprecation process. (#2014) +- The `dataset_expected_obs` argument in `derive_expected_records()` and `derive_locf_records()` +has been deprecated in favor of `dataset_ref`. (#2037) + ## Documentation diff --git a/R/derive_expected_records.R b/R/derive_expected_records.R index c9a753fedc..f61cf2dab9 100644 --- a/R/derive_expected_records.R +++ b/R/derive_expected_records.R @@ -5,17 +5,19 @@ #' #' @param dataset Input dataset #' -#' A data frame, the columns from `dataset_expected_obs` and specified by the +#' A data frame, the columns from `dataset_ref` and specified by the #' `by_vars` parameter are expected. #' -#' @param dataset_expected_obs Expected observations dataset +#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead. +#' +#' @param dataset_ref Expected observations dataset #' #' Data frame with the expected observations, e.g., all the expected #' combinations of `PARAMCD`, `PARAM`, `AVISIT`, `AVISITN`, ... #' #' @param by_vars Grouping variables #' -#' For each group defined by `by_vars` those observations from `dataset_expected_obs` +#' For each group defined by `by_vars` those observations from `dataset_ref` #' are added to the output dataset which do not have a corresponding observation #' in the input dataset. #' @@ -31,7 +33,7 @@ #' "TDOSE", PARCAT1 = "OVERALL")`. #' #' @details For each group (the variables specified in the `by_vars` parameter), -#' those records from `dataset_expected_obs` that are missing in the input +#' those records from `dataset_ref` that are missing in the input #' dataset are added to the output dataset. #' #' @return The input dataset with the missed expected observations added for each @@ -63,7 +65,7 @@ #' #' derive_expected_records( #' dataset = adqs, -#' dataset_expected_obs = parm_visit_ref, +#' dataset_ref = parm_visit_ref, #' by_vars = exprs(USUBJID, PARAMCD), #' set_values_to = exprs(DTYPE = "DERIVED") #' ) @@ -78,35 +80,46 @@ #' #' derive_expected_records( #' dataset = adqs, -#' dataset_expected_obs = parm_visit_ref, +#' dataset_ref = parm_visit_ref, #' by_vars = exprs(USUBJID, PARAMCD), #' set_values_to = exprs(DTYPE = "DERIVED") #' ) #' derive_expected_records <- function(dataset, dataset_expected_obs, + dataset_ref, by_vars = NULL, set_values_to = NULL) { + if (!missing(dataset_expected_obs)) { + deprecate_warn( + "0.12.0", + "derive_expected_records(dataset_expected_obs = )", + "derive_expected_records(dataset_ref = )" + ) + assert_data_frame(dataset_expected_obs) + dataset_ref <- dataset_expected_obs + } + # Check input parameters assert_vars(by_vars, optional = TRUE) - assert_data_frame(dataset_expected_obs) + assert_data_frame(dataset_ref) assert_data_frame( dataset, - required_vars = expr_c(by_vars, chr2vars(colnames(dataset_expected_obs))) + required_vars = expr_c(by_vars, chr2vars(colnames(dataset_ref))) ) assert_varval_list(set_values_to, optional = TRUE) # Derive expected records - ## ids: Variables from by_vars but not in dataset_expected_obs + ## ids: Variables from by_vars but not in dataset_ref ids <- dataset %>% - select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>% + select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>% distinct() if (ncol(ids) > 0) { exp_obsv <- ids %>% - crossing(dataset_expected_obs) + crossing(dataset_ref) } else { - exp_obsv <- dataset_expected_obs + exp_obsv <- dataset_ref } # tmp workaround, update after using tidyr 1.2.0 exp_obs_vars <- exp_obsv %>% diff --git a/R/derive_locf_records.R b/R/derive_locf_records.R index e4a2e6ca2c..ad13927788 100644 --- a/R/derive_locf_records.R +++ b/R/derive_locf_records.R @@ -8,14 +8,17 @@ #' The columns specified by the `by_vars`, `analysis_var`, `order`, #' `keep_vars` parameters are expected. #' -#' @param dataset_expected_obs Expected observations dataset +#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead. +#' +#' @param dataset_ref Expected observations dataset #' #' Data frame with all the combinations of `PARAMCD`, `PARAM`, `AVISIT`, #' `AVISITN`, ... which are expected in the dataset is expected. #' +#' #' @param by_vars Grouping variables #' -#' For each group defined by `by_vars` those observations from `dataset_expected_obs` +#' For each group defined by `by_vars` those observations from `dataset_ref` #' are added to the output dataset which do not have a corresponding observation #' in the input dataset or for which `analysis_var` is `NA` for the corresponding observation #' in the input dataset. @@ -40,7 +43,7 @@ #' @author G Gayatri #' #' @details For each group (with respect to the variables specified for the -#' by_vars parameter) those observations from dataset_expected_obs are added to +#' by_vars parameter) those observations from `dataset_ref` are added to #' the output dataset #' - which do not have a corresponding observation in the input dataset or #' - for which `analysis_var` is NA for the corresponding observation in the input dataset. @@ -101,8 +104,8 @@ #' ) #' #' derive_locf_records( -#' data = advs, -#' dataset_expected_obs = advs_expected_obsv, +#' dataset = advs, +#' dataset_ref = advs_expected_obsv, #' by_vars = exprs(STUDYID, USUBJID, PARAMCD), #' order = exprs(AVISITN, AVISIT), #' keep_vars = exprs(PARAMN) @@ -110,10 +113,21 @@ #' derive_locf_records <- function(dataset, dataset_expected_obs, + dataset_ref, by_vars, analysis_var = AVAL, order, keep_vars = NULL) { + if (!missing(dataset_expected_obs)) { + deprecate_warn( + "0.12.0", + "derive_locf_records(dataset_expected_obs = )", + "derive_locf_records(dataset_ref = )" + ) + assert_data_frame(dataset_expected_obs) + dataset_ref <- dataset_expected_obs + } + #### Input Checking #### analysis_var <- assert_symbol(enexpr(analysis_var)) @@ -123,27 +137,27 @@ derive_locf_records <- function(dataset, assert_expr_list(order) # Check by_vars and order variables in input datasets - assert_data_frame(dataset_expected_obs) + assert_data_frame(dataset_ref) assert_data_frame( dataset, required_vars = expr_c( by_vars, analysis_var, extract_vars(order), keep_vars, - chr2vars(colnames(dataset_expected_obs)) + chr2vars(colnames(dataset_ref)) ) ) - #### Prepping 'dataset_expected_obs' #### + #### Prepping 'dataset_ref' #### # Get the IDs from input dataset for which the expected observations are to be added ids <- dataset %>% - select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>% + select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>% distinct() exp_obsv <- ids %>% - crossing(dataset_expected_obs) + crossing(dataset_ref) diff --git a/man/derive_expected_records.Rd b/man/derive_expected_records.Rd index 75022c4b80..146b48ff60 100644 --- a/man/derive_expected_records.Rd +++ b/man/derive_expected_records.Rd @@ -7,6 +7,7 @@ derive_expected_records( dataset, dataset_expected_obs, + dataset_ref, by_vars = NULL, set_values_to = NULL ) @@ -14,17 +15,19 @@ derive_expected_records( \arguments{ \item{dataset}{Input dataset -A data frame, the columns from \code{dataset_expected_obs} and specified by the +A data frame, the columns from \code{dataset_ref} and specified by the \code{by_vars} parameter are expected.} -\item{dataset_expected_obs}{Expected observations dataset +\item{dataset_expected_obs}{\emph{Deprecated}, please use \code{dataset_ref} instead.} + +\item{dataset_ref}{Expected observations dataset Data frame with the expected observations, e.g., all the expected combinations of \code{PARAMCD}, \code{PARAM}, \code{AVISIT}, \code{AVISITN}, ...} \item{by_vars}{Grouping variables -For each group defined by \code{by_vars} those observations from \code{dataset_expected_obs} +For each group defined by \code{by_vars} those observations from \code{dataset_ref} are added to the output dataset which do not have a corresponding observation in the input dataset.} @@ -51,7 +54,7 @@ contains missing observations. } \details{ For each group (the variables specified in the \code{by_vars} parameter), -those records from \code{dataset_expected_obs} that are missing in the input +those records from \code{dataset_ref} that are missing in the input dataset are added to the output dataset. } \examples{ @@ -74,7 +77,7 @@ parm_visit_ref <- tribble( derive_expected_records( dataset = adqs, - dataset_expected_obs = parm_visit_ref, + dataset_ref = parm_visit_ref, by_vars = exprs(USUBJID, PARAMCD), set_values_to = exprs(DTYPE = "DERIVED") ) @@ -89,7 +92,7 @@ parm_visit_ref <- tribble( derive_expected_records( dataset = adqs, - dataset_expected_obs = parm_visit_ref, + dataset_ref = parm_visit_ref, by_vars = exprs(USUBJID, PARAMCD), set_values_to = exprs(DTYPE = "DERIVED") ) diff --git a/man/derive_locf_records.Rd b/man/derive_locf_records.Rd index 3ef93a3e51..5f500aaf0b 100644 --- a/man/derive_locf_records.Rd +++ b/man/derive_locf_records.Rd @@ -7,6 +7,7 @@ derive_locf_records( dataset, dataset_expected_obs, + dataset_ref, by_vars, analysis_var = AVAL, order, @@ -19,14 +20,16 @@ derive_locf_records( The columns specified by the \code{by_vars}, \code{analysis_var}, \code{order}, \code{keep_vars} parameters are expected.} -\item{dataset_expected_obs}{Expected observations dataset +\item{dataset_expected_obs}{\emph{Deprecated}, please use \code{dataset_ref} instead.} + +\item{dataset_ref}{Expected observations dataset Data frame with all the combinations of \code{PARAMCD}, \code{PARAM}, \code{AVISIT}, \code{AVISITN}, ... which are expected in the dataset is expected.} \item{by_vars}{Grouping variables -For each group defined by \code{by_vars} those observations from \code{dataset_expected_obs} +For each group defined by \code{by_vars} those observations from \code{dataset_ref} are added to the output dataset which do not have a corresponding observation in the input dataset or for which \code{analysis_var} is \code{NA} for the corresponding observation in the input dataset.} @@ -59,7 +62,7 @@ does not contain observations for missed visits/time points. } \details{ For each group (with respect to the variables specified for the -by_vars parameter) those observations from dataset_expected_obs are added to +by_vars parameter) those observations from \code{dataset_ref} are added to the output dataset \itemize{ \item which do not have a corresponding observation in the input dataset or @@ -113,8 +116,8 @@ advs_expected_obsv <- tribble( ) derive_locf_records( - data = advs, - dataset_expected_obs = advs_expected_obsv, + dataset = advs, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAMCD), order = exprs(AVISITN, AVISIT), keep_vars = exprs(PARAMN) diff --git a/tests/testthat/test-derive_expected_records.R b/tests/testthat/test-derive_expected_records.R index 05f0e003bc..0937a4a92c 100644 --- a/tests/testthat/test-derive_expected_records.R +++ b/tests/testthat/test-derive_expected_records.R @@ -30,7 +30,7 @@ test_that("derive_expected_records Test 1: missing values in `by_vars`", { actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = exprs(USUBJID), set_values_to = exprs(DTYPE = "DERIVED") ) @@ -71,7 +71,7 @@ test_that("derive_expected_records Test 2: `by_vars` = NULL", { actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = NULL, set_values_to = exprs(DTYPE = "DERIVED") ) @@ -114,7 +114,7 @@ test_that("derive_expected_records Test 3: visit variables are parameter indepen actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = exprs(USUBJID, PARAMCD), set_values_to = exprs(DTYPE = "DERIVED") ) @@ -157,7 +157,7 @@ test_that("derive_expected_records Test 4: visit variables are parameter depende actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = exprs(USUBJID), set_values_to = exprs(DTYPE = "DERIVED") ) diff --git a/tests/testthat/test-derive_locf_records.R b/tests/testthat/test-derive_locf_records.R index 61c9a103a6..9443e970c8 100644 --- a/tests/testthat/test-derive_locf_records.R +++ b/tests/testthat/test-derive_locf_records.R @@ -32,7 +32,7 @@ test_that("derive_locf_records Test 1: visits are missing", { actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -82,7 +82,7 @@ test_that("derive_locf_records Test 2: some visits have missing AVAL", { actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -142,7 +142,7 @@ test_that("derive_locf_records Test 3: visits are missing - and DTYPE already ex actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -190,7 +190,7 @@ test_that("derive_locf_records Test 4: visit variables are parameter independent actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -246,7 +246,7 @@ test_that("derive_locf_records Test 5: visit variables are parameter dependent", actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -290,7 +290,7 @@ test_that("derive_locf_records Test 6: populate VISITNUM for LOCF records", { actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), analysis_var = AVALC, order = exprs(AVISITN, AVISIT),