From ea25868aeae5d4e2af36f4f49a319a7222067894 Mon Sep 17 00:00:00 2001 From: Zelos Zhu Date: Tue, 1 Aug 2023 20:13:00 +0000 Subject: [PATCH] feat: #2037 deprecate `dataset_expected_obs` for `dataset_ref` --- NEWS.md | 3 ++ R/derive_expected_records.R | 37 +++++++++++++------ R/derive_locf_records.R | 34 ++++++++++++----- man/derive_expected_records.Rd | 15 +++++--- man/derive_locf_records.Rd | 13 ++++--- tests/testthat/test-derive_expected_records.R | 8 ++-- tests/testthat/test-derive_locf_records.R | 12 +++--- 7 files changed, 79 insertions(+), 43 deletions(-) diff --git a/NEWS.md b/NEWS.md index db4eb86c67..d5103d24e4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,6 +40,9 @@ the `filter_add` using the next phase of the deprecation process. (#1950) - The `na_val` argument in `derive_var_shift()` has been deprecated in favor of `missing_value` using the first phase of the deprecation process. (#2014) +- The `dataset_expected_obs` argument in `derive_expected_records()` and `derive_locf_records()` +has been deprecated in favor of `dataset_ref`. (#2037) + ## Documentation diff --git a/R/derive_expected_records.R b/R/derive_expected_records.R index c9a753fedc..f61cf2dab9 100644 --- a/R/derive_expected_records.R +++ b/R/derive_expected_records.R @@ -5,17 +5,19 @@ #' #' @param dataset Input dataset #' -#' A data frame, the columns from `dataset_expected_obs` and specified by the +#' A data frame, the columns from `dataset_ref` and specified by the #' `by_vars` parameter are expected. #' -#' @param dataset_expected_obs Expected observations dataset +#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead. +#' +#' @param dataset_ref Expected observations dataset #' #' Data frame with the expected observations, e.g., all the expected #' combinations of `PARAMCD`, `PARAM`, `AVISIT`, `AVISITN`, ... #' #' @param by_vars Grouping variables #' -#' For each group defined by `by_vars` those observations from `dataset_expected_obs` +#' For each group defined by `by_vars` those observations from `dataset_ref` #' are added to the output dataset which do not have a corresponding observation #' in the input dataset. #' @@ -31,7 +33,7 @@ #' "TDOSE", PARCAT1 = "OVERALL")`. #' #' @details For each group (the variables specified in the `by_vars` parameter), -#' those records from `dataset_expected_obs` that are missing in the input +#' those records from `dataset_ref` that are missing in the input #' dataset are added to the output dataset. #' #' @return The input dataset with the missed expected observations added for each @@ -63,7 +65,7 @@ #' #' derive_expected_records( #' dataset = adqs, -#' dataset_expected_obs = parm_visit_ref, +#' dataset_ref = parm_visit_ref, #' by_vars = exprs(USUBJID, PARAMCD), #' set_values_to = exprs(DTYPE = "DERIVED") #' ) @@ -78,35 +80,46 @@ #' #' derive_expected_records( #' dataset = adqs, -#' dataset_expected_obs = parm_visit_ref, +#' dataset_ref = parm_visit_ref, #' by_vars = exprs(USUBJID, PARAMCD), #' set_values_to = exprs(DTYPE = "DERIVED") #' ) #' derive_expected_records <- function(dataset, dataset_expected_obs, + dataset_ref, by_vars = NULL, set_values_to = NULL) { + if (!missing(dataset_expected_obs)) { + deprecate_warn( + "0.12.0", + "derive_expected_records(dataset_expected_obs = )", + "derive_expected_records(dataset_ref = )" + ) + assert_data_frame(dataset_expected_obs) + dataset_ref <- dataset_expected_obs + } + # Check input parameters assert_vars(by_vars, optional = TRUE) - assert_data_frame(dataset_expected_obs) + assert_data_frame(dataset_ref) assert_data_frame( dataset, - required_vars = expr_c(by_vars, chr2vars(colnames(dataset_expected_obs))) + required_vars = expr_c(by_vars, chr2vars(colnames(dataset_ref))) ) assert_varval_list(set_values_to, optional = TRUE) # Derive expected records - ## ids: Variables from by_vars but not in dataset_expected_obs + ## ids: Variables from by_vars but not in dataset_ref ids <- dataset %>% - select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>% + select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>% distinct() if (ncol(ids) > 0) { exp_obsv <- ids %>% - crossing(dataset_expected_obs) + crossing(dataset_ref) } else { - exp_obsv <- dataset_expected_obs + exp_obsv <- dataset_ref } # tmp workaround, update after using tidyr 1.2.0 exp_obs_vars <- exp_obsv %>% diff --git a/R/derive_locf_records.R b/R/derive_locf_records.R index e4a2e6ca2c..ad13927788 100644 --- a/R/derive_locf_records.R +++ b/R/derive_locf_records.R @@ -8,14 +8,17 @@ #' The columns specified by the `by_vars`, `analysis_var`, `order`, #' `keep_vars` parameters are expected. #' -#' @param dataset_expected_obs Expected observations dataset +#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead. +#' +#' @param dataset_ref Expected observations dataset #' #' Data frame with all the combinations of `PARAMCD`, `PARAM`, `AVISIT`, #' `AVISITN`, ... which are expected in the dataset is expected. #' +#' #' @param by_vars Grouping variables #' -#' For each group defined by `by_vars` those observations from `dataset_expected_obs` +#' For each group defined by `by_vars` those observations from `dataset_ref` #' are added to the output dataset which do not have a corresponding observation #' in the input dataset or for which `analysis_var` is `NA` for the corresponding observation #' in the input dataset. @@ -40,7 +43,7 @@ #' @author G Gayatri #' #' @details For each group (with respect to the variables specified for the -#' by_vars parameter) those observations from dataset_expected_obs are added to +#' by_vars parameter) those observations from `dataset_ref` are added to #' the output dataset #' - which do not have a corresponding observation in the input dataset or #' - for which `analysis_var` is NA for the corresponding observation in the input dataset. @@ -101,8 +104,8 @@ #' ) #' #' derive_locf_records( -#' data = advs, -#' dataset_expected_obs = advs_expected_obsv, +#' dataset = advs, +#' dataset_ref = advs_expected_obsv, #' by_vars = exprs(STUDYID, USUBJID, PARAMCD), #' order = exprs(AVISITN, AVISIT), #' keep_vars = exprs(PARAMN) @@ -110,10 +113,21 @@ #' derive_locf_records <- function(dataset, dataset_expected_obs, + dataset_ref, by_vars, analysis_var = AVAL, order, keep_vars = NULL) { + if (!missing(dataset_expected_obs)) { + deprecate_warn( + "0.12.0", + "derive_locf_records(dataset_expected_obs = )", + "derive_locf_records(dataset_ref = )" + ) + assert_data_frame(dataset_expected_obs) + dataset_ref <- dataset_expected_obs + } + #### Input Checking #### analysis_var <- assert_symbol(enexpr(analysis_var)) @@ -123,27 +137,27 @@ derive_locf_records <- function(dataset, assert_expr_list(order) # Check by_vars and order variables in input datasets - assert_data_frame(dataset_expected_obs) + assert_data_frame(dataset_ref) assert_data_frame( dataset, required_vars = expr_c( by_vars, analysis_var, extract_vars(order), keep_vars, - chr2vars(colnames(dataset_expected_obs)) + chr2vars(colnames(dataset_ref)) ) ) - #### Prepping 'dataset_expected_obs' #### + #### Prepping 'dataset_ref' #### # Get the IDs from input dataset for which the expected observations are to be added ids <- dataset %>% - select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>% + select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>% distinct() exp_obsv <- ids %>% - crossing(dataset_expected_obs) + crossing(dataset_ref) diff --git a/man/derive_expected_records.Rd b/man/derive_expected_records.Rd index 75022c4b80..146b48ff60 100644 --- a/man/derive_expected_records.Rd +++ b/man/derive_expected_records.Rd @@ -7,6 +7,7 @@ derive_expected_records( dataset, dataset_expected_obs, + dataset_ref, by_vars = NULL, set_values_to = NULL ) @@ -14,17 +15,19 @@ derive_expected_records( \arguments{ \item{dataset}{Input dataset -A data frame, the columns from \code{dataset_expected_obs} and specified by the +A data frame, the columns from \code{dataset_ref} and specified by the \code{by_vars} parameter are expected.} -\item{dataset_expected_obs}{Expected observations dataset +\item{dataset_expected_obs}{\emph{Deprecated}, please use \code{dataset_ref} instead.} + +\item{dataset_ref}{Expected observations dataset Data frame with the expected observations, e.g., all the expected combinations of \code{PARAMCD}, \code{PARAM}, \code{AVISIT}, \code{AVISITN}, ...} \item{by_vars}{Grouping variables -For each group defined by \code{by_vars} those observations from \code{dataset_expected_obs} +For each group defined by \code{by_vars} those observations from \code{dataset_ref} are added to the output dataset which do not have a corresponding observation in the input dataset.} @@ -51,7 +54,7 @@ contains missing observations. } \details{ For each group (the variables specified in the \code{by_vars} parameter), -those records from \code{dataset_expected_obs} that are missing in the input +those records from \code{dataset_ref} that are missing in the input dataset are added to the output dataset. } \examples{ @@ -74,7 +77,7 @@ parm_visit_ref <- tribble( derive_expected_records( dataset = adqs, - dataset_expected_obs = parm_visit_ref, + dataset_ref = parm_visit_ref, by_vars = exprs(USUBJID, PARAMCD), set_values_to = exprs(DTYPE = "DERIVED") ) @@ -89,7 +92,7 @@ parm_visit_ref <- tribble( derive_expected_records( dataset = adqs, - dataset_expected_obs = parm_visit_ref, + dataset_ref = parm_visit_ref, by_vars = exprs(USUBJID, PARAMCD), set_values_to = exprs(DTYPE = "DERIVED") ) diff --git a/man/derive_locf_records.Rd b/man/derive_locf_records.Rd index 3ef93a3e51..5f500aaf0b 100644 --- a/man/derive_locf_records.Rd +++ b/man/derive_locf_records.Rd @@ -7,6 +7,7 @@ derive_locf_records( dataset, dataset_expected_obs, + dataset_ref, by_vars, analysis_var = AVAL, order, @@ -19,14 +20,16 @@ derive_locf_records( The columns specified by the \code{by_vars}, \code{analysis_var}, \code{order}, \code{keep_vars} parameters are expected.} -\item{dataset_expected_obs}{Expected observations dataset +\item{dataset_expected_obs}{\emph{Deprecated}, please use \code{dataset_ref} instead.} + +\item{dataset_ref}{Expected observations dataset Data frame with all the combinations of \code{PARAMCD}, \code{PARAM}, \code{AVISIT}, \code{AVISITN}, ... which are expected in the dataset is expected.} \item{by_vars}{Grouping variables -For each group defined by \code{by_vars} those observations from \code{dataset_expected_obs} +For each group defined by \code{by_vars} those observations from \code{dataset_ref} are added to the output dataset which do not have a corresponding observation in the input dataset or for which \code{analysis_var} is \code{NA} for the corresponding observation in the input dataset.} @@ -59,7 +62,7 @@ does not contain observations for missed visits/time points. } \details{ For each group (with respect to the variables specified for the -by_vars parameter) those observations from dataset_expected_obs are added to +by_vars parameter) those observations from \code{dataset_ref} are added to the output dataset \itemize{ \item which do not have a corresponding observation in the input dataset or @@ -113,8 +116,8 @@ advs_expected_obsv <- tribble( ) derive_locf_records( - data = advs, - dataset_expected_obs = advs_expected_obsv, + dataset = advs, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAMCD), order = exprs(AVISITN, AVISIT), keep_vars = exprs(PARAMN) diff --git a/tests/testthat/test-derive_expected_records.R b/tests/testthat/test-derive_expected_records.R index 05f0e003bc..0937a4a92c 100644 --- a/tests/testthat/test-derive_expected_records.R +++ b/tests/testthat/test-derive_expected_records.R @@ -30,7 +30,7 @@ test_that("derive_expected_records Test 1: missing values in `by_vars`", { actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = exprs(USUBJID), set_values_to = exprs(DTYPE = "DERIVED") ) @@ -71,7 +71,7 @@ test_that("derive_expected_records Test 2: `by_vars` = NULL", { actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = NULL, set_values_to = exprs(DTYPE = "DERIVED") ) @@ -114,7 +114,7 @@ test_that("derive_expected_records Test 3: visit variables are parameter indepen actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = exprs(USUBJID, PARAMCD), set_values_to = exprs(DTYPE = "DERIVED") ) @@ -157,7 +157,7 @@ test_that("derive_expected_records Test 4: visit variables are parameter depende actual_output <- derive_expected_records( dataset = input, - dataset_expected_obs = expected_obsv, + dataset_ref = expected_obsv, by_vars = exprs(USUBJID), set_values_to = exprs(DTYPE = "DERIVED") ) diff --git a/tests/testthat/test-derive_locf_records.R b/tests/testthat/test-derive_locf_records.R index 61c9a103a6..9443e970c8 100644 --- a/tests/testthat/test-derive_locf_records.R +++ b/tests/testthat/test-derive_locf_records.R @@ -32,7 +32,7 @@ test_that("derive_locf_records Test 1: visits are missing", { actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -82,7 +82,7 @@ test_that("derive_locf_records Test 2: some visits have missing AVAL", { actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -142,7 +142,7 @@ test_that("derive_locf_records Test 3: visits are missing - and DTYPE already ex actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -190,7 +190,7 @@ test_that("derive_locf_records Test 4: visit variables are parameter independent actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -246,7 +246,7 @@ test_that("derive_locf_records Test 5: visit variables are parameter dependent", actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), order = exprs(AVISITN, AVISIT) ) @@ -290,7 +290,7 @@ test_that("derive_locf_records Test 6: populate VISITNUM for LOCF records", { actual_output <- derive_locf_records( input, - dataset_expected_obs = advs_expected_obsv, + dataset_ref = advs_expected_obsv, by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD), analysis_var = AVALC, order = exprs(AVISITN, AVISIT),