Skip to content

Commit

Permalink
Closes #2037 deprecate dataset_expected_obs for dataset_ref in re…
Browse files Browse the repository at this point in the history
…spective functions (#2039)

feat: #2037 deprecate `dataset_expected_obs` for `dataset_ref`

Co-authored-by: Zelos Zhu <[email protected]>
  • Loading branch information
zdz2101 and Zelos Zhu authored Aug 2, 2023
1 parent 4ed7aac commit b6f39a5
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 43 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ the `filter_add` using the next phase of the deprecation process. (#1950)
- The `na_val` argument in `derive_var_shift()` has been deprecated in favor of
`missing_value` using the first phase of the deprecation process. (#2014)

- The `dataset_expected_obs` argument in `derive_expected_records()` and `derive_locf_records()`
has been deprecated in favor of `dataset_ref`. (#2037)

## Documentation


Expand Down
37 changes: 25 additions & 12 deletions R/derive_expected_records.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
#'
#' @param dataset Input dataset
#'
#' A data frame, the columns from `dataset_expected_obs` and specified by the
#' A data frame, the columns from `dataset_ref` and specified by the
#' `by_vars` parameter are expected.
#'
#' @param dataset_expected_obs Expected observations dataset
#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead.
#'
#' @param dataset_ref Expected observations dataset
#'
#' Data frame with the expected observations, e.g., all the expected
#' combinations of `PARAMCD`, `PARAM`, `AVISIT`, `AVISITN`, ...
#'
#' @param by_vars Grouping variables
#'
#' For each group defined by `by_vars` those observations from `dataset_expected_obs`
#' For each group defined by `by_vars` those observations from `dataset_ref`
#' are added to the output dataset which do not have a corresponding observation
#' in the input dataset.
#'
Expand All @@ -31,7 +33,7 @@
#' "TDOSE", PARCAT1 = "OVERALL")`.
#'
#' @details For each group (the variables specified in the `by_vars` parameter),
#' those records from `dataset_expected_obs` that are missing in the input
#' those records from `dataset_ref` that are missing in the input
#' dataset are added to the output dataset.
#'
#' @return The input dataset with the missed expected observations added for each
Expand Down Expand Up @@ -63,7 +65,7 @@
#'
#' derive_expected_records(
#' dataset = adqs,
#' dataset_expected_obs = parm_visit_ref,
#' dataset_ref = parm_visit_ref,
#' by_vars = exprs(USUBJID, PARAMCD),
#' set_values_to = exprs(DTYPE = "DERIVED")
#' )
Expand All @@ -78,35 +80,46 @@
#'
#' derive_expected_records(
#' dataset = adqs,
#' dataset_expected_obs = parm_visit_ref,
#' dataset_ref = parm_visit_ref,
#' by_vars = exprs(USUBJID, PARAMCD),
#' set_values_to = exprs(DTYPE = "DERIVED")
#' )
#'
derive_expected_records <- function(dataset,
dataset_expected_obs,
dataset_ref,
by_vars = NULL,
set_values_to = NULL) {
if (!missing(dataset_expected_obs)) {
deprecate_warn(
"0.12.0",
"derive_expected_records(dataset_expected_obs = )",
"derive_expected_records(dataset_ref = )"
)
assert_data_frame(dataset_expected_obs)
dataset_ref <- dataset_expected_obs
}

# Check input parameters
assert_vars(by_vars, optional = TRUE)
assert_data_frame(dataset_expected_obs)
assert_data_frame(dataset_ref)
assert_data_frame(
dataset,
required_vars = expr_c(by_vars, chr2vars(colnames(dataset_expected_obs)))
required_vars = expr_c(by_vars, chr2vars(colnames(dataset_ref)))
)
assert_varval_list(set_values_to, optional = TRUE)

# Derive expected records
## ids: Variables from by_vars but not in dataset_expected_obs
## ids: Variables from by_vars but not in dataset_ref
ids <- dataset %>%
select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>%
select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>%
distinct()

if (ncol(ids) > 0) {
exp_obsv <- ids %>%
crossing(dataset_expected_obs)
crossing(dataset_ref)
} else {
exp_obsv <- dataset_expected_obs
exp_obsv <- dataset_ref
} # tmp workaround, update after using tidyr 1.2.0

exp_obs_vars <- exp_obsv %>%
Expand Down
34 changes: 24 additions & 10 deletions R/derive_locf_records.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
#' The columns specified by the `by_vars`, `analysis_var`, `order`,
#' `keep_vars` parameters are expected.
#'
#' @param dataset_expected_obs Expected observations dataset
#' @param dataset_expected_obs *Deprecated*, please use `dataset_ref` instead.
#'
#' @param dataset_ref Expected observations dataset
#'
#' Data frame with all the combinations of `PARAMCD`, `PARAM`, `AVISIT`,
#' `AVISITN`, ... which are expected in the dataset is expected.
#'
#'
#' @param by_vars Grouping variables
#'
#' For each group defined by `by_vars` those observations from `dataset_expected_obs`
#' For each group defined by `by_vars` those observations from `dataset_ref`
#' are added to the output dataset which do not have a corresponding observation
#' in the input dataset or for which `analysis_var` is `NA` for the corresponding observation
#' in the input dataset.
Expand All @@ -40,7 +43,7 @@
#' @author G Gayatri
#'
#' @details For each group (with respect to the variables specified for the
#' by_vars parameter) those observations from dataset_expected_obs are added to
#' by_vars parameter) those observations from `dataset_ref` are added to
#' the output dataset
#' - which do not have a corresponding observation in the input dataset or
#' - for which `analysis_var` is NA for the corresponding observation in the input dataset.
Expand Down Expand Up @@ -101,19 +104,30 @@
#' )
#'
#' derive_locf_records(
#' data = advs,
#' dataset_expected_obs = advs_expected_obsv,
#' dataset = advs,
#' dataset_ref = advs_expected_obsv,
#' by_vars = exprs(STUDYID, USUBJID, PARAMCD),
#' order = exprs(AVISITN, AVISIT),
#' keep_vars = exprs(PARAMN)
#' )
#'
derive_locf_records <- function(dataset,
dataset_expected_obs,
dataset_ref,
by_vars,
analysis_var = AVAL,
order,
keep_vars = NULL) {
if (!missing(dataset_expected_obs)) {
deprecate_warn(
"0.12.0",
"derive_locf_records(dataset_expected_obs = )",
"derive_locf_records(dataset_ref = )"
)
assert_data_frame(dataset_expected_obs)
dataset_ref <- dataset_expected_obs
}

#### Input Checking ####
analysis_var <- assert_symbol(enexpr(analysis_var))

Expand All @@ -123,27 +137,27 @@ derive_locf_records <- function(dataset,
assert_expr_list(order)

# Check by_vars and order variables in input datasets
assert_data_frame(dataset_expected_obs)
assert_data_frame(dataset_ref)
assert_data_frame(
dataset,
required_vars = expr_c(
by_vars, analysis_var, extract_vars(order), keep_vars,
chr2vars(colnames(dataset_expected_obs))
chr2vars(colnames(dataset_ref))
)
)


#### Prepping 'dataset_expected_obs' ####
#### Prepping 'dataset_ref' ####


# Get the IDs from input dataset for which the expected observations are to be added

ids <- dataset %>%
select(!!!setdiff(by_vars, chr2vars(colnames(dataset_expected_obs)))) %>%
select(!!!setdiff(by_vars, chr2vars(colnames(dataset_ref)))) %>%
distinct()

exp_obsv <- ids %>%
crossing(dataset_expected_obs)
crossing(dataset_ref)



Expand Down
15 changes: 9 additions & 6 deletions man/derive_expected_records.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions man/derive_locf_records.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-derive_expected_records.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ test_that("derive_expected_records Test 1: missing values in `by_vars`", {

actual_output <- derive_expected_records(
dataset = input,
dataset_expected_obs = expected_obsv,
dataset_ref = expected_obsv,
by_vars = exprs(USUBJID),
set_values_to = exprs(DTYPE = "DERIVED")
)
Expand Down Expand Up @@ -71,7 +71,7 @@ test_that("derive_expected_records Test 2: `by_vars` = NULL", {

actual_output <- derive_expected_records(
dataset = input,
dataset_expected_obs = expected_obsv,
dataset_ref = expected_obsv,
by_vars = NULL,
set_values_to = exprs(DTYPE = "DERIVED")
)
Expand Down Expand Up @@ -114,7 +114,7 @@ test_that("derive_expected_records Test 3: visit variables are parameter indepen

actual_output <- derive_expected_records(
dataset = input,
dataset_expected_obs = expected_obsv,
dataset_ref = expected_obsv,
by_vars = exprs(USUBJID, PARAMCD),
set_values_to = exprs(DTYPE = "DERIVED")
)
Expand Down Expand Up @@ -157,7 +157,7 @@ test_that("derive_expected_records Test 4: visit variables are parameter depende

actual_output <- derive_expected_records(
dataset = input,
dataset_expected_obs = expected_obsv,
dataset_ref = expected_obsv,
by_vars = exprs(USUBJID),
set_values_to = exprs(DTYPE = "DERIVED")
)
Expand Down
12 changes: 6 additions & 6 deletions tests/testthat/test-derive_locf_records.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ test_that("derive_locf_records Test 1: visits are missing", {

actual_output <- derive_locf_records(
input,
dataset_expected_obs = advs_expected_obsv,
dataset_ref = advs_expected_obsv,
by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
order = exprs(AVISITN, AVISIT)
)
Expand Down Expand Up @@ -82,7 +82,7 @@ test_that("derive_locf_records Test 2: some visits have missing AVAL", {

actual_output <- derive_locf_records(
input,
dataset_expected_obs = advs_expected_obsv,
dataset_ref = advs_expected_obsv,
by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
order = exprs(AVISITN, AVISIT)
)
Expand Down Expand Up @@ -142,7 +142,7 @@ test_that("derive_locf_records Test 3: visits are missing - and DTYPE already ex

actual_output <- derive_locf_records(
input,
dataset_expected_obs = advs_expected_obsv,
dataset_ref = advs_expected_obsv,
by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
order = exprs(AVISITN, AVISIT)
)
Expand Down Expand Up @@ -190,7 +190,7 @@ test_that("derive_locf_records Test 4: visit variables are parameter independent

actual_output <- derive_locf_records(
input,
dataset_expected_obs = advs_expected_obsv,
dataset_ref = advs_expected_obsv,
by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
order = exprs(AVISITN, AVISIT)
)
Expand Down Expand Up @@ -246,7 +246,7 @@ test_that("derive_locf_records Test 5: visit variables are parameter dependent",

actual_output <- derive_locf_records(
input,
dataset_expected_obs = advs_expected_obsv,
dataset_ref = advs_expected_obsv,
by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
order = exprs(AVISITN, AVISIT)
)
Expand Down Expand Up @@ -290,7 +290,7 @@ test_that("derive_locf_records Test 6: populate VISITNUM for LOCF records", {

actual_output <- derive_locf_records(
input,
dataset_expected_obs = advs_expected_obsv,
dataset_ref = advs_expected_obsv,
by_vars = exprs(STUDYID, USUBJID, PARAM, PARAMCD),
analysis_var = AVALC,
order = exprs(AVISITN, AVISIT),
Expand Down

0 comments on commit b6f39a5

Please sign in to comment.