Skip to content

Commit

Permalink
Closes #1966 address derive_vars_joined bugs (#2016)
Browse files Browse the repository at this point in the history
* feat: #1966 make our check_type consistent

* feat: #1966 hacky solution to null new_vars .join problem

* chore: #1966 inserted line in wrong place

* feat: #1966 add news blurb for what was done

* feat: #1966 add tests

* chore: #1966 lintr

* chore: #1966 swap appropriate order selection and restore check_type arg

* chore: #1966 looks like that fixed it

* feat: #1966 issue warning for dataset_add naming conflicts when `new_vars` is NULL

* chore: #1966 clean up for readability

* chore: #1966 restore original replace_values_by_names

* chore: #1966 add additional test to demonstrate how order vars were fixed/selected

* feat: #1966 adopt feedback for error messaging of naming conflicts

---------

Co-authored-by: Zelos Zhu <[email protected]>
  • Loading branch information
zdz2101 and Zelos Zhu authored Aug 1, 2023
1 parent 01c6be7 commit 4ed7aac
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 1 deletion.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ the `filter_add` using the next phase of the deprecation process. (#1950)

- The list of package authors/contributors has been reformatted so that those who are actively maintaining the code base are now marked as *authors*, whereas those who made a significant contribution in the past are now down as *contributors*. All other acknowledgements have been moved to README section (#1941).

- `derive_vars_joined()` had two bugs with regards to duplicates messaging and when `new_vars` was set to `NULL` that have now been addressed (#1966).

# admiral 0.11.1

- Fix bug in `derive_param_tte()`. (#1962)
Expand Down
16 changes: 15 additions & 1 deletion R/derive_joined.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,19 @@ derive_vars_joined <- function(dataset,
if (is.null(new_vars)) {
new_vars <- chr2vars(colnames(dataset_add))
}
preexisting_vars <- chr2vars(colnames(dataset))
preexisting_vars_no_by_vars <- preexisting_vars[which(!(preexisting_vars %in% by_vars))]
duplicates <- intersect(replace_values_by_names(new_vars), preexisting_vars_no_by_vars)
if (length(duplicates) > 0) {
err_msg <- sprintf(
paste(
"The following columns in `dataset_add` have naming conflicts with `dataset`,\n",
"please make the appropriate modifications to `new_vars`, with respect to:\n%s"
),
enumerate(vars2chr(duplicates))
)
abort(err_msg)
}

# number observations of the input dataset to get a unique key
# (by_vars and tmp_obs_nr)
Expand All @@ -371,7 +384,7 @@ derive_vars_joined <- function(dataset,
filter_if(filter_add) %>%
select(
!!!by_vars,
!!!chr2vars(names(order)),
!!!replace_values_by_names(extract_vars(order)),
!!!replace_values_by_names(join_vars),
!!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add)))
)
Expand Down Expand Up @@ -410,6 +423,7 @@ derive_vars_joined <- function(dataset,
by_vars = exprs(!!!by_vars_left, !!tmp_obs_nr),
new_vars = add_suffix_to_vars(new_vars, vars = common_vars, suffix = ".join"),
missing_values = missing_values,
check_type = check_type,
duplicate_msg = paste(
paste(
"After applying `filter_join` the joined dataset contains more",
Expand Down
91 changes: 91 additions & 0 deletions tests/testthat/test-derive_joined.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,94 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from
keys = c("USUBJID", "AESEQ")
)
})

## Test 8: error if new_vars are already in dataset ----
test_that("derive_vars_joined Test 8: error if new_vars are already in dataset", {
myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21))
expect_error(
derive_vars_joined(
myd,
dataset_add = myd,
order = exprs(day),
mode = "last",
filter_join = day < day.join
),
regexp = paste(
"The following columns in `dataset_add` have naming conflicts with `dataset`"
)
)
})

## Test 9: fixing a bug from issue 1966 ----
test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint
adlb_ast <- tribble(
~ADT, ~ASEQ,
"2002-01-01", 1,
"2002-02-02", 2,
"2002-02-02", 3
) %>%
mutate(
STUDYID = "ABC",
USUBJID = "1",
ADT = ymd(ADT),
ADTM = as_datetime(ADT)
)

adlb_tbili_pbl <- tribble(
~ADT, ~ASEQ,
"2002-01-01", 4,
"2002-02-02", 5,
"2002-02-02", 6
) %>%
mutate(
STUDYID = "ABC",
USUBJID = "1",
ADT = ymd(ADT),
ADTM = as_datetime(ADT)
)

adlb_joined <- derive_vars_joined(
adlb_ast,
dataset_add = adlb_tbili_pbl,
by_vars = exprs(STUDYID, USUBJID),
order = exprs(ADTM, ASEQ),
new_vars = exprs(TBILI_ADT = ADT),
filter_join = ADT <= ADT.join,
mode = "first"
)

expected <- adlb_ast %>%
mutate(TBILI_ADT = as.Date(c("2002-01-01", "2002-02-02", "2002-02-02"), "%Y-%m-%d"))

expect_dfs_equal(
base = expected,
compare = adlb_joined,
keys = c("ADT", "ASEQ", "STUDYID", "USUBJID", "ADTM", "TBILI_ADT")
)
})

## Test 10: order vars are selected properly in function body ----
test_that("derive_vars_joined Test 10: order vars are selected properly in function body", {
myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21))
actual <- derive_vars_joined(
myd,
dataset_add = myd,
new_vars = exprs(first_val = val),
join_vars = exprs(day),
order = exprs(-day),
mode = "last",
filter_join = day < day.join
)
expected <- tribble(
~day, ~val, ~first_val,
1, 0, 17,
2, 17, 21,
3, 21, NA
)

expect_dfs_equal(
base = expected,
compare = actual,
keys = c("day", "val", "first_val")
)
})

0 comments on commit 4ed7aac

Please sign in to comment.