Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #1966 address derive_vars_joined bugs #2016

Merged
merged 17 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ the `filter_add` using the next phase of the deprecation process. (#1950)

- The list of package authors/contributors has been reformatted so that those who are actively maintaining the code base are now marked as *authors*, whereas those who made a significant contribution in the past are now down as *contributors*. All other acknowledgements have been moved to README section (#1941).

- `derive_vars_joined()` had two bugs with regards to duplicates messaging and when `new_vars` was set to `NULL` that have now been addressed (#1966).

# admiral 0.11.1

- Fix bug in `derive_param_tte()`. (#1962)
Expand Down
16 changes: 15 additions & 1 deletion R/derive_joined.R
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,19 @@ derive_vars_joined <- function(dataset,
if (is.null(new_vars)) {
new_vars <- chr2vars(colnames(dataset_add))
}
preexisting_vars <- chr2vars(colnames(dataset))
preexisting_vars_no_by_vars <- preexisting_vars[which(!(preexisting_vars %in% by_vars))]
duplicates <- intersect(replace_values_by_names(new_vars), preexisting_vars_no_by_vars)
if (length(duplicates) > 0) {
err_msg <- sprintf(
paste(
"The following columns in `dataset_add` have naming conflicts with `dataset`,\n",
"please make the appropriate modifications to `new_vars`, with respect to:\n%s"
),
enumerate(vars2chr(duplicates))
)
abort(err_msg)
}

# number observations of the input dataset to get a unique key
# (by_vars and tmp_obs_nr)
Expand All @@ -371,7 +384,7 @@ derive_vars_joined <- function(dataset,
filter_if(filter_add) %>%
select(
!!!by_vars,
!!!chr2vars(names(order)),
!!!replace_values_by_names(extract_vars(order)),
!!!replace_values_by_names(join_vars),
!!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add)))
)
Expand Down Expand Up @@ -410,6 +423,7 @@ derive_vars_joined <- function(dataset,
by_vars = exprs(!!!by_vars_left, !!tmp_obs_nr),
new_vars = add_suffix_to_vars(new_vars, vars = common_vars, suffix = ".join"),
missing_values = missing_values,
check_type = check_type,
duplicate_msg = paste(
paste(
"After applying `filter_join` the joined dataset contains more",
Expand Down
91 changes: 91 additions & 0 deletions tests/testthat/test-derive_joined.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,94 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from
keys = c("USUBJID", "AESEQ")
)
})

## Test 8: error if new_vars are already in dataset ----
test_that("derive_vars_joined Test 8: error if new_vars are already in dataset", {
myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21))
expect_error(
derive_vars_joined(
myd,
dataset_add = myd,
order = exprs(day),
mode = "last",
filter_join = day < day.join
),
regexp = paste(
"The following columns in `dataset_add` have naming conflicts with `dataset`"
)
)
})

## Test 9: fixing a bug from issue 1966 ----
test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint
adlb_ast <- tribble(
~ADT, ~ASEQ,
"2002-01-01", 1,
"2002-02-02", 2,
"2002-02-02", 3
) %>%
mutate(
STUDYID = "ABC",
USUBJID = "1",
ADT = ymd(ADT),
ADTM = as_datetime(ADT)
)

adlb_tbili_pbl <- tribble(
~ADT, ~ASEQ,
"2002-01-01", 4,
"2002-02-02", 5,
"2002-02-02", 6
) %>%
mutate(
STUDYID = "ABC",
USUBJID = "1",
ADT = ymd(ADT),
ADTM = as_datetime(ADT)
)

adlb_joined <- derive_vars_joined(
adlb_ast,
dataset_add = adlb_tbili_pbl,
by_vars = exprs(STUDYID, USUBJID),
order = exprs(ADTM, ASEQ),
new_vars = exprs(TBILI_ADT = ADT),
filter_join = ADT <= ADT.join,
mode = "first"
)

expected <- adlb_ast %>%
mutate(TBILI_ADT = as.Date(c("2002-01-01", "2002-02-02", "2002-02-02"), "%Y-%m-%d"))

expect_dfs_equal(
base = expected,
compare = adlb_joined,
keys = c("ADT", "ASEQ", "STUDYID", "USUBJID", "ADTM", "TBILI_ADT")
)
})

## Test 10: order vars are selected properly in function body ----
test_that("derive_vars_joined Test 10: order vars are selected properly in function body", {
myd <- data.frame(day = c(1, 2, 3), val = c(0, 17, 21))
actual <- derive_vars_joined(
myd,
dataset_add = myd,
new_vars = exprs(first_val = val),
join_vars = exprs(day),
order = exprs(-day),
mode = "last",
filter_join = day < day.join
)
expected <- tribble(
~day, ~val, ~first_val,
1, 0, 17,
2, 17, 21,
3, 21, NA
)

expect_dfs_equal(
base = expected,
compare = actual,
keys = c("day", "val", "first_val")
)
})
Loading