Skip to content

Commit

Permalink
Merge pull request #35 from pharmaverse/fix-supp_com_fp
Browse files Browse the repository at this point in the history
Correcting issue #31 and #33 in combine_supp
  • Loading branch information
statasaurus authored Jun 21, 2022
2 parents 0920b6b + 494264d commit 0c5c356
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 39 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export(sort_by_key)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,anti_join)
importFrom(dplyr,any_of)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
Expand Down Expand Up @@ -69,3 +70,4 @@ importFrom(stringr,str_remove_all)
importFrom(stringr,str_to_lower)
importFrom(stringr,str_to_upper)
importFrom(tidyr,pivot_wider)
importFrom(utils,capture.output)
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# metatools 0.1.2
* correct bug with `combine_supp()` when the data and the supp have different classes for the IDVARVAL
* add error to `combine_supp()` to report when not all the rows in the supp have merged
* add `floating_pt_correction` argument to `combine_supp()` used for when there are floating point errors with `IDVARVAL`


# metatools 0.1.1
Expand Down
123 changes: 85 additions & 38 deletions R/supp.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,24 @@ make_supp_qual <- function(dataset, metacore, dataset_name = NULL){
#'
#' @param dataset Domain dataset
#' @param supp Supplemental Qualifier dataset
#' @param floating_pt_correction By default this is `FALSE`, but can be set to
#' `TRUE` if the IDVAR is a double and `supp_combine` is not merging correctly
#' due to floating point.
#'
#' @return a dataset with the supp variables added to it
#' @export
#'
#' @importFrom purrr discard map reduce
#' @importFrom dplyr if_else select group_by group_split pull rename left_join any_of
#' @importFrom dplyr if_else select group_by group_split pull rename left_join
#' any_of
#' @importFrom tidyr pivot_wider
#' @importFrom rlang sym
#'
#' @examples
#' library(safetyData)
#' library(tibble)
#' combine_supp(sdtm_ae, sdtm_suppae) %>% as_tibble()
combine_supp <- function(dataset, supp){
combine_supp <- function(dataset, supp, floating_pt_correction = FALSE){
if(!is.data.frame(dataset) | !is.data.frame(supp)){
stop("You must supply a domain and supplemental dataset", call. = FALSE)
}
Expand Down Expand Up @@ -163,43 +167,86 @@ combine_supp <- function(dataset, supp){
rename(DOMAIN = .data$RDOMAIN) %>%
group_by(.data$IDVAR) %>% #For when there are multiple IDs
group_split() %>%
map(function(x) {
# Get the IDVAR value to allow for renaming of IDVARVAL
id_var <- x %>%
pull(.data$IDVAR) %>%
unique()

wide_x <- x %>%
pivot_wider(
names_from = .data$QNAM,
values_from = .data$QVAL) %>%
select(-.data$IDVAR)


if(!is.na(id_var) && id_var != ""){
# the type the new variable needs to be
type_convert <- dataset %>%
pull(all_of(id_var)) %>%
mode() %>%
paste0("as.", .) %>%
match.fun()
wide_x <- wide_x %>%
mutate(IDVARVAL = type_convert(.data$IDVARVAL)) %>%
rename(!!sym(id_var) := .data$IDVARVAL) #Given there is only one ID per df we can just rename

by <- c("STUDYID", "DOMAIN", "USUBJID", id_var)

out <- left_join(dataset, wide_x,
by = by)
} else {
wide_x <- wide_x %>%
select(-.data$IDVARVAL)
out <- left_join(dataset, wide_x,
by = c("STUDYID", "DOMAIN", "USUBJID"))
}
out
}) %>%
map(~combine_supp_by_idvar(dataset, ., floating_pt_correction)) %>%
reduce(full_join, by= by)
}


#' Handles the combining of datasets and supps for a single IDVAR
#'
#' @param dataset Domain dataset
#' @param supp Supplemental Qualifier dataset with a single IDVAR
#' @param floating_pt_correction By default this is `FALSE`, but can be set to
#' `TRUE` if the IDVAR is a double and `supp_combine` is not merging correctly
#' due to floating point.
#'
#' @return list of datasets
#' @noRd
#' @importFrom dplyr anti_join
#' @importFrom utils capture.output
combine_supp_by_idvar <- function(dataset, supp, floating_pt_correction){
# Get the IDVAR value to allow for renaming of IDVARVAL
id_var <- supp %>%
pull(.data$IDVAR) %>%
unique()

wide_x <- supp %>%
pivot_wider(
names_from = .data$QNAM,
values_from = .data$QVAL) %>%
select(-.data$IDVAR)


if(!is.na(id_var) && id_var != ""){
id_var_sym <- sym(id_var)
# the type the new variable needs to be
type_convert <- dataset %>%
pull(all_of(id_var)) %>%
mode() %>%
paste0("as.", .) %>%
match.fun()


if(floating_pt_correction){
by <- c("STUDYID", "DOMAIN", "USUBJID", "IDVARVAL")
wide_x <- wide_x %>%
mutate(IDVARVAL = as.character(.data$IDVARVAL))
dataset_chr <- dataset %>%
mutate(IDVARVAL = as.character(!!id_var_sym))

out <- left_join(dataset_chr, wide_x,
by = by) %>%
select(-IDVARVAL)
missing<- anti_join(wide_x,dataset_chr, by = by)
} else {
by <- c("STUDYID", "DOMAIN", "USUBJID", id_var)
wide_x <- wide_x %>%
mutate(IDVARVAL = type_convert(.data$IDVARVAL)) %>%
rename(!!id_var_sym := .data$IDVARVAL) #Given there is only one ID per df we can just rename

out <- left_join(dataset, wide_x,
by = by)
missing<- anti_join(wide_x, dataset, by = by)
}

# Add message for when there are rows in the supp that didn't get merged
if(nrow(missing) > 0){
missing_txt <- capture.output(missing %>%
select(.data$USUBJID, !!sym(id_var)) %>%
print()) %>%
paste0(collapse = "\n")
stop(paste0("Not all rows of the Supp were merged. The following rows are missing:\n",
missing_txt),
call. = FALSE)
}

} else {
wide_x <- wide_x %>%
select(-.data$IDVARVAL)
out <- left_join(dataset, wide_x,
by = c("STUDYID", "DOMAIN", "USUBJID"))
}
out

}

6 changes: 5 additions & 1 deletion man/combine_supp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions tests/testthat/test-supp.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,31 @@ test_that("combine_supp works with different IDVARVAL classes", {
test_that("combine_supp works with without QEVAL", {
expect_silent(combine_supp(admiral_tr, admiral_supptr))
})

test_that("supp data that does not match the main data will raise a warning", {
sdtm_suppae_extra <- sdtm_suppae
sdtm_suppae_extra$IDVARVAL[1] <- 99
expect_error(
combine_supp(sdtm_ae, sdtm_suppae_extra)
)
})

test_that("Floating point correction works", {
fp1 = 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1
sdtm_ae_fp <- sdtm_ae %>%
mutate(AESEQ = case_when(AESEQ == 1 ~ fp1,
TRUE ~ as.double(AESEQ)))
# Make sure a FP error is induced
expect_error(combine_supp(sdtm_ae_fp, sdtm_suppae))
# correction
combo_ae <-combine_supp(sdtm_ae_fp, sdtm_suppae, TRUE) %>%
select(USUBJID, AESEQ, AETRTEM) %>%
distinct() %>%
arrange(USUBJID, AESEQ)
supp_check <- sdtm_suppae %>%
select(USUBJID, AESEQ = IDVARVAL, AETRTEM = QVAL) %>%
arrange(USUBJID, AESEQ)
expect_equal(combo_ae, supp_check)
})


0 comments on commit 0c5c356

Please sign in to comment.