diff --git a/NEWS.md b/NEWS.md index 719160632c..9f89acb292 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,22 @@ character vector (`'--DTC'`), was imputed. (#2146) were enhanced such that more than one summary variable can be derived, e.g., `AVAL` as the sum and `ADT` as the maximum of the contributing records. (#1792) +- The "joined" functions (`derive_vars_joined()`, `derive_var_joined_exist_flag()`, +`filter_joined()`, and `event_joined()`) were unified: (#2126) + - The `dataset_add` and `filter_add` arguments were added to + `derive_var_joined_exist_flag()` and `filter_joined()`. + - The `filter` argument was renamed to `filter_join` in + `derive_var_joined_exist_flag()` and `filter_joined()`. + - The `tmp_obs_nr_var`, the `join_type`, the `first_cond_lower`, and the + `first_cond_upper` arguments were added to `derive_vars_joined()`. + - In `derive_var_joined_exist_flag()`, `filter_joined()`, and `event_joined()` + the `first_cond` argument was renamed to `first_cond_upper` and the + `first_cond_lower` argument was added. + - In all "joined" functions the `filter_add` argument is applied to the + additional dataset grouped by `by_vars` and the `filter_join` argument is + applied to the joined dataset grouped by the observations from the input + dataset. I.e., summary functions like `all()` or `any()` can be used. + - The `tmp_event_nr_var` argument was added to `derive_extreme_records()` to allow more control of the selection of records. It creates a temporary variable for the event number, which can be used in `order`. (#2140) @@ -38,6 +54,13 @@ for the event number, which can be used in `order`. (#2140) - The default value for the `false_value` argument in `derive_extreme_records()` was changed to `NA_character_` (#2125) +- In `filter_joined()` and `derive_var_joined_exist_flag()` (#2126) + - the `first_cond` argument was deprecated in favor of `first_cond_upper` and + - the `filter` argument was deprecated in favor of `filter_join`. + +- In `event_joined()` the `first_cond` argument was deprecated in favor of +`first_cond_upper`. (#2126) + - The `ignore_event_order` argument in `derive_extreme_event()` was deprecated and the selection of the records was changed to allow more control. Before, the records were selected first by event and then by `order`. Now they are selected diff --git a/R/derive_extreme_event.R b/R/derive_extreme_event.R index 5537176244..f76febb25a 100644 --- a/R/derive_extreme_event.R +++ b/R/derive_extreme_event.R @@ -39,8 +39,8 @@ #' @param mode Selection mode (first or last) #' #' If a particular event from `events` has more than one observation, -#' "first"/"last" is to select the first/last record of this type of events -#' sorting by `order`. +#' `"first"`/`"last"` is used to select the first/last record of this type of +#' event sorting by `order`. #' #' *Permitted Values:* `"first"`, `"last"` #' @@ -276,7 +276,7 @@ #' ), #' join_vars = exprs(AVALC, ADT), #' join_type = "after", -#' first_cond = AVALC.join == "CR" & +#' first_cond_upper = AVALC.join == "CR" & #' ADT.join >= ADT + 28, #' condition = AVALC == "CR" & #' all(AVALC.join %in% c("CR", "NE")) & @@ -292,7 +292,7 @@ #' ), #' join_vars = exprs(AVALC, ADT), #' join_type = "after", -#' first_cond = AVALC.join %in% c("CR", "PR") & +#' first_cond_upper = AVALC.join %in% c("CR", "PR") & #' ADT.join >= ADT + 28, #' condition = AVALC == "PR" & #' all(AVALC.join %in% c("CR", "PR", "NE")) & @@ -447,12 +447,14 @@ derive_extreme_event <- function(dataset, } else { data_events <- filter_joined( data_source, + dataset_add = data_source, by_vars = by_vars, join_vars = event$join_vars, join_type = event$join_type, - first_cond = !!event$first_cond, + first_cond_lower = !!event$first_cond_lower, + first_cond_upper = !!event$first_cond_upper, order = event_order, - filter = !!event$condition + filter_join = !!event$condition ) } if (is.null(event$keep_source_vars)) { @@ -495,6 +497,8 @@ derive_extreme_event <- function(dataset, #' `derive_extreme_event()`. If the argument is not specified, the input #' dataset (`dataset`) of `derive_extreme_event()` is used. #' +#' *Permitted Values*: a character scalar +#' #' @param condition An unquoted condition for selecting the observations, which #' will contribute to the extreme event. If the condition contains summary #' functions like `all()`, they are evaluated for each by group separately. @@ -517,6 +521,8 @@ derive_extreme_event <- function(dataset, #' PARAM = "Worst Sleeping Problems")`. The values can be a symbol, a #' character string, a numeric value, `NA` or an expression. #' +#' *Permitted Values*: a named list of expressions, e.g., created by `exprs()` +#' #' @param keep_source_vars Variables to keep from the source dataset #' #' The specified variables are kept for the selected observations. The @@ -532,6 +538,8 @@ derive_extreme_event <- function(dataset, #' The description does not affect the derivations where the event is used. It #' is intended for documentation only. #' +#' *Permitted Values*: a character scalar +#' #' @keywords source_specifications #' @family source_specifications #' @@ -587,9 +595,22 @@ event <- function(dataset_name = NULL, #' `derive_extreme_event()`. If the argument is not specified, the input #' dataset (`dataset`) of `derive_extreme_event()` is used. #' +#' *Permitted Values*: a character scalar +#' #' @param condition An unquoted condition for selecting the observations, which #' will contribute to the extreme event. #' +#' The condition is applied to the joined dataset for selecting the confirmed +#' observations. The condition can include summary functions like `all()` or +#' `any()`. The joined dataset is grouped by the original observations. I.e., +#' the summary function are applied to all observations up to the confirmation +#' observation. For example in the oncology setting when using this function +#' for confirmed best overall response, `condition = AVALC == "CR" & +#' all(AVALC.join %in% c("CR", "NE")) & count_vals(var = AVALC.join, val = +#' "NE") <= 1` selects observations with response "CR" and for all +#' observations up to the confirmation observation the response is "CR" or +#' "NE" and there is at most one "NE". +#' #' *Permitted Values*: an unquoted condition #' #' @param join_vars Variables to keep from joined dataset @@ -598,11 +619,13 @@ event <- function(dataset_name = NULL, #' this parameter. The specified variables are added to the joined dataset #' with suffix ".join". For example to select all observations with `AVALC == #' "Y"` and `AVALC == "Y"` for at least one subsequent visit `join_vars = -#' exprs(AVALC, AVISITN)` and `filter = AVALC == "Y" & AVALC.join == "Y" & +#' exprs(AVALC, AVISITN)` and `condition = AVALC == "Y" & AVALC.join == "Y" & #' AVISITN < AVISITN.join` could be specified. #' #' The `*.join` variables are not included in the output dataset. #' +#' *Permitted Values*: a named list of expressions, e.g., created by `exprs()` +#' #' @param join_type Observations to keep after joining #' #' The argument determines which of the joined observations are kept with @@ -614,11 +637,45 @@ event <- function(dataset_name = NULL, #' #' @param first_cond Condition for selecting range of data #' +#' `r lifecycle::badge("deprecated")` +#' +#' This argument is *deprecated*, please use `first_cond_upper` instead. +#' #' If this argument is specified, the other observations are restricted up to #' the first observation where the specified condition is fulfilled. If the #' condition is not fulfilled for any of the subsequent observations, all #' observations are removed. #' +#' *Permitted Values*: an unquoted condition +#' +#' @param first_cond_lower Condition for selecting range of data (before) +#' +#' If this argument is specified, the other observations are restricted from +#' the first observation before the current observation where the specified +#' condition is fulfilled up to the current observation. If the condition is +#' not fulfilled for any of the other observations, no observations are +#' considered, i.e., the observation is not flagged. +#' +#' This parameter should be specified if `condition` contains summary +#' functions which should not apply to all observations but only from a +#' certain observation before the current observation up to the current +#' observation. +#' +#' *Permitted Values*: an unquoted condition +#' +#' @param first_cond_upper Condition for selecting range of data (after) +#' +#' If this argument is specified, the other observations are restricted up to +#' the first observation where the specified condition is fulfilled. If the +#' condition is not fulfilled for any of the other observations, no +#' observations are considered, i.e., the observation is not flagged. +#' +#' This parameter should be specified if `condition` contains summary +#' functions which should not apply to all observations but only up to the +#' confirmation assessment. +#' +#' *Permitted Values*: an unquoted condition +#' #' @param order If specified, the specified variables or expressions are used to #' select the first observation. #' @@ -627,6 +684,8 @@ event <- function(dataset_name = NULL, #' #' @inheritParams event #' +#' @return An object of class `event_joined` +#' #' @keywords source_specifications #' @family source_specifications #' @@ -634,16 +693,162 @@ event <- function(dataset_name = NULL, #' #' @export #' -#' @return An object of class `event_joined` +#' @examples +#' library(tibble) +#' library(dplyr) +#' library(lubridate) +#' # Derive confirmed best overall response (using event_joined()) +#' # CR - complete response, PR - partial response, SD - stable disease +#' # NE - not evaluable, PD - progressive disease +#' adsl <- tribble( +#' ~USUBJID, ~TRTSDTC, +#' "1", "2020-01-01", +#' "2", "2019-12-12", +#' "3", "2019-11-11", +#' "4", "2019-12-30", +#' "5", "2020-01-01", +#' "6", "2020-02-02", +#' "7", "2020-02-02", +#' "8", "2020-02-01" +#' ) %>% +#' mutate(TRTSDT = ymd(TRTSDTC)) +#' +#' adrs <- tribble( +#' ~USUBJID, ~ADTC, ~AVALC, +#' "1", "2020-01-01", "PR", +#' "1", "2020-02-01", "CR", +#' "1", "2020-02-16", "NE", +#' "1", "2020-03-01", "CR", +#' "1", "2020-04-01", "SD", +#' "2", "2020-01-01", "SD", +#' "2", "2020-02-01", "PR", +#' "2", "2020-03-01", "SD", +#' "2", "2020-03-13", "CR", +#' "4", "2020-01-01", "PR", +#' "4", "2020-03-01", "NE", +#' "4", "2020-04-01", "NE", +#' "4", "2020-05-01", "PR", +#' "5", "2020-01-01", "PR", +#' "5", "2020-01-10", "PR", +#' "5", "2020-01-20", "PR", +#' "6", "2020-02-06", "PR", +#' "6", "2020-02-16", "CR", +#' "6", "2020-03-30", "PR", +#' "7", "2020-02-06", "PR", +#' "7", "2020-02-16", "CR", +#' "7", "2020-04-01", "NE", +#' "8", "2020-02-16", "PD" +#' ) %>% +#' mutate( +#' ADT = ymd(ADTC), +#' PARAMCD = "OVR", +#' PARAM = "Overall Response by Investigator" +#' ) %>% +#' derive_vars_merged( +#' dataset_add = adsl, +#' by_vars = exprs(USUBJID), +#' new_vars = exprs(TRTSDT) +#' ) +#' +#' derive_extreme_event( +#' adrs, +#' by_vars = exprs(USUBJID), +#' order = exprs(ADT), +#' mode = "first", +#' source_datasets = list(adsl = adsl), +#' events = list( +#' event_joined( +#' description = paste( +#' "CR needs to be confirmed by a second CR at least 28 days later", +#' "at most one NE is acceptable between the two assessments" +#' ), +#' join_vars = exprs(AVALC, ADT), +#' join_type = "after", +#' first_cond_upper = AVALC.join == "CR" & +#' ADT.join >= ADT + 28, +#' condition = AVALC == "CR" & +#' all(AVALC.join %in% c("CR", "NE")) & +#' count_vals(var = AVALC.join, val = "NE") <= 1, +#' set_values_to = exprs( +#' AVALC = "CR" +#' ) +#' ), +#' event_joined( +#' description = paste( +#' "PR needs to be confirmed by a second CR or PR at least 28 days later,", +#' "at most one NE is acceptable between the two assessments" +#' ), +#' join_vars = exprs(AVALC, ADT), +#' join_type = "after", +#' first_cond_upper = AVALC.join %in% c("CR", "PR") & +#' ADT.join >= ADT + 28, +#' condition = AVALC == "PR" & +#' all(AVALC.join %in% c("CR", "PR", "NE")) & +#' count_vals(var = AVALC.join, val = "NE") <= 1, +#' set_values_to = exprs( +#' AVALC = "PR" +#' ) +#' ), +#' event( +#' description = paste( +#' "CR, PR, or SD are considered as SD if occurring at least 28", +#' "after treatment start" +#' ), +#' condition = AVALC %in% c("CR", "PR", "SD") & ADT >= TRTSDT + 28, +#' set_values_to = exprs( +#' AVALC = "SD" +#' ) +#' ), +#' event( +#' condition = AVALC == "PD", +#' set_values_to = exprs( +#' AVALC = "PD" +#' ) +#' ), +#' event( +#' condition = AVALC %in% c("CR", "PR", "SD", "NE"), +#' set_values_to = exprs( +#' AVALC = "NE" +#' ) +#' ), +#' event( +#' description = "set response to MISSING for patients without records in ADRS", +#' dataset_name = "adsl", +#' condition = TRUE, +#' set_values_to = exprs( +#' AVALC = "MISSING" +#' ), +#' keep_source_vars = exprs(TRTSDT) +#' ) +#' ), +#' set_values_to = exprs( +#' PARAMCD = "CBOR", +#' PARAM = "Best Confirmed Overall Response by Investigator" +#' ) +#' ) %>% +#' filter(PARAMCD == "CBOR") event_joined <- function(dataset_name = NULL, condition, order = NULL, join_vars, join_type, first_cond = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, set_values_to = NULL, keep_source_vars = NULL, description = NULL) { + if (!missing(first_cond)) { + deprecate_warn( + "1.0.0", + "event_joined(first_cond=)", + "event_joined(first_cond_upper=)" + ) + first_cond_upper <- assert_filter_cond(enexpr(first_cond), optional = TRUE) + } else { + first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE) + } + out <- list( description = assert_character_scalar(description, optional = TRUE), dataset_name = assert_character_scalar(dataset_name, optional = TRUE), @@ -655,7 +860,8 @@ event_joined <- function(dataset_name = NULL, values = c("before", "after", "all"), case_sensitive = FALSE ), - first_cond = assert_filter_cond(enexpr(first_cond), optional = TRUE), + first_cond_lower = assert_filter_cond(enexpr(first_cond_lower), optional = TRUE), + first_cond_upper = first_cond_upper, set_values_to = assert_expr_list( set_values_to, named = TRUE, diff --git a/R/derive_joined.R b/R/derive_joined.R index f166e05031..9b38acf24b 100644 --- a/R/derive_joined.R +++ b/R/derive_joined.R @@ -63,6 +63,18 @@ #' #' *Permitted Values*: list of variables or named expressions created by `exprs()` #' +#' @param tmp_obs_nr_var Temporary observation number +#' +#' The specified variable is added to the input dataset (`dataset`) and the +#' additional dataset (`dataset_add`). It is set to the observation number +#' with respect to `order`. For each by group (`by_vars`) the observation +#' number starts with `1`. The variable can be used in the conditions +#' (`filter_join`, `first_cond_upper`, `first_cond_lower`). It can also be +#' used to select consecutive observations or the last observation. +#' +#' The variable is not included in the output dataset. To include it specify +#' it for `new_vars`. +#' #' @param join_vars Variables to use from additional dataset #' #' Any extra variables required from the additional dataset for `filter_join` @@ -81,16 +93,29 @@ #' #' *Permitted Values*: list of variables or named expressions created by `exprs()` #' -#' @param filter_add Filter for additional dataset (`dataset_add`) +#' @param first_cond_lower Condition for selecting range of data (before) #' -#' Only observations from `dataset_add` fulfilling the specified condition are -#' joined to the input dataset. If the argument is not specified, all -#' observations are joined. +#' If this argument is specified, the other observations are restricted from +#' the first observation before the current observation where the specified +#' condition is fulfilled up to the current observation. If the condition is +#' not fulfilled for any of the other observations, no observations are +#' considered. #' -#' Variables created by `order` or `new_vars` arguments can be used in the -#' condition. +#' This argument should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only from a +#' certain observation before the current observation up to the current +#' observation. For an example see the last example below. #' -#' *Permitted Values*: a condition +#' @param first_cond_upper Condition for selecting range of data (after) +#' +#' If this argument is specified, the other observations are restricted up to +#' the first observation where the specified condition is fulfilled. If the +#' condition is not fulfilled for any of the other observations, no +#' observations are considered. +#' +#' This argument should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only up to the +#' confirmation assessment. For an example see the last example below. #' #' @param filter_join Filter for the joined dataset #' @@ -100,6 +125,9 @@ #' Variables created by `order` or `new_vars` arguments can be used in the #' condition. #' +#' The condition can include summary functions like `all()` or `any()`. The +#' joined dataset is grouped by the original observations. +#' #' *Permitted Values*: a condition #' #' @param mode Selection mode @@ -124,6 +152,8 @@ #' #' *Permitted Values*: `"none"`, `"warning"`, `"error"` #' +#' @inheritParams get_joined_data +#' @inheritParams derive_vars_merged #' #' @details #' @@ -140,6 +170,21 @@ #' by the grouping variables (`by_vars`). If no grouping variables are #' specified, a full join is performed. #' +#' 1. If `first_cond_lower` is specified, for each observation of the input +#' dataset the joined dataset is restricted to observations from the first +#' observation where `first_cond_lower` is fulfilled (the observation fulfilling +#' the condition is included) up to the observation of the input dataset. If for +#' an observation of the input dataset the condition is not fulfilled, the +#' observation is removed. +#' +#' If `first_cond_upper` is specified, for each observation of the input +#' dataset the joined dataset is restricted to observations up to the first +#' observation where `first_cond_upper` is fulfilled (the observation +#' fulfilling the condition is included). If for an observation of the input +#' dataset the condition is not fulfilled, the observation is removed. +#' +#' For an example see the last example in the "Examples" section. +#' #' 1. The joined dataset is restricted by the `filter_join` condition. #' #' 1. If `order` is specified, for each observation of the input dataset the @@ -153,12 +198,12 @@ #' Observations in the additional dataset which have no matching observation in #' the input dataset are ignored. #' -#' @inheritParams derive_vars_merged -#' #' @return The output dataset contains all observations and variables of the #' input dataset and additionally the variables specified for `new_vars` from #' the additional dataset (`dataset_add`). #' +#' @seealso [derive_var_joined_exist_flag()], [filter_joined()] +#' #' @keywords der_gen #' @family der_gen #' @@ -192,6 +237,7 @@ #' derive_vars_joined( #' adbds, #' dataset_add = windows, +#' join_type = "all", #' filter_join = AWLO <= ADY & ADY <= AWHI #' ) #' @@ -214,6 +260,7 @@ #' order = exprs(AVAL), #' new_vars = exprs(NADIR = AVAL), #' join_vars = exprs(ADY), +#' join_type = "all", #' filter_add = ADY > 0, #' filter_join = ADY.join < ADY, #' mode = "first", @@ -247,6 +294,7 @@ #' by_vars = exprs(USUBJID), #' order = exprs(AVAL, desc(ADY)), #' new_vars = exprs(HGB_MAX = AVAL, HGB_DY = ADY), +#' join_type = "all", #' filter_add = PARAMCD == "HGB", #' filter_join = ASTDY - 14 <= ADY & ADY <= ASTDY, #' mode = "last" @@ -287,6 +335,7 @@ #' dataset_add = period_ref, #' by_vars = exprs(STUDYID, USUBJID), #' join_vars = exprs(APERSDT, APEREDT), +#' join_type = "all", #' filter_join = APERSDT <= ASTDT & ASTDT <= APEREDT #' ) #' @@ -313,6 +362,7 @@ #' dataset_add = ex, #' by_vars = exprs(USUBJID), #' order = exprs(EXSDT = convert_dtc_to_dt(EXSDTC)), +#' join_type = "all", #' new_vars = exprs(LDRELD = compute_duration( #' start_date = EXSDT, end_date = ASTDT #' )), @@ -320,13 +370,62 @@ #' filter_join = EXSDT <= ASTDT, #' mode = "last" #' ) +#' +#' # first_cond_lower and first_cond_upper argument +#' myd <- tribble( +#' ~subj, ~day, ~val, +#' "1", 1, "++", +#' "1", 2, "-", +#' "1", 3, "0", +#' "1", 4, "+", +#' "1", 5, "++", +#' "1", 6, "-", +#' "2", 1, "-", +#' "2", 2, "++", +#' "2", 3, "+", +#' "2", 4, "0", +#' "2", 5, "-", +#' "2", 6, "++" +#' ) +#' +#' # derive last "++" day before "0" where all results in between are "+" or "++" +#' derive_vars_joined( +#' myd, +#' dataset_add = myd, +#' by_vars = exprs(subj), +#' order = exprs(day), +#' mode = "first", +#' new_vars = exprs(prev_plus_day = day), +#' join_vars = exprs(val), +#' join_type = "before", +#' first_cond_lower = val.join == "++", +#' filter_join = val == "0" & all(val.join %in% c("+", "++")) +#' ) +#' +#' # derive first "++" day after "0" where all results in between are "+" or "++" +#' derive_vars_joined( +#' myd, +#' dataset_add = myd, +#' by_vars = exprs(subj), +#' order = exprs(day), +#' mode = "last", +#' new_vars = exprs(next_plus_day = day), +#' join_vars = exprs(val), +#' join_type = "after", +#' first_cond_upper = val.join == "++", +#' filter_join = val == "0" & all(val.join %in% c("+", "++")) +#' ) derive_vars_joined <- function(dataset, dataset_add, by_vars = NULL, order = NULL, new_vars = NULL, + tmp_obs_nr_var = NULL, join_vars = NULL, + join_type, filter_add = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, filter_join = NULL, mode = NULL, exist_flag = NULL, @@ -349,8 +448,12 @@ derive_vars_joined <- function(dataset, ) ) + tmp_obs_nr_var <- assert_symbol(enexpr(tmp_obs_nr_var), optional = TRUE) filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + first_cond_lower <- assert_filter_cond(enexpr(first_cond_lower), optional = TRUE) + first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE) filter_join <- assert_filter_cond(enexpr(filter_join), optional = TRUE) + exist_flag <- assert_symbol(enexpr(exist_flag), optional = TRUE) if (is.null(new_vars)) { new_vars <- chr2vars(colnames(dataset_add)) @@ -379,34 +482,29 @@ derive_vars_joined <- function(dataset, check_type = "none" ) - # prepare right side of the join, - # by_vars are renamed here, new_vars will be renamed at the end - data_right <- dataset_add %>% - mutate(!!!order, !!!join_vars) %>% - filter_if(filter_add) %>% - select( - !!!by_vars, - !!!replace_values_by_names(extract_vars(order)), - !!!replace_values_by_names(join_vars), - !!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add))) - ) - - # join dataset (if no by variable, a full join is performed) - data_joined <- left_join( + data_joined <- get_joined_data( data, - data_right, - by = vars2chr(by_vars_left), - suffix = c("", ".join") + dataset_add = dataset_add, + by_vars = by_vars, + join_vars = expr_c( + join_vars, + intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add))) + ), + join_type = join_type, + first_cond_lower = !!first_cond_lower, + first_cond_upper = !!first_cond_upper, + order = order, + tmp_obs_nr_var = !!tmp_obs_nr_var, + filter_add = !!filter_add, + filter_join = !!filter_join, + check_type = check_type ) - # select observations for the new variables - data_return <- filter_if(data_joined, filter_join) - common_vars <- - chr2vars(setdiff(intersect(colnames(data), colnames(data_right)), vars2chr(by_vars))) + chr2vars(setdiff(intersect(colnames(data), colnames(dataset_add)), vars2chr(by_vars))) if (!is.null(order)) { - data_return <- filter_extreme( - data_return, + data_joined <- filter_extreme( + data_joined, by_vars = expr_c(by_vars_left, tmp_obs_nr), order = add_suffix_to_vars( replace_values_by_names(order), @@ -421,12 +519,12 @@ derive_vars_joined <- function(dataset, # merge new variables to the input dataset and rename them data %>% derive_vars_merged( - dataset_add = data_return, + dataset_add = data_joined, by_vars = exprs(!!!by_vars_left, !!tmp_obs_nr), new_vars = add_suffix_to_vars(new_vars, vars = common_vars, suffix = ".join"), missing_values = missing_values, check_type = check_type, - exist_flag = !!enexpr(exist_flag), + exist_flag = !!exist_flag, true_value = true_value, false_value = false_value, duplicate_msg = paste( @@ -443,3 +541,363 @@ derive_vars_joined <- function(dataset, ) %>% remove_tmp_vars() } + +#' Join Data for "joined" functions +#' +#' The helper function joins the data for the "joined" functions. All `.join` +#' variables are included in the output dataset. +#' +#' @param dataset +#' `r roxygen_param_dataset(expected_vars = c("by_vars"))` +#' +#' @param dataset_add Additional dataset +#' +#' The variables specified by the `by_vars`, the `new_vars`, the `join_vars`, +#' and the `order` argument are expected. +#' +#' @param by_vars Grouping variables +#' +#' The two datasets are joined by the specified variables. Variables from the +#' additional dataset can be renamed by naming the element, i.e., `by_vars = +#' exprs( = )`. +#' +#' *Permitted Values*: list of variables created by `exprs()` +#' +#' @param order Sort order +#' +#' If the argument is set to a non-null value, for each observation of the +#' input dataset the first or last observation from the joined dataset is +#' selected with respect to the specified order. The specified variables are +#' expected in the additional dataset (`dataset_add`). If a variable is +#' available in both `dataset` and `dataset_add`, the one from `dataset_add` +#' is used for the sorting. +#' +#' If an expression is named, e.g., `exprs(EXSTDT = +#' convert_dtc_to_dt(EXSTDTC), EXSEQ)`, a corresponding variable (`EXSTDT`) is +#' added to the additional dataset and can be used in the filter conditions +#' (`filter_add`, `filter_join`) and for `join_vars` and `new_vars`. The +#' variable is not included in the output dataset. +#' +#' *Permitted Values*: list of expressions created by `exprs()`, e.g., +#' `exprs(ADT, desc(AVAL))` or `NULL` +#' +#' @param new_vars Variables to add +#' +#' The specified variables from the additional dataset are added to the output +#' dataset. Variables can be renamed by naming the element, i.e., `new_vars = +#' exprs( = )`. +#' +#' For example `new_vars = exprs(var1, var2)` adds variables `var1` and `var2` +#' from `dataset_add` to the input dataset. +#' +#' And `new_vars = exprs(var1, new_var2 = old_var2)` takes `var1` and +#' `old_var2` from `dataset_add` and adds them to the input dataset renaming +#' `old_var2` to `new_var2`. +#' +#' Values of the added variables can be modified by specifying an expression. +#' For example, `new_vars = LASTRSP = exprs(str_to_upper(AVALC))` adds the +#' variable `LASTRSP` to the dataset and sets it to the upper case value of +#' `AVALC`. +#' +#' If the argument is not specified or set to `NULL`, all variables from the +#' additional dataset (`dataset_add`) are added. +#' +#' *Permitted Values*: list of variables or named expressions created by `exprs()` +#' +#' @param join_vars Variables to use from additional dataset +#' +#' Any extra variables required from the additional dataset for `filter_join` +#' should be specified for this argument. Variables specified for `new_vars` +#' do not need to be repeated for `join_vars`. If a specified variable exists +#' in both the input dataset and the additional dataset, the suffix ".join" is +#' added to the variable from the additional dataset. +#' +#' If an expression is named, e.g., `exprs(EXTDT = +#' convert_dtc_to_dt(EXSTDTC))`, a corresponding variable is added to the +#' additional dataset and can be used in the filter conditions (`filter_add`, +#' `filter_join`) and for `new_vars`. The variable is not included in the +#' output dataset. +#' +#' The variables are not included in the output dataset. +#' +#' *Permitted Values*: list of variables or named expressions created by `exprs()` +#' +#' @param join_type Observations to keep after joining +#' +#' The argument determines which of the joined observations are kept with +#' respect to the original observation. For example, if `join_type = "after"` +#' is specified all observations after the original observations are kept. +#' +#' For example for confirmed response or BOR in the oncology setting or +#' confirmed deterioration in questionnaires the confirmatory assessment must +#' be after the assessment. Thus `join_type = "after"` could be used. +#' +#' Whereas, sometimes you might allow for confirmatory observations to occur +#' prior to the observation. For example, to identify AEs occurring on or +#' after seven days before a COVID AE. Thus `join_type = "all"` could be used. +#' +#' *Permitted Values:* `"before"`, `"after"`, `"all"` +#' +#' @param tmp_obs_nr_var Temporary observation number +#' +#' The specified variable is added to the input dataset (`dataset`) and the +#' additional dataset (`dataset_add`). It is set to the observation number +#' with respect to `order`. For each by group (`by_vars`) the observation +#' number starts with `1`. The variable can be used in the conditions +#' (`filter_join`, `first_cond_upper`, `first_cond_lower`). It can also be +#' used to select consecutive observations or the last observation. +#' +#' @param filter_add Filter for additional dataset (`dataset_add`) +#' +#' Only observations from `dataset_add` fulfilling the specified condition are +#' joined to the input dataset. If the argument is not specified, all +#' observations are joined. +#' +#' Variables created by `order` or `new_vars` arguments can be used in the +#' condition. +#' +#' The condition can include summary functions like `all()` or `any()`. The +#' additional dataset is grouped by the by variables (`by_vars`). +#' +#' *Permitted Values*: a condition +#' +#' @param first_cond_lower Condition for selecting range of data (before) +#' +#' If this argument is specified, the other observations are restricted from +#' the first observation before the current observation where the specified +#' condition is fulfilled up to the current observation. If the condition is +#' not fulfilled for any of the other observations, no observations are +#' considered, i.e., the observation is not flagged. +#' +#' This argument should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only from a +#' certain observation before the current observation up to the current +#' observation. +#' +#' @param first_cond_upper Condition for selecting range of data (after) +#' +#' If this argument is specified, the other observations are restricted up to +#' the first observation where the specified condition is fulfilled. If the +#' condition is not fulfilled for any of the other observations, no +#' observations are considered, i.e., the observation is not flagged. +#' +#' This argument should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only up to the +#' confirmation assessment. +#' +#' @param filter_join Filter for the joined dataset +#' +#' The specified condition is applied to the joined dataset. Therefore +#' variables from both datasets `dataset` and `dataset_add` can be used. +#' +#' Variables created by `order` or `new_vars` arguments can be used in the +#' condition. +#' +#' The condition can include summary functions like `all()` or `any()`. The +#' joined dataset is grouped by the original observations. +#' +#' *Permitted Values*: a condition +#' +#' @param mode Selection mode +#' +#' Determines if the first or last observation is selected. If the `order` +#' argument is specified, `mode` must be non-null. +#' +#' If the `order` argument is not specified, the `mode` argument is ignored. +#' +#' *Permitted Values*: `"first"`, `"last"`, `NULL` +#' +#' @param check_type Check uniqueness? +#' +#' If `"warning"` or `"error"` is specified, the specified message is issued +#' if the observations of the (restricted) joined dataset are not unique with +#' respect to the by variables and the order. +#' +#' This argument is ignored if `order` is not specified. In this case an error +#' is issued independent of `check_type` if the restricted joined dataset +#' contains more than one observation for any of the observations of the input +#' dataset. +#' +#' *Permitted Values*: `"none"`, `"warning"`, `"error"` +#' +#' +#' @details +#' +#' 1. The variables specified by `order` are added to the additional dataset +#' (`dataset_add`). +#' +#' 1. The variables specified by `join_vars` are added to the additional dataset +#' (`dataset_add`). +#' +#' 1. The records from the additional dataset (`dataset_add`) are restricted to +#' those matching the `filter_add` condition. +#' +#' 1. The input dataset and the (restricted) additional dataset are left joined +#' by the grouping variables (`by_vars`). If no grouping variables are +#' specified, a full join is performed. +#' +#' 1. The joined dataset is restricted by the `filter_join` condition. +#' +#' @keywords internal +get_joined_data <- function(dataset, + dataset_add, + by_vars = NULL, + join_vars = NULL, + join_type, + first_cond_lower = NULL, + first_cond_upper = NULL, + order = NULL, + tmp_obs_nr_var = NULL, + filter_add = NULL, + filter_join = NULL, + check_type = "warning") { + # Check input arguments + assert_vars(by_vars, optional = TRUE) + by_vars_left <- replace_values_by_names(by_vars) + assert_expr_list(join_vars, optional = TRUE) + join_type <- + assert_character_scalar( + join_type, + values = c("before", "after", "all"), + case_sensitive = FALSE + ) + first_cond_lower <- assert_filter_cond(enexpr(first_cond_lower), optional = TRUE) + first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE) + assert_expr_list(order, optional = TRUE) + tmp_obs_nr_var <- assert_symbol(enexpr(tmp_obs_nr_var), optional = TRUE) + filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + filter_join <- assert_filter_cond(enexpr(filter_join), optional = TRUE) + check_type <- + assert_character_scalar( + check_type, + values = c("none", "warning", "error"), + case_sensitive = FALSE + ) + if (join_type != "all" || !is.null(first_cond_lower) || !is.null(first_cond_upper)) { + dataset_order_vars <- extract_vars(order) + } else { + dataset_order_vars <- NULL + } + + assert_data_frame( + dataset, + required_vars = expr_c(by_vars_left, dataset_order_vars) + ) + + assert_data_frame( + dataset_add, + required_vars = expr_c( + by_vars, + extract_vars(order), + setdiff(extract_vars(join_vars), replace_values_by_names(order)) + ) + ) + + # number observations of the input dataset to get a unique key + # (by_vars and tmp_obs_nr_left), it is used later to apply filter_join + tmp_obs_nr_left <- get_new_tmp_var(dataset, prefix = "tmp_obs_nr_left") + data <- dataset %>% + derive_var_obs_number( + new_var = !!tmp_obs_nr_left, + by_vars = by_vars_left, + check_type = "none" + ) + + data_add <- dataset_add %>% + group_by(!!!by_vars) %>% + mutate(!!!order, !!!join_vars) %>% + filter_if(filter_add) %>% + ungroup() + + # number observations of the input dataset and the additional dataset for + # relation of records, e.g., join_type = before|after, first_cond_lower, + # first_cond_upper + tmp_obs_nr_var_join <- NULL + if (join_type != "all" || !is.null(first_cond_lower) || !is.null(first_cond_upper) || + !is.null(tmp_obs_nr_var)) { + if (is.null(tmp_obs_nr_var)) { + tmp_obs_nr_var <- get_new_tmp_var(dataset, prefix = "tmp_obs_nr") + tmp_obs_nr_var_join <- paste0(as_name(tmp_obs_nr_var), ".join") + } + data_add <- derive_var_obs_number( + dataset_add, + new_var = !!tmp_obs_nr_var, + by_vars = by_vars, + order = order, + check_type = check_type + ) + + data <- data %>% + mutate(!!!order) %>% + derive_var_obs_number( + new_var = !!tmp_obs_nr_var, + by_vars = by_vars, + order = order, + check_type = check_type + ) + } + + # join the input dataset with itself such that to each observation of the + # input dataset all following observations are joined + data_joined <- + left_join( + data, + select( + data_add, + !!!by_vars, + !!!replace_values_by_names(extract_vars(order)), + !!!replace_values_by_names(join_vars), + !!tmp_obs_nr_var + ), + by = vars2chr(by_vars_left), + suffix = c("", ".join") + ) + + if (join_type != "all") { + operator <- c(before = "<", after = ">") + + data_joined <- filter( + data_joined, + !!parse_expr(paste0( + as_name(tmp_obs_nr_var), ".join", + operator[join_type], + as_name(tmp_obs_nr_var) + )) + ) + } + + if (!is.null(first_cond_upper)) { + # select all observations up to the first confirmation observation + data_joined <- filter_relative( + data_joined, + by_vars = expr_c(by_vars_left, tmp_obs_nr_var), + condition = !!first_cond_upper, + order = exprs(!!parse_expr(paste0(as_name(tmp_obs_nr_var), ".join"))), + mode = "first", + selection = "before", + inclusive = TRUE, + keep_no_ref_groups = FALSE + ) + } + + if (!is.null(first_cond_lower)) { + # select all observations up to the first confirmation observation + data_joined <- filter_relative( + data_joined, + by_vars = expr_c(by_vars_left, tmp_obs_nr_var), + condition = !!first_cond_lower, + order = exprs(!!parse_expr(paste0("desc(", as_name(tmp_obs_nr_var), ".join)"))), + mode = "first", + selection = "before", + inclusive = TRUE, + keep_no_ref_groups = FALSE + ) + } + # apply confirmation condition, which may include summary functions + data_joined %>% + group_by(!!!by_vars_left, !!tmp_obs_nr_left) %>% + filter_if(filter_join) %>% + ungroup() %>% + remove_tmp_vars() %>% + select(-!!tmp_obs_nr_var_join) +} diff --git a/R/derive_var_joined_exist_flag.R b/R/derive_var_joined_exist_flag.R index fece51a587..2940cd43c8 100644 --- a/R/derive_var_joined_exist_flag.R +++ b/R/derive_var_joined_exist_flag.R @@ -13,10 +13,15 @@ #' @param dataset #' `r roxygen_param_dataset(expected_vars = c("by_vars", "join_vars"))` #' +#' @param dataset_add Additional dataset +#' +#' The variables specified for `by_vars`, `join_vars`, and `order` are +#' expected. +#' #' @param by_vars By variables #' #' The specified variables are used as by variables for joining the input -#' dataset with itself. +#' dataset (`dataset`) with the additional dataset (`dataset_add`). #' #' @param order Order #' @@ -28,12 +33,13 @@ #' #' @param tmp_obs_nr_var Temporary observation number #' -#' The specified variable is added to the input dataset and set to the -#' observation number with respect to `order`. For each by group (`by_vars`) -#' the observation number starts with `1`. The variable can be used in the -#' conditions (`filter`, `first_cond`). It is not included in the output -#' dataset. It can be used to flag consecutive observations or the last -#' observation (see last example below). +#' The specified variable is added to the input dataset (`dataset`) and the +#' additional dataset (`dataset_add`). It is set to the observation number +#' with respect to `order`. For each by group (`by_vars`) the observation +#' number starts with `1`. The variable can be used in the conditions +#' (`filter_join`, `first_cond_upper`, `first_cond_lower`). It is not included +#' in the output dataset. It can also be used to flag consecutive observations +#' or the last observation (see last example below). #' #' @param join_vars Variables to keep from joined dataset #' @@ -41,42 +47,68 @@ #' for this parameter. The specified variables are added to the joined dataset #' with suffix ".join". For example to flag all observations with `AVALC == #' "Y"` and `AVALC == "Y"` for at least one subsequent visit `join_vars = -#' exprs(AVALC, AVISITN)` and `filter = AVALC == "Y" & AVALC.join == "Y" & +#' exprs(AVALC, AVISITN)` and `filter_join = AVALC == "Y" & AVALC.join == "Y" & #' AVISITN < AVISITN.join` could be specified. #' #' The `*.join` variables are not included in the output dataset. #' -#' @param join_type Observations to keep after joining +#' @param first_cond Condition for selecting range of data #' -#' The argument determines which of the joined observations are kept with -#' respect to the original observation. For example, if `join_type = "after"` -#' is specified all observations after the original observations are kept. +#' `r lifecycle::badge("deprecated")` #' -#' For example for confirmed response or BOR in the oncology setting or -#' confirmed deterioration in questionnaires the confirmatory assessment must -#' be after the assessment to be flagged. Thus `join_type = "after"` could be -#' used. +#' This argument is *deprecated*, please use `first_cond_upper` instead. #' -#' Whereas, sometimes you might allow for confirmatory observations to occur -#' prior to the observation to be flagged. For example, to flag AEs occurring -#' on or after seven days before a COVID AE. Thus `join_type = "all"` could be -#' used. +#' If this argument is specified, the other observations are restricted up to +#' the first observation where the specified condition is fulfilled. If the +#' condition is not fulfilled for any of the other observations, no +#' observations are considered, i.e., the observation is not flagged. #' -#' *Permitted Values:* `"before"`, `"after"`, `"all"` +#' This parameter should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only up to the +#' confirmation assessment. For an example see the third example below. #' -#' @param first_cond Condition for selecting range of data +#' @param first_cond_lower Condition for selecting range of data (before) +#' +#' If this argument is specified, the other observations are restricted from +#' the first observation before the current observation where the specified +#' condition is fulfilled up to the current observation. If the condition is +#' not fulfilled for any of the other observations, no observations are +#' considered, i.e., the observation is not flagged. +#' +#' This parameter should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only from a +#' certain observation before the current observation up to the current +#' observation. For an example see the last example below. +#' +#' @param first_cond_upper Condition for selecting range of data (after) #' #' If this argument is specified, the other observations are restricted up to #' the first observation where the specified condition is fulfilled. If the #' condition is not fulfilled for any of the other observations, no #' observations are considered, i.e., the observation is not flagged. #' -#' This parameter should be specified if `filter` contains summary functions -#' which should not apply to all observations but only up to the confirmation -#' assessment. For an example see the third example below. +#' This parameter should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only up to the +#' confirmation assessment. For an example see the third example below. +#' +#' @param filter_join Condition for selecting observations +#' +#' The filter is applied to the joined dataset for flagging the confirmed +#' observations. The condition can include summary functions like `all()` or +#' `any()`. The joined dataset is grouped by the original observations. I.e., +#' the summary function are applied to all observations up to the confirmation +#' observation. For example, `filter_join = AVALC == "CR" & all(AVALC.join +#' %in% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1` +#' selects observations with response "CR" and for all observations up to the +#' confirmation observation the response is "CR" or "NE" and there is at most +#' one "NE". #' #' @param filter Condition for selecting observations #' +#' `r lifecycle::badge("deprecated")` +#' +#' This argument is *deprecated*, please use `filter_join` instead. +#' #' The filter is applied to the joined dataset for flagging the confirmed #' observations. The condition can include summary functions. The joined #' dataset is grouped by the original observations. I.e., the summary function @@ -92,29 +124,35 @@ #' if the observations of the input dataset are not unique with respect to the #' by variables and the order. #' -#' *Default:* `"warning"` -#' #' *Permitted Values:* `"none"`, `"warning"`, `"error"` #' #' @param true_value Value of `new_var` for flagged observations #' -#' *Default*: `"Y"` -#' #' @param false_value Value of `new_var` for observations not flagged #' -#' *Default*: `NA_character_` +#' @inheritParams get_joined_data #' #' @details #' The following steps are performed to produce the output dataset. #' #' ## Step 1 #' -#' The input dataset is joined with itself by the variables specified for -#' `by_vars`. From the right hand side of the join only the variables -#' specified for `join_vars` are kept. The suffix ".join" is added to these -#' variables. +#' - The variables specified by `order` are added to the additional dataset +#' (`dataset_add`). +#' +#' - The variables specified by `join_vars` are added to the additional dataset +#' (`dataset_add`). +#' +#' - The records from the additional dataset (`dataset_add`) are restricted to +#' those matching the `filter_add` condition. #' -#' For example, for `by_vars = USUBJID`, `join_vars = exprs(AVISITN, AVALC)` and input dataset +#' The input dataset (`dataset`) is joined with the restricted additional +#' dataset by the variables specified for `by_vars`. From the additional +#' dataset only the variables specified for `join_vars` are kept. The suffix +#' ".join" is added to those variables which also exist in the input dataset. +#' +#' For example, for `by_vars = USUBJID`, `join_vars = exprs(AVISITN, AVALC)` +#' and input dataset and additional dataset #' #' ```{r eval=FALSE} #' # A tibble: 2 x 4 @@ -153,17 +191,26 @@ #' #' ## Step 3 #' -#' If `first_cond` is specified, for each observation of the input dataset the -#' joined dataset is restricted to observations up to the first observation -#' where `first_cond` is fulfilled (the observation fulfilling the condition -#' is included). If for an observation of the input dataset the condition is -#' not fulfilled, the observation is removed. +#' If `first_cond_lower` is specified, for each observation of the input +#' dataset the joined dataset is restricted to observations from the first +#' observation where `first_cond_lower` is fulfilled (the observation +#' fulfilling the condition is included) up to the observation of the input +#' dataset. If for an observation of the input dataset the condition is not +#' fulfilled, the observation is removed. +#' +#' If `first_cond_upper` is specified, for each observation of the input +#' dataset the joined dataset is restricted to observations up to the first +#' observation where `first_cond_upper` is fulfilled (the observation +#' fulfilling the condition is included). If for an observation of the input +#' dataset the condition is not fulfilled, the observation is removed. +#' +#' For an example see the last example in the "Examples" section. #' #' ## Step 4 #' #' The joined dataset is grouped by the observations from the input dataset #' and restricted to the observations fulfilling the condition specified by -#' `filter`. +#' `filter_join`. #' #' ## Step 5 #' @@ -181,13 +228,12 @@ #' @keywords der_gen #' @family der_gen #' -#' @seealso [filter_joined()] +#' @seealso [filter_joined()], [derive_vars_joined()] #' #' @export #' #' @examples #' library(tibble) -#' library(admiral) #' #' # flag observations with a duration longer than 30 and #' # at, after, or up to 7 days before a COVID AE (ACOVFL == "Y") @@ -207,12 +253,13 @@ #' #' derive_var_joined_exist_flag( #' adae, +#' dataset_add = adae, #' new_var = ALCOVFL, #' by_vars = exprs(USUBJID), #' join_vars = exprs(ACOVFL, ADY), #' join_type = "all", #' order = exprs(ADY), -#' filter = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 +#' filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 #' ) #' #' # flag observations with AVALC == "Y" and AVALC == "Y" at one subsequent visit @@ -231,12 +278,13 @@ #' #' derive_var_joined_exist_flag( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' new_var = CONFFL, #' join_vars = exprs(AVALC, AVISITN), #' join_type = "after", #' order = exprs(AVISITN), -#' filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join +#' filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join #' ) #' #' # select observations with AVALC == "CR", AVALC == "CR" at a subsequent visit, @@ -261,13 +309,14 @@ #' #' derive_var_joined_exist_flag( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' join_vars = exprs(AVALC), #' join_type = "after", #' order = exprs(AVISITN), #' new_var = CONFFL, -#' first_cond = AVALC.join == "CR", -#' filter = AVALC == "CR" & all(AVALC.join %in% c("CR", "NE")) & +#' first_cond_upper = AVALC.join == "CR", +#' filter_join = AVALC == "CR" & all(AVALC.join %in% c("CR", "NE")) & #' count_vals(var = AVALC.join, val = "NE") <= 1 #' ) #' @@ -294,13 +343,14 @@ #' #' derive_var_joined_exist_flag( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' join_vars = exprs(AVALC, ADY), #' join_type = "after", #' order = exprs(ADY), #' new_var = CONFFL, -#' first_cond = AVALC.join %in% c("CR", "PR") & ADY.join - ADY >= 20, -#' filter = AVALC == "PR" & +#' first_cond_upper = AVALC.join %in% c("CR", "PR") & ADY.join - ADY >= 20, +#' filter_join = AVALC == "PR" & #' all(AVALC.join %in% c("CR", "PR", "NE")) & #' count_vals(var = AVALC.join, val = "NE") <= 1 & #' ( @@ -327,17 +377,63 @@ #' #' derive_var_joined_exist_flag( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' new_var = CONFFL, #' tmp_obs_nr_var = tmp_obs_nr, #' join_vars = exprs(CRIT1FL), #' join_type = "all", #' order = exprs(AVISITN), -#' filter = CRIT1FL == "Y" & CRIT1FL.join == "Y" & +#' filter_join = CRIT1FL == "Y" & CRIT1FL.join == "Y" & #' (tmp_obs_nr + 1 == tmp_obs_nr.join | tmp_obs_nr == max(tmp_obs_nr.join)) #' ) #' +#' # first_cond_lower and first_cond_upper argument +#' myd <- tribble( +#' ~subj, ~day, ~val, +#' "1", 1, "++", +#' "1", 2, "-", +#' "1", 3, "0", +#' "1", 4, "+", +#' "1", 5, "++", +#' "1", 6, "-", +#' "2", 1, "-", +#' "2", 2, "++", +#' "2", 3, "+", +#' "2", 4, "0", +#' "2", 5, "-", +#' "2", 6, "++" +#' ) +#' +#' # flag "0" where all results from the first "++" before the "0" up to the "0" +#' # (excluding the "0") are "+" or "++" +#' derive_var_joined_exist_flag( +#' myd, +#' dataset_add = myd, +#' by_vars = exprs(subj), +#' order = exprs(day), +#' new_var = flag, +#' join_vars = exprs(val), +#' join_type = "before", +#' first_cond_lower = val.join == "++", +#' filter_join = val == "0" & all(val.join %in% c("+", "++")) +#' ) +#' +#' # flag "0" where all results from the "0" (excluding the "0") up to the first +#' # "++" after the "0" are "+" or "++" +#' derive_var_joined_exist_flag( +#' myd, +#' dataset_add = myd, +#' by_vars = exprs(subj), +#' order = exprs(day), +#' new_var = flag, +#' join_vars = exprs(val), +#' join_type = "after", +#' first_cond_upper = val.join == "++", +#' filter_join = val == "0" & all(val.join %in% c("+", "++")) +#' ) derive_var_joined_exist_flag <- function(dataset, + dataset_add, by_vars, order, new_var, @@ -345,14 +441,36 @@ derive_var_joined_exist_flag <- function(dataset, join_vars, join_type, first_cond = NULL, - filter, + first_cond_lower = NULL, + first_cond_upper = NULL, + filter = NULL, + filter_add = NULL, + filter_join, true_value = "Y", false_value = NA_character_, check_type = "warning") { new_var <- assert_symbol(enexpr(new_var)) tmp_obs_nr_var <- assert_symbol(enexpr(tmp_obs_nr_var), optional = TRUE) - first_cond <- assert_filter_cond(enexpr(first_cond), optional = TRUE) - filter <- assert_filter_cond(enexpr(filter)) + first_cond_lower <- assert_filter_cond(enexpr(first_cond_lower), optional = TRUE) + first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE) + if (!missing(first_cond)) { + deprecate_warn( + "1.0.0", + "derive_var_joined_exist_flag(first_cond=)", + "derive_var_joined_exist_flag(first_cond_upper=)" + ) + first_cond_upper <- assert_filter_cond(enexpr(first_cond), optional = TRUE) + } + filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + filter_join <- assert_filter_cond(enexpr(filter_join)) + if (!missing(filter)) { + deprecate_warn( + "1.0.0", + "derive_var_joined_exist_flag(filter=)", + "derive_var_joined_exist_flag(filter_join=)" + ) + filter_join <- assert_filter_cond(enexpr(filter)) + } assert_data_frame(dataset) tmp_obs_nr <- get_new_tmp_var(dataset, prefix = "tmp_obs_nr_") @@ -364,13 +482,15 @@ derive_var_joined_exist_flag <- function(dataset, data_filtered <- filter_joined( data, + dataset_add = dataset_add, by_vars = by_vars, order = order, tmp_obs_nr_var = !!tmp_obs_nr_var, join_vars = join_vars, join_type = join_type, - first_cond = !!first_cond, - filter = !!filter, + first_cond_lower = !!first_cond_lower, + first_cond_upper = !!first_cond_upper, + filter_join = !!filter_join, check_type = check_type ) diff --git a/R/filter_joined.R b/R/filter_joined.R index d39e4d1ceb..89f31374ab 100644 --- a/R/filter_joined.R +++ b/R/filter_joined.R @@ -16,7 +16,12 @@ #' response value can be confirmed by a subsequent assessment. This is commonly #' used in endpoints such as best overall response. #' -#' @param dataset `r roxygen_param_dataset(expected_vars = c("by_vars", "order", "join_vars"))` +#' @param dataset `r roxygen_param_dataset(expected_vars = c("by_vars", "order"))` +#' +#' @param dataset_add Additional dataset +#' +#' The variables specified for `by_vars`, `join_vars`, and `order` are +#' expected. #' #' @param by_vars By variables #' @@ -29,27 +34,46 @@ #' this parameter. The specified variables are added to the joined dataset #' with suffix ".join". For example to select all observations with `AVALC == #' "Y"` and `AVALC == "Y"` for at least one subsequent visit `join_vars = -#' exprs(AVALC, AVISITN)` and `filter = AVALC == "Y" & AVALC.join == "Y" & -#' AVISITN < AVISITN.join` could be specified. +#' exprs(AVALC, AVISITN)` and `filter_join = AVALC == "Y" & AVALC.join == "Y" +#' & AVISITN < AVISITN.join` could be specified. #' #' The `*.join` variables are not included in the output dataset. #' -#' @param join_type Observations to keep after joining +#' @param first_cond Condition for selecting range of data #' -#' The argument determines which of the joined observations are kept with -#' respect to the original observation. For example, if `join_type = -#' "after"` is specified all observations after the original observations are -#' kept. +#' `r lifecycle::badge("deprecated")` #' -#' *Permitted Values:* `"before"`, `"after"`, `"all"` -#' -#' @param first_cond Condition for selecting range of data +#' This argument is *deprecated*, please use `first_cond_upper` instead. #' #' If this argument is specified, the other observations are restricted up to #' the first observation where the specified condition is fulfilled. If the #' condition is not fulfilled for any of the subsequent observations, all #' observations are removed. #' +#' @param first_cond_lower Condition for selecting range of data (before) +#' +#' If this argument is specified, the other observations are restricted from +#' the first observation before the current observation where the specified +#' condition is fulfilled up to the current observation. If the condition is +#' not fulfilled for any of the other observations, no observations are +#' considered, i.e., the observation is not flagged. +#' +#' This parameter should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only from a +#' certain observation before the current observation up to the current +#' observation. For an example see the last example below. +#' +#' @param first_cond_upper Condition for selecting range of data (after) +#' +#' If this argument is specified, the other observations are restricted up to +#' the first observation where the specified condition is fulfilled. If the +#' condition is not fulfilled for any of the other observations, no +#' observations are considered, i.e., the observation is not flagged. +#' +#' This parameter should be specified if `filter_join` contains summary +#' functions which should not apply to all observations but only up to the +#' confirmation assessment. For an example see the last example below. +#' #' @param order Order #' #' The observations are ordered by the specified order. @@ -59,15 +83,31 @@ #' #' @param tmp_obs_nr_var Temporary observation number #' -#' The specified variable is added to the input dataset and set to the -#' observation number with respect to `order`. For each by group (`by_vars`) -#' the observation number starts with `1`. The variable can be used in the -#' conditions (`filter`, `first_cond`). It is not included in the output -#' dataset. It can be used to select consecutive observations or the last -#' observation (see last example below). +#' The specified variable is added to the input dataset (`dataset`) and the +#' additional dataset (`dataset_add`). It is set to the observation number +#' with respect to `order`. For each by group (`by_vars`) the observation +#' number starts with `1`. The variable can be used in the conditions +#' (`filter_join`, `first_cond_upper`, `first_cond_lower`). It is not included +#' in the output dataset. It can also be used to select consecutive +#' observations or the last observation (see example below). +#' +#' @param filter_add Filter for additional dataset (`dataset_add`) +#' +#' Only observations from `dataset_add` fulfilling the specified condition are +#' joined to the input dataset. If the argument is not specified, all +#' observations are joined. +#' +#' Variables created by the `order` argument can be used in the condition. +#' +#' The condition can include summary functions. The additional dataset is +#' grouped by the by variables (`by_vars`). #' #' @param filter Condition for selecting observations #' +#' `r lifecycle::badge("deprecated")` +#' +#' This argument is *deprecated*, please use `filter_join` instead. +#' #' The filter is applied to the joined dataset for selecting the confirmed #' observations. The condition can include summary functions. The joined #' dataset is grouped by the original observations. I.e., the summary function @@ -79,28 +119,52 @@ #' confirmation observation the response is "CR" or "NE" and there is at most #' one "NE". #' +#' @param filter_join Condition for selecting observations +#' +#' The filter is applied to the joined dataset for selecting the confirmed +#' observations. The condition can include summary functions like `all()` or +#' `any()`. The joined dataset is grouped by the original observations. I.e., +#' the summary function are applied to all observations up to the confirmation +#' observation. For example in the oncology setting when using this function +#' for confirmed best overall response, `filter_join = AVALC == "CR" & +#' all(AVALC.join %in% c("CR", "NE")) & count_vals(var = AVALC.join, val = +#' "NE") <= 1` selects observations with response "CR" and for all +#' observations up to the confirmation observation the response is "CR" or +#' "NE" and there is at most one "NE". +#' #' @param check_type Check uniqueness? #' #' If `"warning"` or `"error"` is specified, the specified message is issued #' if the observations of the input dataset are not unique with respect to the #' by variables and the order. #' -#' *Default:* `"none"` -#' #' *Permitted Values:* `"none"`, `"warning"`, `"error"` #' +#' @inheritParams get_joined_data +#' #' @details #' #' The following steps are performed to produce the output dataset. #' #' ## Step 1 #' -#' The input dataset is joined with itself by the variables specified for -#' `by_vars`. From the right hand side of the join only the variables -#' specified for `join_vars` are kept. The suffix ".join" is added to these -#' variables. +#' - The variables specified by `order` are added to the additional dataset +#' (`dataset_add`). +#' +#' - The variables specified by `join_vars` are added to the additional dataset +#' (`dataset_add`). #' -#' For example, for `by_vars = USUBJID`, `join_vars = exprs(AVISITN, AVALC)` and input dataset +#' - The records from the additional dataset (`dataset_add`) are restricted to +#' those matching the `filter_add` condition. +#' +#' Then the input dataset (`dataset`) is joined with the restricted +#' additional dataset by the variables specified for `by_vars`. From the +#' additional dataset only the variables specified for `join_vars` are kept. +#' The suffix ".join" is added to those variables which are also present in +#' the input dataset. +#' +#' For example, for `by_vars = USUBJID`, `join_vars = exprs(AVISITN, AVALC)` +#' and input dataset and additional dataset #' #' ```{r eval=FALSE} #' # A tibble: 2 x 4 @@ -139,17 +203,26 @@ #' #' ## Step 3 #' -#' If `first_cond` is specified, for each observation of the input dataset the -#' joined dataset is restricted to observations up to the first observation -#' where `first_cond` is fulfilled (the observation fulfilling the condition -#' is included). If for an observation of the input dataset the condition is -#' not fulfilled, the observation is removed. +#' If `first_cond_lower` is specified, for each observation of the input +#' dataset the joined dataset is restricted to observations from the first +#' observation where `first_cond_lower` is fulfilled (the observation +#' fulfilling the condition is included) up to the observation of the input +#' dataset. If for an observation of the input dataset the condition is not +#' fulfilled, the observation is removed. +#' +#' If `first_cond_upper` is specified, for each observation of the input +#' dataset the joined dataset is restricted to observations up to the first +#' observation where `first_cond_upper` is fulfilled (the observation +#' fulfilling the condition is included). If for an observation of the input +#' dataset the condition is not fulfilled, the observation is removed. +#' +#' For an example see the last example in the "Examples" section. #' #' ## Step 4 #' #' The joined dataset is grouped by the observations from the input dataset #' and restricted to the observations fulfilling the condition specified by -#' `filter`. +#' `filter_join`. #' #' ## Step 5 #' @@ -190,11 +263,12 @@ #' #' filter_joined( #' adae, +#' dataset_add = adae, #' by_vars = exprs(USUBJID), #' join_vars = exprs(ACOVFL, ADY), #' join_type = "all", #' order = exprs(ADY), -#' filter = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 +#' filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 #' ) #' #' # filter observations with AVALC == "Y" and AVALC == "Y" at a subsequent visit @@ -213,11 +287,12 @@ #' #' filter_joined( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' join_vars = exprs(AVALC, AVISITN), #' join_type = "after", #' order = exprs(AVISITN), -#' filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join +#' filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join #' ) #' #' # select observations with AVALC == "CR", AVALC == "CR" at a subsequent visit, @@ -242,12 +317,13 @@ #' #' filter_joined( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' join_vars = exprs(AVALC), #' join_type = "after", #' order = exprs(AVISITN), -#' first_cond = AVALC.join == "CR", -#' filter = AVALC == "CR" & all(AVALC.join %in% c("CR", "NE")) & +#' first_cond_upper = AVALC.join == "CR", +#' filter_join = AVALC == "CR" & all(AVALC.join %in% c("CR", "NE")) & #' count_vals(var = AVALC.join, val = "NE") <= 1 #' ) #' @@ -274,12 +350,13 @@ #' #' filter_joined( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' join_vars = exprs(AVALC, ADY), #' join_type = "after", #' order = exprs(ADY), -#' first_cond = AVALC.join %in% c("CR", "PR") & ADY.join - ADY >= 20, -#' filter = AVALC == "PR" & +#' first_cond_upper = AVALC.join %in% c("CR", "PR") & ADY.join - ADY >= 20, +#' filter_join = AVALC == "PR" & #' all(AVALC.join %in% c("CR", "PR", "NE")) & #' count_vals(var = AVALC.join, val = "NE") <= 1 & #' ( @@ -306,23 +383,71 @@ #' #' filter_joined( #' data, +#' dataset_add = data, #' by_vars = exprs(USUBJID), #' tmp_obs_nr_var = tmp_obs_nr, #' join_vars = exprs(CRIT1FL), #' join_type = "all", #' order = exprs(AVISITN), -#' filter = CRIT1FL == "Y" & CRIT1FL.join == "Y" & +#' filter_join = CRIT1FL == "Y" & CRIT1FL.join == "Y" & #' (tmp_obs_nr + 1 == tmp_obs_nr.join | tmp_obs_nr == max(tmp_obs_nr.join)) #' ) #' +#' # first_cond_lower and first_cond_upper argument +#' myd <- tribble( +#' ~subj, ~day, ~val, +#' "1", 1, "++", +#' "1", 2, "-", +#' "1", 3, "0", +#' "1", 4, "+", +#' "1", 5, "++", +#' "1", 6, "-", +#' "2", 1, "-", +#' "2", 2, "++", +#' "2", 3, "+", +#' "2", 4, "0", +#' "2", 5, "-", +#' "2", 6, "++" +#' ) +#' +#' # select "0" where all results from the first "++" before the "0" up to the "0" +#' # (excluding the "0") are "+" or "++" +#' filter_joined( +#' myd, +#' dataset_add = myd, +#' by_vars = exprs(subj), +#' order = exprs(day), +#' join_vars = exprs(val), +#' join_type = "before", +#' first_cond_lower = val.join == "++", +#' filter_join = val == "0" & all(val.join %in% c("+", "++")) +#' ) +#' +#' # select "0" where all results from the "0" (excluding the "0") up to the first +#' # "++" after the "0" are "+" or "++" +#' filter_joined( +#' myd, +#' dataset_add = myd, +#' by_vars = exprs(subj), +#' order = exprs(day), +#' join_vars = exprs(val), +#' join_type = "after", +#' first_cond_upper = val.join == "++", +#' filter_join = val == "0" & all(val.join %in% c("+", "++")) +#' ) filter_joined <- function(dataset, + dataset_add, by_vars, join_vars, join_type, first_cond = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, order, tmp_obs_nr_var = NULL, - filter, + filter_add = NULL, + filter_join, + filter = NULL, check_type = "warning") { # Check input parameters assert_vars(by_vars) @@ -333,10 +458,24 @@ filter_joined <- function(dataset, values = c("before", "after", "all"), case_sensitive = FALSE ) - first_cond <- assert_filter_cond(enexpr(first_cond), optional = TRUE) + first_cond_lower <- assert_filter_cond(enexpr(first_cond_lower), optional = TRUE) + first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE) + if (!missing(first_cond)) { + deprecate_warn( + "1.0.0", + "filter_joined(first_cond=)", + "filter_joined(first_cond_upper=)" + ) + first_cond_upper <- assert_filter_cond(enexpr(first_cond), optional = TRUE) + } assert_expr_list(order) tmp_obs_nr_var <- assert_symbol(enexpr(tmp_obs_nr_var), optional = TRUE) - filter <- assert_filter_cond(enexpr(filter)) + filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + filter_join <- assert_filter_cond(enexpr(filter_join)) + if (!missing(filter)) { + deprecate_warn("1.0.0", "filter_joined(filter=)", "filter_joined(filter_join=)") + filter_join <- assert_filter_cond(enexpr(filter)) + } check_type <- assert_character_scalar( check_type, @@ -345,63 +484,34 @@ filter_joined <- function(dataset, ) assert_data_frame( dataset, + required_vars = expr_c(by_vars, extract_vars(order)) + ) + + assert_data_frame( + dataset_add, required_vars = expr_c(by_vars, join_vars, extract_vars(order)) ) - # number observations of the input dataset to get a unique key - # (by_vars and tmp_obs_nr_var) if (is.null(tmp_obs_nr_var)) { tmp_obs_nr_var <- get_new_tmp_var(dataset, prefix = "tmp_obs_nr_") } - data <- dataset %>% - derive_var_obs_number( - new_var = !!tmp_obs_nr_var, - by_vars = by_vars, - order = order, - check_type = check_type - ) - # join the input dataset with itself such that to each observation of the - # input dataset all following observations are joined - data_joined <- - left_join( - data, - select(data, !!!by_vars, !!!join_vars, !!tmp_obs_nr_var), - by = vars2chr(by_vars), - suffix = c("", ".join") - ) - if (join_type != "all") { - operator <- c(before = "<", after = ">") - - data_joined <- filter( - data_joined, - !!parse_expr(paste0( - as_name(tmp_obs_nr_var), ".join", - operator[join_type], - as_name(tmp_obs_nr_var) - )) - ) - } - - if (!is.null(first_cond)) { - # select all observations up to the first confirmation observation - data_joined <- filter_relative( - data_joined, - by_vars = expr_c(by_vars, tmp_obs_nr_var), - condition = !!first_cond, - order = exprs(!!parse_expr(paste0(as_name(tmp_obs_nr_var), ".join"))), - mode = "first", - selection = "before", - inclusive = TRUE, - keep_no_ref_groups = FALSE - ) - } - - # apply confirmation condition, which may include summary functions - data_joined %>% - group_by(!!!by_vars, !!tmp_obs_nr_var) %>% - filter(!!filter) %>% + get_joined_data( + dataset, + dataset_add = dataset_add, + by_vars = by_vars, + join_vars = join_vars, + join_type = join_type, + first_cond_lower = !!first_cond_lower, + first_cond_upper = !!first_cond_upper, + order = order, + tmp_obs_nr_var = !!tmp_obs_nr_var, + filter_add = !!filter_add, + filter_join = !!filter_join, + check_type = check_type + ) %>% # select one observation of each group, as the joined variables are removed # it doesn't matter which one, so we take just the first one + group_by(!!!by_vars, !!tmp_obs_nr_var) %>% slice(1L) %>% ungroup() %>% select(colnames(dataset)) diff --git a/R/globals.R b/R/globals.R index 7a4855e8d2..01b3deeb0c 100644 --- a/R/globals.R +++ b/R/globals.R @@ -67,13 +67,7 @@ globalVariables(c( "ex", "idtc__", "Source", - "temp_age", - "temp_dummy", - "temp_event", - "temp_obs_nr", - "temp_source_nr", "temp_slicenr", - "temp_date", "time_differential_dt", "tmp_obs_nr_filter_relative", "tmp_obs_nr_match_filter_relative", @@ -93,27 +87,15 @@ globalVariables(c( "SCN", "value", "ASTDTM", - "temp_start", "AENDTM", - "temp_end", "key", "order1", "order2", "order3", "_unit", - "temp_DT", "temp_from_var", "temp_to_var", - "temp_dose_freq", - "temp_new_dose_no", - "temp_num_of_doses", - "temp_dose_multiplier", - "temp_day_difference", - "tmp_analysis_date", - "tmp_dose_date", - "tmp_seq_var", "EXDOSFRQ", - "tmp_ldose_dt", "CDISC_VALUE", "DOSE_WINDOW", "DOSE_COUNT", @@ -122,10 +104,6 @@ globalVariables(c( "grpseq", "time_differential", "temp_flag", - "tmp_obs_nr_filter_joined", - "tmp_obs_nr_filter_joined.join", - "tmp_obs_nr_var_conf_flag", - "tmp_obs_nr_var", "AGE", "SEX", "SMOKEFL", diff --git a/inst/templates/ad_adae.R b/inst/templates/ad_adae.R index bc11351dbb..4f286a4771 100644 --- a/inst/templates/ad_adae.R +++ b/inst/templates/ad_adae.R @@ -91,6 +91,7 @@ adae <- adae %>% by_vars = exprs(STUDYID, USUBJID), new_vars = exprs(LDOSEDTM = EXSTDTM), join_vars = exprs(EXSTDTM), + join_type = "all", order = exprs(EXSTDTM), filter_add = (EXDOSE > 0 | (EXDOSE == 0 & grepl("PLACEBO", EXTRT))) & !is.na(EXSTDTM), filter_join = EXSTDTM <= ASTDTM, diff --git a/inst/templates/ad_adlbhy.R b/inst/templates/ad_adlbhy.R index 383bfae302..f833ffec3f 100644 --- a/inst/templates/ad_adlbhy.R +++ b/inst/templates/ad_adlbhy.R @@ -56,8 +56,9 @@ bili_records <- adlb_annotated %>% hylaw_records <- derive_vars_joined( dataset = altast_records, dataset_add = bili_records, - by_vars = exprs(STUDYID, USUBJID, ADY), + by_vars = exprs(STUDYID, USUBJID), order = exprs(ADY), + join_type = "all", filter_join = ADT.join - ADT <= 14 & CRIT1FL == "Y" & CRIT1FL.join == "Y", new_vars = exprs(BILI_LBSEQ = LBSEQ, BILI_DT = ADT, BILI_CRITFL = CRIT1FL), mode = "first" diff --git a/inst/templates/ad_adpc.R b/inst/templates/ad_adpc.R index 59ef735015..bcdb11cdaa 100644 --- a/inst/templates/ad_adpc.R +++ b/inst/templates/ad_adpc.R @@ -206,6 +206,7 @@ adpc_prev <- adpc_first_dose %>% AENDTM_prev = AENDTM ), join_vars = exprs(ADTM), + join_type = "all", filter_add = NULL, filter_join = ADTM > ADTM.join, mode = "last", @@ -224,6 +225,7 @@ adpc_next <- adpc_prev %>% AENDTM_next = AENDTM ), join_vars = exprs(ADTM), + join_type = "all", filter_add = NULL, filter_join = ADTM <= ADTM.join, mode = "first", @@ -239,6 +241,7 @@ adpc_nom_prev <- adpc_next %>% order = exprs(NFRLT), new_vars = exprs(NFRLT_prev = NFRLT), join_vars = exprs(NFRLT), + join_type = "all", filter_add = NULL, filter_join = NFRLT > NFRLT.join, mode = "last", @@ -254,6 +257,7 @@ adpc_nom_next <- adpc_nom_prev %>% order = exprs(NFRLT), new_vars = exprs(NFRLT_next = NFRLT), join_vars = exprs(NFRLT), + join_type = "all", filter_add = NULL, filter_join = NFRLT <= NFRLT.join, mode = "first", diff --git a/inst/templates/ad_adppk.R b/inst/templates/ad_adppk.R index 9407959337..9dada3456e 100644 --- a/inst/templates/ad_adppk.R +++ b/inst/templates/ad_adppk.R @@ -182,6 +182,7 @@ adppk_prev <- adppk_first_dose %>% AENDTM_prev = AENDTM ), join_vars = exprs(ADTM), + join_type = "all", filter_add = NULL, filter_join = ADTM > ADTM.join, mode = "last", @@ -197,6 +198,7 @@ adppk_nom_prev <- adppk_prev %>% order = exprs(NFRLT), new_vars = exprs(NFRLT_prev = NFRLT), join_vars = exprs(NFRLT), + join_type = "all", filter_add = NULL, filter_join = NFRLT > NFRLT.join, mode = "last", diff --git a/man/derive_extreme_event.Rd b/man/derive_extreme_event.Rd index 13311b72b9..17204edfad 100644 --- a/man/derive_extreme_event.Rd +++ b/man/derive_extreme_event.Rd @@ -61,8 +61,8 @@ the event and by group, the records are ordered by the specified order. \item{mode}{Selection mode (first or last) If a particular event from \code{events} has more than one observation, -"first"/"last" is to select the first/last record of this type of events -sorting by \code{order}. +\code{"first"}/\code{"last"} is used to select the first/last record of this type of +event sorting by \code{order}. \emph{Permitted Values:} \code{"first"}, \code{"last"}} @@ -329,7 +329,7 @@ derive_extreme_event( ), join_vars = exprs(AVALC, ADT), join_type = "after", - first_cond = AVALC.join == "CR" & + first_cond_upper = AVALC.join == "CR" & ADT.join >= ADT + 28, condition = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & @@ -345,7 +345,7 @@ derive_extreme_event( ), join_vars = exprs(AVALC, ADT), join_type = "after", - first_cond = AVALC.join \%in\% c("CR", "PR") & + first_cond_upper = AVALC.join \%in\% c("CR", "PR") & ADT.join >= ADT + 28, condition = AVALC == "PR" & all(AVALC.join \%in\% c("CR", "PR", "NE")) & diff --git a/man/derive_var_joined_exist_flag.Rd b/man/derive_var_joined_exist_flag.Rd index b1f6f4455d..acd6905c0f 100644 --- a/man/derive_var_joined_exist_flag.Rd +++ b/man/derive_var_joined_exist_flag.Rd @@ -6,6 +6,7 @@ \usage{ derive_var_joined_exist_flag( dataset, + dataset_add, by_vars, order, new_var, @@ -13,7 +14,11 @@ derive_var_joined_exist_flag( join_vars, join_type, first_cond = NULL, - filter, + first_cond_lower = NULL, + first_cond_upper = NULL, + filter = NULL, + filter_add = NULL, + filter_join, true_value = "Y", false_value = NA_character_, check_type = "warning" @@ -24,10 +29,15 @@ derive_var_joined_exist_flag( The variables specified by the \code{by_vars} and \code{join_vars} argument(s) to be expected.} +\item{dataset_add}{Additional dataset + +The variables specified for \code{by_vars}, \code{join_vars}, and \code{order} are +expected.} + \item{by_vars}{By variables The specified variables are used as by variables for joining the input -dataset with itself.} +dataset (\code{dataset}) with the additional dataset (\code{dataset_add}).} \item{order}{Order @@ -39,18 +49,19 @@ The specified variable is added to the input dataset.} \item{tmp_obs_nr_var}{Temporary observation number -The specified variable is added to the input dataset and set to the -observation number with respect to \code{order}. For each by group (\code{by_vars}) -the observation number starts with \code{1}. The variable can be used in the -conditions (\code{filter}, \code{first_cond}). It is not included in the output -dataset. It can be used to flag consecutive observations or the last -observation (see last example below).} +The specified variable is added to the input dataset (\code{dataset}) and the +additional dataset (\code{dataset_add}). It is set to the observation number +with respect to \code{order}. For each by group (\code{by_vars}) the observation +number starts with \code{1}. The variable can be used in the conditions +(\code{filter_join}, \code{first_cond_upper}, \code{first_cond_lower}). It is not included +in the output dataset. It can also be used to flag consecutive observations +or the last observation (see last example below).} \item{join_vars}{Variables to keep from joined dataset The variables needed from the other observations should be specified for this parameter. The specified variables are added to the joined dataset -with suffix ".join". For example to flag all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified. +with suffix ".join". For example to flag all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified. The \verb{*.join} variables are not included in the output dataset.} @@ -62,29 +73,59 @@ is specified all observations after the original observations are kept. For example for confirmed response or BOR in the oncology setting or confirmed deterioration in questionnaires the confirmatory assessment must -be after the assessment to be flagged. Thus \code{join_type = "after"} could be -used. +be after the assessment. Thus \code{join_type = "after"} could be used. Whereas, sometimes you might allow for confirmatory observations to occur -prior to the observation to be flagged. For example, to flag AEs occurring -on or after seven days before a COVID AE. Thus \code{join_type = "all"} could be -used. +prior to the observation. For example, to identify AEs occurring on or +after seven days before a COVID AE. Thus \code{join_type = "all"} could be used. \emph{Permitted Values:} \code{"before"}, \code{"after"}, \code{"all"}} \item{first_cond}{Condition for selecting range of data +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This argument is \emph{deprecated}, please use \code{first_cond_upper} instead. + If this argument is specified, the other observations are restricted up to the first observation where the specified condition is fulfilled. If the condition is not fulfilled for any of the other observations, no observations are considered, i.e., the observation is not flagged. -This parameter should be specified if \code{filter} contains summary functions -which should not apply to all observations but only up to the confirmation -assessment. For an example see the third example below.} +This parameter should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only up to the +confirmation assessment. For an example see the third example below.} + +\item{first_cond_lower}{Condition for selecting range of data (before) + +If this argument is specified, the other observations are restricted from +the first observation before the current observation where the specified +condition is fulfilled up to the current observation. If the condition is +not fulfilled for any of the other observations, no observations are +considered, i.e., the observation is not flagged. + +This parameter should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only from a +certain observation before the current observation up to the current +observation. For an example see the last example below.} + +\item{first_cond_upper}{Condition for selecting range of data (after) + +If this argument is specified, the other observations are restricted up to +the first observation where the specified condition is fulfilled. If the +condition is not fulfilled for any of the other observations, no +observations are considered, i.e., the observation is not flagged. + +This parameter should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only up to the +confirmation assessment. For an example see the third example below.} \item{filter}{Condition for selecting observations +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This argument is \emph{deprecated}, please use \code{filter_join} instead. + The filter is applied to the joined dataset for flagging the confirmed observations. The condition can include summary functions. The joined dataset is grouped by the original observations. I.e., the summary function @@ -93,13 +134,34 @@ example, \code{filter = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & c response "CR" and for all observations up to the confirmation observation the response is "CR" or "NE" and there is at most one "NE".} -\item{true_value}{Value of \code{new_var} for flagged observations +\item{filter_add}{Filter for additional dataset (\code{dataset_add}) + +Only observations from \code{dataset_add} fulfilling the specified condition are +joined to the input dataset. If the argument is not specified, all +observations are joined. -\emph{Default}: \code{"Y"}} +Variables created by \code{order} or \code{new_vars} arguments can be used in the +condition. -\item{false_value}{Value of \code{new_var} for observations not flagged +The condition can include summary functions like \code{all()} or \code{any()}. The +additional dataset is grouped by the by variables (\code{by_vars}). -\emph{Default}: \code{NA_character_}} +\emph{Permitted Values}: a condition} + +\item{filter_join}{Condition for selecting observations + +The filter is applied to the joined dataset for flagging the confirmed +observations. The condition can include summary functions like \code{all()} or +\code{any()}. The joined dataset is grouped by the original observations. I.e., +the summary function are applied to all observations up to the confirmation +observation. For example, \code{filter_join = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1} +selects observations with response "CR" and for all observations up to the +confirmation observation the response is "CR" or "NE" and there is at most +one "NE".} + +\item{true_value}{Value of \code{new_var} for flagged observations} + +\item{false_value}{Value of \code{new_var} for observations not flagged} \item{check_type}{Check uniqueness? @@ -107,8 +169,6 @@ If \code{"warning"} or \code{"error"} is specified, the specified message is iss if the observations of the input dataset are not unique with respect to the by variables and the order. -\emph{Default:} \code{"warning"} - \emph{Permitted Values:} \code{"none"}, \code{"warning"}, \code{"error"}} } \value{ @@ -128,13 +188,22 @@ used in endpoints such as best overall response. The following steps are performed to produce the output dataset. \subsection{Step 1}{ +\itemize{ +\item The variables specified by \code{order} are added to the additional dataset +(\code{dataset_add}). +\item The variables specified by \code{join_vars} are added to the additional dataset +(\code{dataset_add}). +\item The records from the additional dataset (\code{dataset_add}) are restricted to +those matching the \code{filter_add} condition. +} -The input dataset is joined with itself by the variables specified for -\code{by_vars}. From the right hand side of the join only the variables -specified for \code{join_vars} are kept. The suffix ".join" is added to these -variables. +The input dataset (\code{dataset}) is joined with the restricted additional +dataset by the variables specified for \code{by_vars}. From the additional +dataset only the variables specified for \code{join_vars} are kept. The suffix +".join" is added to those variables which also exist in the input dataset. -For example, for \code{by_vars = USUBJID}, \code{join_vars = exprs(AVISITN, AVALC)} and input dataset +For example, for \code{by_vars = USUBJID}, \code{join_vars = exprs(AVISITN, AVALC)} +and input dataset and additional dataset \if{html}{\out{
}}\preformatted{# A tibble: 2 x 4 USUBJID AVISITN AVALC AVAL @@ -171,18 +240,27 @@ USUBJID AVISITN AVALC AVAL AVISITN.join AVALC.join \subsection{Step 3}{ -If \code{first_cond} is specified, for each observation of the input dataset the -joined dataset is restricted to observations up to the first observation -where \code{first_cond} is fulfilled (the observation fulfilling the condition -is included). If for an observation of the input dataset the condition is -not fulfilled, the observation is removed. +If \code{first_cond_lower} is specified, for each observation of the input +dataset the joined dataset is restricted to observations from the first +observation where \code{first_cond_lower} is fulfilled (the observation +fulfilling the condition is included) up to the observation of the input +dataset. If for an observation of the input dataset the condition is not +fulfilled, the observation is removed. + +If \code{first_cond_upper} is specified, for each observation of the input +dataset the joined dataset is restricted to observations up to the first +observation where \code{first_cond_upper} is fulfilled (the observation +fulfilling the condition is included). If for an observation of the input +dataset the condition is not fulfilled, the observation is removed. + +For an example see the last example in the "Examples" section. } \subsection{Step 4}{ The joined dataset is grouped by the observations from the input dataset and restricted to the observations fulfilling the condition specified by -\code{filter}. +\code{filter_join}. } \subsection{Step 5}{ @@ -199,7 +277,6 @@ previous step. For the other observations it is set to \code{false_value}. } \examples{ library(tibble) -library(admiral) # flag observations with a duration longer than 30 and # at, after, or up to 7 days before a COVID AE (ACOVFL == "Y") @@ -219,12 +296,13 @@ adae <- tribble( derive_var_joined_exist_flag( adae, + dataset_add = adae, new_var = ALCOVFL, by_vars = exprs(USUBJID), join_vars = exprs(ACOVFL, ADY), join_type = "all", order = exprs(ADY), - filter = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 + filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 ) # flag observations with AVALC == "Y" and AVALC == "Y" at one subsequent visit @@ -243,12 +321,13 @@ data <- tribble( derive_var_joined_exist_flag( data, + dataset_add = data, by_vars = exprs(USUBJID), new_var = CONFFL, join_vars = exprs(AVALC, AVISITN), join_type = "after", order = exprs(AVISITN), - filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join + filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join ) # select observations with AVALC == "CR", AVALC == "CR" at a subsequent visit, @@ -273,13 +352,14 @@ data <- tribble( derive_var_joined_exist_flag( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", order = exprs(AVISITN), new_var = CONFFL, - first_cond = AVALC.join == "CR", - filter = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & + first_cond_upper = AVALC.join == "CR", + filter_join = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1 ) @@ -306,13 +386,14 @@ data <- tribble( derive_var_joined_exist_flag( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC, ADY), join_type = "after", order = exprs(ADY), new_var = CONFFL, - first_cond = AVALC.join \%in\% c("CR", "PR") & ADY.join - ADY >= 20, - filter = AVALC == "PR" & + first_cond_upper = AVALC.join \%in\% c("CR", "PR") & ADY.join - ADY >= 20, + filter_join = AVALC == "PR" & all(AVALC.join \%in\% c("CR", "PR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1 & ( @@ -339,19 +420,64 @@ data <- tribble( derive_var_joined_exist_flag( data, + dataset_add = data, by_vars = exprs(USUBJID), new_var = CONFFL, tmp_obs_nr_var = tmp_obs_nr, join_vars = exprs(CRIT1FL), join_type = "all", order = exprs(AVISITN), - filter = CRIT1FL == "Y" & CRIT1FL.join == "Y" & + filter_join = CRIT1FL == "Y" & CRIT1FL.join == "Y" & (tmp_obs_nr + 1 == tmp_obs_nr.join | tmp_obs_nr == max(tmp_obs_nr.join)) ) +# first_cond_lower and first_cond_upper argument +myd <- tribble( + ~subj, ~day, ~val, + "1", 1, "++", + "1", 2, "-", + "1", 3, "0", + "1", 4, "+", + "1", 5, "++", + "1", 6, "-", + "2", 1, "-", + "2", 2, "++", + "2", 3, "+", + "2", 4, "0", + "2", 5, "-", + "2", 6, "++" +) + +# flag "0" where all results from the first "++" before the "0" up to the "0" +# (excluding the "0") are "+" or "++" +derive_var_joined_exist_flag( + myd, + dataset_add = myd, + by_vars = exprs(subj), + order = exprs(day), + new_var = flag, + join_vars = exprs(val), + join_type = "before", + first_cond_lower = val.join == "++", + filter_join = val == "0" & all(val.join \%in\% c("+", "++")) +) + +# flag "0" where all results from the "0" (excluding the "0") up to the first +# "++" after the "0" are "+" or "++" +derive_var_joined_exist_flag( + myd, + dataset_add = myd, + by_vars = exprs(subj), + order = exprs(day), + new_var = flag, + join_vars = exprs(val), + join_type = "after", + first_cond_upper = val.join == "++", + filter_join = val == "0" & all(val.join \%in\% c("+", "++")) +) } \seealso{ -\code{\link[=filter_joined]{filter_joined()}} +\code{\link[=filter_joined]{filter_joined()}}, \code{\link[=derive_vars_joined]{derive_vars_joined()}} General Derivation Functions for all ADaMs that returns variable appended to dataset: \code{\link{derive_var_extreme_flag}()}, diff --git a/man/derive_vars_joined.Rd b/man/derive_vars_joined.Rd index 5092e89ab8..63ccc0a4d7 100644 --- a/man/derive_vars_joined.Rd +++ b/man/derive_vars_joined.Rd @@ -11,8 +11,12 @@ derive_vars_joined( by_vars = NULL, order = NULL, new_vars = NULL, + tmp_obs_nr_var = NULL, join_vars = NULL, + join_type, filter_add = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, filter_join = NULL, mode = NULL, exist_flag = NULL, @@ -78,6 +82,18 @@ additional dataset (\code{dataset_add}) are added. \emph{Permitted Values}: list of variables or named expressions created by \code{exprs()}} +\item{tmp_obs_nr_var}{Temporary observation number + +The specified variable is added to the input dataset (\code{dataset}) and the +additional dataset (\code{dataset_add}). It is set to the observation number +with respect to \code{order}. For each by group (\code{by_vars}) the observation +number starts with \code{1}. The variable can be used in the conditions +(\code{filter_join}, \code{first_cond_upper}, \code{first_cond_lower}). It can also be +used to select consecutive observations or the last observation. + +The variable is not included in the output dataset. To include it specify +it for \code{new_vars}.} + \item{join_vars}{Variables to use from additional dataset Any extra variables required from the additional dataset for \code{filter_join} @@ -95,6 +111,22 @@ The variables are not included in the output dataset. \emph{Permitted Values}: list of variables or named expressions created by \code{exprs()}} +\item{join_type}{Observations to keep after joining + +The argument determines which of the joined observations are kept with +respect to the original observation. For example, if \code{join_type = "after"} +is specified all observations after the original observations are kept. + +For example for confirmed response or BOR in the oncology setting or +confirmed deterioration in questionnaires the confirmatory assessment must +be after the assessment. Thus \code{join_type = "after"} could be used. + +Whereas, sometimes you might allow for confirmatory observations to occur +prior to the observation. For example, to identify AEs occurring on or +after seven days before a COVID AE. Thus \code{join_type = "all"} could be used. + +\emph{Permitted Values:} \code{"before"}, \code{"after"}, \code{"all"}} + \item{filter_add}{Filter for additional dataset (\code{dataset_add}) Only observations from \code{dataset_add} fulfilling the specified condition are @@ -104,8 +136,35 @@ observations are joined. Variables created by \code{order} or \code{new_vars} arguments can be used in the condition. +The condition can include summary functions like \code{all()} or \code{any()}. The +additional dataset is grouped by the by variables (\code{by_vars}). + \emph{Permitted Values}: a condition} +\item{first_cond_lower}{Condition for selecting range of data (before) + +If this argument is specified, the other observations are restricted from +the first observation before the current observation where the specified +condition is fulfilled up to the current observation. If the condition is +not fulfilled for any of the other observations, no observations are +considered. + +This argument should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only from a +certain observation before the current observation up to the current +observation. For an example see the last example below.} + +\item{first_cond_upper}{Condition for selecting range of data (after) + +If this argument is specified, the other observations are restricted up to +the first observation where the specified condition is fulfilled. If the +condition is not fulfilled for any of the other observations, no +observations are considered. + +This argument should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only up to the +confirmation assessment. For an example see the last example below.} + \item{filter_join}{Filter for the joined dataset The specified condition is applied to the joined dataset. Therefore @@ -114,6 +173,9 @@ variables from both datasets \code{dataset} and \code{dataset_add} can be used. Variables created by \code{order} or \code{new_vars} arguments can be used in the condition. +The condition can include summary functions like \code{all()} or \code{any()}. The +joined dataset is grouped by the original observations. + \emph{Permitted Values}: a condition} \item{mode}{Selection mode @@ -193,6 +255,20 @@ those matching the \code{filter_add} condition. \item The input dataset and the (restricted) additional dataset are left joined by the grouping variables (\code{by_vars}). If no grouping variables are specified, a full join is performed. +\item If \code{first_cond_lower} is specified, for each observation of the input +dataset the joined dataset is restricted to observations from the first +observation where \code{first_cond_lower} is fulfilled (the observation fulfilling +the condition is included) up to the observation of the input dataset. If for +an observation of the input dataset the condition is not fulfilled, the +observation is removed. + +If \code{first_cond_upper} is specified, for each observation of the input +dataset the joined dataset is restricted to observations up to the first +observation where \code{first_cond_upper} is fulfilled (the observation +fulfilling the condition is included). If for an observation of the input +dataset the condition is not fulfilled, the observation is removed. + +For an example see the last example in the "Examples" section. \item The joined dataset is restricted by the \code{filter_join} condition. \item If \code{order} is specified, for each observation of the input dataset the first or last observation (depending on \code{mode}) is selected. @@ -233,6 +309,7 @@ windows <- tribble( derive_vars_joined( adbds, dataset_add = windows, + join_type = "all", filter_join = AWLO <= ADY & ADY <= AWHI ) @@ -255,6 +332,7 @@ derive_vars_joined( order = exprs(AVAL), new_vars = exprs(NADIR = AVAL), join_vars = exprs(ADY), + join_type = "all", filter_add = ADY > 0, filter_join = ADY.join < ADY, mode = "first", @@ -288,6 +366,7 @@ derive_vars_joined( by_vars = exprs(USUBJID), order = exprs(AVAL, desc(ADY)), new_vars = exprs(HGB_MAX = AVAL, HGB_DY = ADY), + join_type = "all", filter_add = PARAMCD == "HGB", filter_join = ASTDY - 14 <= ADY & ADY <= ASTDY, mode = "last" @@ -328,6 +407,7 @@ derive_vars_joined( dataset_add = period_ref, by_vars = exprs(STUDYID, USUBJID), join_vars = exprs(APERSDT, APEREDT), + join_type = "all", filter_join = APERSDT <= ASTDT & ASTDT <= APEREDT ) @@ -354,6 +434,7 @@ derive_vars_joined( dataset_add = ex, by_vars = exprs(USUBJID), order = exprs(EXSDT = convert_dtc_to_dt(EXSDTC)), + join_type = "all", new_vars = exprs(LDRELD = compute_duration( start_date = EXSDT, end_date = ASTDT )), @@ -361,8 +442,55 @@ derive_vars_joined( filter_join = EXSDT <= ASTDT, mode = "last" ) + +# first_cond_lower and first_cond_upper argument +myd <- tribble( + ~subj, ~day, ~val, + "1", 1, "++", + "1", 2, "-", + "1", 3, "0", + "1", 4, "+", + "1", 5, "++", + "1", 6, "-", + "2", 1, "-", + "2", 2, "++", + "2", 3, "+", + "2", 4, "0", + "2", 5, "-", + "2", 6, "++" +) + +# derive last "++" day before "0" where all results in between are "+" or "++" +derive_vars_joined( + myd, + dataset_add = myd, + by_vars = exprs(subj), + order = exprs(day), + mode = "first", + new_vars = exprs(prev_plus_day = day), + join_vars = exprs(val), + join_type = "before", + first_cond_lower = val.join == "++", + filter_join = val == "0" & all(val.join \%in\% c("+", "++")) +) + +# derive first "++" day after "0" where all results in between are "+" or "++" +derive_vars_joined( + myd, + dataset_add = myd, + by_vars = exprs(subj), + order = exprs(day), + mode = "last", + new_vars = exprs(next_plus_day = day), + join_vars = exprs(val), + join_type = "after", + first_cond_upper = val.join == "++", + filter_join = val == "0" & all(val.join \%in\% c("+", "++")) +) } \seealso{ +\code{\link[=derive_var_joined_exist_flag]{derive_var_joined_exist_flag()}}, \code{\link[=filter_joined]{filter_joined()}} + General Derivation Functions for all ADaMs that returns variable appended to dataset: \code{\link{derive_var_extreme_flag}()}, \code{\link{derive_var_joined_exist_flag}()}, diff --git a/man/event.Rd b/man/event.Rd index aeabb110d1..6c85a96645 100644 --- a/man/event.Rd +++ b/man/event.Rd @@ -18,7 +18,9 @@ event( \item{dataset_name}{Dataset name of the dataset to be used as input for the event. The name refers to the dataset specified for \code{source_datasets} in \code{derive_extreme_event()}. If the argument is not specified, the input -dataset (\code{dataset}) of \code{derive_extreme_event()} is used.} +dataset (\code{dataset}) of \code{derive_extreme_event()} is used. + +\emph{Permitted Values}: a character scalar} \item{condition}{An unquoted condition for selecting the observations, which will contribute to the extreme event. If the condition contains summary @@ -39,7 +41,9 @@ first or last observation if \code{mode} is specified. \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event, e.g. \code{exprs(PARAMCD = "WSP", PARAM = "Worst Sleeping Problems")}. The values can be a symbol, a -character string, a numeric value, \code{NA} or an expression.} +character string, a numeric value, \code{NA} or an expression. + +\emph{Permitted Values}: a named list of expressions, e.g., created by \code{exprs()}} \item{keep_source_vars}{Variables to keep from the source dataset @@ -53,7 +57,9 @@ a symbol or a tidyselect expression, e.g., \code{exprs(VISIT, VISITNUM, starts_w \item{description}{Description of the event The description does not affect the derivations where the event is used. It -is intended for documentation only.} +is intended for documentation only. + +\emph{Permitted Values}: a character scalar} } \value{ An object of class \code{event} diff --git a/man/event_joined.Rd b/man/event_joined.Rd index 0778b14ae8..55cdbedffc 100644 --- a/man/event_joined.Rd +++ b/man/event_joined.Rd @@ -11,6 +11,8 @@ event_joined( join_vars, join_type, first_cond = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, set_values_to = NULL, keep_source_vars = NULL, description = NULL @@ -20,11 +22,22 @@ event_joined( \item{dataset_name}{Dataset name of the dataset to be used as input for the event. The name refers to the dataset specified for \code{source_datasets} in \code{derive_extreme_event()}. If the argument is not specified, the input -dataset (\code{dataset}) of \code{derive_extreme_event()} is used.} +dataset (\code{dataset}) of \code{derive_extreme_event()} is used. + +\emph{Permitted Values}: a character scalar} \item{condition}{An unquoted condition for selecting the observations, which will contribute to the extreme event. +The condition is applied to the joined dataset for selecting the confirmed +observations. The condition can include summary functions like \code{all()} or +\code{any()}. The joined dataset is grouped by the original observations. I.e., +the summary function are applied to all observations up to the confirmation +observation. For example in the oncology setting when using this function +for confirmed best overall response, \code{condition = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1} selects observations with response "CR" and for all +observations up to the confirmation observation the response is "CR" or +"NE" and there is at most one "NE". + \emph{Permitted Values}: an unquoted condition} \item{order}{If specified, the specified variables or expressions are used to @@ -37,9 +50,11 @@ select the first observation. The variables needed from the other observations should be specified for this parameter. The specified variables are added to the joined dataset -with suffix ".join". For example to select all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified. +with suffix ".join". For example to select all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{condition = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified. + +The \verb{*.join} variables are not included in the output dataset. -The \verb{*.join} variables are not included in the output dataset.} +\emph{Permitted Values}: a named list of expressions, e.g., created by \code{exprs()}} \item{join_type}{Observations to keep after joining @@ -51,14 +66,50 @@ kept. \item{first_cond}{Condition for selecting range of data +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This argument is \emph{deprecated}, please use \code{first_cond_upper} instead. + If this argument is specified, the other observations are restricted up to the first observation where the specified condition is fulfilled. If the condition is not fulfilled for any of the subsequent observations, all -observations are removed.} +observations are removed. + +\emph{Permitted Values}: an unquoted condition} + +\item{first_cond_lower}{Condition for selecting range of data (before) + +If this argument is specified, the other observations are restricted from +the first observation before the current observation where the specified +condition is fulfilled up to the current observation. If the condition is +not fulfilled for any of the other observations, no observations are +considered, i.e., the observation is not flagged. + +This parameter should be specified if \code{condition} contains summary +functions which should not apply to all observations but only from a +certain observation before the current observation up to the current +observation. + +\emph{Permitted Values}: an unquoted condition} + +\item{first_cond_upper}{Condition for selecting range of data (after) + +If this argument is specified, the other observations are restricted up to +the first observation where the specified condition is fulfilled. If the +condition is not fulfilled for any of the other observations, no +observations are considered, i.e., the observation is not flagged. + +This parameter should be specified if \code{condition} contains summary +functions which should not apply to all observations but only up to the +confirmation assessment. + +\emph{Permitted Values}: an unquoted condition} \item{set_values_to}{A named list returned by \code{exprs()} defining the variables to be set for the event, e.g. \code{exprs(PARAMCD = "WSP", PARAM = "Worst Sleeping Problems")}. The values can be a symbol, a -character string, a numeric value, \code{NA} or an expression.} +character string, a numeric value, \code{NA} or an expression. + +\emph{Permitted Values}: a named list of expressions, e.g., created by \code{exprs()}} \item{keep_source_vars}{Variables to keep from the source dataset @@ -72,7 +123,9 @@ a symbol or a tidyselect expression, e.g., \code{exprs(VISIT, VISITNUM, starts_w \item{description}{Description of the event The description does not affect the derivations where the event is used. It -is intended for documentation only.} +is intended for documentation only. + +\emph{Permitted Values}: a character scalar} } \value{ An object of class \code{event_joined} @@ -87,6 +140,141 @@ observation of the source dataset. The events are selected by calling \code{filter_joined()}. See its documentation for more details. } +\examples{ +library(tibble) +library(dplyr) +library(lubridate) +# Derive confirmed best overall response (using event_joined()) +# CR - complete response, PR - partial response, SD - stable disease +# NE - not evaluable, PD - progressive disease +adsl <- tribble( + ~USUBJID, ~TRTSDTC, + "1", "2020-01-01", + "2", "2019-12-12", + "3", "2019-11-11", + "4", "2019-12-30", + "5", "2020-01-01", + "6", "2020-02-02", + "7", "2020-02-02", + "8", "2020-02-01" +) \%>\% + mutate(TRTSDT = ymd(TRTSDTC)) + +adrs <- tribble( + ~USUBJID, ~ADTC, ~AVALC, + "1", "2020-01-01", "PR", + "1", "2020-02-01", "CR", + "1", "2020-02-16", "NE", + "1", "2020-03-01", "CR", + "1", "2020-04-01", "SD", + "2", "2020-01-01", "SD", + "2", "2020-02-01", "PR", + "2", "2020-03-01", "SD", + "2", "2020-03-13", "CR", + "4", "2020-01-01", "PR", + "4", "2020-03-01", "NE", + "4", "2020-04-01", "NE", + "4", "2020-05-01", "PR", + "5", "2020-01-01", "PR", + "5", "2020-01-10", "PR", + "5", "2020-01-20", "PR", + "6", "2020-02-06", "PR", + "6", "2020-02-16", "CR", + "6", "2020-03-30", "PR", + "7", "2020-02-06", "PR", + "7", "2020-02-16", "CR", + "7", "2020-04-01", "NE", + "8", "2020-02-16", "PD" +) \%>\% + mutate( + ADT = ymd(ADTC), + PARAMCD = "OVR", + PARAM = "Overall Response by Investigator" + ) \%>\% + derive_vars_merged( + dataset_add = adsl, + by_vars = exprs(USUBJID), + new_vars = exprs(TRTSDT) + ) + +derive_extreme_event( + adrs, + by_vars = exprs(USUBJID), + order = exprs(ADT), + mode = "first", + source_datasets = list(adsl = adsl), + events = list( + event_joined( + description = paste( + "CR needs to be confirmed by a second CR at least 28 days later", + "at most one NE is acceptable between the two assessments" + ), + join_vars = exprs(AVALC, ADT), + join_type = "after", + first_cond_upper = AVALC.join == "CR" & + ADT.join >= ADT + 28, + condition = AVALC == "CR" & + all(AVALC.join \%in\% c("CR", "NE")) & + count_vals(var = AVALC.join, val = "NE") <= 1, + set_values_to = exprs( + AVALC = "CR" + ) + ), + event_joined( + description = paste( + "PR needs to be confirmed by a second CR or PR at least 28 days later,", + "at most one NE is acceptable between the two assessments" + ), + join_vars = exprs(AVALC, ADT), + join_type = "after", + first_cond_upper = AVALC.join \%in\% c("CR", "PR") & + ADT.join >= ADT + 28, + condition = AVALC == "PR" & + all(AVALC.join \%in\% c("CR", "PR", "NE")) & + count_vals(var = AVALC.join, val = "NE") <= 1, + set_values_to = exprs( + AVALC = "PR" + ) + ), + event( + description = paste( + "CR, PR, or SD are considered as SD if occurring at least 28", + "after treatment start" + ), + condition = AVALC \%in\% c("CR", "PR", "SD") & ADT >= TRTSDT + 28, + set_values_to = exprs( + AVALC = "SD" + ) + ), + event( + condition = AVALC == "PD", + set_values_to = exprs( + AVALC = "PD" + ) + ), + event( + condition = AVALC \%in\% c("CR", "PR", "SD", "NE"), + set_values_to = exprs( + AVALC = "NE" + ) + ), + event( + description = "set response to MISSING for patients without records in ADRS", + dataset_name = "adsl", + condition = TRUE, + set_values_to = exprs( + AVALC = "MISSING" + ), + keep_source_vars = exprs(TRTSDT) + ) + ), + set_values_to = exprs( + PARAMCD = "CBOR", + PARAM = "Best Confirmed Overall Response by Investigator" + ) +) \%>\% + filter(PARAMCD == "CBOR") +} \seealso{ \code{\link[=derive_extreme_event]{derive_extreme_event()}}, \code{\link[=event]{event()}} diff --git a/man/filter_joined.Rd b/man/filter_joined.Rd index 09c6b5f660..26641168f8 100644 --- a/man/filter_joined.Rd +++ b/man/filter_joined.Rd @@ -6,20 +6,30 @@ \usage{ filter_joined( dataset, + dataset_add, by_vars, join_vars, join_type, first_cond = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, order, tmp_obs_nr_var = NULL, - filter, + filter_add = NULL, + filter_join, + filter = NULL, check_type = "warning" ) } \arguments{ \item{dataset}{Input dataset -The variables specified by the \code{by_vars}, \code{order} and \code{join_vars} argument(s) to be expected.} +The variables specified by the \code{by_vars} and \code{order} argument(s) to be expected.} + +\item{dataset_add}{Additional dataset + +The variables specified for \code{by_vars}, \code{join_vars}, and \code{order} are +expected.} \item{by_vars}{By variables @@ -30,25 +40,61 @@ dataset with itself.} The variables needed from the other observations should be specified for this parameter. The specified variables are added to the joined dataset -with suffix ".join". For example to select all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified. +with suffix ".join". For example to select all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified. The \verb{*.join} variables are not included in the output dataset.} \item{join_type}{Observations to keep after joining The argument determines which of the joined observations are kept with -respect to the original observation. For example, if \code{join_type = "after"} is specified all observations after the original observations are -kept. +respect to the original observation. For example, if \code{join_type = "after"} +is specified all observations after the original observations are kept. + +For example for confirmed response or BOR in the oncology setting or +confirmed deterioration in questionnaires the confirmatory assessment must +be after the assessment. Thus \code{join_type = "after"} could be used. + +Whereas, sometimes you might allow for confirmatory observations to occur +prior to the observation. For example, to identify AEs occurring on or +after seven days before a COVID AE. Thus \code{join_type = "all"} could be used. \emph{Permitted Values:} \code{"before"}, \code{"after"}, \code{"all"}} \item{first_cond}{Condition for selecting range of data +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This argument is \emph{deprecated}, please use \code{first_cond_upper} instead. + If this argument is specified, the other observations are restricted up to the first observation where the specified condition is fulfilled. If the condition is not fulfilled for any of the subsequent observations, all observations are removed.} +\item{first_cond_lower}{Condition for selecting range of data (before) + +If this argument is specified, the other observations are restricted from +the first observation before the current observation where the specified +condition is fulfilled up to the current observation. If the condition is +not fulfilled for any of the other observations, no observations are +considered, i.e., the observation is not flagged. + +This parameter should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only from a +certain observation before the current observation up to the current +observation. For an example see the last example below.} + +\item{first_cond_upper}{Condition for selecting range of data (after) + +If this argument is specified, the other observations are restricted up to +the first observation where the specified condition is fulfilled. If the +condition is not fulfilled for any of the other observations, no +observations are considered, i.e., the observation is not flagged. + +This parameter should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only up to the +confirmation assessment. For an example see the last example below.} + \item{order}{Order The observations are ordered by the specified order. @@ -58,15 +104,42 @@ The observations are ordered by the specified order. \item{tmp_obs_nr_var}{Temporary observation number -The specified variable is added to the input dataset and set to the -observation number with respect to \code{order}. For each by group (\code{by_vars}) -the observation number starts with \code{1}. The variable can be used in the -conditions (\code{filter}, \code{first_cond}). It is not included in the output -dataset. It can be used to select consecutive observations or the last -observation (see last example below).} +The specified variable is added to the input dataset (\code{dataset}) and the +additional dataset (\code{dataset_add}). It is set to the observation number +with respect to \code{order}. For each by group (\code{by_vars}) the observation +number starts with \code{1}. The variable can be used in the conditions +(\code{filter_join}, \code{first_cond_upper}, \code{first_cond_lower}). It is not included +in the output dataset. It can also be used to select consecutive +observations or the last observation (see example below).} + +\item{filter_add}{Filter for additional dataset (\code{dataset_add}) + +Only observations from \code{dataset_add} fulfilling the specified condition are +joined to the input dataset. If the argument is not specified, all +observations are joined. + +Variables created by the \code{order} argument can be used in the condition. + +The condition can include summary functions. The additional dataset is +grouped by the by variables (\code{by_vars}).} + +\item{filter_join}{Condition for selecting observations + +The filter is applied to the joined dataset for selecting the confirmed +observations. The condition can include summary functions like \code{all()} or +\code{any()}. The joined dataset is grouped by the original observations. I.e., +the summary function are applied to all observations up to the confirmation +observation. For example in the oncology setting when using this function +for confirmed best overall response, \code{filter_join = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1} selects observations with response "CR" and for all +observations up to the confirmation observation the response is "CR" or +"NE" and there is at most one "NE".} \item{filter}{Condition for selecting observations +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} + +This argument is \emph{deprecated}, please use \code{filter_join} instead. + The filter is applied to the joined dataset for selecting the confirmed observations. The condition can include summary functions. The joined dataset is grouped by the original observations. I.e., the summary function @@ -83,8 +156,6 @@ If \code{"warning"} or \code{"error"} is specified, the specified message is iss if the observations of the input dataset are not unique with respect to the by variables and the order. -\emph{Default:} \code{"none"} - \emph{Permitted Values:} \code{"none"}, \code{"warning"}, \code{"error"}} } \value{ @@ -108,13 +179,23 @@ used in endpoints such as best overall response. \details{ The following steps are performed to produce the output dataset. \subsection{Step 1}{ +\itemize{ +\item The variables specified by \code{order} are added to the additional dataset +(\code{dataset_add}). +\item The variables specified by \code{join_vars} are added to the additional dataset +(\code{dataset_add}). +\item The records from the additional dataset (\code{dataset_add}) are restricted to +those matching the \code{filter_add} condition. +} -The input dataset is joined with itself by the variables specified for -\code{by_vars}. From the right hand side of the join only the variables -specified for \code{join_vars} are kept. The suffix ".join" is added to these -variables. +Then the input dataset (\code{dataset}) is joined with the restricted +additional dataset by the variables specified for \code{by_vars}. From the +additional dataset only the variables specified for \code{join_vars} are kept. +The suffix ".join" is added to those variables which are also present in +the input dataset. -For example, for \code{by_vars = USUBJID}, \code{join_vars = exprs(AVISITN, AVALC)} and input dataset +For example, for \code{by_vars = USUBJID}, \code{join_vars = exprs(AVISITN, AVALC)} +and input dataset and additional dataset \if{html}{\out{
}}\preformatted{# A tibble: 2 x 4 USUBJID AVISITN AVALC AVAL @@ -151,18 +232,27 @@ USUBJID AVISITN AVALC AVAL AVISITN.join AVALC.join \subsection{Step 3}{ -If \code{first_cond} is specified, for each observation of the input dataset the -joined dataset is restricted to observations up to the first observation -where \code{first_cond} is fulfilled (the observation fulfilling the condition -is included). If for an observation of the input dataset the condition is -not fulfilled, the observation is removed. +If \code{first_cond_lower} is specified, for each observation of the input +dataset the joined dataset is restricted to observations from the first +observation where \code{first_cond_lower} is fulfilled (the observation +fulfilling the condition is included) up to the observation of the input +dataset. If for an observation of the input dataset the condition is not +fulfilled, the observation is removed. + +If \code{first_cond_upper} is specified, for each observation of the input +dataset the joined dataset is restricted to observations up to the first +observation where \code{first_cond_upper} is fulfilled (the observation +fulfilling the condition is included). If for an observation of the input +dataset the condition is not fulfilled, the observation is removed. + +For an example see the last example in the "Examples" section. } \subsection{Step 4}{ The joined dataset is grouped by the observations from the input dataset and restricted to the observations fulfilling the condition specified by -\code{filter}. +\code{filter_join}. } \subsection{Step 5}{ @@ -194,11 +284,12 @@ adae <- tribble( filter_joined( adae, + dataset_add = adae, by_vars = exprs(USUBJID), join_vars = exprs(ACOVFL, ADY), join_type = "all", order = exprs(ADY), - filter = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 + filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 ) # filter observations with AVALC == "Y" and AVALC == "Y" at a subsequent visit @@ -217,11 +308,12 @@ data <- tribble( filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC, AVISITN), join_type = "after", order = exprs(AVISITN), - filter = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join + filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join ) # select observations with AVALC == "CR", AVALC == "CR" at a subsequent visit, @@ -246,12 +338,13 @@ data <- tribble( filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", order = exprs(AVISITN), - first_cond = AVALC.join == "CR", - filter = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & + first_cond_upper = AVALC.join == "CR", + filter_join = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1 ) @@ -278,12 +371,13 @@ data <- tribble( filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC, ADY), join_type = "after", order = exprs(ADY), - first_cond = AVALC.join \%in\% c("CR", "PR") & ADY.join - ADY >= 20, - filter = AVALC == "PR" & + first_cond_upper = AVALC.join \%in\% c("CR", "PR") & ADY.join - ADY >= 20, + filter_join = AVALC == "PR" & all(AVALC.join \%in\% c("CR", "PR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1 & ( @@ -310,15 +404,58 @@ data <- tribble( filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), tmp_obs_nr_var = tmp_obs_nr, join_vars = exprs(CRIT1FL), join_type = "all", order = exprs(AVISITN), - filter = CRIT1FL == "Y" & CRIT1FL.join == "Y" & + filter_join = CRIT1FL == "Y" & CRIT1FL.join == "Y" & (tmp_obs_nr + 1 == tmp_obs_nr.join | tmp_obs_nr == max(tmp_obs_nr.join)) ) +# first_cond_lower and first_cond_upper argument +myd <- tribble( + ~subj, ~day, ~val, + "1", 1, "++", + "1", 2, "-", + "1", 3, "0", + "1", 4, "+", + "1", 5, "++", + "1", 6, "-", + "2", 1, "-", + "2", 2, "++", + "2", 3, "+", + "2", 4, "0", + "2", 5, "-", + "2", 6, "++" +) + +# select "0" where all results from the first "++" before the "0" up to the "0" +# (excluding the "0") are "+" or "++" +filter_joined( + myd, + dataset_add = myd, + by_vars = exprs(subj), + order = exprs(day), + join_vars = exprs(val), + join_type = "before", + first_cond_lower = val.join == "++", + filter_join = val == "0" & all(val.join \%in\% c("+", "++")) +) + +# select "0" where all results from the "0" (excluding the "0") up to the first +# "++" after the "0" are "+" or "++" +filter_joined( + myd, + dataset_add = myd, + by_vars = exprs(subj), + order = exprs(day), + join_vars = exprs(val), + join_type = "after", + first_cond_upper = val.join == "++", + filter_join = val == "0" & all(val.join \%in\% c("+", "++")) +) } \seealso{ \code{\link[=count_vals]{count_vals()}}, \code{\link[=min_cond]{min_cond()}}, \code{\link[=max_cond]{max_cond()}} diff --git a/man/get_joined_data.Rd b/man/get_joined_data.Rd new file mode 100644 index 0000000000..b82c96a808 --- /dev/null +++ b/man/get_joined_data.Rd @@ -0,0 +1,211 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/derive_joined.R +\name{get_joined_data} +\alias{get_joined_data} +\title{Join Data for "joined" functions} +\usage{ +get_joined_data( + dataset, + dataset_add, + by_vars = NULL, + join_vars = NULL, + join_type, + first_cond_lower = NULL, + first_cond_upper = NULL, + order = NULL, + tmp_obs_nr_var = NULL, + filter_add = NULL, + filter_join = NULL, + check_type = "warning" +) +} +\arguments{ +\item{dataset}{Input dataset + +The variables specified by the \code{by_vars} argument(s) to be expected.} + +\item{dataset_add}{Additional dataset + +The variables specified by the \code{by_vars}, the \code{new_vars}, the \code{join_vars}, +and the \code{order} argument are expected.} + +\item{by_vars}{Grouping variables + +The two datasets are joined by the specified variables. Variables from the +additional dataset can be renamed by naming the element, i.e., \verb{by_vars = exprs( = )}. + +\emph{Permitted Values}: list of variables created by \code{exprs()}} + +\item{join_vars}{Variables to use from additional dataset + +Any extra variables required from the additional dataset for \code{filter_join} +should be specified for this argument. Variables specified for \code{new_vars} +do not need to be repeated for \code{join_vars}. If a specified variable exists +in both the input dataset and the additional dataset, the suffix ".join" is +added to the variable from the additional dataset. + +If an expression is named, e.g., \code{exprs(EXTDT = convert_dtc_to_dt(EXSTDTC))}, a corresponding variable is added to the +additional dataset and can be used in the filter conditions (\code{filter_add}, +\code{filter_join}) and for \code{new_vars}. The variable is not included in the +output dataset. + +The variables are not included in the output dataset. + +\emph{Permitted Values}: list of variables or named expressions created by \code{exprs()}} + +\item{join_type}{Observations to keep after joining + +The argument determines which of the joined observations are kept with +respect to the original observation. For example, if \code{join_type = "after"} +is specified all observations after the original observations are kept. + +For example for confirmed response or BOR in the oncology setting or +confirmed deterioration in questionnaires the confirmatory assessment must +be after the assessment. Thus \code{join_type = "after"} could be used. + +Whereas, sometimes you might allow for confirmatory observations to occur +prior to the observation. For example, to identify AEs occurring on or +after seven days before a COVID AE. Thus \code{join_type = "all"} could be used. + +\emph{Permitted Values:} \code{"before"}, \code{"after"}, \code{"all"}} + +\item{first_cond_lower}{Condition for selecting range of data (before) + +If this argument is specified, the other observations are restricted from +the first observation before the current observation where the specified +condition is fulfilled up to the current observation. If the condition is +not fulfilled for any of the other observations, no observations are +considered, i.e., the observation is not flagged. + +This argument should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only from a +certain observation before the current observation up to the current +observation.} + +\item{first_cond_upper}{Condition for selecting range of data (after) + +If this argument is specified, the other observations are restricted up to +the first observation where the specified condition is fulfilled. If the +condition is not fulfilled for any of the other observations, no +observations are considered, i.e., the observation is not flagged. + +This argument should be specified if \code{filter_join} contains summary +functions which should not apply to all observations but only up to the +confirmation assessment.} + +\item{order}{Sort order + +If the argument is set to a non-null value, for each observation of the +input dataset the first or last observation from the joined dataset is +selected with respect to the specified order. The specified variables are +expected in the additional dataset (\code{dataset_add}). If a variable is +available in both \code{dataset} and \code{dataset_add}, the one from \code{dataset_add} +is used for the sorting. + +If an expression is named, e.g., \code{exprs(EXSTDT = convert_dtc_to_dt(EXSTDTC), EXSEQ)}, a corresponding variable (\code{EXSTDT}) is +added to the additional dataset and can be used in the filter conditions +(\code{filter_add}, \code{filter_join}) and for \code{join_vars} and \code{new_vars}. The +variable is not included in the output dataset. + +\emph{Permitted Values}: list of expressions created by \code{exprs()}, e.g., +\code{exprs(ADT, desc(AVAL))} or \code{NULL}} + +\item{tmp_obs_nr_var}{Temporary observation number + +The specified variable is added to the input dataset (\code{dataset}) and the +additional dataset (\code{dataset_add}). It is set to the observation number +with respect to \code{order}. For each by group (\code{by_vars}) the observation +number starts with \code{1}. The variable can be used in the conditions +(\code{filter_join}, \code{first_cond_upper}, \code{first_cond_lower}). It can also be +used to select consecutive observations or the last observation.} + +\item{filter_add}{Filter for additional dataset (\code{dataset_add}) + +Only observations from \code{dataset_add} fulfilling the specified condition are +joined to the input dataset. If the argument is not specified, all +observations are joined. + +Variables created by \code{order} or \code{new_vars} arguments can be used in the +condition. + +The condition can include summary functions like \code{all()} or \code{any()}. The +additional dataset is grouped by the by variables (\code{by_vars}). + +\emph{Permitted Values}: a condition} + +\item{filter_join}{Filter for the joined dataset + +The specified condition is applied to the joined dataset. Therefore +variables from both datasets \code{dataset} and \code{dataset_add} can be used. + +Variables created by \code{order} or \code{new_vars} arguments can be used in the +condition. + +The condition can include summary functions like \code{all()} or \code{any()}. The +joined dataset is grouped by the original observations. + +\emph{Permitted Values}: a condition} + +\item{check_type}{Check uniqueness? + +If \code{"warning"} or \code{"error"} is specified, the specified message is issued +if the observations of the (restricted) joined dataset are not unique with +respect to the by variables and the order. + +This argument is ignored if \code{order} is not specified. In this case an error +is issued independent of \code{check_type} if the restricted joined dataset +contains more than one observation for any of the observations of the input +dataset. + +\emph{Permitted Values}: \code{"none"}, \code{"warning"}, \code{"error"}} + +\item{new_vars}{Variables to add + +The specified variables from the additional dataset are added to the output +dataset. Variables can be renamed by naming the element, i.e., \verb{new_vars = exprs( = )}. + +For example \code{new_vars = exprs(var1, var2)} adds variables \code{var1} and \code{var2} +from \code{dataset_add} to the input dataset. + +And \code{new_vars = exprs(var1, new_var2 = old_var2)} takes \code{var1} and +\code{old_var2} from \code{dataset_add} and adds them to the input dataset renaming +\code{old_var2} to \code{new_var2}. + +Values of the added variables can be modified by specifying an expression. +For example, \code{new_vars = LASTRSP = exprs(str_to_upper(AVALC))} adds the +variable \code{LASTRSP} to the dataset and sets it to the upper case value of +\code{AVALC}. + +If the argument is not specified or set to \code{NULL}, all variables from the +additional dataset (\code{dataset_add}) are added. + +\emph{Permitted Values}: list of variables or named expressions created by \code{exprs()}} + +\item{mode}{Selection mode + +Determines if the first or last observation is selected. If the \code{order} +argument is specified, \code{mode} must be non-null. + +If the \code{order} argument is not specified, the \code{mode} argument is ignored. + +\emph{Permitted Values}: \code{"first"}, \code{"last"}, \code{NULL}} +} +\description{ +The helper function joins the data for the "joined" functions. All \code{.join} +variables are included in the output dataset. +} +\details{ +\enumerate{ +\item The variables specified by \code{order} are added to the additional dataset +(\code{dataset_add}). +\item The variables specified by \code{join_vars} are added to the additional dataset +(\code{dataset_add}). +\item The records from the additional dataset (\code{dataset_add}) are restricted to +those matching the \code{filter_add} condition. +\item The input dataset and the (restricted) additional dataset are left joined +by the grouping variables (\code{by_vars}). If no grouping variables are +specified, a full join is performed. +\item The joined dataset is restricted by the \code{filter_join} condition. +} +} +\keyword{internal} diff --git a/tests/testthat/test-derive_extreme_event.R b/tests/testthat/test-derive_extreme_event.R index 23711ceb8f..58b6e896b5 100644 --- a/tests/testthat/test-derive_extreme_event.R +++ b/tests/testthat/test-derive_extreme_event.R @@ -1,6 +1,6 @@ -# derive_extreme_records ---- +# derive_extreme_event ---- ## Test 1: `mode` = first ---- -test_that("derive_extreme_records Test 1: `mode` = first", { +test_that("derive_extreme_event Test 1: `mode` = first", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~AVALC, ~ADY, "1", "NO SLEEP", "N", 1, @@ -67,7 +67,7 @@ test_that("derive_extreme_records Test 1: `mode` = first", { }) ## Test 2: `mode` = last ---- -test_that("derive_extreme_records Test 2: `mode` = last", { +test_that("derive_extreme_event Test 2: `mode` = last", { input <- tibble::tribble( ~USUBJID, ~PARAMCD, ~AVALC, ~ADY, "1", "NO SLEEP", "N", 1, @@ -134,7 +134,7 @@ test_that("derive_extreme_records Test 2: `mode` = last", { }) ## Test 3: `source_datasets` works ---- -test_that("derive_extreme_records Test 3: `source_datasets` works", { +test_that("derive_extreme_event Test 3: `source_datasets` works", { adsl <- tibble::tribble( ~USUBJID, ~TRTSDTC, "1", "2020-01-01", @@ -282,7 +282,7 @@ test_that("derive_extreme_records Test 3: `source_datasets` works", { }) ## Test 4: event-specific mode ---- -test_that("derive_extreme_records Test 4: event-specific mode", { +test_that("derive_extreme_event Test 4: event-specific mode", { adhy <- tibble::tribble( ~USUBJID, ~AVISITN, ~CRIT1FL, "1", 1, "Y", @@ -342,7 +342,7 @@ test_that("derive_extreme_records Test 4: event-specific mode", { }) ## Test 5: event_joined() is handled correctly ---- -test_that("derive_extreme_records Test 5: event_joined() is handled correctly", { +test_that("derive_extreme_event Test 5: event_joined() is handled correctly", { adsl <- tibble::tribble( ~USUBJID, ~TRTSDTC, "1", "2020-01-01", @@ -414,7 +414,7 @@ test_that("derive_extreme_records Test 5: event_joined() is handled correctly", event_joined( join_vars = exprs(AVALC, ADT), join_type = "after", - first_cond = AVALC.join == "CR" & + first_cond_upper = AVALC.join == "CR" & ADT.join >= ADT + 28, condition = AVALC == "CR" & all(AVALC.join %in% c("CR", "NE")) & @@ -426,7 +426,7 @@ test_that("derive_extreme_records Test 5: event_joined() is handled correctly", event_joined( join_vars = exprs(AVALC, ADT), join_type = "after", - first_cond = AVALC.join %in% c("CR", "PR") & + first_cond_upper = AVALC.join %in% c("CR", "PR") & ADT.join >= ADT + 28, condition = AVALC == "PR" & all(AVALC.join %in% c("CR", "PR", "NE")) & @@ -517,7 +517,7 @@ test_that("derive_extreme_records Test 5: event_joined() is handled correctly", }) ## Test 6: no tmp_event_nr_var ---- -test_that("derive_extreme_records Test 6: no tmp_event_nr_var", { +test_that("derive_extreme_event Test 6: no tmp_event_nr_var", { adrs <- tibble::tribble( ~USUBJID, ~AVISITN, ~AVALC, "1", 1, "PR", @@ -535,14 +535,14 @@ test_that("derive_extreme_records Test 6: no tmp_event_nr_var", { event_joined( join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC.join == "CR", + first_cond_upper = AVALC.join == "CR", condition = AVALC == "CR", set_values_to = exprs(AVALC = "Y") ), event_joined( join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC.join %in% c("CR", "PR"), + first_cond_upper = AVALC.join %in% c("CR", "PR"), condition = AVALC == "PR", set_values_to = exprs(AVALC = "Y") ) @@ -566,8 +566,9 @@ test_that("derive_extreme_records Test 6: no tmp_event_nr_var", { keys = c("USUBJID", "PARAMCD", "AVISITN") ) }) + ## Test 7: deprecation of ignore_event_order ---- -test_that("derive_extreme_records Test 7: deprecation of ignore_event_order", { +test_that("derive_extreme_event Test 7: deprecation of ignore_event_order", { adrs <- tibble::tribble( ~USUBJID, ~AVISITN, ~AVALC, "1", 1, "PR", @@ -586,14 +587,14 @@ test_that("derive_extreme_records Test 7: deprecation of ignore_event_order", { event_joined( join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC.join == "CR", + first_cond_upper = AVALC.join == "CR", condition = AVALC == "CR", set_values_to = exprs(AVALC = "Y") ), event_joined( join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC.join %in% c("CR", "PR"), + first_cond_upper = AVALC.join %in% c("CR", "PR"), condition = AVALC == "PR", set_values_to = exprs(AVALC = "Y") ) @@ -605,7 +606,6 @@ test_that("derive_extreme_records Test 7: deprecation of ignore_event_order", { ), class = "lifecycle_warning_deprecated" ) - expected <- bind_rows( adrs, tibble::tribble( @@ -620,3 +620,41 @@ test_that("derive_extreme_records Test 7: deprecation of ignore_event_order", { keys = c("USUBJID", "PARAMCD", "AVISITN") ) }) + +# event_joined ---- +## Test 8: deprecation of `first_cond` ---- +test_that("event_joined Test 8: deprecation of `first_cond`", { + new_event <- event_joined( + join_vars = exprs(AVALC, ADT), + join_type = "after", + first_cond_upper = AVALC.join == "CR" & + ADT.join >= ADT + 28, + condition = AVALC == "CR" & + all(AVALC.join %in% c("CR", "NE")) & + count_vals(var = AVALC.join, val = "NE") <= 1, + set_values_to = exprs( + AVALC = "CR" + ) + ) + + expect_warning( + old_event <- event_joined( + join_vars = exprs(AVALC, ADT), + join_type = "after", + first_cond = AVALC.join == "CR" & + ADT.join >= ADT + 28, + condition = AVALC == "CR" & + all(AVALC.join %in% c("CR", "NE")) & + count_vals(var = AVALC.join, val = "NE") <= 1, + set_values_to = exprs( + AVALC = "CR" + ) + ), + class = "lifecycle_warning_deprecated" + ) + + expect_equal( + old_event, + expected = new_event + ) +}) diff --git a/tests/testthat/test-derive_joined.R b/tests/testthat/test-derive_joined.R index f52587cef7..9f5db9bc82 100644 --- a/tests/testthat/test-derive_joined.R +++ b/tests/testthat/test-derive_joined.R @@ -24,6 +24,7 @@ test_that("derive_vars_joined Test 1: no by_vars, no order, no new_vars", { select(expected, USUBJID, ADY), dataset_add = windows, join_vars = exprs(AWHI, AWLO), + join_type = "all", filter_join = AWLO <= ADY & ADY <= AWHI ), keys = c("USUBJID", "ADY") @@ -54,6 +55,7 @@ test_that("derive_vars_joined Test 2: new_vars with rename", { order = exprs(AVAL), new_vars = exprs(NADIR = AVAL), join_vars = exprs(ADY), + join_type = "all", filter_add = ADY > 0, filter_join = ADY.join < ADY, mode = "first", @@ -91,6 +93,7 @@ test_that("derive_vars_joined Test 3: by_vars with rename", { order = exprs(FADT), new_vars = exprs(ATOXGR_pre = FAORRES), join_vars = exprs(FADT), + join_type = "all", filter_join = FADT < TRTSDTM, mode = "last" ), @@ -125,6 +128,7 @@ test_that("derive_vars_joined Test 4: order with expression", { order = exprs(FADT = convert_dtc_to_dt(FADTC)), new_vars = exprs(ATOXGR_pre = FAORRES), join_vars = exprs(FADT), + join_type = "all", filter_join = FADT < TRTSDTM, mode = "last" ), @@ -166,6 +170,7 @@ test_that("derive_vars_joined Test 5: join_vars with expression", { order = exprs(TRSTRESN), new_vars = exprs(AVAL = TRSTRESN), join_vars = exprs(TRDT = convert_dtc_to_dt(TRDTC)), + join_type = "all", filter_join = TRDT <= ADT, mode = "first", check_type = "none" @@ -201,6 +206,7 @@ test_that("derive_vars_joined Test 6: no join_vars, no filter_join", { dataset_add = faae, by_vars = exprs(AEGRPID = FAGRPID), order = exprs(FAORRES), + join_type = "all", new_vars = exprs(ATOXGR_pre = FAORRES), mode = "first" ), @@ -232,6 +238,7 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from dataset_add = ex, by_vars = exprs(USUBJID), order = exprs(EXSDT = convert_dtc_to_dt(EXSDTC)), + join_type = "all", new_vars = exprs(LSTDSDUR = compute_duration( start_date = EXSDT, end_date = ASTDT )), @@ -251,6 +258,7 @@ test_that("derive_vars_joined Test 8: error if new_vars are already in dataset", myd, dataset_add = myd, order = exprs(day), + join_type = "all", mode = "last", filter_join = day < day.join ), @@ -293,6 +301,7 @@ test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint dataset_add = adlb_tbili_pbl, by_vars = exprs(STUDYID, USUBJID), order = exprs(ADTM, ASEQ), + join_type = "all", new_vars = exprs(TBILI_ADT = ADT), filter_join = ADT <= ADT.join, mode = "first" @@ -316,6 +325,7 @@ test_that("derive_vars_joined Test 10: order vars are selected properly in funct dataset_add = myd, new_vars = exprs(first_val = val), join_vars = exprs(day), + join_type = "all", order = exprs(-day), mode = "last", filter_join = day < day.join @@ -334,6 +344,7 @@ test_that("derive_vars_joined Test 10: order vars are selected properly in funct ) }) + ## Test 11: Ensure exist_flag, true/false value arguments work ---- test_that("derive_vars_joined Test 11: Ensure exist_flag, true/false value arguments work", { expected <- tibble::tribble( @@ -359,6 +370,7 @@ test_that("derive_vars_joined Test 11: Ensure exist_flag, true/false value argum select(expected, USUBJID, ADY), dataset_add = windows, join_vars = exprs(AWHI, AWLO), + join_type = "all", filter_join = AWLO <= ADY & ADY <= AWHI, exist_flag = flag, true_value = "Yes", @@ -367,3 +379,49 @@ test_that("derive_vars_joined Test 11: Ensure exist_flag, true/false value argum keys = c("USUBJID", "ADY") ) }) + +# get_joined_data ---- +## Test 12: `first_cond_lower` works ---- +test_that("get_joined_data Test 12: `first_cond_lower` works", { + data <- tribble( + ~subj, ~day, ~val, + "1", 1, "++", + "1", 2, "-", + "1", 3, "0", + "1", 4, "+", + "1", 5, "++", + "1", 6, "-", + "2", 1, "-", + "2", 2, "++", + "2", 3, "+", + "2", 4, "0", + "2", 5, "-", + "2", 6, "++" + ) + + expected <- tibble::tribble( + ~day.join, ~val.join, + 2, "++", + 3, "+" + ) %>% + mutate( + subj = "2", + day = 4, + val = "0" + ) + + expect_dfs_equal( + base = expected, + compare = get_joined_data( + data, + dataset_add = data, + by_vars = exprs(subj), + order = exprs(day), + join_vars = exprs(val), + join_type = "before", + first_cond_lower = val.join == "++", + filter_join = val == "0" & all(val.join %in% c("+", "++")) + ), + keys = c("subj", "day.join") + ) +}) diff --git a/tests/testthat/test-derive_var_joined_exist_flag.R b/tests/testthat/test-derive_var_joined_exist_flag.R index 284bae38da..9b0d593947 100644 --- a/tests/testthat/test-derive_var_joined_exist_flag.R +++ b/tests/testthat/test-derive_var_joined_exist_flag.R @@ -24,12 +24,13 @@ test_that("derive_var_joined_exist_flag Test 1: filter without first_cond", { actual <- derive_var_joined_exist_flag( data, + dataset_add = data, new_var = CONFFL, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", order = exprs(AVISITN), - filter = AVALC == "PR" & AVALC.join %in% c("CR", "PR") + filter_join = AVALC == "PR" & AVALC.join %in% c("CR", "PR") ) expected <- tibble::tribble( @@ -58,7 +59,7 @@ test_that("derive_var_joined_exist_flag Test 1: filter without first_cond", { }) ## Flagging any patient CR value that is followed by a CR -## Test 2 : filter with first_cond --- +## Test 2: filter with first_cond ---- test_that("derive_var_joined_exist_flag Test 2: filter with first_cond", { data <- tibble::tribble( ~USUBJID, ~AVISITN, ~AVALC, @@ -79,14 +80,15 @@ test_that("derive_var_joined_exist_flag Test 2: filter with first_cond", { actual <- derive_var_joined_exist_flag( data, + dataset_add = data, new_var = CONFFL, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC == "CR" & + first_cond_upper = AVALC == "CR" & AVALC.join == "CR", order = exprs(AVISITN), - filter = TRUE + filter_join = TRUE ) expected <- tibble::tribble( @@ -115,20 +117,20 @@ test_that("derive_var_joined_exist_flag Test 2: filter with first_cond", { ## Flagging any patient PR value that is followed by a CR or PR ## and at most one SD in between -## Test 3:filter with first_cond and summary function --- - +## Test 3: filter with first_cond and summary function ---- test_that("derive_var_joined_exist_flag Test 3: filter with first_cond and summary function", { actual <- derive_var_joined_exist_flag( data, + dataset_add = data, new_var = CONFFL, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC == "PR" & + first_cond_upper = AVALC == "PR" & AVALC.join %in% c("CR", "PR"), order = exprs(AVISITN), - filter = count_vals(AVALC.join, "SD") <= 1, + filter_join = count_vals(AVALC.join, "SD") <= 1, false_value = "N" ) @@ -159,9 +161,8 @@ test_that("derive_var_joined_exist_flag Test 3: filter with first_cond and summa ## Flagging observations with a duration longer than 30 and ## on or after 7 days of a COVID AE (ACOVFL == "Y") -## Test 4: join_type = 'all' --- - -test_that("derive_var_joined_exist_flag, Test 4: join_type = 'all'", { +## Test 4: join_type = 'all' ---- +test_that("derive_var_joined_exist_flag Test 4: join_type = 'all'", { adae <- tibble::tribble( ~USUBJID, ~ADY, ~ACOVFL, ~ADURN, "1", 10, "N", 1, @@ -179,12 +180,13 @@ test_that("derive_var_joined_exist_flag, Test 4: join_type = 'all'", { actual <- derive_var_joined_exist_flag( adae, + dataset_add = adae, by_vars = exprs(USUBJID), new_var = ALCOVFL, join_vars = exprs(ACOVFL, ADY), join_type = "all", order = exprs(ADY), - filter = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 + filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 ) expected <- tibble::tribble( @@ -210,8 +212,8 @@ test_that("derive_var_joined_exist_flag, Test 4: join_type = 'all'", { }) ## Flagging observations with AVALC = Y and an observation with CRIT1FL = Y before -## Test 5: join_type = 'before' --- -test_that("derive_var_joined_exist_flag, Test 5: join_type = 'before'", { +## Test 5: join_type = 'before' ---- +test_that("derive_var_joined_exist_flag Test 5: join_type = 'before'", { data <- tibble::tribble( ~USUBJID, ~ASEQ, ~AVALC, ~CRIT1FL, "1", 1, "Y", "Y", @@ -223,12 +225,13 @@ test_that("derive_var_joined_exist_flag, Test 5: join_type = 'before'", { actual <- derive_var_joined_exist_flag( data, + dataset_add = data, by_vars = exprs(USUBJID), order = exprs(ASEQ), new_var = CONFFL, join_vars = exprs(CRIT1FL), join_type = "before", - filter = AVALC == "Y" & CRIT1FL.join == "Y", + filter_join = AVALC == "Y" & CRIT1FL.join == "Y", false_value = "N" ) @@ -248,8 +251,8 @@ test_that("derive_var_joined_exist_flag, Test 5: join_type = 'before'", { ) }) -## Test 6: tmp_obs_nr_var argument works ---- +## Test 6: tmp_obs_nr_var argument works ---- test_that("derive_var_joined_exist_flag Test 6: tmp_obs_nr_var argument works", { expected <- tibble::tribble( ~USUBJID, ~AVISITN, ~CRIT1FL, ~CONFFL, @@ -265,21 +268,124 @@ test_that("derive_var_joined_exist_flag Test 6: tmp_obs_nr_var argument works", "4", 2, "N", "N" ) - + input <- select(expected, -CONFFL) expect_dfs_equal( base = expected, compare = derive_var_joined_exist_flag( - select(expected, -CONFFL), + input, + dataset_add = input, by_vars = exprs(USUBJID), new_var = CONFFL, tmp_obs_nr_var = tmp_obs_nr, join_vars = exprs(CRIT1FL), join_type = "all", order = exprs(AVISITN), - filter = CRIT1FL == "Y" & CRIT1FL.join == "Y" & + filter_join = CRIT1FL == "Y" & CRIT1FL.join == "Y" & (tmp_obs_nr + 1 == tmp_obs_nr.join | tmp_obs_nr == max(tmp_obs_nr.join)), false_value = "N" ), keys = c("USUBJID", "AVISITN") ) }) + +## Test 7: deprecation of `filter` ---- +test_that("derive_var_joined_exist_flag Test 7: deprecation of `filter`", { + expect_warning( + actual <- + derive_var_joined_exist_flag( + data, + dataset_add = data, + new_var = CONFFL, + by_vars = exprs(USUBJID), + join_vars = exprs(AVALC), + join_type = "after", + order = exprs(AVISITN), + filter = AVALC == "PR" & AVALC.join %in% c("CR", "PR") + ), + class = "lifecycle_warning_deprecated" + ) + + expected <- tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, ~CONFFL, + "1", 1, "PR", "Y", + "1", 2, "CR", NA_character_, + "1", 3, "CR", NA_character_, + "1", 4, "SD", NA_character_, + "1", 5, "NE", NA_character_, + "2", 1, "SD", NA_character_, + "2", 2, "PR", NA_character_, + "2", 3, "PD", NA_character_, + "3", 1, "SD", NA_character_, + "4", 1, "PR", "Y", + "4", 2, "PD", NA_character_, + "4", 3, "SD", NA_character_, + "4", 4, "SD", NA_character_, + "4", 5, "PR", NA_character_ + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "AVISITN") + ) +}) + +## Test 8: deprecation of `first_cond` ---- +test_that("derive_var_joined_exist_flag Test 8: deprecation of `first_cond`", { + data <- tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, + "1", 1, "PR", + "1", 2, "CR", + "1", 3, "CR", + "1", 4, "SD", + "1", 5, "NE", + "2", 1, "SD", + "2", 2, "PR", + "2", 3, "PD", + "3", 1, "CR", + "4", 1, "CR", + "4", 2, "SD", + "4", 3, "CR", + "4", 4, "CR" + ) + + expect_warning( + actual <- + derive_var_joined_exist_flag( + data, + dataset_add = data, + new_var = CONFFL, + by_vars = exprs(USUBJID), + join_vars = exprs(AVALC), + join_type = "after", + first_cond = AVALC == "CR" & + AVALC.join == "CR", + order = exprs(AVISITN), + filter_join = TRUE + ), + class = "lifecycle_warning_deprecated" + ) + + expected <- tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, ~CONFFL, + "1", 1, "PR", NA_character_, + "1", 2, "CR", "Y", + "1", 3, "CR", NA_character_, + "1", 4, "SD", NA_character_, + "1", 5, "NE", NA_character_, + "2", 1, "SD", NA_character_, + "2", 2, "PR", NA_character_, + "2", 3, "PD", NA_character_, + "3", 1, "CR", NA_character_, + "4", 1, "CR", "Y", + "4", 2, "SD", NA_character_, + "4", 3, "CR", "Y", + "4", 4, "CR", NA_character_ + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "AVISITN") + ) +}) diff --git a/tests/testthat/test-filter_joined.R b/tests/testthat/test-filter_joined.R index 98e37fdc81..45d8080cc2 100644 --- a/tests/testthat/test-filter_joined.R +++ b/tests/testthat/test-filter_joined.R @@ -17,16 +17,17 @@ data <- tibble::tribble( ) # filter_joined ---- -## Test 1: filter without first_cond ---- -test_that("filter_joined Test 1: filter without first_cond", { +## Test 1: filter without first_cond_upper ---- +test_that("filter_joined Test 1: filter without first_cond_upper", { actual <- filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVISITN, AVALC), join_type = "after", order = exprs(AVISITN), - filter = AVALC == "PR" & AVALC.join %in% c("CR", "PR") & + filter_join = AVALC == "PR" & AVALC.join %in% c("CR", "PR") & AVISITN < AVISITN.join ) @@ -48,13 +49,14 @@ test_that("filter_joined Test 2: filter with first_cond", { actual <- filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC == "CR" & + first_cond_upper = AVALC == "CR" & AVALC.join == "CR", order = exprs(AVISITN), - filter = TRUE + filter_join = TRUE ) expected <- tibble::tribble( @@ -74,13 +76,14 @@ test_that("filter_joined Test 3: filter with first_cond and summary function", { actual <- filter_joined( data, + dataset_add = data, by_vars = exprs(USUBJID), join_vars = exprs(AVALC), join_type = "after", - first_cond = AVALC == "PR" & + first_cond_upper = AVALC == "PR" & AVALC.join %in% c("CR", "PR"), order = exprs(AVISITN), - filter = count_vals(AVALC.join, "SD") <= 1 + filter_join = count_vals(AVALC.join, "SD") <= 1 ) expected <- tibble::tribble( @@ -95,7 +98,7 @@ test_that("filter_joined Test 3: filter with first_cond and summary function", { ) }) -## Test 4: join_type = "all" ---- +## Test 4: join_type = 'all' ---- test_that("filter_joined Test 4: join_type = 'all'", { adae <- tibble::tribble( ~USUBJID, ~ADY, ~ACOVFL, ~ADURN, @@ -113,11 +116,12 @@ test_that("filter_joined Test 4: join_type = 'all'", { actual <- filter_joined( adae, + dataset_add = adae, by_vars = exprs(USUBJID), join_vars = exprs(ACOVFL, ADY), join_type = "all", order = exprs(ADY), - filter = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 + filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY >= ADY.join - 7 ) expected <- tibble::tribble( @@ -133,9 +137,69 @@ test_that("filter_joined Test 4: join_type = 'all'", { ) }) +## Test 5: deprecation of `filter` ---- +test_that("filter_joined Test 5: deprecation of `filter`", { + expect_warning( + actual <- + filter_joined( + data, + dataset_add = data, + by_vars = exprs(USUBJID), + join_vars = exprs(AVISITN, AVALC), + join_type = "after", + order = exprs(AVISITN), + filter = AVALC == "PR" & AVALC.join %in% c("CR", "PR") & + AVISITN < AVISITN.join + ), + class = "lifecycle_warning_deprecated" + ) + + expected <- tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, + "1", 1, "PR", + "4", 1, "PR" + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "AVISITN") + ) +}) + +## Test 6: deprecation of `first_cond` ---- +test_that("filter_joined Test 6: deprecation of `first_cond`", { + expect_warning( + actual <- + filter_joined( + data, + dataset_add = data, + by_vars = exprs(USUBJID), + join_vars = exprs(AVALC), + join_type = "after", + first_cond = AVALC == "CR" & + AVALC.join == "CR", + order = exprs(AVISITN), + filter_join = TRUE + ), + class = "lifecycle_warning_deprecated" + ) + + expected <- tibble::tribble( + ~USUBJID, ~AVISITN, ~AVALC, + "1", 2, "CR" + ) + + expect_dfs_equal( + base = expected, + compare = actual, + keys = c("USUBJID", "AVISITN") + ) +}) + # min_cond ---- -## Test 1: test it ---- -test_that("min_cond, Test 1: test it", { +## Test 7: minimum is derived correctly ---- +test_that("min_cond Test 7: minimum is derived correctly", { data <- tibble::tribble( ~USUBJID, ~AVISITN, ~AVALC, "1", 1, "PR", @@ -172,8 +236,8 @@ test_that("min_cond, Test 1: test it", { }) # max_cond ---- -## Test 1: test it ---- -test_that("max_cond, Test 1: test it", { +## Test 8: maximum is derived correctly ---- +test_that("max_cond Test 8: maximum is derived correctly", { data <- tibble::tribble( ~USUBJID, ~AVISITN, ~AVALC, "1", 1, "PR", diff --git a/vignettes/generic.Rmd b/vignettes/generic.Rmd index 61d6f43d06..f591e69a7a 100644 --- a/vignettes/generic.Rmd +++ b/vignettes/generic.Rmd @@ -250,6 +250,7 @@ adsl_05 <- adsl_04 %>% filter_add = DSDECOD == "RANDOMIZED", by_vars = exprs(STUDYID, USUBJID), new_vars = exprs(RAND30DT = DSSTDT), + join_type = "all", filter_join = DSSTDT >= TRTSDT - 30 ) ``` @@ -286,6 +287,7 @@ ae_01 <- ae %>% by_vars = exprs(USUBJID), new_vars = exprs(DCUTFL), join_vars = exprs(DCUTDY), + join_type = "all", filter_join = AESTDY <= DCUTDY ) ``` @@ -316,6 +318,7 @@ ae_02 <- ae_01 %>% order = exprs(as.integer(factor(AESEV, levels = c("SEVERE", "MODERATE", "MILD")))), new_vars = exprs(AENADSEV = AESEV), join_vars = exprs(AESTDY), + join_type = "all", filter_join = AESTDY.join < AESTDY, mode = "first", check_type = "none" diff --git a/vignettes/hys_law.Rmd b/vignettes/hys_law.Rmd index db34b07a5b..ec22159247 100644 --- a/vignettes/hys_law.Rmd +++ b/vignettes/hys_law.Rmd @@ -115,6 +115,7 @@ hylaw_records <- derive_vars_joined( dataset_add = bili_records, by_vars = exprs(STUDYID, USUBJID), order = exprs(ADY), + join_type = "all", filter_join = 0 <= ADT.join - ADT & ADT.join - ADT <= 14 & CRIT1FL == "Y" & CRIT1FL.join == "Y", new_vars = exprs(BILI_DT = ADT, BILI_CRITFL = CRIT1FL), mode = "first" diff --git a/vignettes/occds.Rmd b/vignettes/occds.Rmd index 335610b27d..517ac3cf0d 100644 --- a/vignettes/occds.Rmd +++ b/vignettes/occds.Rmd @@ -242,6 +242,7 @@ adae <- derive_vars_joined( by_vars = exprs(STUDYID, USUBJID), new_vars = exprs(LDOSEDTM = EXSTDTM), join_vars = exprs(EXSTDTM), + join_type = "all", order = exprs(EXSTDTM), filter_add = (EXDOSE > 0 | (EXDOSE == 0 & grepl("PLACEBO", EXTRT))) & !is.na(EXSTDTM), filter_join = EXSTDTM <= ASTDTM, diff --git a/vignettes/pk_adnca.Rmd b/vignettes/pk_adnca.Rmd index 1cabe5c7de..c518aded18 100644 --- a/vignettes/pk_adnca.Rmd +++ b/vignettes/pk_adnca.Rmd @@ -388,6 +388,7 @@ adpc_prev <- adpc_first_dose %>% AENDTM_prev = AENDTM ), join_vars = exprs(ADTM), + join_type = "all", filter_add = NULL, filter_join = ADTM > ADTM.join, mode = "last", @@ -422,6 +423,7 @@ adpc_next <- adpc_prev %>% AENDTM_next = AENDTM ), join_vars = exprs(ADTM), + join_type = "all", filter_add = NULL, filter_join = ADTM <= ADTM.join, mode = "first", @@ -457,6 +459,7 @@ adpc_nom_prev <- adpc_next %>% order = exprs(NFRLT), new_vars = exprs(NFRLT_prev = NFRLT), join_vars = exprs(NFRLT), + join_type = "all", filter_add = NULL, filter_join = NFRLT > NFRLT.join, mode = "last", @@ -472,6 +475,7 @@ adpc_nom_next <- adpc_nom_prev %>% order = exprs(NFRLT), new_vars = exprs(NFRLT_next = NFRLT), join_vars = exprs(NFRLT), + join_type = "all", filter_add = NULL, filter_join = NFRLT <= NFRLT.join, mode = "first", @@ -1025,6 +1029,7 @@ adppk_prev <- adppk_first_dose %>% AENDTM_prev = AENDTM ), join_vars = exprs(ADTM), + join_type = "all", filter_add = NULL, filter_join = ADTM > ADTM.join, mode = "last", @@ -1040,6 +1045,7 @@ adppk_nom_prev <- adppk_prev %>% order = exprs(NFRLT), new_vars = exprs(NFRLT_prev = NFRLT), join_vars = exprs(NFRLT), + join_type = "all", filter_add = NULL, filter_join = NFRLT > NFRLT.join, mode = "last", diff --git a/vignettes/questionnaires.Rmd b/vignettes/questionnaires.Rmd index 5a1b353e70..d6e0761ea4 100644 --- a/vignettes/questionnaires.Rmd +++ b/vignettes/questionnaires.Rmd @@ -360,12 +360,13 @@ later: ```{r} adgdssf <- adgdssf %>% derive_var_joined_exist_flag( + dataset_add = adgdssf, by_vars = exprs(USUBJID, PARAMCD), order = exprs(ADT), new_var = CDETFL, join_vars = exprs(CHGCAT1, ADY), join_type = "after", - filter = CHGCAT1 == "WORSENED" & + filter_join = CHGCAT1 == "WORSENED" & CHGCAT1.join == "WORSENED" & ADY.join >= ADY + 7 ) @@ -385,13 +386,14 @@ argument is helpful: # Flagging deterioration at two consecutive assessments adgdssf <- adgdssf %>% derive_var_joined_exist_flag( + dataset_add = adgdssf, by_vars = exprs(USUBJID, PARAMCD), order = exprs(ADT), new_var = CONDETFL, join_vars = exprs(CHGCAT1), join_type = "after", tmp_obs_nr_var = tmp_obs_nr, - filter = CHGCAT1 == "WORSENED" & + filter_join = CHGCAT1 == "WORSENED" & CHGCAT1.join == "WORSENED" & tmp_obs_nr.join == tmp_obs_nr + 1 ) %>% @@ -399,13 +401,15 @@ adgdssf <- adgdssf %>% # - a second deterioration at least 7 days later or # - deterioration at the last assessment and death due to progression derive_var_joined_exist_flag( + ., + dataset_add = ., by_vars = exprs(USUBJID, PARAMCD), order = exprs(ADT), new_var = CDTDTHFL, join_vars = exprs(CHGCAT1, ADY), join_type = "all", tmp_obs_nr_var = tmp_obs_nr, - filter = CHGCAT1 == "WORSENED" & ( + filter_join = CHGCAT1 == "WORSENED" & ( CHGCAT1.join == "WORSENED" & ADY.join >= ADY + 7 | tmp_obs_nr == max(tmp_obs_nr.join) & DTHCAUS == "PROGRESSIVE DISEASE") ) @@ -424,12 +428,13 @@ parameter summary functions like `all()` can be used in the filter condition: ```{r} adgdssf <- adgdssf %>% derive_var_joined_exist_flag( + dataset_add = adgdssf, by_vars = exprs(USUBJID, PARAMCD), order = exprs(ADT), new_var = DEFDETFL, join_vars = exprs(CHGCAT1), join_type = "after", - filter = CHGCAT1 == "WORSENED" & all(CHGCAT1.join == "WORSENED") + filter_join = CHGCAT1 == "WORSENED" & all(CHGCAT1.join == "WORSENED") ) ``` diff --git a/vignettes/visits_periods.Rmd b/vignettes/visits_periods.Rmd index f4d5baf4ed..1f9daaeab9 100644 --- a/vignettes/visits_periods.Rmd +++ b/vignettes/visits_periods.Rmd @@ -85,7 +85,8 @@ adbds <- tribble( derive_vars_joined( adbds, dataset_add = windows, - filter_join = AWLO <= ADY & ADY <= AWHI + filter_join = AWLO <= ADY & ADY <= AWHI, + join_type = "all", ) ``` @@ -167,13 +168,18 @@ adae <- tribble( "1", "2021-04-05", "2", "2021-02-15", ) %>% + mutate( + STUDYID = "xyz", + .before = USUBJID + ) %>% mutate(ASTDT = ymd(ASTDT)) derive_vars_joined( adae, dataset_add = phase_ref, - by_vars = exprs(USUBJID), - filter_join = PHSDT <= ASTDT & ASTDT <= PHEDT + by_vars = exprs(STUDYID, USUBJID), + filter_join = PHSDT <= ASTDT & ASTDT <= PHEDT, + join_type = "all" ) ``` @@ -268,6 +274,7 @@ derive_vars_joined( by_vars = exprs(STUDYID, USUBJID), new_vars = exprs(APERIOD, TRTA), join_vars = exprs(APERSDT, APEREDT), + join_type = "all", filter_join = APERSDT <= ASTDT & ASTDT <= APEREDT ) ```