From b6852de3a1fcaedfc7413daafbff1ebee0a04ab9 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 4 Nov 2022 00:44:20 +0100 Subject: [PATCH 01/24] edit warn_about_new_time_points.pamm() --- R/warnings.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/warnings.R b/R/warnings.R index 35ec3e69..6003eed0 100644 --- a/R/warnings.R +++ b/R/warnings.R @@ -38,7 +38,7 @@ warn_about_new_time_points.glm <- function(object, newdata, time_var, ...) { warn_about_new_time_points.pamm <- function(object, newdata, ...) { if (inherits(object, "pamm")) { - int_original <- int_info(object) + int_original <- int_info(object)$interval if ("interval" %in% colnames(newdata)) { int_new <- unique(newdata[["interval"]]) if(!all(int_new %in% int_original)) { From 1c8132c2d98b1333ae083e688b2d453cca39b678 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 4 Nov 2022 00:46:09 +0100 Subject: [PATCH 02/24] last tidyselect lifecycle changes included --- R/model-evaluation.R | 4 ++-- R/sim-pexp.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/model-evaluation.R b/R/model-evaluation.R index b356e146..2a76840c 100644 --- a/R/model-evaluation.R +++ b/R/model-evaluation.R @@ -16,7 +16,7 @@ as.data.frame.crps <- function(x, row.names = NULL, optional = FALSE, ...) { m$method <- attr(x, "dimnames")[[1]] m <- m %>% - pivot_longer(cols = -.data$method, values_to = "IBS") %>% - dplyr::rename(time = .data$name) + pivot_longer(cols = -"method", values_to = "IBS") %>% + dplyr::rename(time = "name") } diff --git a/R/sim-pexp.R b/R/sim-pexp.R index 4b98c74f..bda71b8b 100644 --- a/R/sim-pexp.R +++ b/R/sim-pexp.R @@ -157,7 +157,7 @@ sim_pexp <- function(formula, data, cut) { suppressMessages( sim_df <- sim_df %>% - left_join(select(data, -.data$time, -.data$status)) + left_join(select(data, -"time", -"status")) ) attr(sim_df, "id_var") <- "id" @@ -166,7 +166,7 @@ sim_pexp <- function(formula, data, cut) { attr(sim_df, "tz_var") <- tz_vars attr(sim_df, "cens_value") <- 0 attr(sim_df, "breaks") <- cut - attr(sim_df, "tz") <- imap(tz_vars, ~select(sim_df, .x) %>% + attr(sim_df, "tz") <- imap(tz_vars, ~select(sim_df, all_of(.x)) %>% pull(.x) %>% unique()) %>% flatten() if (exists("ll_funs")) attr(sim_df, "ll_funs") <- ll_funs if (exists("cumu_funs")) attr(sim_df, "cumu_funs") <- cumu_funs From acd278eac8b5220c4fce63794e5cb78898c4c2a3 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 4 Nov 2022 13:18:12 +0100 Subject: [PATCH 03/24] minor edit --- R/ggplot-extensions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ggplot-extensions.R b/R/ggplot-extensions.R index a3e7854a..3fbe3a4a 100644 --- a/R/ggplot-extensions.R +++ b/R/ggplot-extensions.R @@ -11,8 +11,8 @@ #' @seealso #' \code{\link[ggplot2]{geom_ribbon}} \code{geom_stepribbon} #' inherits from \code{geom_ribbon}. -#' @inheritParams ggplot2:::geom_ribbon -#' @inheritParams ggplot2:::geom_step +#' @inheritParams ggplot2::geom_ribbon +#' @inheritParams ggplot2::geom_step #' @examples #' library(ggplot2) #' huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron)) From 149e0a608616335095cc9637fdcc17f82a5f5c1e Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 4 Nov 2022 13:18:52 +0100 Subject: [PATCH 04/24] minor edits after document() --- DESCRIPTION | 2 +- man/dplyr_verbs.Rd | 4 ---- man/get_term.Rd | 7 ------- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f4ba5c91..7d5695df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,6 +39,6 @@ License: MIT + file LICENSE LazyData: true URL: https://adibender.github.io/pammtools/ BugReports: https://github.com/adibender/pammtools/issues -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.1 Encoding: UTF-8 Roxygen: list(markdown = TRUE) diff --git a/man/dplyr_verbs.Rd b/man/dplyr_verbs.Rd index 714d169e..07cce70d 100644 --- a/man/dplyr_verbs.Rd +++ b/man/dplyr_verbs.Rd @@ -97,10 +97,6 @@ the input. Weights are automatically standardised to sum to 1.} \item{.env}{DEPRECATED.} -\item{y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - \item{by}{A character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all diff --git a/man/get_term.Rd b/man/get_term.Rd index 1eff8c94..f78f8d0a 100644 --- a/man/get_term.Rd +++ b/man/get_term.Rd @@ -14,13 +14,6 @@ first row will be used.} \item{term}{The (non-linear) model term of interest.} -\item{n}{Specify the output sequence either by supplying the -length of the sequence with \code{n}, or the spacing between value -with \code{by}. Specifying both is an error. - -I recommend that you name these arguments in order to make it clear to -the reader.} - \item{...}{Further arguments passed to \code{\link{seq_range}}.} } \description{ From 1832dd6982d6d55d40e026a56a66a7a070b15967 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 4 Nov 2022 15:32:06 +0100 Subject: [PATCH 05/24] as_ped() recurrent events + tdc data --- R/as-ped.R | 214 +++++++++++++++++++++++++++++------------------------ 1 file changed, 116 insertions(+), 98 deletions(-) diff --git a/R/as-ped.R b/R/as-ped.R index ae57f6ad..d1456144 100644 --- a/R/as-ped.R +++ b/R/as-ped.R @@ -53,57 +53,57 @@ as_ped <- function(data, ...) { #' @rdname as_ped #' @export as_ped.data.frame <- function( - data, - formula, - cut = NULL, - max_time = NULL, - tdc_specials = c("concurrent", "cumulative"), - censor_code = 0L, - transition = character(), - timescale = c("gap", "calendar"), - min_events = 1L, - ...) { - + data, + formula, + cut = NULL, + max_time = NULL, + tdc_specials = c("concurrent", "cumulative"), + censor_code = 0L, + transition = character(), + timescale = c("gap", "calendar"), + min_events = 1L, + ...) { + status_error(data, formula) assert_subset(tdc_specials, c("concurrent", "cumulative")) - + if (test_character(transition, min.chars = 1L, min.len = 1L)) { ped <- as_ped_recurrent(data = data, formula = formula, cut = cut, - max_time = max_time, tdc_specials = tdc_specials, censor_code = censor_code, - transition = transition, timescale = timescale, min_events = min_events, ... ) + max_time = max_time, tdc_specials = tdc_specials, censor_code = censor_code, + transition = transition, timescale = timescale, min_events = min_events, ... ) return(ped) } - + event_types <- get_event_types(data, formula, censor_code) if (length(event_types) > 1) { - + ped <- as_ped_cr(data = data, formula = formula, cut = cut, max_time = max_time, - tdc_specials = tdc_specials, censor_code = censor_code, ...) - + tdc_specials = tdc_specials, censor_code = censor_code, ...) + } else { - + dots <- list(...) dots$data <- data dots$formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) dots$cut <- cut dots$max_time <- max_time - + ped <- do.call(split_data, dots) attr(ped, "time_var") <- get_lhs_vars(dots$formula)[1] attr(ped, "status_var") <- get_lhs_vars(dots$formula)[2] - + } - + ped - + } #' @rdname as_ped #' @export as_ped.nested_fdf <- function(data, formula, ...) { - + status_error(data, formula) - + dots <- list(...) # update interval break points (if necessary) cut <- dots$cut @@ -112,80 +112,96 @@ as_ped.nested_fdf <- function(data, formula, ...) { } ccr_breaks <- attr(data, "ccr_breaks") cut <- union(cut, ccr_breaks[ccr_breaks <= max(cut)]) %>% sort() - + ped <- data %>% select_if(is.atomic) %>% as.data.frame() %>% as_ped( - formula = formula, - id = dots$id, - cut = cut, - max_time = dots$max_time) - + formula = formula, + id = dots$id, + cut = cut, + max_time = dots$max_time, + transition = dots$transition, + timescale = dots$timescale) ## NEW ---- + # replace updated attributes attr(data, "breaks") <- attr(ped, "breaks") - attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% - summarize(id_n = n()) %>% pull("id_n") %>% as_vector() - attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% - transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() - attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), - times = attr(data, "id_n")) - + if (test_character(dots$transition, min.chars = 1L, min.len = 1L)) { + attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var")), + .data[[dots$transition]]) %>% + summarize(id_n = n()) %>% pull("id_n") %>% as_vector() + attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var")), + .data[[dots$transition]]) %>% + transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() + attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), + times = attr(data, "id_n")) + } else { + attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% + summarize(id_n = n()) %>% pull("id_n") %>% as_vector() + attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% + transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() + attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), + times = attr(data, "id_n")) + } + ## NEW ---- + ## + + if (has_special(formula, "concurrent")) { ped <- ped %>% add_concurrent(data = data, id_var = dots$id) } - + if (has_special(formula, "cumulative")) { ped <- add_cumulative(ped, data = data, formula = formula) attr(ped, "ll_weights") <- imap(attr(ped, "tz"), - ~bind_cols(!!.y := .x, ll_weight = c(mean(abs(diff(.x))), abs(diff(.x))))) + ~bind_cols(!!.y := .x, ll_weight = c(mean(abs(diff(.x))), abs(diff(.x))))) class(ped) <- c("fped", class(ped)) } attr(ped, "time_var") <- get_lhs_vars(formula)[1] attr(ped, "func_mat_names") <- make_mat_names( - attr(ped, "func"), - attr(ped, "time_var")) - + attr(ped, "func"), + attr(ped, "time_var")) + ped - + } #' @rdname as_ped #' @export as_ped.list <- function( - data, - formula, - tdc_specials = c("concurrent", "cumulative"), - ...) { - + data, + formula, + tdc_specials = c("concurrent", "cumulative"), + ...) { + assert_class(data, "list") assert_class(formula, "formula") - + status_error(data[[1]], formula) - + nl <- length(data) # form <- Formula(formula) has_tdc <- has_tdc_form(formula, tdc_specials = tdc_specials) - + if (nl == 1 & !has_tdc) { ped <- data[[1]] %>% as_ped(formula = formula, tdc_specials = tdc_specials, ...) } else { if (nl == 2 & !has_tdc) { - stop("Two data sets provided in 'data' but no specification of + stop("Two data sets provided in 'data' but no specification of time-dependent covariate effects in 'formula'") } else { - + nested_fdf <- nest_tdc(data, formula, ...) ped <- as_ped(nested_fdf, formula, ...) - + } } lhs_vars <- get_lhs_vars(formula) attr(ped, "time_var") <- lhs_vars[1] attr(ped, "trafo_args")$formula <- formula - + ped - + } #' @rdname as_ped @@ -199,15 +215,15 @@ is.ped <- function(x) inherits(x, "ped") #' variables that were used to create the PED object (code{data}). #' @export as_ped.ped <- function(data, newdata, ...) { - + if (is.ped(newdata)) { stop("newdata already in ped format.") } - + trafo_args <- attr(data, "trafo_args") trafo_args[["data"]] <- newdata do.call(as_ped, trafo_args) - + } @@ -215,14 +231,14 @@ as_ped.ped <- function(data, newdata, ...) { #' @rdname as_ped #' @export as_ped.pamm <- function(data, newdata, ...) { - + if (is.ped(newdata)) { stop("newdata already in ped format.") } trafo_args <- data[["trafo_args"]] trafo_args$data <- newdata do.call(split_data, trafo_args) - + } @@ -232,18 +248,18 @@ as_ped.pamm <- function(data, newdata, ...) { #' #' @keywords internal as_ped_cr <- function( - data, - formula, - cut = NULL, - max_time = NULL, - tdc_specials = c("concurrent", "cumulative"), - censor_code = 0L, - combine = TRUE, - ...) { - + data, + formula, + cut = NULL, + max_time = NULL, + tdc_specials = c("concurrent", "cumulative"), + censor_code = 0L, + combine = TRUE, + ...) { + lhs_vars <- get_lhs_vars(formula) event_types <- get_event_types(data, formula, censor_code) - + cut <- map2( event_types, if(is.list(cut)) cut else list(cut), @@ -254,7 +270,7 @@ as_ped_cr <- function( if(length(cut) > 1 & combine) { cut <- list(reduce(cut, union)) } - + ped <- map2( event_types, cut, @@ -270,7 +286,7 @@ as_ped_cr <- function( ped_i$cause <- .event ped_i }) - + if (combine) { ped <- do.call(rbind, ped) class(ped) <- c("ped_cr_union", "ped_cr", class(ped)) @@ -282,14 +298,14 @@ as_ped_cr <- function( attributes(ped)$trafo_args$id <- attributes(ped[[1]])$trafo_args$id attributes(ped)$trafo_args$formula <- formula } - + attr(ped, "trafo_args")[["cut"]] <- if (length(cut) ==1) unlist(cut) else cut attr(ped, "trafo_args")[["combine"]] <- combine attr(ped, "trafo_args")[["censor_code"]] <- censor_code attr(ped, "risks") <- event_types - + ped - + } #' Exctract event types @@ -302,11 +318,11 @@ as_ped_cr <- function( #' #' @keywords internal get_event_types <- function(data, formula, censor_code) { - + lhs_vars <- get_lhs_vars(formula) status_values <- unique(data[[lhs_vars[length(lhs_vars)]]]) %>% sort() status_values[status_values != censor_code] - + } @@ -328,42 +344,44 @@ get_event_types <- function(data, formula, censor_code) { #' @export #' @keywords internal as_ped_recurrent <- function( - data, - formula, - cut = NULL, - max_time = NULL, - tdc_specials = c("concurrent", "cumulative"), - censor_code = 0L, - transition = character(), - timescale = c("gap", "calendar"), - min_events = 1L, - ... + data, + formula, + cut = NULL, + max_time = NULL, + tdc_specials = c("concurrent", "cumulative"), + censor_code = 0L, + transition = character(), + timescale = c("gap", "calendar"), + min_events = 1L, + ... ) { - + assert_character(transition, min.chars = 1L, min.len = 1L, any.missing = FALSE, - len = 1L) + len = 1L) assert_integer(min_events, lower = 1L, len = 1L) - + status_error(data, formula) assert_subset(tdc_specials, c("concurrent", "cumulative")) - + rhs_vars <- get_rhs_vars(formula) if (!(transition %in% rhs_vars)) { + formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) ## NEW---- formula <- add_to_rhs(formula, transition) } - + dots <- list(...) dots$data <- data - dots$formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) + # dots$formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) + dots$formula <- formula dots$cut <- cut dots$max_time <- max_time dots$transition <- transition dots$min_events <- min_events dots$timescale <- timescale - + ped <- do.call(split_data_recurrent, dots) attr(ped, "time_var") <- get_lhs_vars(dots$formula)[1] - + return(ped) - + } From 16d45a58b4f907d5f377d8ce7350c2bfbd7e385e Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:08:06 +0100 Subject: [PATCH 06/24] minor change to avoid "tidy evaluation" warning --- R/viz-elra.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R/viz-elra.R b/R/viz-elra.R index f3494a61..38bd2f09 100644 --- a/R/viz-elra.R +++ b/R/viz-elra.R @@ -32,7 +32,7 @@ gg_partial <- function(data, model, term, ..., reference = NULL, ci = TRUE) { # ndf <- ndf %>% mutate_at(vars[-1], ~as.factor(.x)) n_vars <- length(vars) - gg_base <- ggplot(ndf, aes_string(x = vars[1])) + xlab(vars[1]) + gg_base <- ggplot(ndf, aes(x = .data[[vars[1]]])) + xlab(vars[1]) if (n_vars == 1) { gg_out <- gg_base + geom_ribbon(aes_string(ymin = "ci_lower", ymax = "ci_upper"), @@ -40,7 +40,7 @@ gg_partial <- function(data, model, term, ..., reference = NULL, ci = TRUE) { geom_line(aes_string(y = "fit")) } else { # if (n_vars == 2) { - gg_out <- gg_base + aes_string(y = vars[2], z = "fit") + + gg_out <- gg_base + aes(y = .data[[vars[2]]], z = "fit") + geom_tile(aes_string(fill = "fit")) + geom_contour(col = "grey30") + scale_y_continuous(expand = c(0, 0)) + @@ -94,7 +94,7 @@ gg_partial_ll <- function( levels = c("ci_lower", "fit", "ci_upper"))) } - gg_base <- ggplot(ll_df, aes_string(x = "intmid", y = tz_var)) + + gg_base <- ggplot(ll_df, aes(x = .data[["intmid"]], y = tz_var)) + geom_tile(aes_string(fill = "fit"), colour = "grey30") + scale_fill_gradient2(high = "firebrick2", low = "steelblue", na.value = "grey30") + @@ -142,7 +142,7 @@ gg_slice <- function(data, model, term, ..., reference = NULL, ci = TRUE) { ndf <- ndf %>% mutate_at(vars[-1], ~as.factor(.x)) n_vars <- length(vars) - gg_out <- ggplot(ndf, aes_string(x = vars[1], y = "fit")) + gg_out <- ggplot(ndf, aes(x = .data[[vars[1]]], y = "fit")) if (ci) { gg_out <- gg_out + geom_ribbon(aes_string(ymin = "ci_lower", ymax = "ci_upper"), alpha = 0.3) @@ -150,11 +150,11 @@ gg_slice <- function(data, model, term, ..., reference = NULL, ci = TRUE) { gg_out <- gg_out + geom_line() if (n_vars > 1) { if(ci) { - gg_out <- gg_out + aes_string(group = vars[2], fill = vars[2]) + - geom_line(aes_string(col = vars[2])) + gg_out <- gg_out + aes(group = .data[[vars[2]]], fill = .data[[vars[2]]]) + + geom_line(aes(col = .data[[vars[2]]])) } else { - gg_out <- gg_out + aes_string(group = vars[2]) + - geom_line(aes_string(col = vars[2])) + gg_out <- gg_out + aes(group = .data[[vars[2]]]) + + geom_line(aes(col = .data[[vars[2]]])) } if (n_vars > 2) { form <- as.formula(paste0("~", vars[-1:-2], collapse = "+")) @@ -176,10 +176,10 @@ gg_cumu_eff <- function(data, model, term, z1, z2=NULL, se_mult = 2, ci = TRUE) cumu_eff_df <- get_cumu_eff(data, model, term, z1, z2, se_mult) - gg_out <- ggplot(cumu_eff_df, aes_string(x = "tend", y = "cumu_eff")) + gg_out <- ggplot(cumu_eff_df, aes(x = .data[["tend"]], y = .data[["cumu_eff"]])) if (ci) { gg_out <- gg_out + - geom_ribbon(aes_string(ymin = "cumu_eff_lower", ymax = "cumu_eff_upper"), + geom_ribbon(aes(ymin = .data[["cumu_eff_lower"]], ymax = .data[["cumu_eff_upper"]]), alpha = 0.3) } From 2a30cfcc8e61c4f19a63917c29664c2b2a8e002a Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:14:12 +0100 Subject: [PATCH 07/24] concurrent() editted --- R/formula-specials.R | 214 ++++++++++++++++++++++++------------------- 1 file changed, 122 insertions(+), 92 deletions(-) diff --git a/R/formula-specials.R b/R/formula-specials.R index 3b04b71a..9b6b7d23 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -31,14 +31,14 @@ #' @export #' @keywords internal cumulative <- function(..., - tz_var, - ll_fun = function(t, tz) t >= tz, - suffix = NULL) { - + tz_var, + ll_fun = function(t, tz) t >= tz, + suffix = NULL) { + vars <- as.list(substitute(list(...)))[-1] vars_chr <- vars %>% map(~as.character(.)) lgl_latency <- map_lgl(vars_chr, ~any(. %in% "latency")) - + if (any(lgl_latency)) { latency_var <- unlist(vars_chr)[unlist(vars_chr) != "latency"][lgl_latency] col_vars <- unlist(vars_chr)[unlist(vars_chr) != "latency"] @@ -46,14 +46,14 @@ cumulative <- function(..., latency_var <- "" col_vars <- unlist(vars_chr) } - + list( col_vars = col_vars, latency_var = latency_var, tz_var = tz_var, suffix = suffix, ll_fun = ll_fun) - + } @@ -61,23 +61,23 @@ cumulative <- function(..., #' @inherit cumulative #' @keywords internal concurrent <- function(..., - tz_var, - lag = 0, - suffix = NULL) { - + tz_var, + lag = 0, + suffix = NULL) { + assert_number(lag, lower = 0) - ll_fun = function(t, tz) {t > tz + lag} + ll_fun = function(t, tz) {t >= tz + lag} vars <- as.list(substitute(list(...)))[-1] vars_chr <- vars %>% map(~as.character(.)) %>% unlist() - - + + list( col_vars = vars_chr, tz_var = tz_var, suffix = suffix, ll_fun = ll_fun, lag = lag) - + } @@ -97,35 +97,35 @@ concurrent <- function(..., #' @importFrom stats terms #' @keywords internal get_cumulative <- function(data, formula) { - + stopifnot(has_tdc_form(formula)) - + func_list <- eval_special(get_tdc_form(formula, data = data), data = data) - + n_func <- length(func_list) ll_funs <- map(func_list, ~.x[["ll_fun"]]) tz_vars <- map(func_list, ~.x[["tz_var"]]) tz <- map(tz_vars, ~pull(data, .x) %>% unlist() %>% unique() %>% sort()) - + names(tz) <- names(tz_vars) <- names(ll_funs) <- tz_vars - + ## create matrices func_mats <- map(func_list, - ~ expand_cumulative(data = data, ., n_func = n_func)) %>% + ~ expand_cumulative(data = data, ., n_func = n_func)) %>% flatten() - + list( func_list = func_list, func_mats = func_mats, ll_funs = ll_funs, tz_vars = tz_vars, tz = tz) - + } #' @keywords internal eval_special <- function(formula, special="cumulative", data = NULL) { - + tf <- terms(formula, specials = special, data = data) ind_special <- attr(tf, "specials")[[special]] # extract components @@ -135,7 +135,7 @@ eval_special <- function(formula, special="cumulative", data = NULL) { } else { NULL } - + } @@ -145,9 +145,9 @@ eval_special <- function(formula, special="cumulative", data = NULL) { #' \code{formula} should be checked #' @keywords internal has_special <- function(formula, special = "cumulative") { - + has_tdc_form(formula, tdc_specials = special) - + } #' @rdname get_cumulative @@ -156,7 +156,7 @@ has_special <- function(formula, special = "cumulative") { #' @importFrom purrr map invoke_map #' @keywords internal expand_cumulative <- function(data, func, n_func) { - + col_vars <- func$col_vars tz_var <- func$tz_var tz <- pull(data, tz_var) %>% unlist() %>% unique() %>% sort() @@ -175,7 +175,7 @@ expand_cumulative <- function(data, func, n_func) { } else { nz <- ncols_vars[1] } - + # create list of matrices for covariates/time matrices provided in func hist_mats <- list() for (i in seq_along(col_vars)) { @@ -187,21 +187,21 @@ expand_cumulative <- function(data, func, n_func) { make_z_mat(data, col_vars[i], nz) } } - + if (any(c(time_var, tz_var) %in% col_vars)) { hist_mats <- c(hist_mats, list(make_lag_lead_mat(data, tz, func$ll_fun))) names(hist_mats) <- make_mat_names(c(col_vars, "LL"), func$latency_var, - tz_var, func$suffix, n_func) + tz_var, func$suffix, n_func) time_mat_ind <- grepl(time_var, names(hist_mats)) names(hist_mats)[time_mat_ind] <- paste0(names(hist_mats)[time_mat_ind], - "_mat") + "_mat") } else { names(hist_mats) <- make_mat_names(col_vars, func$latency_var, tz_var, - func$suffix, n_func) + func$suffix, n_func) } - + hist_mats - + } #' Extract information on concurrent effects @@ -218,9 +218,9 @@ prep_concurrent <- function(x, formula, ...) { #' @inherit prep_concurrent #' @keywords internal prep_concurrent.list <- function(x, formula, ...) { - + lgl_concurrent <- has_special(formula, "concurrent") - + if (lgl_concurrent) { ccr_list <- eval_special(formula, special = "concurrent", x[[2]]) ccr_tz_vars <- map_chr(ccr_list, ~.x[["tz_var"]]) %>% unique() @@ -236,11 +236,11 @@ prep_concurrent.list <- function(x, formula, ...) { # should just start modeling the hazard at t = lag?!? reduce(union) %>% sort() } - + list( ccr_list = ccr_list, ccr_time = ccr_time) - + } @@ -256,40 +256,70 @@ get_tz <- function(data, tz_var) { #' @keywords internal #' @importFrom purrr map2 -add_concurrent <- function(ped, data, id_var) { - +add_concurrent <- function(ped, data, id_var, ...) { + ccr <- attr(data, "ccr") - - ped_split <- split(ped$tend, f = ped[[id_var]]) - + + dots <- list(...) + if (any(dots$transition %in% names(ped))) { + if(dots$timescale == "gap") { + ## create an auxiliary 'tend_aux' column, the not reset 'tend' + ped <- ped %>% + group_by(.data[[id_var]]) %>% + mutate(tend_aux = lag(tend, default = 0)) %>% + ungroup() %>% + group_by(.data[[id_var]], .data[[dots$transition]]) %>% + mutate(tend_aux = first(tend_aux), + tend_aux = tend + tend_aux) %>% + ungroup() + } else { + ped <- mutate(ped, tend_aux = tend) + } + ped_split <- split(ped$tend_aux, f = list(ped[[id_var]], ped[[transition]]), + drop = TRUE) + } else { + ped_split <- split(ped$tend, f = ped[[id_var]]) + } + for (ccr_i in ccr[["ccr_list"]]) { tdc_vars_i <- ccr_i[["col_vars"]] tz_var_i <- ccr_i[["tz_var"]] ccr_vars_i <- c(tz_var_i, tdc_vars_i) - ccr_i_df <- data %>% - select(one_of(c(id_var, ccr_vars_i))) - ccr_i_df <- ccr_i_df %>% unnest(cols = -one_of(id_var)) - - li <- map2(ped_split, split(ccr_i_df, f = ccr_i_df[[id_var]]), - function(.x, .y) { - ll_ind <- rowSums(outer(.x, .y[[tz_var_i]], ccr_i$ll_fun)) - .y[ll_ind, tdc_vars_i] - }) %>% bind_rows() %>% as.data.frame() - + if (any(dots$transition %in% names(ped))) { + ccr_i_df <- data %>% + select(one_of(c(id_var, transition, ccr_vars_i))) %>% + unnest(cols = -one_of(id_var)) + ccr_i_df_split <- split(ccr_i_df, + f = list(ccr_i_df[[id_var]], + ccr_i_df[[transition]]), drop = TRUE) + } else { + ccr_i_df <- data %>% + select(one_of(c(id_var, ccr_vars_i))) %>% + unnest(cols = -one_of(id_var)) + + ccr_i_df_split <- split(ccr_i_df, f = ccr_i_df[[id_var]]) + } + + li <- map2(ped_split, ccr_i_df_split, + function(.x, .y) { + ll_ind <- rowSums(outer(.x, .y[[tz_var_i]], ccr_i$ll_fun)) + .y[ll_ind, tdc_vars_i] + }) %>% bind_rows() %>% as.data.frame() + ped <- ped %>% bind_cols(li) - + if (any(dots$transition %in% names(ped))) ped$tend_aux <- NULL } - + attr(ped, "ccr") <- ccr - + ped - - + + } #' @keywords internal add_cumulative <- function(ped, data, formula) { - + func_components <- get_cumulative(data, formula) func_matrices <- func_components$func_mats for (i in seq_along(func_matrices)) { @@ -299,9 +329,9 @@ add_cumulative <- function(ped, data, formula) { attr(ped, "ll_funs") <- func_components$ll_funs attr(ped, "tz") <- func_components$tz attr(ped, "tz_vars") <- func_components$tz_vars - + ped - + } make_mat_names <- function(x, ...) { @@ -310,12 +340,12 @@ make_mat_names <- function(x, ...) { #' @keywords internal make_mat_names.default <- function( - col_vars, - latency_var = NULL, - tz_var = NULL, - suffix = NULL, - nfunc = 1) { - + col_vars, + latency_var = NULL, + tz_var = NULL, + suffix = NULL, + nfunc = 1) { + if (!is.null(suffix)) { return(paste(col_vars, suffix, sep = "_")) } else { @@ -326,28 +356,28 @@ make_mat_names.default <- function( if (!is.null(latency_var)) { latency_ind <- col_vars == latency_var col_vars[latency_ind] <- paste(col_vars[latency_ind], "latency", - sep = "_") + sep = "_") } } - + return(col_vars) - + } #' @keywords internal make_mat_names.list <- function(func_list, time_var) { hist_names <- map(func_list, ~ make_mat_names(c(.x[["col_vars"]], "LL"), - .x[["latency_var"]], .x[["tz_var"]], .x[["suffix"]], - nfunc = length(func_list))) - + .x[["latency_var"]], .x[["tz_var"]], .x[["suffix"]], + nfunc = length(func_list))) + time_mat_ind <- map(hist_names, ~grepl(time_var, .)) for (i in seq_along(time_mat_ind)) { hist_names[[i]][time_mat_ind[[i]]] <- paste0(hist_names[[i]][time_mat_ind[[i]]], "_mat") } - + hist_names - + } #' Create matrix components for cumulative effects @@ -360,42 +390,42 @@ make_mat_names.list <- function(func_list, time_var) { #' #' @keywords internal make_time_mat <- function(data, nz) { - + brks <- attr(data, "breaks") id_tseq <- attr(data, "id_tseq") Tmat <- matrix(brks[id_tseq], nrow = length(id_tseq), ncol = nz) Tmat - + } #' @rdname elra_matrix #' @inherit make_time_mat #' @keywords internal make_latency_mat <- function(data, tz) { - + time <- attr(data, "breaks") id_tseq <- attr(data, "id_tseq") Latency_mat <- outer(time, tz, FUN = "-") Latency_mat[Latency_mat < 0] <- 0 Latency_mat[id_tseq, , drop = FALSE] - + } #' @rdname elra_matrix #' @inherit make_time_mat #' @keywords internal make_lag_lead_mat <- function( - data, - tz, - ll_fun = function(t, tz) t >= tz) { - + data, + tz, + ll_fun = function(t, tz) t >= tz) { + LL <- outer(attr(data, "breaks"), tz, FUN = ll_fun) * 1L delta <- abs(diff(tz)) IW <- matrix(c(mean(delta), delta), ncol = length(tz), nrow = nrow(LL), - byrow = TRUE) + byrow = TRUE) LL <- LL * IW LL[attr(data, "id_tseq"), , drop = FALSE] - + } #' @rdname elra_matrix @@ -406,20 +436,20 @@ make_lag_lead_mat <- function( #' @importFrom dplyr pull #' @keywords internal make_z_mat <- function(data, z_var, nz, ...) { - + tz_ind <- seq_len(nz) Z <- map(data[[z_var]], .f = ~ unlist(.x)[tz_ind]) Z <- do.call(rbind, Z) colnames(Z) <- paste0(z_var, tz_ind) Z[is.na(Z)] <- 0 Z[attr(data, "id_tz_seq"), , drop = FALSE] - - } + +} get_ncols <- function(data, col_vars) { - + map(col_vars, ~pull(data, .x) %>% map_int(function(z) ifelse(is.atomic(z), length(z), nrow(z)))) %>% - map_int(max) - + map_int(max) + } From 34b20d96bff2b74ce0f0752c54c9023343affd45 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:16:51 +0100 Subject: [PATCH 08/24] concurrent() editted --- R/formula-specials.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/formula-specials.R b/R/formula-specials.R index 9b6b7d23..dfa3e04d 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -66,7 +66,7 @@ concurrent <- function(..., suffix = NULL) { assert_number(lag, lower = 0) - ll_fun = function(t, tz) {t >= tz + lag} + ll_fun = function(t, tz) {t > tz + lag} ## Question: t >= tz + lag? vars <- as.list(substitute(list(...)))[-1] vars_chr <- vars %>% map(~as.character(.)) %>% unlist() From 2525b0aeb0c168e7aa27c09909ee81572fa83925 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:20:58 +0100 Subject: [PATCH 09/24] minor change to avoid "tidy evaluation" warning --- R/viz-elra.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/viz-elra.R b/R/viz-elra.R index 38bd2f09..d7dc3927 100644 --- a/R/viz-elra.R +++ b/R/viz-elra.R @@ -35,13 +35,13 @@ gg_partial <- function(data, model, term, ..., reference = NULL, ci = TRUE) { gg_base <- ggplot(ndf, aes(x = .data[[vars[1]]])) + xlab(vars[1]) if (n_vars == 1) { gg_out <- gg_base + - geom_ribbon(aes_string(ymin = "ci_lower", ymax = "ci_upper"), + geom_ribbon(aes(ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]]), alpha = 0.3) + - geom_line(aes_string(y = "fit")) + geom_line(aes(y = .data[["fit"]])) } else { # if (n_vars == 2) { gg_out <- gg_base + aes(y = .data[[vars[2]]], z = "fit") + - geom_tile(aes_string(fill = "fit")) + + geom_tile(aes(fill = .data[["fit"]])) + geom_contour(col = "grey30") + scale_y_continuous(expand = c(0, 0)) + scale_x_continuous(expand = c(0, 0)) + @@ -95,7 +95,7 @@ gg_partial_ll <- function( } gg_base <- ggplot(ll_df, aes(x = .data[["intmid"]], y = tz_var)) + - geom_tile(aes_string(fill = "fit"), colour = "grey30") + + geom_tile(aes(fill = .data[["fit"]]), colour = "grey30") + scale_fill_gradient2(high = "firebrick2", low = "steelblue", na.value = "grey30") + scale_x_continuous(expand = c(0, 0)) + @@ -145,7 +145,7 @@ gg_slice <- function(data, model, term, ..., reference = NULL, ci = TRUE) { gg_out <- ggplot(ndf, aes(x = .data[[vars[1]]], y = "fit")) if (ci) { gg_out <- gg_out + - geom_ribbon(aes_string(ymin = "ci_lower", ymax = "ci_upper"), alpha = 0.3) + geom_ribbon(aes(ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]]), alpha = 0.3) } gg_out <- gg_out + geom_line() if (n_vars > 1) { From 334a5edd90503387706485c6315c59d46c0fa919 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:21:33 +0100 Subject: [PATCH 10/24] no change (indentation) --- man/geom_hazard.Rd | 18 +++++++++++------- man/geom_stepribbon.Rd | 18 +++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/man/geom_hazard.Rd b/man/geom_hazard.Rd index eed37d9b..0fc1f85b 100644 --- a/man/geom_hazard.Rd +++ b/man/geom_hazard.Rd @@ -45,10 +45,10 @@ geom_surv( ) } \arguments{ -\item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}} or -\code{\link[ggplot2:aes_]{aes_()}}. If specified and \code{inherit.aes = TRUE} (the -default), it is combined with the default mapping at the top level of the -plot. You must supply \code{mapping} if there is no plot mapping.} +\item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} \item{data}{The data to be displayed in this layer. There are three options: @@ -66,10 +66,14 @@ will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{stat}{The statistical transformation to use on the data for this -layer, as a string.} +layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the +stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than +\code{"stat_count"})} -\item{position}{Position adjustment, either as a string, or the result of -a call to a position adjustment function.} +\item{position}{Position adjustment, either as a string naming the adjustment +(e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a +position adjustment function. Use the latter if you need to change the +settings of the adjustment.} \item{na.rm}{If \code{FALSE}, the default, missing values are removed with a warning. If \code{TRUE}, missing values are silently removed.} diff --git a/man/geom_stepribbon.Rd b/man/geom_stepribbon.Rd index c9e8737e..f13322d7 100644 --- a/man/geom_stepribbon.Rd +++ b/man/geom_stepribbon.Rd @@ -19,10 +19,10 @@ geom_stepribbon( ) } \arguments{ -\item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}} or -\code{\link[ggplot2:aes_]{aes_()}}. If specified and \code{inherit.aes = TRUE} (the -default), it is combined with the default mapping at the top level of the -plot. You must supply \code{mapping} if there is no plot mapping.} +\item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} \item{data}{The data to be displayed in this layer. There are three options: @@ -40,10 +40,14 @@ will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{stat}{The statistical transformation to use on the data for this -layer, as a string.} +layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the +stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than +\code{"stat_count"})} -\item{position}{Position adjustment, either as a string, or the result of -a call to a position adjustment function.} +\item{position}{Position adjustment, either as a string naming the adjustment +(e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a +position adjustment function. Use the latter if you need to change the +settings of the adjustment.} \item{direction}{direction of stairs: 'vh' for vertical then horizontal, 'hv' for horizontal then vertical, or 'mid' for step half-way between From 98569a7a2913cfc65efe5f10c6d25ab265a5d7a3 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:48:38 +0100 Subject: [PATCH 11/24] minor change to avoid "tidy evaluation" warning --- R/viz-elra.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/viz-elra.R b/R/viz-elra.R index d7dc3927..81a73a97 100644 --- a/R/viz-elra.R +++ b/R/viz-elra.R @@ -142,7 +142,7 @@ gg_slice <- function(data, model, term, ..., reference = NULL, ci = TRUE) { ndf <- ndf %>% mutate_at(vars[-1], ~as.factor(.x)) n_vars <- length(vars) - gg_out <- ggplot(ndf, aes(x = .data[[vars[1]]], y = "fit")) + gg_out <- ggplot(ndf, aes(x = .data[[vars[1]]], y = .data[["fit"]])) if (ci) { gg_out <- gg_out + geom_ribbon(aes(ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]]), alpha = 0.3) From 0e529671613efe0419c6befc96c897ab3b4e0eee Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:49:36 +0100 Subject: [PATCH 12/24] edits in add_concurrent() --- R/formula-specials.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/formula-specials.R b/R/formula-specials.R index dfa3e04d..572c41c1 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -266,16 +266,16 @@ add_concurrent <- function(ped, data, id_var, ...) { ## create an auxiliary 'tend_aux' column, the not reset 'tend' ped <- ped %>% group_by(.data[[id_var]]) %>% - mutate(tend_aux = lag(tend, default = 0)) %>% + mutate(tend_aux = lag(.data$tend, default = 0)) %>% ungroup() %>% group_by(.data[[id_var]], .data[[dots$transition]]) %>% - mutate(tend_aux = first(tend_aux), - tend_aux = tend + tend_aux) %>% + mutate(tend_aux = first(.data$tend_aux), + tend_aux = .data$tend + .data$tend_aux) %>% ungroup() } else { - ped <- mutate(ped, tend_aux = tend) + ped <- mutate(ped, tend_aux = .data$tend) } - ped_split <- split(ped$tend_aux, f = list(ped[[id_var]], ped[[transition]]), + ped_split <- split(ped$tend_aux, f = list(ped[[id_var]], ped[[dots$transition]]), drop = TRUE) } else { ped_split <- split(ped$tend, f = ped[[id_var]]) @@ -287,11 +287,11 @@ add_concurrent <- function(ped, data, id_var, ...) { ccr_vars_i <- c(tz_var_i, tdc_vars_i) if (any(dots$transition %in% names(ped))) { ccr_i_df <- data %>% - select(one_of(c(id_var, transition, ccr_vars_i))) %>% + select(one_of(c(id_var, dots$transition, ccr_vars_i))) %>% unnest(cols = -one_of(id_var)) ccr_i_df_split <- split(ccr_i_df, f = list(ccr_i_df[[id_var]], - ccr_i_df[[transition]]), drop = TRUE) + ccr_i_df[[dots$transition]]), drop = TRUE) } else { ccr_i_df <- data %>% select(one_of(c(id_var, ccr_vars_i))) %>% From 143ac1a4018ebadd48616240278bf06a71a0483d Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 17:50:34 +0100 Subject: [PATCH 13/24] as_ped() edited to handle: recurrent events + tdc data --- R/as-ped.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/as-ped.R b/R/as-ped.R index d1456144..34454023 100644 --- a/R/as-ped.R +++ b/R/as-ped.R @@ -130,8 +130,7 @@ as_ped.nested_fdf <- function(data, formula, ...) { attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var")), .data[[dots$transition]]) %>% summarize(id_n = n()) %>% pull("id_n") %>% as_vector() - attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var")), - .data[[dots$transition]]) %>% + attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% ## Do not group_by() by .data[[dots$transition]] transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), times = attr(data, "id_n")) @@ -148,7 +147,7 @@ as_ped.nested_fdf <- function(data, formula, ...) { if (has_special(formula, "concurrent")) { - ped <- ped %>% add_concurrent(data = data, id_var = dots$id) + ped <- ped %>% add_concurrent(data = data, id_var = dots$id, ...) ## NEW ---- } if (has_special(formula, "cumulative")) { @@ -380,6 +379,7 @@ as_ped_recurrent <- function( dots$timescale <- timescale ped <- do.call(split_data_recurrent, dots) + # ped <- arrange(ped, .data[[dots$id]], .data[[transition]]) ## I find more intuitive arranging the data this way.. then some unit tests should be changed.. (test-as-ped.R:103) (test-as-ped.R:107) (test-as-ped.R:121) (test-as-ped.R:124) (test-as-ped.R:127) (test-as-ped.R:131) attr(ped, "time_var") <- get_lhs_vars(dots$formula)[1] return(ped) From f607d534f8532ea5265a46fed1c537af892ec36b Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 15 Nov 2022 20:20:04 +0100 Subject: [PATCH 14/24] edits in add_concurrent() --- R/formula-specials.R | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/R/formula-specials.R b/R/formula-specials.R index 572c41c1..9b8ce875 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -264,14 +264,30 @@ add_concurrent <- function(ped, data, id_var, ...) { if (any(dots$transition %in% names(ped))) { if(dots$timescale == "gap") { ## create an auxiliary 'tend_aux' column, the not reset 'tend' - ped <- ped %>% + tend_aux <- ped %>% group_by(.data[[id_var]]) %>% mutate(tend_aux = lag(.data$tend, default = 0)) %>% - ungroup() %>% + ungroup() %>% group_by(.data[[id_var]], .data[[dots$transition]]) %>% - mutate(tend_aux = first(.data$tend_aux), - tend_aux = .data$tend + .data$tend_aux) %>% - ungroup() + mutate(tend_aux = first(.datatend_aux)) %>% + ungroup() %>% + select(id_var, "tend_aux") %>% unique() %>% + group_by(.data[[id_var]]) %>% + mutate(tend_aux = cumsum(.data$tend_aux)) %>% + ungroup() %>% + pull("tend_aux") %>% + rep(., attr(data, "id_n_i")) + + ped <- mutate(ped, tend_aux = .data$tend + .data$tend_aux) + + # ped <- ped %>% + # group_by(.data[[id_var]]) %>% + # mutate(tend_aux = lag(.data$tend, default = 0)) %>% + # ungroup() %>% + # group_by(.data[[id_var]], .data[[dots$transition]]) %>% + # mutate(tend_aux = first(.data$tend_aux), + # tend_aux = .data$tend + .data$tend_aux) %>% + # ungroup() } else { ped <- mutate(ped, tend_aux = .data$tend) } From 96ad9ce9c55ee4819bf2ebf8a2c48dbf36597b0b Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Thu, 17 Nov 2022 11:04:39 +0100 Subject: [PATCH 15/24] tidyselect lifecycle change --- tests/testthat/test-simple-transform.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-simple-transform.R b/tests/testthat/test-simple-transform.R index 82385d32..ea7c2765 100644 --- a/tests/testthat/test-simple-transform.R +++ b/tests/testthat/test-simple-transform.R @@ -64,7 +64,7 @@ test_that("Error on wrong input", { expect_error(as_ped(tumor, x ~ y, cut = c(0:5, 10, 40))) expect_error(as_ped(tumor, Surv(days2, status) ~., cut = c(0:5, 10, 40))) expect_error(as_ped( - data = rename(tumor, ped_time = time), + data = rename(tumor, ped_time = "time"), formula = Surv(ped_time, status) ~.)) # already in data set ped_time From ce0a92df7b5801962aacdd22d67db006250ba9ef Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Thu, 17 Nov 2022 13:42:23 +0100 Subject: [PATCH 16/24] correct last edit in add_concurrent() --- R/formula-specials.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/formula-specials.R b/R/formula-specials.R index 9b8ce875..82a091c2 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -269,16 +269,19 @@ add_concurrent <- function(ped, data, id_var, ...) { mutate(tend_aux = lag(.data$tend, default = 0)) %>% ungroup() %>% group_by(.data[[id_var]], .data[[dots$transition]]) %>% - mutate(tend_aux = first(.datatend_aux)) %>% + mutate(tend_aux = first(.data$tend_aux)) %>% ungroup() %>% select(id_var, "tend_aux") %>% unique() %>% group_by(.data[[id_var]]) %>% mutate(tend_aux = cumsum(.data$tend_aux)) %>% ungroup() %>% + arrange(.data[[id_var]]) %>% ## I suggest arranging this way in a previous step (in as_ped_recurrent() function) ---- pull("tend_aux") %>% - rep(., attr(data, "id_n_i")) + rep(., attr(data, "id_n")) - ped <- mutate(ped, tend_aux = .data$tend + .data$tend_aux) + ped <- arrange(ped, .data[[id_var]], .data[[dots$transition]]) %>% ## arrange + mutate(tend_aux = .data$tend + tend_aux) %>% + arrange(.data[[dots$transition]]) ## unmake previous arrangement (to hold the attributes order..) --- # ped <- ped %>% # group_by(.data[[id_var]]) %>% @@ -323,7 +326,7 @@ add_concurrent <- function(ped, data, id_var, ...) { }) %>% bind_rows() %>% as.data.frame() ped <- ped %>% bind_cols(li) - if (any(dots$transition %in% names(ped))) ped$tend_aux <- NULL + # if (any(dots$transition %in% names(ped))) ped$tend_aux <- NULL } attr(ped, "ccr") <- ccr From e383e0e83b4bc14f2fbf8b120386dad7b4bc2a61 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Thu, 17 Nov 2022 13:43:12 +0100 Subject: [PATCH 17/24] edit in as_ped_recurrent() --- R/as-ped.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/as-ped.R b/R/as-ped.R index 34454023..b7ecaa2e 100644 --- a/R/as-ped.R +++ b/R/as-ped.R @@ -366,11 +366,12 @@ as_ped_recurrent <- function( if (!(transition %in% rhs_vars)) { formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) ## NEW---- formula <- add_to_rhs(formula, transition) + } else { + formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) } dots <- list(...) dots$data <- data - # dots$formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) dots$formula <- formula dots$cut <- cut dots$max_time <- max_time From b14fa875aa7fe565a3a25ff6d88fd9d7b6fe0db0 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 18 Nov 2022 00:48:43 +0100 Subject: [PATCH 18/24] minor edit --- R/formula-specials.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/formula-specials.R b/R/formula-specials.R index 82a091c2..a77db628 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -271,7 +271,7 @@ add_concurrent <- function(ped, data, id_var, ...) { group_by(.data[[id_var]], .data[[dots$transition]]) %>% mutate(tend_aux = first(.data$tend_aux)) %>% ungroup() %>% - select(id_var, "tend_aux") %>% unique() %>% + select(id_var, dots$transition, "tend_aux") %>% unique() %>% group_by(.data[[id_var]]) %>% mutate(tend_aux = cumsum(.data$tend_aux)) %>% ungroup() %>% From 35cdf2f6493063bd16ab77c75270c0e3ec3d2c7f Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Fri, 18 Nov 2022 00:49:08 +0100 Subject: [PATCH 19/24] minor edit --- R/as-ped.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/as-ped.R b/R/as-ped.R index b7ecaa2e..46e86737 100644 --- a/R/as-ped.R +++ b/R/as-ped.R @@ -130,8 +130,16 @@ as_ped.nested_fdf <- function(data, formula, ...) { attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var")), .data[[dots$transition]]) %>% summarize(id_n = n()) %>% pull("id_n") %>% as_vector() - attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% ## Do not group_by() by .data[[dots$transition]] - transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() + ## Question: group_by() by .data[[dots$transition]]? + if (dots$timescale == "gap") { + attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var")), + .data[[dots$transition]]) %>% + transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() + } else { ## if calendar + attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% + transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() + } + attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), times = attr(data, "id_n")) } else { From eaf609908643c9dbcd8dfb82ba73b64917a340c8 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 10 Jan 2023 12:24:03 +0100 Subject: [PATCH 20/24] tidyup and add some checks --- R/as-ped.R | 32 ++++++++++++-------- R/formula-specials.R | 72 ++++++++++++++++++++++++++++++-------------- 2 files changed, 69 insertions(+), 35 deletions(-) diff --git a/R/as-ped.R b/R/as-ped.R index 46e86737..4f8cf673 100644 --- a/R/as-ped.R +++ b/R/as-ped.R @@ -122,27 +122,26 @@ as_ped.nested_fdf <- function(data, formula, ...) { cut = cut, max_time = dots$max_time, transition = dots$transition, - timescale = dots$timescale) ## NEW ---- + timescale = dots$timescale) # replace updated attributes attr(data, "breaks") <- attr(ped, "breaks") - if (test_character(dots$transition, min.chars = 1L, min.len = 1L)) { + if (length(dots$transition) != 0) { ## if there are recurrent events (i.e. transition != character()) attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var")), .data[[dots$transition]]) %>% summarize(id_n = n()) %>% pull("id_n") %>% as_vector() - ## Question: group_by() by .data[[dots$transition]]? - if (dots$timescale == "gap") { + ## id_tseq attribute depends on timescale + if (dots$timescale == "gap") { ## if gap, then group_by() by "transition" attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var")), .data[[dots$transition]]) %>% transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() - } else { ## if calendar + } else { ## if calendar attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() } - attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), times = attr(data, "id_n")) - } else { + } else { ## if no recurrent events attr(data, "id_n") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% summarize(id_n = n()) %>% pull("id_n") %>% as_vector() attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% @@ -150,12 +149,9 @@ as_ped.nested_fdf <- function(data, formula, ...) { attr(data, "id_tz_seq") <- rep(seq_len(nrow(data)), times = attr(data, "id_n")) } - ## NEW ---- - ## - if (has_special(formula, "concurrent")) { - ped <- ped %>% add_concurrent(data = data, id_var = dots$id, ...) ## NEW ---- + ped <- ped %>% add_concurrent(data = data, id_var = dots$id, ...) } if (has_special(formula, "cumulative")) { @@ -370,9 +366,20 @@ as_ped_recurrent <- function( status_error(data, formula) assert_subset(tdc_specials, c("concurrent", "cumulative")) + ## check there is an event in the last transition + last_spell <- data %>% + filter(.data[[transition]] == max(.data[[transition]])) + lhs_vars <- get_lhs_vars(formula) + status_var <- lhs_vars[[length(lhs_vars)]] + if (has_tdc_form(formula) & all(last_spell[[status_var]] == 0)) { + stop("All observations in the last transition are censored. + Please, filter those rows in order to properly transform the data + or specify a proper 'cut' argument.") + } + rhs_vars <- get_rhs_vars(formula) if (!(transition %in% rhs_vars)) { - formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) ## NEW---- + formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) formula <- add_to_rhs(formula, transition) } else { formula <- get_ped_form(formula, data = data, tdc_specials = tdc_specials) @@ -388,7 +395,6 @@ as_ped_recurrent <- function( dots$timescale <- timescale ped <- do.call(split_data_recurrent, dots) - # ped <- arrange(ped, .data[[dots$id]], .data[[transition]]) ## I find more intuitive arranging the data this way.. then some unit tests should be changed.. (test-as-ped.R:103) (test-as-ped.R:107) (test-as-ped.R:121) (test-as-ped.R:124) (test-as-ped.R:127) (test-as-ped.R:131) attr(ped, "time_var") <- get_lhs_vars(dots$formula)[1] return(ped) diff --git a/R/formula-specials.R b/R/formula-specials.R index a77db628..c235b878 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -256,42 +256,33 @@ get_tz <- function(data, tz_var) { #' @keywords internal #' @importFrom purrr map2 +#' @importFrom tidyselect all_of add_concurrent <- function(ped, data, id_var, ...) { ccr <- attr(data, "ccr") dots <- list(...) if (any(dots$transition %in% names(ped))) { + if(dots$timescale == "gap") { - ## create an auxiliary 'tend_aux' column, the not reset 'tend' - tend_aux <- ped %>% - group_by(.data[[id_var]]) %>% - mutate(tend_aux = lag(.data$tend, default = 0)) %>% - ungroup() %>% + ## create an auxiliary 'tend_aux' column, a not resetted copy of 'tend' + tend_aux_df <- ped %>% + group_by(.data[[id_var]]) %>% + mutate(tend_aux = lag(.data$tend, default = 0)) %>% + ungroup() %>% group_by(.data[[id_var]], .data[[dots$transition]]) %>% mutate(tend_aux = first(.data$tend_aux)) %>% ungroup() %>% - select(id_var, dots$transition, "tend_aux") %>% unique() %>% + select(all_of(c(id_var, dots$transition)), "tend_aux") %>% + unique() %>% group_by(.data[[id_var]]) %>% mutate(tend_aux = cumsum(.data$tend_aux)) %>% - ungroup() %>% - arrange(.data[[id_var]]) %>% ## I suggest arranging this way in a previous step (in as_ped_recurrent() function) ---- - pull("tend_aux") %>% - rep(., attr(data, "id_n")) + ungroup() - ped <- arrange(ped, .data[[id_var]], .data[[dots$transition]]) %>% ## arrange - mutate(tend_aux = .data$tend + tend_aux) %>% - arrange(.data[[dots$transition]]) ## unmake previous arrangement (to hold the attributes order..) --- + ped <- left_join(ped, tend_aux_df, by = c(id_var, dots$transition)) %>% + mutate(tend_aux = .data$tend + .data$tend_aux) - # ped <- ped %>% - # group_by(.data[[id_var]]) %>% - # mutate(tend_aux = lag(.data$tend, default = 0)) %>% - # ungroup() %>% - # group_by(.data[[id_var]], .data[[dots$transition]]) %>% - # mutate(tend_aux = first(.data$tend_aux), - # tend_aux = .data$tend + .data$tend_aux) %>% - # ungroup() - } else { + } else { ## if calendar ped <- mutate(ped, tend_aux = .data$tend) } ped_split <- split(ped$tend_aux, f = list(ped[[id_var]], ped[[dots$transition]]), @@ -325,6 +316,9 @@ add_concurrent <- function(ped, data, id_var, ...) { .y[ll_ind, tdc_vars_i] }) %>% bind_rows() %>% as.data.frame() + ## check that data contains baseline value + if(nrow(ped) != nrow(li)) stop("Please, add baseline values to your data (i.e. TDC value at t = 0)") + ped <- ped %>% bind_cols(li) # if (any(dots$transition %in% names(ped))) ped$tend_aux <- NULL } @@ -341,6 +335,17 @@ add_cumulative <- function(ped, data, formula) { func_components <- get_cumulative(data, formula) func_matrices <- func_components$func_mats + + ## check that all individuals share same tz pattern + tz_vars <- func_components$tz_vars ## a list + id_var <- attr(ped, "id_var") + has_common_tzset <- sapply(tz_vars, function(tz_var) + eval_common_tzset(data, id_var, tz_var)) + if (!all(has_common_tzset)) { + stop("TDC values should be recorded at same tz times for all individuals") + ## if this error not fixed there will be problems with matrix dimensions + } + for (i in seq_along(func_matrices)) { ped[[names(func_matrices)[i]]] <- func_matrices[[i]] } @@ -353,6 +358,29 @@ add_cumulative <- function(ped, data, formula) { } +#' @keywords internal +#' @importFrom purrr map +#' @importFrom tibble is_tibble +eval_common_tzset <- function(data, id_var, tz_var) { + tz_vectors <- data %>% + split(data[[id_var]]) + if (is_tibble(tz_vectors[[1]][[tz_var]][[1]])) { + tz_vectors <- sapply(tz_vectors, function(elem) as.vector(elem[[tz_var]][[1]])) + } else { + tz_vectors <- lapply(tz_vectors, function(elem) elem[[tz_var]][[1]]) + } + + idx <- which.max(lapply(tz_vectors, length)) + largest_tz_vector <- tz_vectors[[idx]] + + has_common_tzset <- map(.x = tz_vectors, + .f = function(x) all(x == largest_tz_vector[seq_along(x)])) %>% + reduce(c) %>% + all() + + return(has_common_tzset) +} + make_mat_names <- function(x, ...) { UseMethod("make_mat_names", x) } From 2c8f8c5d2576aca9e72b43c9ba048d98c70fbcf6 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 10 Jan 2023 12:24:21 +0100 Subject: [PATCH 21/24] add immported packages --- DESCRIPTION | 3 ++- NAMESPACE | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7d5695df..683989dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Imports: Formula, mvtnorm, pec, - vctrs (>= 0.3.0) + vctrs (>= 0.3.0), + tidyselect Suggests: testthat License: MIT + file LICENSE diff --git a/NAMESPACE b/NAMESPACE index 4e176179..10bc8279 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -194,6 +194,7 @@ importFrom(stats,terms) importFrom(stats,update) importFrom(stats,vcov) importFrom(tibble,as_tibble) +importFrom(tibble,is_tibble) importFrom(tidyr,complete) importFrom(tidyr,crossing) importFrom(tidyr,gather) @@ -201,4 +202,5 @@ importFrom(tidyr,nest) importFrom(tidyr,pivot_longer) importFrom(tidyr,replace_na) importFrom(tidyr,unnest) +importFrom(tidyselect,all_of) importFrom(vctrs,vec_c) From d67d561e88aeff1c302c7395e1ecdce916802d6c Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 10 Jan 2023 12:25:15 +0100 Subject: [PATCH 22/24] add tests to test as_ped() for recurrent events and TDCs --- tests/testthat/test-as-ped.R | 223 +++++++++++++++++++++++++--- tests/testthat/test-tdc-transform.R | 74 +++++++-- 2 files changed, 268 insertions(+), 29 deletions(-) diff --git a/tests/testthat/test-as-ped.R b/tests/testthat/test-as-ped.R index b64f94f8..284a2d1d 100644 --- a/tests/testthat/test-as-ped.R +++ b/tests/testthat/test-as-ped.R @@ -12,26 +12,26 @@ test_that("Trafo works and attributes are appended", { expect_data_frame(ped, nrow = 12L, ncols = 8L) expect_is(ped, "ped") expect_subset(c("ped_status", "tstart", "tend", "interval", "offset"), - names(ped)) + names(ped)) expect_is(attr(ped, "breaks"), "numeric") expect_is(attr(ped, "intvars"), "character") expect_is(attr(ped, "id_var"), "character") expect_equal(attr(ped, "id_var"), "id") expect_equal(is.ped(ped), TRUE) - + ped <- as_ped( data = tumor, formula = Surv(days, status)~ complications + age) expect_data_frame(ped, nrows = 11L, ncols = 8L) - - + + }) test_that("Trafo works for list objects (with TDCs)", { data("patient") event_df <- filter(patient, CombinedID %in% c(1110, 1116)) ped <- as_ped(data = list(event_df), formula = Surv(survhosp, PatientDied)~ ., - cut = 0:30, id = "CombinedID") + cut = 0:30, id = "CombinedID") expect_data_frame(ped, nrows = 40, ncols = 15) tdc_df <- filter(daily, CombinedID %in% c(1110, 1116)) ## check nesting @@ -44,7 +44,7 @@ test_that("Trafo works for list objects (with TDCs)", { data = list(event_df, tdc_df), formula = Surv(survhosp, PatientDied) ~ . + cumulative(survhosp, Study_Day, caloriesPercentage, tz_var = "Study_Day") + - cumulative(proteinGproKG, tz_var = "Study_Day"), + cumulative(proteinGproKG, tz_var = "Study_Day"), cut = 0:30, id = "CombinedID") expect_subset("survhosp_Study_Day_mat", colnames(ped)) @@ -52,18 +52,18 @@ test_that("Trafo works for list objects (with TDCs)", { expect_identical(any(is.na(ped$caloriesPercentage_Study_Day)), FALSE) expect_identical(colnames(ped$Study_Day), paste0("Study_Day", 1:12)) ped <- as_ped( - data = list(event_df, tdc_df), - formula = Surv(survhosp, PatientDied) ~ . + - cumulative(Study_Day, caloriesPercentage, tz_var = "Study_Day") + - cumulative(proteinGproKG, tz_var = "Study_Day"), - id = "CombinedID") + data = list(event_df, tdc_df), + formula = Surv(survhosp, PatientDied) ~ . + + cumulative(Study_Day, caloriesPercentage, tz_var = "Study_Day") + + cumulative(proteinGproKG, tz_var = "Study_Day"), + id = "CombinedID") expect_data_frame(ped, nrows = 2L, ncols = 19L) - + }) test_that("Trafo works for left truncated data", { - + mort2 <- mort %>% group_by(id) %>% slice(1) %>% filter(id %in% c(1:3)) mort_ped <- as_ped(Surv(tstart, exit, event) ~ ses, data = mort2) expect_data_frame(mort_ped, nrows = 8L, ncols = 7L) @@ -72,12 +72,12 @@ test_that("Trafo works for left truncated data", { expect_identical(round(mort_ped$offset, 2), c(1.25, 2.30, 1.41, 0.89, 2.30, 1.41, 1.25, 2.30)) expect_identical(mort_ped$ped_status, c(rep(0, 5), 1, 0, 0)) expect_identical(mort_ped$ses, factor(rep(c("upper", "lower", "upper"), times = c(4,2,2)))) - + }) test_that("Trafo works for recurrent events data", { - + test_df <- data.frame( id = c(1,1, 2,2,2), tstart = c(0, .5, 0, .8, 1.2), @@ -92,7 +92,7 @@ test_that("Trafo works for recurrent events data", { transition = "enum", id = "id", timescale = "gap") - + expect_data_frame(gap_df, nrows = 9L, ncols = 8L) expect_identical( round(gap_df$tstart, 1), @@ -108,7 +108,7 @@ test_that("Trafo works for recurrent events data", { gap_df$enum, rep(c(1, 2), times = c(5, 4)) ) - + ## CALENDAR timescale cal_df <- as_ped( data = test_df, @@ -116,7 +116,7 @@ test_that("Trafo works for recurrent events data", { id = "id", transition = "enum", timescale = "calendar") - + expect_data_frame(cal_df, nrows = 6L, ncols = 8L) expect_identical( round(cal_df$tstart, 1), @@ -132,5 +132,192 @@ test_that("Trafo works for recurrent events data", { cal_df$enum, rep(c(1, 2), each = 3) ) + +}) + +test_that("Trafo works for recurrent events data and concurrent effects of TDCs", { + + test_event_df <- data.frame( + id = c(1,1,1, 2,2), + tstart = c(0, 100, 250, 0, 300), + tstop = c(100, 250, 600, 300, 750), + status = c(1, 1, 1, 1, 0), + enum = c(1, 2, 3, 1, 2)) + + test_tdc_df <- data.frame(id = rep(c(1, 2), times = c(7, 8)), + tz = c(0, seq(100, 600, by = 100), + 0, seq(100, 700, by = 100)), + ztz = c(0, 5, 4, 6, 3, 8, 7, + 0, 3, 4, 4.5, 5, 6, 3, 4)) + + test_df <- list(test_event_df, test_tdc_df) + + # GAP timescale + gap_df <- as_ped( + data = test_df, + formula = Surv(tstart, tstop, status) ~ enum + concurrent(ztz, tz_var = "tz"), + transition = "enum", + id = "id", + timescale = "gap") + + expect_data_frame(gap_df, nrows = 17L, ncols = 9L) + expect_identical( + round(gap_df$tstart, 1), + c(0, 0, 100, 200, 250, 0, 100, 0, 100, 200, 250, + 300, 0, 100, 200, 250, 300)) + expect_identical( + round(gap_df$tend, 1), + c(100, 100, 200, 250, 300, 100, 200, 100, 200, 250, + 300, 400, 100, 200, 250, 300, 400)) + expect_identical( + gap_df$ped_status, + c(1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) + ) + expect_identical( + gap_df$enum, + rep(c(1, 2, 3), times = c(5, 7, 5)) + ) + expect_identical( + gap_df$tend_aux, + c(100, 100, 200, 250, 300, 200, 300, 400, 500, 550, + 600, 700, 400, 500, 550, 600, 700) + ) + + ## CALENDAR timescale + cal_df <- as_ped( + data = test_df, + formula = Surv(tstart, tstop, status) ~ enum + concurrent(ztz, tz_var = "tz"), + id = "id", + transition = "enum", + timescale = "calendar") + + expect_data_frame(cal_df, nrows = 14L, ncols = 9L) + expect_identical( + round(cal_df$tstart, 1), + c(0, 0, 100, 200, 250, 100, 200, 300, 400, 500, + 250, 300, 400, 500)) + expect_identical( + round(cal_df$tend, 1), + c(100, 100, 200, 250, 300, 200, 250, 400, 500, + 600, 300, 400, 500, 600)) + expect_identical( + cal_df$ped_status, + c(1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1) + ) + expect_identical( + cal_df$enum, + rep(c(1, 2, 3), times = c(5, 5, 4)) + ) + expect_identical( + cal_df$tend_aux, + c(100, 100, 200, 250, 300, 200, 250, 400, 500, + 600, 300, 400, 500, 600) + ) + +}) +test_that("Trafo works for recurrent events data and cumulative effects of TDCs", { + + test_event_df <- data.frame( + id = c(1,1,1, 2,2), + tstart = c(0, 100, 250, 0, 300), + tstop = c(100, 250, 600, 300, 750), + status = c(1, 1, 1, 1, 0), + enum = c(1, 2, 3, 1, 2)) + + test_tdc_df <- data.frame(id = rep(c(1, 2), times = c(7, 8)), + tz = c(0, seq(100, 600, by = 100), + 0, seq(100, 700, by = 100)), + ztz = c(0, 5, 4, 6, 3, 8, 7, + 0, 3, 4, 4.5, 5, 6, 3, 4)) + + test_df <- list(test_event_df, test_tdc_df) + test_formula <- as.formula( + Surv(tstart, tstop, status) ~ enum + cumulative(latency(tz), ztz, tz_var = "tz", + ll_fun = function(t, tz) t >= tz) + ) + + # GAP timescale + gap_df <- as_ped( + data = test_df, + formula = test_formula, + transition = "enum", + id = "id", + timescale = "gap") + + expect_data_frame(gap_df, nrows = 14L, ncols = 10L) + expect_identical( + round(gap_df$tstart, 1), + c(0, 0, 100, 250, 0, 100, 0, 100, 250, 300, 0, 100, 250, 300)) + expect_identical( + round(gap_df$tend, 1), + c(100, 100, 250, 300, 100, 250, 100, 250, 300, 600, 100, 250, 300, 600)) + expect_identical( + gap_df$ped_status, + c(1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1) + ) + expect_identical( + gap_df$enum, + rep(c(1, 2, 3), times = c(4, 6, 4)) + ) + + expect_subset("ztz", colnames(gap_df)) + expect_identical(colnames(gap_df$ztz), paste0("ztz", 1:8)) + expect_matrix(gap_df$ztz, nrows = 14, ncols = 8) + expect_matrix(gap_df$tz_latency, nrows = 14, ncols = 8) + expect_matrix(gap_df$LL, nrows = 14, ncols = 8) + + ## CALENDAR timescale + cal_df <- as_ped( + data = test_df, + formula = test_formula, + id = "id", + transition = "enum", + timescale = "calendar") + + expect_data_frame(cal_df, nrows = 8L, ncols = 10L) + expect_identical( + round(cal_df$tstart, 1), + c(0, 0, 100, 250, 100, 300, 250, 300)) + expect_identical( + round(cal_df$tend, 1), + c(100, 100, 250, 300, 250, 600, 300, 600)) + expect_identical( + cal_df$ped_status, + c(1, 0, 0, 1, 1, 0, 0, 1) + ) + expect_identical( + cal_df$enum, + rep(c(1, 2, 3), times = c(4, 2, 2)) + ) + + expect_subset("ztz", colnames(cal_df)) + expect_identical(colnames(cal_df$ztz), paste0("ztz", 1:8)) + expect_matrix(cal_df$ztz, nrows = 8, ncols = 8) + expect_matrix(cal_df$tz_latency, nrows = 8, ncols = 8) + expect_matrix(cal_df$LL, nrows = 8, ncols = 8) + }) + +test_that("Trafo for recurrent events aborts when all obs. in the last spell are censored", { + + test_event_df <- data.frame( + id = c(1,1,1,1, 2,2), + tstart = c(0, 100, 250, 300, 0, 300), + tstop = c(100, 250, 300, 600, 300, 750), + status = c(1, 1, 1, 0, 1, 0), + enum = c(1, 2, 3, 4, 1, 2)) + test_tdc_df <- data.frame(id = rep(c(1, 2), times = c(7, 8)), + tz = c(0, seq(100, 600, by = 100), + 0, seq(100, 700, by = 100)), + ztz = c(0, 5, 4, 6, 3, 8, 7, + 0, 3, 4, 4.5, 5, 6, 3, 4)) + test_df <- list(test_event_df, test_tdc_df) + + expect_error(as_ped( + data = test_df, + formula = Surv(tstart, tstop, status) ~ enum + concurrent(ztz, tz_var = "tz"), + transition = "enum", + id = "id", + timescale = "gap"), "are censored") +}) \ No newline at end of file diff --git a/tests/testthat/test-tdc-transform.R b/tests/testthat/test-tdc-transform.R index def15983..3defe1ec 100644 --- a/tests/testthat/test-tdc-transform.R +++ b/tests/testthat/test-tdc-transform.R @@ -21,8 +21,8 @@ test_that("Concurrent TDC are transformed correctly", { concurrent(bili, protime, tz_var = "day"), id = "id") expect_equal(unique(ped$tend), c(176, 182, 192, 364, 365, 400, 743, 768, 1012)) expect_equal(ped$bili, - c(rep(14.5, 3), rep(21.3, 3), rep(1.1, 2), rep(0.8, 3), rep(1, 3), - 1.9, 1.4, rep(1.1, 3), rep(1.5, 3), rep(1.8, 2))) + c(rep(14.5, 3), rep(21.3, 3), rep(1.1, 2), rep(0.8, 3), rep(1, 3), + 1.9, 1.4, rep(1.1, 3), rep(1.5, 3), rep(1.8, 2))) # lag != 0 ped <- as_ped( data = list(event_df, tdc_df), @@ -33,8 +33,8 @@ test_that("Concurrent TDC are transformed correctly", { unique(ped$tend), sort(c(time, tz + 10))) expect_equal(ped$bili, - c(rep(14.5, 3), rep(21.3, 3), rep(1.1, 2), rep(0.8, 3), rep(1, 3), - 1.9, 1.4, rep(1.1, 3), rep(1.5, 3), rep(1.8, 2))) + c(rep(14.5, 3), rep(21.3, 3), rep(1.1, 2), rep(0.8, 3), rep(1, 3), + 1.9, 1.4, rep(1.1, 3), rep(1.5, 3), rep(1.8, 2))) # unequal lags ped <- as_ped( data = list(event_df, tdc_df), @@ -51,7 +51,7 @@ test_that("Concurrent TDC are transformed correctly", { expect_equal(ped$protime, c(rep(12.2, 4), rep(11.2, 6), rep(10.6, 2), rep(11, 5), rep(11.6, 6), rep(10.6, 2), rep(12, 11), rep(13.3, 4))) -# when maxtime is set + # when maxtime is set ped <- as_ped( data = list(event_df, tdc_df), formula = Surv(time, status)~. + concurrent(bili, protime, tz_var = "day"), @@ -59,8 +59,8 @@ test_that("Concurrent TDC are transformed correctly", { max_time = 1400) expect_equal(unique(ped$tend), sort(c(time, tz, 1400))) expect_equal(ped$bili, - c(rep(14.5, 3), rep(21.3, 3), rep(1.1, 2), rep(0.8, 3), rep(1.0, 3), rep(1.9, 2), - 1.4, rep(1.1, 3), rep(1.5, 3), rep(1.8, 2))) + c(rep(14.5, 3), rep(21.3, 3), rep(1.1, 2), rep(0.8, 3), rep(1.0, 3), rep(1.9, 2), + 1.4, rep(1.1, 3), rep(1.5, 3), rep(1.8, 2))) }) test_that("Covariate matrices are created correctly", { @@ -91,10 +91,62 @@ test_that("Covariate matrices are created correctly", { expect_equal(LLmat[3, ], c(rep(0, 2), rep(1, 6), rep(0, 3))) expect_equal(max(Ltmat * LLmat), 5) ped <- as_ped(data, - Surv(time, status) ~ . + - cumulative(z.tz2, latency(tz2), tz_var = "tz2", - ll_fun = function(t, tz) (t - tz) >= 0 & (t - tz) <= 5), - cut = 0:2) + Surv(time, status) ~ . + + cumulative(z.tz2, latency(tz2), tz_var = "tz2", + ll_fun = function(t, tz) (t - tz) >= 0 & (t - tz) <= 5), + cut = 0:2) expect_equal(max(ped$tz2_latency * ped$LL), 5) + +}) + +test_that("Concurrent TDC (for recurrent events) aborts when data has no baseline value", { + + test_event_df <- data.frame( + id = c(1,1,1, 2,2), + tstart = c(0, 100, 250, 0, 300), + tstop = c(100, 250, 600, 300, 750), + status = c(1, 1, 1, 1, 0), + enum = c(1, 2, 3, 1, 2)) + test_tdc_df <- data.frame(id = rep(c(1, 2), times = c(6, 7)), + tz = c(seq(100, 600, by = 100), + seq(100, 700, by = 100)), + ztz = c(5, 4, 6, 3, 8, 7, + 3, 4, 4.5, 5, 6, 3, 4)) + test_df <- list(test_event_df, test_tdc_df) + + expect_error(as_ped( + data = test_df, + formula = Surv(tstart, tstop, status) ~ enum + concurrent(ztz, tz_var = "tz"), + transition = "enum", + id = "id", + timescale = "gap"), "add baseline") }) + +test_that("Cumulative TDC (for recurrent events) aborts when TDCs are recorded + at different tz times for each individual", { + + test_event_df <- data.frame( + id = c(1,1,1, 2,2), + tstart = c(0, 100, 250, 0, 300), + tstop = c(100, 250, 600, 300, 750), + status = c(1, 1, 1, 1, 0), + enum = c(1, 2, 3, 1, 2)) + test_tdc_df <- data.frame(id = rep(c(1, 2), times = c(6, 7)), + tz = c(seq(100, 600, by = 100), + seq(100, 400, by = 100), 450, 500, + 600), + ztz = c(5, 4, 6, 3, 8, 7, + 3, 4, 4.5, 5, 6, 3, 4)) + test_df <- list(test_event_df, test_tdc_df) + test_formula <- Surv(tstart, tstop, status) ~ + enum + cumulative(latency(tz), ztz, tz_var = "tz", + ll_fun = function(t, tz) t >= tz) + expect_error(as_ped( + data = test_df, + formula = test_formula, + transition = "enum", + id = "id", + timescale = "gap"), "at same tz times") + } +) From ebdc467fd25a12f08ecb82fe4f39c0abad7e46c5 Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 8 Aug 2023 13:03:34 +0200 Subject: [PATCH 23/24] last edits --- R/as-ped.R | 8 +++--- R/formula-specials.R | 59 ++++++++++++++++++++++++++++++++++---------- R/viz-elra.R | 4 ++- 3 files changed, 54 insertions(+), 17 deletions(-) diff --git a/R/as-ped.R b/R/as-ped.R index 4f8cf673..3ecce6ee 100644 --- a/R/as-ped.R +++ b/R/as-ped.R @@ -132,8 +132,10 @@ as_ped.nested_fdf <- function(data, formula, ...) { summarize(id_n = n()) %>% pull("id_n") %>% as_vector() ## id_tseq attribute depends on timescale if (dots$timescale == "gap") { ## if gap, then group_by() by "transition" - attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var")), - .data[[dots$transition]]) %>% + # attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var")), + # .data[[dots$transition]]) %>% + # transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() + attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% transmute(id_tseq = row_number()) %>% pull("id_tseq") %>% as_vector() } else { ## if calendar attr(data, "id_tseq") <- ped %>% group_by(!!sym(attr(data, "id_var"))) %>% @@ -155,7 +157,7 @@ as_ped.nested_fdf <- function(data, formula, ...) { } if (has_special(formula, "cumulative")) { - ped <- add_cumulative(ped, data = data, formula = formula) + ped <- add_cumulative(ped, data = data, formula = formula, timescale = dots$timescale) attr(ped, "ll_weights") <- imap(attr(ped, "tz"), ~bind_cols(!!.y := .x, ll_weight = c(mean(abs(diff(.x))), abs(diff(.x))))) class(ped) <- c("fped", class(ped)) diff --git a/R/formula-specials.R b/R/formula-specials.R index c235b878..6722d4ce 100644 --- a/R/formula-specials.R +++ b/R/formula-specials.R @@ -96,7 +96,7 @@ concurrent <- function(..., #' @importFrom purrr flatten map #' @importFrom stats terms #' @keywords internal -get_cumulative <- function(data, formula) { +get_cumulative <- function(data, formula, ped, timescale = NULL) { stopifnot(has_tdc_form(formula)) @@ -111,7 +111,8 @@ get_cumulative <- function(data, formula) { ## create matrices func_mats <- map(func_list, - ~ expand_cumulative(data = data, ., n_func = n_func)) %>% + ~ expand_cumulative(data = data, ., n_func = n_func, + ped = ped, timescale = timescale)) %>% flatten() list( @@ -155,7 +156,7 @@ has_special <- function(formula, special = "cumulative") { #' @param func Single evaluated \code{\link{cumulative}} term. #' @importFrom purrr map invoke_map #' @keywords internal -expand_cumulative <- function(data, func, n_func) { +expand_cumulative <- function(data, func, n_func, ped, timescale) { col_vars <- func$col_vars tz_var <- func$tz_var @@ -182,14 +183,14 @@ expand_cumulative <- function(data, func, n_func) { hist_mats[[i]] <- if (col_vars[i] == attr(data, "time_var")) { make_time_mat(data, nz) } else if (col_vars[i] == func$latency_var) { - make_latency_mat(data, tz) + make_latency_mat(data, tz, timescale) } else { - make_z_mat(data, col_vars[i], nz) + make_z_mat(data, col_vars[i], nz, ped, timescale) } } if (any(c(time_var, tz_var) %in% col_vars)) { - hist_mats <- c(hist_mats, list(make_lag_lead_mat(data, tz, func$ll_fun))) + hist_mats <- c(hist_mats, list(make_lag_lead_mat(data, tz, func$ll_fun, timescale))) names(hist_mats) <- make_mat_names(c(col_vars, "LL"), func$latency_var, tz_var, func$suffix, n_func) time_mat_ind <- grepl(time_var, names(hist_mats)) @@ -331,9 +332,9 @@ add_concurrent <- function(ped, data, id_var, ...) { } #' @keywords internal -add_cumulative <- function(ped, data, formula) { +add_cumulative <- function(ped, data, formula, timescale) { - func_components <- get_cumulative(data, formula) + func_components <- get_cumulative(data, formula, ped, timescale) func_matrices <- func_components$func_mats ## check that all individuals share same tz pattern @@ -448,12 +449,23 @@ make_time_mat <- function(data, nz) { #' @rdname elra_matrix #' @inherit make_time_mat #' @keywords internal -make_latency_mat <- function(data, tz) { +make_latency_mat <- function(data, tz, timescale = NULL) { time <- attr(data, "breaks") id_tseq <- attr(data, "id_tseq") Latency_mat <- outer(time, tz, FUN = "-") Latency_mat[Latency_mat < 0] <- 0 + if (!is.null(timescale)) { + if (timescale == "gap") { + idx <- unique(id_tseq[id_tseq > nrow(Latency_mat)]) + l_idx <- length(idx) + if (l_idx) { + Latency_mat <- Latency_mat[c(1:nrow(Latency_mat), rep(nrow(Latency_mat), l_idx)),] + Latency_mat[idx,] <- map(idx, function(i) lag(Latency_mat[i,], n = order(i), default = 0)) %>% + reduce(rbind) + } + } + } Latency_mat[id_tseq, , drop = FALSE] } @@ -464,13 +476,26 @@ make_latency_mat <- function(data, tz) { make_lag_lead_mat <- function( data, tz, - ll_fun = function(t, tz) t >= tz) { + ll_fun = function(t, tz) t >= tz, + timescale = NULL) { LL <- outer(attr(data, "breaks"), tz, FUN = ll_fun) * 1L delta <- abs(diff(tz)) IW <- matrix(c(mean(delta), delta), ncol = length(tz), nrow = nrow(LL), byrow = TRUE) LL <- LL * IW + if (!is.null(timescale)) { + if (timescale == "gap") { + id_tseq <- attr(data, "id_tseq") + idx <- unique(id_tseq[id_tseq > nrow(LL)]) + l_idx <- length(idx) + if (l_idx) { + LL <- LL[c(1:nrow(LL), rep(nrow(LL), l_idx)),] + LL[idx,] <- map(idx, function(i) lag(LL[i,], n = order(i), default = 0)) %>% + reduce(rbind) + } + } + } LL[attr(data, "id_tseq"), , drop = FALSE] } @@ -482,15 +507,23 @@ make_lag_lead_mat <- function( #' @importFrom purrr map map_int #' @importFrom dplyr pull #' @keywords internal -make_z_mat <- function(data, z_var, nz, ...) { +make_z_mat <- function(data, z_var, nz, ped = NULL, timescale = NULL) { tz_ind <- seq_len(nz) Z <- map(data[[z_var]], .f = ~ unlist(.x)[tz_ind]) Z <- do.call(rbind, Z) colnames(Z) <- paste0(z_var, tz_ind) Z[is.na(Z)] <- 0 - Z[attr(data, "id_tz_seq"), , drop = FALSE] - + if (is.null(timescale)) { + Z[attr(data, "id_tz_seq"), , drop = FALSE] + } else { + Z <- Z[attr(data, "id_tz_seq"), , drop = FALSE] + id_var <- attr(data, "id_var") + ## reorder as in ped data frame! + posx_df <- data.frame(pos_p = order(ped[[id_var]]), pos_Z = 1:nrow(ped)) + posx_df <- posx_df[order(posx_df$pos_p),] + Z[posx_df$pos_Z,] + } } get_ncols <- function(data, col_vars) { diff --git a/R/viz-elra.R b/R/viz-elra.R index 81a73a97..dca5f8e8 100644 --- a/R/viz-elra.R +++ b/R/viz-elra.R @@ -68,6 +68,8 @@ gg_partial_ll <- function( ci = FALSE, time_var = "tend") { + assert_class(data, "fped") + ind_term <- which(map_lgl(attr(data, "func_mat_names"), ~any(grepl(term, .x)))) tv_sym <- sym(time_var) @@ -94,7 +96,7 @@ gg_partial_ll <- function( levels = c("ci_lower", "fit", "ci_upper"))) } - gg_base <- ggplot(ll_df, aes(x = .data[["intmid"]], y = tz_var)) + + gg_base <- ggplot(ll_df, aes(x = .data[["intmid"]], y = .data[[tz_var]])) + geom_tile(aes(fill = .data[["fit"]]), colour = "grey30") + scale_fill_gradient2(high = "firebrick2", low = "steelblue", na.value = "grey30") + From a4768ee10a583499cb637c5dc020c5ce2d58091a Mon Sep 17 00:00:00 2001 From: Lore Zumeta Olaskoaga Date: Tue, 8 Aug 2023 22:59:00 +0200 Subject: [PATCH 24/24] get_ll() changed --- R/viz-elra.R | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/R/viz-elra.R b/R/viz-elra.R index dca5f8e8..e20ee1a3 100644 --- a/R/viz-elra.R +++ b/R/viz-elra.R @@ -235,9 +235,5 @@ get_ll <- function(x, ind_term, ..., time_var = "tend") { nd %>% filter(!!sym(tz_var) %in% tz_val) %>% mutate( !!sym(ll_var_mat) := ll_fun(.data[[time_var]], .data[[tz_var]]) * 1L) %>% - arrange(.data[[time_var]], .data[[tz_var]]) %>% - group_by(.data[[tz_var]]) %>% - mutate(!!sym(ll_var_mat) := lag(!!sym(ll_var_mat), default = 0)) %>% - ungroup() - + arrange(.data[[time_var]], .data[[tz_var]]) }