Skip to content

Incorporate resolve_delayed_datasets into resolve_delayed #255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 31 commits into
base: main
Choose a base branch
from

Conversation

chlebowa
Copy link
Contributor

@chlebowa chlebowa commented Feb 18, 2025

Upgrade of #252

example
pkgload::load_all("teal.transform")
library(teal.modules.general)
library(teal)

data <- within(teal_data(), {
  iris <- iris
  mtcars <- mtcars
  co2 <- CO2
})


app <- init(
  data = data,
  modules = modules(
    tm_g_scatterplot(
      x = data_extract_spec(
        select = select_spec(
          choices = variable_choices(data = "all", subset = function(data) names(Filter(is.numeric, data))),
          selected = first_choice()
        )
      ),
      y = list(
        data_extract_spec(
          dataname = "co2",
          select = select_spec(selected = nth_choice(2))
        ),
        data_extract_spec(
          dataname = "iris",
          select = select_spec(selected = nth_choice(2))
        )
      )
    ),
    tm_g_scatterplotmatrix(
      variables = data_extract_spec()
    )
  )
)

runApp(app)

Some conclusions (for later):

  • choices should never be specified via variable_choices or value_choices, only eager values or "delayed_choices" (first_choice(), nth_choice(), last_choice(), all_choices(), first_choices(), last_choices()).
  • data in variable_choices and value_choices shouldn't be of class character.

@gogonzo gogonzo self-assigned this Feb 18, 2025
@gogonzo gogonzo added the core label Feb 18, 2025
- Fix rcmdcheck
- more tests TBD
- fix get_extract_datanames
@@ -63,6 +59,7 @@ export(merge_datasets)
export(merge_expression_module)
export(merge_expression_srv)
export(no_selected_as_NULL)
export(nth_choice)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bad idea. What if n = 6 but there are only 3 choices?
And why not slice_choices as well?

@@ -171,35 +175,48 @@ choices_labeled <- function(choices, labels, subset = NULL, types = NULL) {
#' })
#' @export
#'
variable_choices <- function(data, subset = NULL, fill = FALSE, key = NULL) {
variable_choices <- function(data = "all", subset = function(data) names(data), fill = FALSE, key = NULL) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
variable_choices <- function(data = "all", subset = function(data) names(data), fill = FALSE, key = NULL) {
variable_choices <- function(data = "inherit", subset = function(data) names(data), fill = FALSE, key = NULL) {

Comment on lines 180 to +181
checkmate::check_character(subset, null.ok = TRUE, any.missing = FALSE),
checkmate::check_function(subset)
checkmate::check_function(subset, args = "data")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since function is now default, why allow NULL?

Suggested change
checkmate::check_character(subset, null.ok = TRUE, any.missing = FALSE),
checkmate::check_function(subset)
checkmate::check_function(subset, args = "data")
checkmate::check_character(subset, any.missing = FALSE),
checkmate::check_function(subset, args = "data")

Also remove NULL provisions in delayed_choices.

)
checkmate::assert_flag(fill)
checkmate::assert_character(key, null.ok = TRUE, any.missing = FALSE)

UseMethod("variable_choices")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not S3?

subset <- resolve_delayed_expr(subset, ds = data, is_value_choices = FALSE)
subset <- subset(data)
if (
!checkmate::test_character(subset, any.missing = FALSE) ||
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing values are rejected by the caller.

Suggested change
!checkmate::test_character(subset, any.missing = FALSE) ||
!checkmate::test_character(subset) ||

@@ -451,7 +391,7 @@ data_extract_srv.FilteredData <- function(id, datasets, data_extract_spec, ...)
data_extract_srv.list <- function(id,
datasets,
data_extract_spec,
join_keys = NULL,
join_keys = teal.data::join_keys(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

checkmate::assert_class(join_keys, "join_keys")

if (anyDuplicated(datanames)) {
stop("list contains data_extract_spec objects with the same dataset")
}
names(data_extract_spec) <- datanames # so the lapply/sapply results are named
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then why USE.NAMES = FALSE?

class(ans) <- c("multiple_choices", class(ans))
ans
}

#' @keywords internal
#' @noRd
.delayed_choices <- function(fun) {
.delayed_choices <- function(selected_fun) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're doing this, I suggest fun_choices and fun_selected because it is not immediately abvious that you're refering to "function passed to selected argument".

#' @rdname delayed_choices
nth_choice <- function(n) {
.delayed_choices(selected_fun = function(x) {
new_n <- min(length(x), n)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not abvious. Why not x[1]?

choices = NULL,
selected = `if`(inherits(choices, "delayed_data"), NULL, choices[1]),
selected = first_choice(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adjust argument checks

Copy link

@llrs-roche llrs-roche left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I try the example app I see that even if I have a selected column above it (on the red arrow) there is the option to select a different column and value (green arrow):

image

This is confusing for developers (or at least for me) and users. If I select two variables on the dropdown menu right after the dataset I see an error message: Cannot merge at least two dataset extracts. Make sure all datasets used for merging have appropriate keys.:

image

Besides the confusing message ("cannot merge at least two datasets" -> "cannot merge datasets extracts" ?) it is the same dataset, there shouldn't be any merge of datasets. I'm not sure if this is due to changes on data_extract* or on merged_datasets, but it might go away if the previous comment is resolved by removing these menus.

Code/diff review:
Many tests were deleted, probably because most of them are internal functions and they are quite restrictive in a PR like this. Still many are not replaced and resolve.delayed_choices_selected is not tested, resolve_delayed.FilteredData or resolve.delayed_value_choices with inherits(x$var_choices, "delayed_variable_choices") (detected via covr::report()).

This might be out of the scope but there are paths on data_extract_srv.list not tested such as when length(data_extract_spec) != 1. This might create some corner cases with bugs.

@@ -51,51 +51,64 @@ all_choices <- function() {
class(ans) <- c("multiple_choices", class(ans))
ans
}
#' @export
#' @rdname delayed_choices
nth_choice <- function(n) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no example using nth_choice.
If I explore the output of the function it doesn't show the input value in any place, instead it shows .delayed_choices.

This happens for several delayed_choices exported, it is not the focus of this PR, but it would be nice to address or at least open an issue to improve this later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.delayed_choices (the internal) is a template for creating the currently existing delayed_choices functions.
delayed_choices functions will never display the whole body because of the way it is used for composition with the subset function of a delayed_data object. But that is not a problem because it doesn't have to display it.

Comment on lines 99 to 101
if (inherits(x, "delayed_choices")) {
x
} else if (length(x) == 0L) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if else clause has several conditions that return the same maybe it can be simplified. I mention this because currently it is shown to the user.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is? 🤔

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is:

library("teal.transform")
a1 <- all_choices()
a1
#> function(x) {
#>       if (inherits(x, "delayed_choices")) {
#>         x
#>       } else if (length(x) == 0L) {
#>         x
#>       } else if (is.atomic(x)) {
#>         fun(x)
#>       } else if (inherits(x, "delayed_data")) {
#>         if (is.null(x$subset)) {
#>           return(x)
#>         }
#>         original_fun <- x$subset
#>         x$subset <- function(data) {
#>           fun(original_fun(data))
#>         }
#>         x
#>       }
#>     }
#> <bytecode: 0x0000021b95a48610>
#> <environment: 0x0000021b95a5f500>
#> attr(,"class")
#> [1] "multiple_choices" "delayed_choices"  "delayed_data"

Created on 2025-02-24 with reprex v2.1.1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, you mean the source is visible. It doesn't end up in the reproducible code, though. I don't think anybody has any business looking inside this one but you're right, as it is now, it could be written better, e.g.

function(x) {
  if (is.atomic(x)) {
    fun(x)
  } else if (inherits(x, "delayed_data") && !is.null(x$subset)) {
    original_fun <- x$subset
    x$subset <- function(data) {
      fun(original_fun(data))
    }
    x
  } else {
    x
  }
}

When it was written, possible cases were added as they were discovered, so there may be some edge cases that we didn't think of or encounter.


resolve <- function(x, datasets, join_keys = teal.data::join_keys()) {
checkmate::assert_list(datasets, min.len = 1, names = "named")
checkmate::assert_class(join_keys, "join_keys")
UseMethod("resolve")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused but here a method is defined but then there is no NextMethod or UseMethod call or similar between the different classes: "delayed_variable_choices", "delayed_value_choices", "delayed_choices_selected", "delayed_select_spec", "delayed_filter_spec" and "delayed_data_extract_spec". I think it would simplify the code a bit or at least make it a bit more easy to follow the jumps between classes.

checkmate::check_null(keys)
)

resolve <- function(x, datasets, join_keys = teal.data::join_keys()) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is marked as internal, but x is not informative: perhaps renaming it to dd for delayed data will make it easier for us/developers to use.
I mention this because renaming keys to join_keys is also a breaking change.
It seems like the resolve_delayed function is the user facing function for this functionality (same description, arguments and value sections). I would link on the documentation from there to that exported method.

x
}

#' Resolve expression after delayed data are loaded

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch for deleting this function that was not used internally.

@@ -2,15 +2,15 @@ ADSL <- teal.data::rADSL
ADLB <- teal.data::rADLB
ADTTE <- teal.data::rADTTE

data_list <- list(ADSL = reactive(ADSL), ADTTE = reactive(ADTTE), ADLB = reactive(ADLB))
data_list <- list(ADSL = reactive(ADSL), ADTTE = reactive(ADTTE), ADLB = reactive(ADLB), iris = reactive(iris))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iris is added on specific tests (see line 22). But I also see you replaced some that use it by ADSL. Use it or remove it from that test too unless there is something specific about the dataset that needs to be used.

@gogonzo
Copy link
Contributor

gogonzo commented Feb 24, 2025

PR needs more work (I don't know yet how much). Some modules in efficacy app throw some errors. Same happens in #252. Can somebody confirm?
I think we messed up reactivity of the data_extract_srv. We will decide with @llrs-roche what do do next

image
app
pkgload::load_all("temp//teal.transform")
library(teal)
library(teal.modules.general)
library(teal.modules.clinical)
options(shiny.useragg = FALSE)

## Data reproducible code ----
data <- teal_data()
data <- within(data, {
  library(dplyr)
  library(random.cdisc.data)
  library(nestcolor)
  # optional libraries
  library(sparkline)

  ADSL <- radsl(seed = 1)
  .adsl_labels <- teal.data::col_labels(ADSL, fill = FALSE)

  .char_vars_asl <- names(Filter(isTRUE, sapply(ADSL, is.character)))

  .adsl_labels <- c(
    .adsl_labels,
    AGEGR1 = "Age Group"
  )
  ADSL <- ADSL %>%
    mutate(
      AGEGR1 = factor(case_when(
        AGE < 45 ~ "<45",
        AGE >= 45 ~ ">=45"
      ))
    ) %>%
    mutate_at(.char_vars_asl, factor)

  teal.data::col_labels(ADSL) <- .adsl_labels

  ADTTE <- radtte(ADSL, seed = 1)

  ADRS <- radrs(ADSL, seed = 1)
  .adrs_labels <- teal.data::col_labels(ADRS, fill = FALSE)
  ADRS <- filter(ADRS, PARAMCD == "BESRSPI" | AVISIT == "FOLLOW UP")
  teal.data::col_labels(ADRS) <- .adrs_labels

  ADQS <- radqs(ADSL, seed = 1)
  .adqs_labels <- teal.data::col_labels(ADQS, fill = FALSE)
  ADQS <- ADQS %>%
    filter(ABLFL != "Y" & ABLFL2 != "Y") %>%
    filter(AVISIT %in% c("WEEK 1 DAY 8", "WEEK 2 DAY 15", "WEEK 3 DAY 22")) %>%
    mutate(
      AVISIT = as.factor(AVISIT),
      AVISITN = rank(AVISITN) %>%
        as.factor() %>%
        as.numeric() %>%
        as.factor()
    )
  teal.data::col_labels(ADQS) <- .adqs_labels
})

# set join_keys
join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE", "ADRS", "ADQS")]

## Reusable Configuration For Modules
ADSL <- data[["ADSL"]]
ADTTE <- data[["ADTTE"]]
ADRS <- data[["ADRS"]]
ADQS <- data[["ADQS"]]
char_vars_asl <- data[["char_vars_asl"]]

arm_vars <- c("ARMCD", "ARM")
strata_vars <- c("STRATA1", "STRATA2")
facet_vars <- c("AGEGR1", "BMRKR2", "SEX", "COUNTRY")
cov_vars <- c("AGE", "SEX", "BMRKR1", "BMRKR2", "REGION1")
visit_vars <- c("AVISIT", "AVISITN")

cs_arm_var <- choices_selected(
  choices = variable_choices(ADSL, subset = arm_vars),
  selected = "ARM"
)

cs_strata_var <- choices_selected(
  choices = variable_choices(ADSL, subset = strata_vars),
  selected = "STRATA1"
)

cs_facet_var <- choices_selected(
  choices = variable_choices(ADSL, subset = facet_vars),
  selected = "AGEGR1"
)

cs_cov_var <- choices_selected(
  choices = variable_choices(ADSL, subset = cov_vars),
  selected = "AGE"
)

cs_paramcd_tte <- choices_selected(
  choices = value_choices(ADTTE, "PARAMCD", "PARAM"),
  selected = "OS"
)

cs_paramcd_rsp <- choices_selected(
  choices = value_choices(ADRS, "PARAMCD", "PARAM"),
  selected = "BESRSPI"
)

cs_paramcd_qs <- choices_selected(
  choices = value_choices(ADQS, "PARAMCD", "PARAM"),
  selected = "FKSI-FWB"
)

cs_visit_var_qs <- choices_selected(
  choices = variable_choices(ADQS, subset = visit_vars),
  selected = "AVISIT"
)

fact_vars_asl <- names(Filter(isTRUE, sapply(ADSL, is.factor)))
fact_vars_asl_orig <- fact_vars_asl[!fact_vars_asl %in% char_vars_asl]

date_vars_asl <- names(ADSL)[vapply(ADSL, function(x) inherits(x, c("Date", "POSIXct", "POSIXlt")), logical(1))]
demog_vars_asl <- names(ADSL)[!(names(ADSL) %in% c("USUBJID", "STUDYID", date_vars_asl))]

# reference & comparison arm selection when switching the arm variable
# ARMCD is given in a delayed fashion using value choices and
# ARM is given with the ref and comp levels supplied explicitly
arm_ref_comp <- list(
  ARMCD = list(
    ref = value_choices("ADSL", var_choices = "ARMCD", var_label = "ARM", subset = "ARM A"),
    comp = value_choices("ADSL", var_choices = "ARMCD", var_label = "ARM", subset = c("ARM B", "ARM C"))
  ),
  ARM = list(ref = "A: Drug X", comp = c("B: Placebo", "C: Combination"))
)

## App header and footer ----
nest_logo <- "https://raw.githubusercontent.com/insightsengineering/hex-stickers/main/PNG/nest.png"
app_source <- "https://github.com/insightsengineering/teal.gallery/tree/main/efficacy"
gh_issues_page <- "https://github.com/insightsengineering/teal.gallery/issues"

header <- tags$span(
  style = "display: flex; align-items: center; justify-content: space-between; margin: 10px 0 10px 0;",
  tags$span("My first teal app", style = "font-size: 30px;"),
  tags$span(
    style = "display: flex; align-items: center;",
    tags$img(src = nest_logo, alt = "NEST logo", height = "45px", style = "margin-right:10px;"),
    tags$span(style = "font-size: 24px;", "NEST @ Roche")
  )
)

footer <- tags$p(
  "This teal app is brought to you by the NEST Team at Roche/Genentech.
        For more information, please visit:",
  tags$a(href = app_source, target = "_blank", "Source Code"), ", ",
  tags$a(href = gh_issues_page, target = "_blank", "Report Issues")
)

## Setup App
app <- init(
  data = data,
  filter = teal_slices(
    count_type = "all",
    teal_slice(dataname = "ADSL", varname = "ITTFL", selected = "Y"),
    teal_slice(dataname = "ADSL", varname = "SEX"),
    teal_slice(dataname = "ADSL", varname = "AGE")
  ),
  modules = modules(
    tm_front_page(
      label = "App Info",
      header_text = c(
        "Info about input data source" =
          "This app uses CDISC ADaM datasets randomly generated by `random.cdisc.data` R packages"
      ),
      tables = list(`NEST packages used in this demo app` = data.frame(
        Packages = c("teal.modules.general", "teal.modules.clinical", "random.cdisc.data")
      ))
    ),
    tm_data_table("Data Table"),
    tm_variable_browser("Variable Browser"),
    tm_t_summary(
      label = "Demographic Table",
      dataname = "ADSL",
      arm_var = cs_arm_var,
      summarize_vars = choices_selected(
        choices = variable_choices(ADSL, demog_vars_asl),
        selected = c("SEX", "AGE", "RACE")
      )
    ),
    modules(
      label = "Forest Plots",
      tm_g_forest_tte(
        label = "Survival Forest Plot",
        dataname = "ADTTE",
        arm_var = cs_arm_var,
        strata_var = cs_strata_var,
        subgroup_var = cs_facet_var,
        paramcd = cs_paramcd_tte,
        plot_height = c(800L, 200L, 4000L)
      ),
      tm_g_forest_rsp(
        label = "Response Forest Plot",
        dataname = "ADRS",
        arm_var = cs_arm_var,
        strata_var = cs_strata_var,
        subgroup_var = cs_facet_var,
        paramcd = cs_paramcd_rsp,
        plot_height = c(800L, 200L, 4000L)
      )
    ),
    tm_g_km(
      label = "Kaplan Meier Plot",
      dataname = "ADTTE",
      arm_var = cs_arm_var,
      arm_ref_comp = arm_ref_comp,
      paramcd = cs_paramcd_tte,
      facet_var = cs_facet_var,
      strata_var = cs_strata_var,
      plot_height = c(1800L, 200L, 4000L)
    ),
    tm_t_binary_outcome(
      label = "Response Table",
      dataname = "ADRS",
      arm_var = cs_arm_var,
      arm_ref_comp = arm_ref_comp,
      paramcd = cs_paramcd_rsp,
      strata_var = cs_strata_var,
      rsp_table = TRUE
    ),
    tm_t_tte(
      label = "Time To Event Table",
      dataname = "ADTTE",
      arm_var = cs_arm_var,
      paramcd = cs_paramcd_tte,
      strata_var = cs_strata_var,
      time_points = choices_selected(c(182, 365, 547), 182),
      event_desc_var = choices_selected(
        choices = variable_choices("ADTTE", "EVNTDESC"),
        selected = "EVNTDESC",
        fixed = TRUE
      )
    ),
    tm_t_crosstable(
      "Cross Table",
      x = data_extract_spec(
        dataname = "ADSL",
        select = select_spec(
          choices = variable_choices(ADSL, fact_vars_asl_orig),
          selected = fact_vars_asl_orig[1]
        )
      ),
      y = data_extract_spec(
        dataname = "ADSL",
        select = select_spec(
          choices = variable_choices(ADSL, fact_vars_asl_orig),
          selected = fact_vars_asl_orig[4]
        )
      )
    ),
    tm_t_coxreg(
      label = "Cox Reg",
      dataname = "ADTTE",
      arm_var = cs_arm_var,
      arm_ref_comp = arm_ref_comp,
      paramcd = cs_paramcd_tte,
      strata_var = cs_strata_var,
      cov_var = cs_cov_var
    ),
    tm_t_logistic(
      label = "Logistic Reg",
      dataname = "ADRS",
      arm_var = cs_arm_var,
      arm_ref_comp = NULL,
      paramcd = cs_paramcd_rsp,
      cov_var = cs_cov_var
    ),
    tm_a_mmrm(
      label = "MMRM",
      dataname = "ADQS",
      aval_var = choices_selected(c("AVAL", "CHG"), "AVAL"),
      id_var = choices_selected(c("USUBJID", "SUBJID"), "USUBJID"),
      arm_var = cs_arm_var,
      visit_var = cs_visit_var_qs,
      arm_ref_comp = arm_ref_comp,
      paramcd = cs_paramcd_qs,
      cov_var = choices_selected(c("BASE", "AGE", "SEX", "BASE:AVISIT"), NULL),
      conf_level = choices_selected(c(0.95, 0.9, 0.8), 0.95)
    ),
    tm_t_binary_outcome(
      label = "Binary Response",
      dataname = "ADRS",
      arm_var = cs_arm_var,
      paramcd = cs_paramcd_rsp,
      strata_var = cs_strata_var
    ),
    tm_t_ancova(
      label = "ANCOVA",
      dataname = "ADQS",
      avisit = choices_selected(value_choices(ADQS, "AVISIT"), "WEEK 1 DAY 8"),
      arm_var = cs_arm_var,
      arm_ref_comp = arm_ref_comp,
      aval_var = choices_selected(variable_choices(ADQS, c("AVAL", "CHG", "PCHG")), "CHG"),
      cov_var = choices_selected(variable_choices(ADQS, c("BASE", "STRATA1", "SEX")), "STRATA1"),
      paramcd = cs_paramcd_qs
    )
  )
) |>
  modify_title(
    title = "Efficacy Analysis Teal Demo App",
    favicon = nest_logo
  ) |>
  modify_header(header) |>
  modify_footer(footer)

shinyApp(app$ui, app$server)

@llrs-roche
Copy link

@gogonzo From Forest plot to Time to Event Table I see failures (without modifying the input). However, on Forest plot I see a different error message: "Value of the endpoint variable should not be empty.":

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants