From 67c82a9aab5471c8b81990dbe9768ef88bd9c3a8 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 17 Mar 2025 15:47:55 -0700
Subject: [PATCH 01/20] feat: add yeo-johnson * step and layer work with a
 single outcome and layer_yj(.pred) * need to work on multiple outcomes case

---
 R/new_epipredict_steps/layer_yeo_johnson.R | 248 +++++++++++++
 R/new_epipredict_steps/step_yeo_johnson.R  | 409 +++++++++++++++++++++
 _targets.yaml                              |   6 +-
 test-yeo-johnson.Rmd                       |  85 +++++
 tests/testthat/test-yeo-johnson.R          | 103 ++++++
 5 files changed, 850 insertions(+), 1 deletion(-)
 create mode 100644 R/new_epipredict_steps/layer_yeo_johnson.R
 create mode 100644 R/new_epipredict_steps/step_yeo_johnson.R
 create mode 100644 test-yeo-johnson.Rmd
 create mode 100644 tests/testthat/test-yeo-johnson.R

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
new file mode 100644
index 00000000..aa6c6feb
--- /dev/null
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -0,0 +1,248 @@
+#' Unormalizing transformation
+#'
+#' Will undo a step_epi_YeoJohnson transformation.
+#'
+#' @param frosting a `frosting` postprocessor. The layer will be added to the
+#'   sequence of operations for this frosting.
+#' @param ... One or more selector functions to scale variables
+#'   for this step. See [recipes::selections()] for more details.
+#' @param df a data frame that contains the population data to be used for
+#'   inverting the existing scaling.
+#' @param by A (possibly named) character vector of variables to join by.
+#' @param id a random id string
+#'
+#' @return an updated `frosting` postprocessor
+#' @export
+#' @examples
+#' library(dplyr)
+#' jhu <- epidatasets::cases_deaths_subset %>%
+#'   filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
+#'   select(geo_value, time_value, cases)
+#'
+#' pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000))
+#'
+#' r <- epi_recipe(jhu) %>%
+#'   step_epi_YeoJohnson(
+#'     df = pop_data,
+#'     df_pop_col = "value",
+#'     by = c("geo_value" = "states"),
+#'     cases, suffix = "_scaled"
+#'   ) %>%
+#'   step_epi_lag(cases_scaled, lag = c(0, 7, 14)) %>%
+#'   step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") %>%
+#'   step_epi_naomit()
+#'
+#' f <- frosting() %>%
+#'   layer_predict() %>%
+#'   layer_threshold(.pred) %>%
+#'   layer_naomit(.pred) %>%
+#'   layer_epi_YeoJohnson(.pred,
+#'     df = pop_data,
+#'     by = c("geo_value" = "states"),
+#'     df_pop_col = "value"
+#'   )
+#'
+#' wf <- epi_workflow(r, linear_reg()) %>%
+#'   fit(jhu) %>%
+#'   add_frosting(f)
+#'
+#' forecast(wf)
+layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
+  checkmate::assert_tibble(lambdas, min.rows = 1, null.ok = TRUE)
+
+  add_layer(
+    frosting,
+    layer_epi_YeoJohnson_new(
+      lambdas = lambdas,
+      by = by,
+      terms = dplyr::enquos(...),
+      id = id
+    )
+  )
+}
+
+layer_epi_YeoJohnson_new <- function(lambdas, by, terms, id) {
+  layer("epi_YeoJohnson", lambdas = lambdas, by = by, terms = terms, id = id)
+}
+
+#' @export
+#' @importFrom workflows extract_preprocessor
+slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data, ...) {
+  rlang::check_dots_empty()
+
+  # Get the lambdas from the layer or from the workflow.
+  lambdas <- object$lambdas %||% get_lambdas_in_layer(workflow)
+
+  # If the by is not specified, try to infer it from the lambdas.
+  if (is.null(object$by)) {
+    # Assume `layer_predict` has calculated the prediction keys and other
+    # layers don't change the prediction key colnames:
+    prediction_key_colnames <- names(components$keys)
+    lhs_potential_keys <- prediction_key_colnames
+    rhs_potential_keys <- colnames(select(lambdas, -starts_with("lambda_")))
+    object$by <- intersect(lhs_potential_keys, rhs_potential_keys)
+    suggested_min_keys <- setdiff(lhs_potential_keys, "time_value")
+    if (!all(suggested_min_keys %in% object$by)) {
+      cli_warn(
+        c(
+          "{setdiff(suggested_min_keys, object$by)} {?was an/were} epikey column{?s} in the predictions,
+          but {?wasn't/weren't} found in the population `df`.",
+          "i" = "Defaulting to join by {object$by}",
+          ">" = "Double-check whether column names on the population `df` match those expected in your predictions",
+          ">" = "Consider using population data with breakdowns by {suggested_min_keys}",
+          ">" = "Manually specify `by =` to silence"
+        ),
+        class = "epipredict__layer_population_scaling__default_by_missing_suggested_keys"
+      )
+    }
+  }
+
+  # Establish the join columns.
+  object$by <- object$by %||%
+    intersect(
+      epipredict:::epi_keys_only(components$predictions),
+      colnames(select(lambdas, -starts_with("lambda_")))
+    )
+  joinby <- list(x = names(object$by) %||% object$by, y = object$by)
+  hardhat::validate_column_names(components$predictions, joinby$x)
+  hardhat::validate_column_names(lambdas, joinby$y)
+
+  # Join the lambdas.
+  components$predictions <- inner_join(
+    components$predictions,
+    lambdas,
+    by = object$by,
+    relationship = "many-to-one",
+    unmatched = c("error", "drop")
+  )
+
+  # TODO: There are many possibilities here:
+  # - (a) the terms can be empty, where we should probably default to
+  #   all_outcomes().
+  # - (b) explicitly giving all_outcomes(), we end here with terms being empty,
+  #   which doesn't seem right; need to make sure we pull in all the outcome
+  #   columns here. The question is what form should they have?
+  # - (c) if the user just specifies .pred, then we have to infer the outcome
+  #   from the mold, which is simple enough and the main case I have working.
+  # - (d) the user might specify outcomes of the form .pred_ahead_1_cases,
+  #   .pred_ahead_7_cases, etc. Is that the right format? Trying those out now
+  #   and getting errors downstream from forecast().
+  # Get the columns to transform.
+  exprs <- rlang::expr(c(!!!object$terms))
+  pos <- tidyselect::eval_select(exprs, components$predictions)
+  col_names <- names(pos)
+
+  # For every column, we need to use the appropriate lambda column, which differs per row.
+  # Note that yj_inverse() is vectorized.
+  if (identical(col_names, ".pred")) {
+    # In this case, we don't get a hint for the outcome column name, so we need to
+    # infer it from the mold. `outcomes` is a vector of objects like
+    # ahead_1_cases, ahead_7_cases, etc. We want to extract the cases part.
+    outcome_cols <- names(components$mold$outcomes) %>%
+      stringr::str_match("ahead_\\d+_(.*)") %>%
+      extract(, 2)
+
+    components$predictions <- components$predictions %>%
+      rowwise() %>%
+      mutate(.pred := yj_inverse(.pred, !!sym(paste0("lambda_", outcome_cols))))
+  } else if (identical(col_names, character(0))) {
+    # In this case, we should assume the user wants to transform all outcomes.
+    cli::cli_abort("Not specifying columns to layer Yeo-Johnson is not implemented yet.", call = rlang::caller_env())
+  } else {
+    # In this case, we assume that the user has specified the columns they want
+    # transformed here. We then need to determine the lambda columns for each of
+    # these columns. That is, we need to convert a vector of column names like
+    # c(".pred_ahead_1_case_rate", ".pred_ahead_7_case_rate") to
+    # c("lambda_ahead_1_case_rate", "lambda_ahead_7_case_rate").
+    original_outcome_cols <- str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
+    if (all(original_outcome_cols %nin% names(components$mold$outcomes))) {
+      cli_abort("All columns specified in `...` must be outcome columns.", call = rlang::caller_env())
+    }
+
+    for (i in seq_along(col_names)) {
+      col <- col_names[i]
+      lambda_col <- paste0("lambda_", original_outcome_cols[i])
+      components$predictions <- components$predictions %>%
+        rowwise() %>%
+        mutate(!!sym(col) := yj_inverse(!!sym(col), !!sym(lambda_col)))
+    }
+  }
+
+  # Remove the lambda columns.
+  components$predictions <- components$predictions %>%
+    select(-any_of(starts_with("lambda_"))) %>%
+    ungroup()
+  components
+}
+
+#' @export
+print.layer_epi_YeoJohnson <- function(x, width = max(20, options()$width - 30), ...) {
+  title <- "Yeo-Johnson transformation (see `lambdas` object for values) on "
+  epipredict:::print_layer(x$terms, title = title, width = width)
+}
+
+#' Inverse Yeo-Johnson transformation
+#'
+#' Inverse of `yj_transform` in step_yeo_johnson.R.
+#'
+#' @keywords internal
+yj_inverse <- function(x, lambda, eps = 0.001) {
+  if (is.na(lambda)) {
+    return(x)
+  }
+  if (!inherits(x, "tbl_df") || is.data.frame(x)) {
+    x <- unlist(x, use.names = FALSE)
+  } else {
+    if (!is.vector(x)) {
+      x <- as.vector(x)
+    }
+  }
+
+  dat_neg <- x < 0
+  ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
+  not_neg <- ind_neg[["not"]]
+  is_neg <- ind_neg[["is"]]
+
+  nn_inv_trans <- function(x, lambda) {
+    if (abs(lambda) < eps) {
+      # log(x + 1)
+      exp(x) - 1
+    } else {
+      # ((x + 1)^lambda - 1) / lambda
+      (lambda * x + 1)^(1 / lambda) - 1
+    }
+  }
+
+  ng_inv_trans <- function(x, lambda) {
+    if (abs(lambda - 2) < eps) {
+      # -log(-x + 1)
+      -(exp(-x) - 1)
+    } else {
+      # -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
+      -(((lambda - 2) * x + 1)^(1 / (2 - lambda)) - 1)
+    }
+  }
+
+  if (length(not_neg) > 0) {
+    x[not_neg] <- nn_inv_trans(x[not_neg], lambda)
+  }
+
+  if (length(is_neg) > 0) {
+    x[is_neg] <- ng_inv_trans(x[is_neg], lambda)
+  }
+  x
+}
+
+get_lambdas_in_layer <- function(workflow) {
+  this_recipe <- hardhat::extract_recipe(workflow)
+  if (!(this_recipe %>% recipes::detect_step("epi_YeoJohnson"))) {
+    cli_abort("`layer_epi_YeoJohnson` requires `step_epi_YeoJohnson` in the recipe.", call = rlang::caller_env())
+  }
+  for (step in this_recipe$steps) {
+    if (inherits(step, "step_epi_YeoJohnson")) {
+      lambdas <- step$lambdas
+      break
+    }
+  }
+  lambdas
+}
diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
new file mode 100644
index 00000000..81e66ba8
--- /dev/null
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -0,0 +1,409 @@
+#' Yeo-Johnson transformation
+#'
+#' `step_epi_YeoJohnson()` creates a *specification* of a recipe step that will
+#' transform data using a Yeo-Johnson transformation. This fork works with panel
+#' data and is meant for epidata.
+#' TODO: Do an edit pass on this docstring.
+#'
+#' @inheritParams step_center
+#' @param lambdas A numeric vector of transformation values. This
+#'  is `NULL` until computed by [prep()].
+#' @param na_lambda_fill A numeric value to fill in for any
+#'  geos where the lambda cannot be estimated.
+#' @param limits A length 2 numeric vector defining the range to
+#'  compute the transformation parameter lambda.
+#' @param num_unique An integer where data that have less possible
+#'  values will not be evaluated for a transformation.
+#' @param na_rm A logical indicating whether missing values should be
+#'  removed.
+#' @param epi_keys_checked Internal. A character vector of key columns
+#'  that are expected in the data.
+#' @param skip A logical. Should the step be skipped when the recipe is
+#'  baked by [bake()]. On the `training` data, the step will always be
+#'  conducted (even if `skip = TRUE`).
+#' @template step-return
+#' @family individual transformation steps
+#' @export
+#' @details The Yeo-Johnson transformation is very similar to the
+#'  Box-Cox but does not require the input variables to be strictly
+#'  positive. In the package, the partial log-likelihood function is
+#'  directly optimized within a reasonable set of transformation
+#'  values (which can be changed by the user).
+#'
+#' This transformation is typically done on the outcome variable
+#'  using the residuals for a statistical model (such as ordinary
+#'  least squares). Here, a simple null model (intercept only) is
+#'  used to apply the transformation to the *predictor*
+#'  variables individually. This can have the effect of making the
+#'  variable distributions more symmetric.
+#'
+#' If the transformation parameters are estimated to be very
+#'  closed to the bounds, or if the optimization fails, a value of
+#'  `NA` is used and no transformation is applied.
+#'
+#' # Tidying
+#'
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
+#' columns `terms`, `value` , and `id`:
+#'
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, the lambda estimate}
+#'   \item{id}{character, id of this step}
+#' }
+#'
+#' @template case-weights-not-supported
+#'
+#' @references Yeo, I. K., and Johnson, R. A. (2000). A new family of power
+#'   transformations to improve normality or symmetry. *Biometrika*.
+#' @examplesIf rlang::is_installed("modeldata")
+#' data(biomass, package = "modeldata")
+#'
+#' biomass_tr <- biomass[biomass$dataset == "Training", ]
+#' biomass_te <- biomass[biomass$dataset == "Testing", ]
+#'
+#' rec <- recipe(
+#'   HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
+#'   data = biomass_tr
+#' )
+#'
+#' yj_transform <- step_epi_YeoJohnson(rec, all_numeric())
+#'
+#' yj_estimates <- prep(yj_transform, training = biomass_tr)
+#'
+#' yj_te <- bake(yj_estimates, biomass_te)
+#'
+#' plot(density(biomass_te$sulfur), main = "before")
+#' plot(density(yj_te$sulfur), main = "after")
+#'
+#' tidy(yj_transform, number = 1)
+#' tidy(yj_estimates, number = 1)
+step_epi_YeoJohnson <- function(
+  recipe,
+  ...,
+  role = NA,
+  trained = FALSE,
+  lambdas = NULL,
+  na_lambda_fill = 1 / 4,
+  limits = c(-5, 5),
+  num_unique = 5,
+  na_rm = TRUE,
+  epi_keys_checked = NULL,
+  skip = FALSE,
+  id = rand_id("epi_YeoJohnson")
+) {
+  checkmate::assert_numeric(limits, len = 2)
+  checkmate::assert_numeric(na_lambda_fill, lower = min(limits), upper = max(limits), len = 1)
+  checkmate::assert_numeric(num_unique, lower = 2, upper = Inf, len = 1)
+  checkmate::assert_logical(na_rm, len = 1)
+  checkmate::assert_logical(skip, len = 1)
+  if (is.null(epi_keys_checked)) {
+    epi_keys_checked <- key_colnames(recipe$template, exclude = "time_value")
+  }
+  add_step(
+    recipe,
+    step_epi_YeoJohnson_new(
+      terms = enquos(...),
+      role = role,
+      trained = trained,
+      lambdas = lambdas,
+      na_lambda_fill = na_lambda_fill,
+      limits = sort(limits)[1:2],
+      num_unique = num_unique,
+      na_rm = na_rm,
+      epi_keys_checked = epi_keys_checked,
+      forecast_date = NULL,
+      metadata = NULL,
+      columns = NULL,
+      skip = skip,
+      id = id
+    )
+  )
+}
+
+step_epi_YeoJohnson_new <- function(
+  terms,
+  role,
+  trained,
+  lambdas,
+  na_lambda_fill,
+  limits,
+  num_unique,
+  na_rm,
+  epi_keys_checked,
+  forecast_date,
+  metadata,
+  columns,
+  skip,
+  id
+) {
+  step(
+    subclass = "epi_YeoJohnson",
+    terms = terms,
+    role = role,
+    trained = trained,
+    lambdas = lambdas,
+    na_lambda_fill = na_lambda_fill,
+    limits = limits,
+    num_unique = num_unique,
+    na_rm = na_rm,
+    epi_keys_checked = epi_keys_checked,
+    forecast_date = forecast_date,
+    metadata = metadata,
+    columns = columns,
+    skip = skip,
+    id = id
+  )
+}
+
+#' @export
+prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
+  # Check that the columns selected for transformation are numeric.
+  col_names <- recipes_eval_select(x$terms, training, info)
+  check_type(training[, col_names], types = c("double", "integer"))
+
+  lambdas <- get_lambdas_yj_table(
+    training,
+    col_names,
+    x$limits,
+    x$num_unique,
+    x$na_lambda_fill,
+    x$na_rm,
+    x$epi_keys_checked
+  )
+
+  step_epi_YeoJohnson_new(
+    terms = x$terms,
+    role = x$role,
+    trained = TRUE,
+    lambdas = lambdas,
+    na_lambda_fill = x$na_lambda_fill,
+    limits = x$limits,
+    num_unique = x$num_unique,
+    na_rm = x$na_rm,
+    epi_keys_checked = x$epi_keys_checked,
+    forecast_date = attributes(training)$metadata$as_of,
+    metadata = attributes(training)$metadata,
+    columns = col_names,
+    skip = x$skip,
+    id = x$id
+  )
+}
+
+#' @export
+bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
+  # If not an epi_df, make it one assuming the template of training data.
+  # If it is an epi_df, check that the keys match.
+  # Imitating the pattern in step_adjust_latency().
+  if (!inherits(new_data, "epi_df") || is.null(attributes(new_data)$metadata$as_of)) {
+    new_data <- as_epi_df(
+      new_data,
+      as_of = object$forecast_date,
+      other_keys = object$metadata$other_keys %||% character()
+    )
+    new_data %@% metadata <- object$metadata
+    keys <- object$epi_keys_checked
+  }
+  # Check that the keys match.
+  keys <- key_colnames(new_data, exclude = "time_value")
+  if (!identical(keys, object$epi_keys_checked)) {
+    cli::cli_abort(
+      "The keys of the new data do not match the keys of the training data.",
+      call = rlang::caller_fn()
+    )
+  }
+  # Check that the columns for transformation are present in new_data.
+  col_names <- object$columns
+  check_new_data(col_names, object, new_data)
+
+  # Transform each column, using the appropriate lambda column per row.
+  # Note that yj_transform() is vectorized.
+  new_data %<>% left_join(object$lambdas, by = keys)
+  for (col in col_names) {
+    new_data <- new_data %>%
+      rowwise() %>%
+      mutate(!!col := yj_transform(!!sym(col), !!sym(paste0("lambda_", col))))
+  }
+  # Remove the lambda columns.
+  new_data %>%
+    select(-starts_with("lambda_")) %>%
+    ungroup()
+}
+
+#' @export
+print.step_epi_YeoJohnson <- function(x, width = max(20, options()$width - 39), ...) {
+  title <- "Yeo-Johnson transformation (see `lambdas` object for values) on "
+  epipredict:::print_epi_step(x$terms, x$terms, title = title, width = width)
+  invisible(x)
+}
+
+#' Compute the lambda values per group for each column.
+#'
+#' @keywords internal
+#' @rdname recipes-internal
+get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lambda_fill, na_rm, epi_keys_checked) {
+  # Estimate the lambda for each column, creating a lambda_ column for each.
+  # Note that estimate_yj() operates on a vector.
+  lambdas <- training %>%
+    summarise(
+      across(all_of(col_names), ~ estimate_yj(.x, limits, num_unique, na_rm)),
+      .by = epi_keys_checked
+    ) %>%
+    rename_with(~ paste0("lambda_", .x), -all_of(epi_keys_checked))
+
+  # Check for NAs in any of the lambda_ columns.
+  # EDIT: This warning was too noisy. Keeping code around, in case we want it.
+  # for (col in col_names) {
+  #   if (any(is.na(values[[paste0("lambda_", col)]]))) {
+  #     cli::cli_warn(
+  #       c(
+  #         x = "Yeo-Johnson lambda could not be estimated for some geos for {col}.",
+  #         i = "Using lambda={x$na_lambda_fill} in these cases."
+  #       ),
+  #       call = rlang::caller_fn()
+  #     )
+  #   }
+  # }
+
+  # Fill in NAs with the default lambda.
+  lambdas %>%
+    mutate(across(starts_with("lambda_"), \(col) ifelse(is.na(col), na_lambda_fill, col)))
+}
+
+
+### Code below taken from recipes::step_YeoJohnson.
+### https://github.com/tidymodels/recipes/blob/v1.1.1/R/YeoJohnson.R#L172
+
+#' Internal Functions
+#'
+#' @keywords internal
+#' @rdname recipes-internal
+#' @export
+yj_transform <- function(x, lambda, ind_neg = NULL, eps = 0.001) {
+  if (is.na(lambda)) {
+    return(x)
+  }
+  if (!inherits(x, "tbl_df") || is.data.frame(x)) {
+    x <- unlist(x, use.names = FALSE)
+  } else {
+    if (!is.vector(x)) {
+      x <- as.vector(x)
+    }
+  }
+  # TODO case weights: can we use weights here?
+  if (is.null(ind_neg)) {
+    dat_neg <- x < 0
+    ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
+  }
+  not_neg <- ind_neg[["not"]]
+  is_neg <- ind_neg[["is"]]
+
+  nn_trans <- function(x, lambda) {
+    if (abs(lambda) < eps) {
+      log(x + 1)
+    } else {
+      ((x + 1)^lambda - 1) / lambda
+    }
+  }
+
+  ng_trans <- function(x, lambda) {
+    if (abs(lambda - 2) < eps) {
+      -log(-x + 1)
+    } else {
+      -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
+    }
+  }
+
+  if (length(not_neg) > 0) {
+    x[not_neg] <- nn_trans(x[not_neg], lambda)
+  }
+
+  if (length(is_neg) > 0) {
+    x[is_neg] <- ng_trans(x[is_neg], lambda)
+  }
+  x
+}
+
+
+## Helper for the log-likelihood calc for eq 3.1 of Yeo, I. K.,
+## & Johnson, R. A. (2000). A new family of power transformations
+## to improve normality or symmetry. Biometrika. page 957
+ll_yj <- function(lambda, y, ind_neg, const, eps = 0.001) {
+  n <- length(y)
+  y_t <- yj_transform(y, lambda, ind_neg)
+  mu_t <- mean(y_t)
+  var_t <- var(y_t) * (n - 1) / n
+  res <- -.5 * n * log(var_t) + (lambda - 1) * const
+  res
+}
+
+## eliminates missing data and returns -llh
+yj_obj <- function(lam, dat, ind_neg, const) {
+  ll_yj(lambda = lam, y = dat, ind_neg = ind_neg, const = const)
+}
+
+## estimates the values
+#' @keywords internal
+#' @rdname recipes-internal
+#' @export
+estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, call = caller_env(2)) {
+  na_rows <- which(is.na(dat))
+  if (length(na_rows) > 0) {
+    if (na_rm) {
+      dat <- dat[-na_rows]
+    } else {
+      cli::cli_abort(
+        c(
+          x = "Missing values are not allowed for the YJ transformation.",
+          i = "See {.arg na_rm} option."
+        ),
+        call = call
+      )
+    }
+  }
+
+  eps <- .001
+  if (length(unique(dat)) < num_unique) {
+    return(NA)
+  }
+  dat_neg <- dat < 0
+  ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
+
+  const <- sum(sign(dat) * log(abs(dat) + 1))
+
+  res <- optimize(
+    yj_obj,
+    interval = limits,
+    maximum = TRUE,
+    dat = dat,
+    ind_neg = ind_neg,
+    const = const,
+    tol = .0001
+  )
+  lam <- res$maximum
+  if (abs(limits[1] - lam) <= eps | abs(limits[2] - lam) <= eps) {
+    lam <- NA
+  }
+  lam
+}
+
+# Copied from recipes:::tidy.step_BoxCox
+#
+#' @rdname tidy.recipe
+#' @export
+tidy.step_epi_YeoJohnson <- function(x, ...) {
+  if (is_trained(x)) {
+    res <- tibble(
+      terms = names(x$lambdas),
+      value = unname(x$lambdas)
+    )
+  } else {
+    term_names <- sel2char(x$terms)
+    res <- tibble(
+      terms = term_names,
+      value = na_dbl
+    )
+  }
+  res$id <- x$id
+  res
+}
diff --git a/_targets.yaml b/_targets.yaml
index 3f31a6e6..c76259e4 100644
--- a/_targets.yaml
+++ b/_targets.yaml
@@ -18,4 +18,8 @@ covid_hosp_prod:
   store: covid_hosp_prod
   use_crew: yes
   reporter_make: timestamp
-
+# test_proj:
+#   script: scripts/test_proj.R
+#   store: test_proj
+#   use_crew: yes
+#   reporter_make: timestamp
diff --git a/test-yeo-johnson.Rmd b/test-yeo-johnson.Rmd
new file mode 100644
index 00000000..24c395af
--- /dev/null
+++ b/test-yeo-johnson.Rmd
@@ -0,0 +1,85 @@
+---
+title: "Yeo-Johnson Transformation Testing"
+output:
+  html_document:
+    self_contained: True
+editor_options:
+  chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+  digits = 3,
+  comment = "#>",
+  collapse = TRUE,
+  cache = FALSE,
+  dev.args = list(bg = "transparent"),
+  dpi = 300,
+  cache.lazy = FALSE,
+  out.width = "90%",
+  fig.align = "center",
+  fig.width = 9,
+  fig.height = 6
+)
+ggplot2::theme_set(ggplot2::theme_bw())
+options(
+  dplyr.print_min = 6,
+  dplyr.print_max = 6,
+  pillar.max_footer_lines = 2,
+  pillar.min_chars = 15,
+  stringr.view_n = 6,
+  pillar.bold = TRUE,
+  width = 77
+)
+suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
+```
+
+## Setup and Data Loading
+
+First, we'll set up the environment and load the necessary data:
+
+```{r setup-env}
+# Simple case with keys = geo_value.
+filtered_data <- cases_deaths_subset %>%
+  filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+  select(geo_value, time_value, cases)
+```
+
+## Yeo-Johnson Transformation
+
+Let's apply the Yeo-Johnson transformation to our data:
+
+```{r yeo-johnson-transform}
+r <- epi_recipe(filtered_data) %>%
+  step_epi_YeoJohnson(cases) %>%
+  prep(filtered_data)
+
+# Display the recipe
+r
+
+# Inspect the lambda values for each state
+r$steps[[1]]$lambdas
+```
+
+## Manual Whitening Comparison
+
+Now, let's compare the Yeo-Johnson transformation with a manual whitening approach using quarter root scaling:
+
+```{r manual-whitening}
+# Apply the transformation
+out1 <- r %>% bake(filtered_data)
+out2 <- filtered_data %>%
+  mutate(cases = (cases + 0.01)^(1 / 4))
+
+filtered_data %>%
+  mutate(cases = log(cases)) %>%
+  ggplot(aes(time_value, cases)) +
+  geom_line(color = "blue") +
+  geom_line(data = out1 %>% mutate(cases = log(cases)),
+            aes(time_value, cases), color = "green") +
+  geom_line(data = out2 %>% mutate(cases = log(cases)),
+            aes(time_value, cases), color = "red") +
+  facet_wrap(~geo_value, scales = "free_y") +
+  theme_minimal() +
+  labs(title = "Yeo-Johnson transformation", x = "Time", y = "Log Cases")
+```
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
new file mode 100644
index 00000000..b45deacf
--- /dev/null
+++ b/tests/testthat/test-yeo-johnson.R
@@ -0,0 +1,103 @@
+suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
+
+test_that("Yeo-Johnson transformation inverts correctly", {
+  expect_true(
+    map_lgl(seq(-5, 5, 0.1), function(lambda) {
+      map_lgl(seq(0, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all()
+    }) %>%
+      all()
+  )
+})
+
+test_that("Yeo-Johnson steps and layers invert each other", {
+  jhu <- cases_deaths_subset %>%
+    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+    select(geo_value, time_value, cases)
+  filtered_data <- jhu
+
+  # Get some lambda values
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(cases) %>%
+    step_epi_lag(cases, lag = 0) %>%
+    step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+  tr <- r %>% prep(filtered_data)
+
+  # Check general lambda values tibble structure
+  expect_true("lambda_cases" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_cases))
+  # Still works on a tibble
+  expect_equal(
+    tr %>% bake(filtered_data %>% as_tibble()),
+    tr %>% bake(filtered_data)
+  )
+
+  # Make sure that the inverse transformation works
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_epi_YeoJohnson(.pred)
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value)
+  out2 <- forecast(wf) %>% rename(cases = .pred)
+  expect_equal(out1, out2)
+
+  # Make sure it works when there are multiple predictors and outcomes
+  jhu_multi <- epidatasets::covid_case_death_rates_extended %>%
+    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+    select(geo_value, time_value, case_rate, death_rate)
+  filtered_data <- jhu_multi
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(case_rate, death_rate) %>%
+    step_epi_lag(case_rate, death_rate, lag = 0) %>%
+    step_epi_ahead(case_rate, death_rate, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+  tr <- r %>% prep(filtered_data)
+
+  # Check general lambda values tibble structure
+  expect_true("lambda_case_rate" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("lambda_death_rate" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_case_rate))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_death_rate))
+
+  # TODO: Make sure that the inverse transformation works
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_epi_YeoJohnson(.pred_ahead_0_case_rate)
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value)
+  # debugonce(slather.layer_epi_YeoJohnson)
+  out2 <- forecast(wf) %>% rename(case_rate = .pred)
+  expect_equal(out1, out2)
+})
+
+test_that("Yeo-Johnson steps and layers invert each other when other_keys are present", {
+  jhu <- cases_deaths_subset %>%
+    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+    select(geo_value, time_value, cases)
+  filtered_data <- jhu
+
+  # Get some lambda values
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(cases) %>%
+    step_epi_lag(cases, lag = 0) %>%
+    step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+  tr <- r %>% prep(filtered_data)
+  # Check for fixed lambda values
+  expect_true(all(near(tr$steps[[1]]$lambdas$lambda_cases, c(0.856, 0.207), tol = 0.001)))
+
+  # Make sure that the inverse transformation works
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_epi_YeoJohnson(.pred)
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value)
+  out2 <- forecast(wf) %>% rename(cases = .pred)
+  expect_equal(out1, out2)
+})

From 0319acc846dedda0125bc880f68f4443fc6cc843 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 19 Mar 2025 10:29:25 -0700
Subject: [PATCH 02/20] Update R/new_epipredict_steps/step_yeo_johnson.R

Co-authored-by: Daniel McDonald <dajmcdon@gmail.com>
---
 R/new_epipredict_steps/step_yeo_johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index 81e66ba8..d4ca0765 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -81,7 +81,7 @@
 step_epi_YeoJohnson <- function(
   recipe,
   ...,
-  role = NA,
+  role = "predictor",
   trained = FALSE,
   lambdas = NULL,
   na_lambda_fill = 1 / 4,

From 8d029c22f799b443bad1ad06100cb87cc1f93180 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 19 Mar 2025 14:43:31 -0700
Subject: [PATCH 03/20] Update R/new_epipredict_steps/step_yeo_johnson.R

Co-authored-by: Daniel McDonald <dajmcdon@gmail.com>
---
 R/new_epipredict_steps/step_yeo_johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index d4ca0765..b4276c4f 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -218,7 +218,7 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
 
   # Transform each column, using the appropriate lambda column per row.
   # Note that yj_transform() is vectorized.
-  new_data %<>% left_join(object$lambdas, by = keys)
+  new_data <- left_join(new_data, object$lambdas, by = keys)
   for (col in col_names) {
     new_data <- new_data %>%
       rowwise() %>%

From cb6b431c4c8a901557f4fd2c770e16e9d507075e Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 19 Mar 2025 14:49:20 -0700
Subject: [PATCH 04/20] Update R/new_epipredict_steps/step_yeo_johnson.R

Co-authored-by: Daniel McDonald <dajmcdon@gmail.com>
---
 R/new_epipredict_steps/step_yeo_johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index b4276c4f..53bd5515 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -160,7 +160,7 @@ step_epi_YeoJohnson_new <- function(
 prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
   # Check that the columns selected for transformation are numeric.
   col_names <- recipes_eval_select(x$terms, training, info)
-  check_type(training[, col_names], types = c("double", "integer"))
+  recipes::check_type(training[, col_names], types = c("double", "integer"))
 
   lambdas <- get_lambdas_yj_table(
     training,

From 415d99a61690b3323d00f437f2ab1db3d9e89c7d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 19 Mar 2025 15:52:29 -0700
Subject: [PATCH 05/20] fix: temp columns lambda_ -> .lambda_

---
 R/new_epipredict_steps/layer_yeo_johnson.R | 10 +++++-----
 R/new_epipredict_steps/step_yeo_johnson.R  | 10 +++++-----
 tests/testthat/test-yeo-johnson.R          | 16 +++++++++-------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index aa6c6feb..6eb5f9d9 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -101,7 +101,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
   object$by <- object$by %||%
     intersect(
       epipredict:::epi_keys_only(components$predictions),
-      colnames(select(lambdas, -starts_with("lambda_")))
+      colnames(select(lambdas, -starts_with(".lambda_")))
     )
   joinby <- list(x = names(object$by) %||% object$by, y = object$by)
   hardhat::validate_column_names(components$predictions, joinby$x)
@@ -133,7 +133,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
   col_names <- names(pos)
 
   # For every column, we need to use the appropriate lambda column, which differs per row.
-  # Note that yj_inverse() is vectorized.
+  # Note that yj_inverse() is vectorized in x, but not in lambda.
   if (identical(col_names, ".pred")) {
     # In this case, we don't get a hint for the outcome column name, so we need to
     # infer it from the mold. `outcomes` is a vector of objects like
@@ -144,7 +144,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
 
     components$predictions <- components$predictions %>%
       rowwise() %>%
-      mutate(.pred := yj_inverse(.pred, !!sym(paste0("lambda_", outcome_cols))))
+      mutate(.pred := yj_inverse(.pred, !!sym(paste0(".lambda_", outcome_cols))))
   } else if (identical(col_names, character(0))) {
     # In this case, we should assume the user wants to transform all outcomes.
     cli::cli_abort("Not specifying columns to layer Yeo-Johnson is not implemented yet.", call = rlang::caller_env())
@@ -161,7 +161,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
 
     for (i in seq_along(col_names)) {
       col <- col_names[i]
-      lambda_col <- paste0("lambda_", original_outcome_cols[i])
+      lambda_col <- paste0(".lambda_", original_outcome_cols[i])
       components$predictions <- components$predictions %>%
         rowwise() %>%
         mutate(!!sym(col) := yj_inverse(!!sym(col), !!sym(lambda_col)))
@@ -170,7 +170,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
 
   # Remove the lambda columns.
   components$predictions <- components$predictions %>%
-    select(-any_of(starts_with("lambda_"))) %>%
+    select(-any_of(starts_with(".lambda_"))) %>%
     ungroup()
   components
 }
diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index 53bd5515..e9a2633e 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -217,16 +217,16 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
   check_new_data(col_names, object, new_data)
 
   # Transform each column, using the appropriate lambda column per row.
-  # Note that yj_transform() is vectorized.
+  # Note that yj_transform() is vectorized in x, but not in lambda.
   new_data <- left_join(new_data, object$lambdas, by = keys)
   for (col in col_names) {
     new_data <- new_data %>%
       rowwise() %>%
-      mutate(!!col := yj_transform(!!sym(col), !!sym(paste0("lambda_", col))))
+      mutate(!!col := yj_transform(!!sym(col), !!sym(paste0(".lambda_", col))))
   }
   # Remove the lambda columns.
   new_data %>%
-    select(-starts_with("lambda_")) %>%
+    select(-starts_with(".lambda_")) %>%
     ungroup()
 }
 
@@ -249,7 +249,7 @@ get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lam
       across(all_of(col_names), ~ estimate_yj(.x, limits, num_unique, na_rm)),
       .by = epi_keys_checked
     ) %>%
-    rename_with(~ paste0("lambda_", .x), -all_of(epi_keys_checked))
+    rename_with(~ paste0(".lambda_", .x), -all_of(epi_keys_checked))
 
   # Check for NAs in any of the lambda_ columns.
   # EDIT: This warning was too noisy. Keeping code around, in case we want it.
@@ -267,7 +267,7 @@ get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lam
 
   # Fill in NAs with the default lambda.
   lambdas %>%
-    mutate(across(starts_with("lambda_"), \(col) ifelse(is.na(col), na_lambda_fill, col)))
+    mutate(across(starts_with(".lambda_"), \(col) ifelse(is.na(col), na_lambda_fill, col)))
 }
 
 
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index b45deacf..1c5d42dd 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -24,8 +24,8 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   tr <- r %>% prep(filtered_data)
 
   # Check general lambda values tibble structure
-  expect_true("lambda_cases" %in% names(tr$steps[[1]]$lambdas))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_cases))
+  expect_true(".lambda_cases" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_cases))
   # Still works on a tibble
   expect_equal(
     tr %>% bake(filtered_data %>% as_tibble()),
@@ -56,12 +56,13 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   tr <- r %>% prep(filtered_data)
 
   # Check general lambda values tibble structure
-  expect_true("lambda_case_rate" %in% names(tr$steps[[1]]$lambdas))
-  expect_true("lambda_death_rate" %in% names(tr$steps[[1]]$lambdas))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_case_rate))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$lambda_death_rate))
+  expect_true(".lambda_case_rate" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(".lambda_death_rate" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_case_rate))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_death_rate))
 
   # TODO: Make sure that the inverse transformation works
+  skip("TODO")
   f <- frosting() %>%
     layer_predict() %>%
     layer_epi_YeoJohnson(.pred_ahead_0_case_rate)
@@ -75,6 +76,7 @@ test_that("Yeo-Johnson steps and layers invert each other", {
 })
 
 test_that("Yeo-Johnson steps and layers invert each other when other_keys are present", {
+  skip("TODO")
   jhu <- cases_deaths_subset %>%
     filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
     select(geo_value, time_value, cases)
@@ -88,7 +90,7 @@ test_that("Yeo-Johnson steps and layers invert each other when other_keys are pr
     step_epi_naomit()
   tr <- r %>% prep(filtered_data)
   # Check for fixed lambda values
-  expect_true(all(near(tr$steps[[1]]$lambdas$lambda_cases, c(0.856, 0.207), tol = 0.001)))
+  expect_true(all(near(tr$steps[[1]]$lambdas$.lambda_cases, c(0.856, 0.207), tol = 0.001)))
 
   # Make sure that the inverse transformation works
   f <- frosting() %>%

From ca2df2f47663519c48c5767bc7d00ec146dbed4a Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Wed, 19 Mar 2025 16:12:14 -0700
Subject: [PATCH 06/20] fix: remove epi_keys_checked

---
 R/new_epipredict_steps/step_yeo_johnson.R | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index e9a2633e..384fc272 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -16,8 +16,6 @@
 #'  values will not be evaluated for a transformation.
 #' @param na_rm A logical indicating whether missing values should be
 #'  removed.
-#' @param epi_keys_checked Internal. A character vector of key columns
-#'  that are expected in the data.
 #' @param skip A logical. Should the step be skipped when the recipe is
 #'  baked by [bake()]. On the `training` data, the step will always be
 #'  conducted (even if `skip = TRUE`).
@@ -88,7 +86,6 @@ step_epi_YeoJohnson <- function(
   limits = c(-5, 5),
   num_unique = 5,
   na_rm = TRUE,
-  epi_keys_checked = NULL,
   skip = FALSE,
   id = rand_id("epi_YeoJohnson")
 ) {
@@ -97,9 +94,6 @@ step_epi_YeoJohnson <- function(
   checkmate::assert_numeric(num_unique, lower = 2, upper = Inf, len = 1)
   checkmate::assert_logical(na_rm, len = 1)
   checkmate::assert_logical(skip, len = 1)
-  if (is.null(epi_keys_checked)) {
-    epi_keys_checked <- key_colnames(recipe$template, exclude = "time_value")
-  }
   add_step(
     recipe,
     step_epi_YeoJohnson_new(
@@ -111,7 +105,6 @@ step_epi_YeoJohnson <- function(
       limits = sort(limits)[1:2],
       num_unique = num_unique,
       na_rm = na_rm,
-      epi_keys_checked = epi_keys_checked,
       forecast_date = NULL,
       metadata = NULL,
       columns = NULL,
@@ -130,7 +123,6 @@ step_epi_YeoJohnson_new <- function(
   limits,
   num_unique,
   na_rm,
-  epi_keys_checked,
   forecast_date,
   metadata,
   columns,
@@ -147,7 +139,6 @@ step_epi_YeoJohnson_new <- function(
     limits = limits,
     num_unique = num_unique,
     na_rm = na_rm,
-    epi_keys_checked = epi_keys_checked,
     forecast_date = forecast_date,
     metadata = metadata,
     columns = columns,
@@ -169,7 +160,7 @@ prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
     x$num_unique,
     x$na_lambda_fill,
     x$na_rm,
-    x$epi_keys_checked
+    key_colnames(training, exclude = "time_value")
   )
 
   step_epi_YeoJohnson_new(
@@ -181,7 +172,6 @@ prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
     limits = x$limits,
     num_unique = x$num_unique,
     na_rm = x$na_rm,
-    epi_keys_checked = x$epi_keys_checked,
     forecast_date = attributes(training)$metadata$as_of,
     metadata = attributes(training)$metadata,
     columns = col_names,
@@ -202,11 +192,11 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
       other_keys = object$metadata$other_keys %||% character()
     )
     new_data %@% metadata <- object$metadata
-    keys <- object$epi_keys_checked
   }
   # Check that the keys match.
   keys <- key_colnames(new_data, exclude = "time_value")
-  if (!identical(keys, object$epi_keys_checked)) {
+  old_keys <- object$lambdas %>% select(-starts_with(".lambda_")) %>% colnames()
+  if (!identical(keys, old_keys)) {
     cli::cli_abort(
       "The keys of the new data do not match the keys of the training data.",
       call = rlang::caller_fn()

From 17fac6a3b9f57ee4cf0136409ed0cbc2973b5dcc Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 17:56:34 -0700
Subject: [PATCH 07/20] Update R/new_epipredict_steps/step_yeo_johnson.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 R/new_epipredict_steps/step_yeo_johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index 384fc272..91ecb4d7 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -196,7 +196,7 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
   # Check that the keys match.
   keys <- key_colnames(new_data, exclude = "time_value")
   old_keys <- object$lambdas %>% select(-starts_with(".lambda_")) %>% colnames()
-  if (!identical(keys, old_keys)) {
+  if (!all(keys %in% old_keys)) {
     cli::cli_abort(
       "The keys of the new data do not match the keys of the training data.",
       call = rlang::caller_fn()

From a14c932623bae69976330ea2e71663bffc0552af Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:02:09 -0700
Subject: [PATCH 08/20] Update test-yeo-johnson.Rmd

Co-authored-by: David Weber <david.weber2@pm.me>
---
 test-yeo-johnson.Rmd | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/test-yeo-johnson.Rmd b/test-yeo-johnson.Rmd
index 24c395af..27379ad7 100644
--- a/test-yeo-johnson.Rmd
+++ b/test-yeo-johnson.Rmd
@@ -71,15 +71,18 @@ out1 <- r %>% bake(filtered_data)
 out2 <- filtered_data %>%
   mutate(cases = (cases + 0.01)^(1 / 4))
 
-filtered_data %>%
-  mutate(cases = log(cases)) %>%
-  ggplot(aes(time_value, cases)) +
-  geom_line(color = "blue") +
-  geom_line(data = out1 %>% mutate(cases = log(cases)),
-            aes(time_value, cases), color = "green") +
-  geom_line(data = out2 %>% mutate(cases = log(cases)),
-            aes(time_value, cases), color = "red") +
-  facet_wrap(~geo_value, scales = "free_y") +
+all_together <- rbind(
+  filtered_data %>%
+  mutate(name = "raw"),
+  out1 %>% mutate(name = "yeo-johnson"),
+  out2 %>% mutate(name = "quarter-root")
+)
+
+all_together %>%
+  ggplot(aes(time_value, cases, color = name)) +
+  geom_line() +
+  facet_grid(~geo_value, scales = "free_y") +
   theme_minimal() +
-  labs(title = "Yeo-Johnson transformation", x = "Time", y = "Log Cases")
+  labs(title = "Yeo-Johnson transformation", x = "Time", y = "Log Cases") +
+  scale_y_log10()
 ```

From 66a15efb98e4ff16c60b5c2a16874d72f722c7f3 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:05:08 -0700
Subject: [PATCH 09/20] Update tests/testthat/test-yeo-johnson.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 tests/testthat/test-yeo-johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 1c5d42dd..07b9ab16 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -3,7 +3,7 @@ suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
 test_that("Yeo-Johnson transformation inverts correctly", {
   expect_true(
     map_lgl(seq(-5, 5, 0.1), function(lambda) {
-      map_lgl(seq(0, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all()
+      map_lgl(seq(-10, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all()
     }) %>%
       all()
   )

From 80c5cd0069e1cb19c1352d88f8382563e714ad1a Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:08:32 -0700
Subject: [PATCH 10/20] Update R/new_epipredict_steps/layer_yeo_johnson.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 R/new_epipredict_steps/layer_yeo_johnson.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index 6eb5f9d9..0c697f71 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -155,7 +155,8 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
     # c(".pred_ahead_1_case_rate", ".pred_ahead_7_case_rate") to
     # c("lambda_ahead_1_case_rate", "lambda_ahead_7_case_rate").
     original_outcome_cols <- str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
-    if (all(original_outcome_cols %nin% names(components$mold$outcomes))) {
+    outcomes_wout_ahead <- str_match(names(components$mold$outcomes), "ahead_\\d+_(.*)")[,2]
+    if (all(original_outcome_cols %nin% outcomes_wout_ahead)) {
       cli_abort("All columns specified in `...` must be outcome columns.", call = rlang::caller_env())
     }
 

From cd87b0bc6c969b290061e07df037000e958e23b7 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:08:38 -0700
Subject: [PATCH 11/20] Update tests/testthat/test-yeo-johnson.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 tests/testthat/test-yeo-johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 07b9ab16..03a22a93 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -65,7 +65,7 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   skip("TODO")
   f <- frosting() %>%
     layer_predict() %>%
-    layer_epi_YeoJohnson(.pred_ahead_0_case_rate)
+    layer_epi_YeoJohnson(.pred_ahead_0_case_rate, .pred_ahead_0_death_rate)
   wf <- epi_workflow(r, linear_reg()) %>%
     fit(filtered_data) %>%
     add_frosting(f)

From 88cb4753e4d6cac206eeba3996e9d12c0b880c01 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:08:48 -0700
Subject: [PATCH 12/20] Update tests/testthat/test-yeo-johnson.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 tests/testthat/test-yeo-johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 03a22a93..0980348a 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -71,7 +71,7 @@ test_that("Yeo-Johnson steps and layers invert each other", {
     add_frosting(f)
   out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value)
   # debugonce(slather.layer_epi_YeoJohnson)
-  out2 <- forecast(wf) %>% rename(case_rate = .pred)
+  out2 <- forecast(wf) %>% rename(case_rate = .pred_ahead_0_case_rate, death_rate = .pred_ahead_0_death_rate)
   expect_equal(out1, out2)
 })
 

From 8f200f39f1c2e5ac5ec840026b623e4315ce472d Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:10:05 -0700
Subject: [PATCH 13/20] merge

---
 R/new_epipredict_steps/layer_yeo_johnson.R | 2 +-
 R/new_epipredict_steps/step_yeo_johnson.R  | 2 +-
 tests/testthat/test-yeo-johnson.R          | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index 0c697f71..731297cc 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -156,7 +156,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
     # c("lambda_ahead_1_case_rate", "lambda_ahead_7_case_rate").
     original_outcome_cols <- str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
     outcomes_wout_ahead <- str_match(names(components$mold$outcomes), "ahead_\\d+_(.*)")[,2]
-    if (all(original_outcome_cols %nin% outcomes_wout_ahead)) {
+    if (any(original_outcome_cols %nin% outcomes_wout_ahead)) {
       cli_abort("All columns specified in `...` must be outcome columns.", call = rlang::caller_env())
     }
 
diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index 91ecb4d7..1abae34d 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -237,7 +237,7 @@ get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lam
   lambdas <- training %>%
     summarise(
       across(all_of(col_names), ~ estimate_yj(.x, limits, num_unique, na_rm)),
-      .by = epi_keys_checked
+      .by = all_of(epi_keys_checked)
     ) %>%
     rename_with(~ paste0(".lambda_", .x), -all_of(epi_keys_checked))
 
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 0980348a..4c490325 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -1,6 +1,8 @@
 suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
 
 test_that("Yeo-Johnson transformation inverts correctly", {
+  # Note that the special lambda values of 0 and 2 are covered by the tests
+  # below.
   expect_true(
     map_lgl(seq(-5, 5, 0.1), function(lambda) {
       map_lgl(seq(-10, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all()

From b56559883d824fc2166be09a390611cf0b327bed Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 18:11:07 -0700
Subject: [PATCH 14/20] test: inverse transform with multiple outcomes works

---
 tests/testthat/test-yeo-johnson.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 4c490325..ea5653a8 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -63,8 +63,7 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_case_rate))
   expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_death_rate))
 
-  # TODO: Make sure that the inverse transformation works
-  skip("TODO")
+  # Make sure that the inverse transformation works
   f <- frosting() %>%
     layer_predict() %>%
     layer_epi_YeoJohnson(.pred_ahead_0_case_rate, .pred_ahead_0_death_rate)

From d579ef8a48498e7645cbea1d47e148fd1ba6ac35 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 20 Mar 2025 19:41:19 -0700
Subject: [PATCH 15/20] ci+test: update r version, remove snap

---
 .github/workflows/tests.yaml                | 3 +++
 tests/testthat/_snaps/forecasters-basics.md | 5 -----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 5cf55c1b..6dbadacc 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -19,6 +19,9 @@ jobs:
       fail-fast: false
       matrix:
         config:
+          # The one we use in production.
+          - { os: ubuntu-latest, r: "renv" }
+          # See if the latest release works.
           - { os: ubuntu-latest, r: "release" }
 
     env:
diff --git a/tests/testthat/_snaps/forecasters-basics.md b/tests/testthat/_snaps/forecasters-basics.md
index d82ac804..15216ea7 100644
--- a/tests/testthat/_snaps/forecasters-basics.md
+++ b/tests/testthat/_snaps/forecasters-basics.md
@@ -17,8 +17,3 @@
       ! Can't rename columns that don't exist.
       x Column `slide_value_case_rate` doesn't exist.
 
-# no_recent_outcome deals with no as_of
-
-    Code
-      res <- forecaster[[2]](jhu, "case_rate", extra_sources = "death_rate", ahead = 2L)
-

From 28c8471ab6c46e0534e80f9dcd5d5d4dc28f7a22 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 11:13:21 -0700
Subject: [PATCH 16/20] fix: tests

---
 R/new_epipredict_steps/layer_yeo_johnson.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index 731297cc..84815697 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -62,7 +62,7 @@ layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id =
 }
 
 layer_epi_YeoJohnson_new <- function(lambdas, by, terms, id) {
-  layer("epi_YeoJohnson", lambdas = lambdas, by = by, terms = terms, id = id)
+  epipredict:::layer("epi_YeoJohnson", lambdas = lambdas, by = by, terms = terms, id = id)
 }
 
 #' @export
@@ -140,7 +140,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
     # ahead_1_cases, ahead_7_cases, etc. We want to extract the cases part.
     outcome_cols <- names(components$mold$outcomes) %>%
       stringr::str_match("ahead_\\d+_(.*)") %>%
-      extract(, 2)
+      magrittr::extract(, 2)
 
     components$predictions <- components$predictions %>%
       rowwise() %>%

From 812d00c978a73097ffc8ee39348a857d4a73ec43 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 13:31:28 -0700
Subject: [PATCH 17/20] doc+fix+test: fix other_keys tests, terms handling,
 docs pass

---
 R/new_epipredict_steps/layer_yeo_johnson.R | 85 ++++++++++++----------
 R/new_epipredict_steps/step_yeo_johnson.R  | 67 ++++++++---------
 tests/testthat/test-yeo-johnson.R          | 50 ++++++++++---
 3 files changed, 119 insertions(+), 83 deletions(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index 84815697..abd3ae41 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -19,34 +19,28 @@
 #'   filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
 #'   select(geo_value, time_value, cases)
 #'
-#' pop_data <- data.frame(states = c("ca", "ny"), value = c(20000, 30000))
-#'
+#' # Create a recipe with a Yeo-Johnson transformation.
 #' r <- epi_recipe(jhu) %>%
-#'   step_epi_YeoJohnson(
-#'     df = pop_data,
-#'     df_pop_col = "value",
-#'     by = c("geo_value" = "states"),
-#'     cases, suffix = "_scaled"
-#'   ) %>%
-#'   step_epi_lag(cases_scaled, lag = c(0, 7, 14)) %>%
-#'   step_epi_ahead(cases_scaled, ahead = 7, role = "outcome") %>%
+#'   step_epi_YeoJohnson(cases) %>%
+#'   step_epi_lag(cases, lag = 0) %>%
+#'   step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
 #'   step_epi_naomit()
 #'
+#' # Create a frosting layer that will undo the Yeo-Johnson transformation.
 #' f <- frosting() %>%
 #'   layer_predict() %>%
-#'   layer_threshold(.pred) %>%
-#'   layer_naomit(.pred) %>%
-#'   layer_epi_YeoJohnson(.pred,
-#'     df = pop_data,
-#'     by = c("geo_value" = "states"),
-#'     df_pop_col = "value"
-#'   )
+#'   layer_epi_YeoJohnson(.pred)
 #'
+#' # Create a workflow and fit it.
 #' wf <- epi_workflow(r, linear_reg()) %>%
 #'   fit(jhu) %>%
 #'   add_frosting(f)
 #'
+#' # Forecast the workflow, which should reverse the Yeo-Johnson transformation.
 #' forecast(wf)
+#' # Compare to the original data.
+#' plot(density(jhu$cases))
+#' plot(density(forecast(wf)$cases))
 layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
   checkmate::assert_tibble(lambdas, min.rows = 1, null.ok = TRUE)
 
@@ -116,28 +110,21 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
     unmatched = c("error", "drop")
   )
 
-  # TODO: There are many possibilities here:
-  # - (a) the terms can be empty, where we should probably default to
-  #   all_outcomes().
-  # - (b) explicitly giving all_outcomes(), we end here with terms being empty,
-  #   which doesn't seem right; need to make sure we pull in all the outcome
-  #   columns here. The question is what form should they have?
-  # - (c) if the user just specifies .pred, then we have to infer the outcome
-  #   from the mold, which is simple enough and the main case I have working.
-  # - (d) the user might specify outcomes of the form .pred_ahead_1_cases,
-  #   .pred_ahead_7_cases, etc. Is that the right format? Trying those out now
-  #   and getting errors downstream from forecast().
-  # Get the columns to transform.
   exprs <- rlang::expr(c(!!!object$terms))
   pos <- tidyselect::eval_select(exprs, components$predictions)
   col_names <- names(pos)
 
-  # For every column, we need to use the appropriate lambda column, which differs per row.
-  # Note that yj_inverse() is vectorized in x, but not in lambda.
+  # The `object$terms` is where the user specifies the columns they want to
+  # untransform. We need to match the outcomes with their lambda columns in our
+  # parameter table and then apply the inverse transformation.
   if (identical(col_names, ".pred")) {
-    # In this case, we don't get a hint for the outcome column name, so we need to
-    # infer it from the mold. `outcomes` is a vector of objects like
-    # ahead_1_cases, ahead_7_cases, etc. We want to extract the cases part.
+    # In this case, we don't get a hint for the outcome column name, so we need
+    # to infer it from the mold.
+    if (length(components$mold$outcomes) > 1) {
+      cli_abort("Only one outcome is allowed when specifying `.pred`.", call = rlang::caller_env())
+    }
+    # `outcomes` is a vector of objects like ahead_1_cases, ahead_7_cases, etc.
+    # We want to extract the cases part.
     outcome_cols <- names(components$mold$outcomes) %>%
       stringr::str_match("ahead_\\d+_(.*)") %>%
       magrittr::extract(, 2)
@@ -146,8 +133,14 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
       rowwise() %>%
       mutate(.pred := yj_inverse(.pred, !!sym(paste0(".lambda_", outcome_cols))))
   } else if (identical(col_names, character(0))) {
-    # In this case, we should assume the user wants to transform all outcomes.
-    cli::cli_abort("Not specifying columns to layer Yeo-Johnson is not implemented yet.", call = rlang::caller_env())
+    # Wish I could suggest `all_outcomes()` here, but currently it's the same as
+    # not specifying any terms. I don't want to spend time with dealing with
+    # this case until someone asks for it.
+    cli::cli_abort("Not specifying columns to layer Yeo-Johnson is not implemented.
+    If you had a single outcome, you can use `.pred` as a column name.
+    If you had multiple outcomes, you'll need to specify them like
+    `.pred_ahead_1_<outcome_col>`, `.pred_ahead_7_<outcome_col>`, etc.
+    ", call = rlang::caller_env())
   } else {
     # In this case, we assume that the user has specified the columns they want
     # transformed here. We then need to determine the lambda columns for each of
@@ -157,7 +150,9 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
     original_outcome_cols <- str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
     outcomes_wout_ahead <- str_match(names(components$mold$outcomes), "ahead_\\d+_(.*)")[,2]
     if (any(original_outcome_cols %nin% outcomes_wout_ahead)) {
-      cli_abort("All columns specified in `...` must be outcome columns.", call = rlang::caller_env())
+      cli_abort("All columns specified in `...` must be outcome columns.
+      They must be of the form `.pred_ahead_1_<outcome_col>`, `.pred_ahead_7_<outcome_col>`, etc.
+      ", call = rlang::caller_env())
     }
 
     for (i in seq_along(col_names)) {
@@ -184,7 +179,8 @@ print.layer_epi_YeoJohnson <- function(x, width = max(20, options()$width - 30),
 
 #' Inverse Yeo-Johnson transformation
 #'
-#' Inverse of `yj_transform` in step_yeo_johnson.R.
+#' Inverse of `yj_transform` in step_yeo_johnson.R. Note that this function is
+#' vectorized in x, but not in lambda.
 #'
 #' @keywords internal
 yj_inverse <- function(x, lambda, eps = 0.001) {
@@ -247,3 +243,16 @@ get_lambdas_in_layer <- function(workflow) {
   }
   lambdas
 }
+
+get_transformed_cols_in_layer <- function(workflow) {
+  this_recipe <- hardhat::extract_recipe(workflow)
+  if (!(this_recipe %>% recipes::detect_step("epi_YeoJohnson"))) {
+    cli_abort("`layer_epi_YeoJohnson` requires `step_epi_YeoJohnson` in the recipe.", call = rlang::caller_env())
+  }
+  for (step in this_recipe$steps) {
+    if (inherits(step, "step_epi_YeoJohnson")) {
+      lambdas <- step$lambdas
+      break
+    }
+  }
+}
diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index 1abae34d..65290628 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -3,17 +3,16 @@
 #' `step_epi_YeoJohnson()` creates a *specification* of a recipe step that will
 #' transform data using a Yeo-Johnson transformation. This fork works with panel
 #' data and is meant for epidata.
-#' TODO: Do an edit pass on this docstring.
 #'
 #' @inheritParams step_center
-#' @param lambdas A numeric vector of transformation values. This
+#' @param lambdas Internal. A numeric vector of transformation values. This
 #'  is `NULL` until computed by [prep()].
 #' @param na_lambda_fill A numeric value to fill in for any
 #'  geos where the lambda cannot be estimated.
 #' @param limits A length 2 numeric vector defining the range to
 #'  compute the transformation parameter lambda.
-#' @param num_unique An integer where data that have less possible
-#'  values will not be evaluated for a transformation.
+#' @param num_unique An integer where data that have fewer than this
+#'  many unique values will not be evaluated for a transformation.
 #' @param na_rm A logical indicating whether missing values should be
 #'  removed.
 #' @param skip A logical. Should the step be skipped when the recipe is
@@ -22,11 +21,13 @@
 #' @template step-return
 #' @family individual transformation steps
 #' @export
-#' @details The Yeo-Johnson transformation is very similar to the
-#'  Box-Cox but does not require the input variables to be strictly
-#'  positive. In the package, the partial log-likelihood function is
-#'  directly optimized within a reasonable set of transformation
-#'  values (which can be changed by the user).
+#' @details The Yeo-Johnson transformation is variance-stabilizing
+#'  transformation, similar to the Box-Cox but does not require the input
+#'  variables to be strictly positive. In the package, the partial
+#'  log-likelihood function is directly optimized within a reasonable set of
+#'  transformation values (which can be changed by the user). The optimization
+#'  finds a lambda parameter for each group in the data that minimizes the
+#'  variance of the transformed data.
 #'
 #' This transformation is typically done on the outcome variable
 #'  using the residuals for a statistical model (such as ordinary
@@ -36,7 +37,7 @@
 #'  variable distributions more symmetric.
 #'
 #' If the transformation parameters are estimated to be very
-#'  closed to the bounds, or if the optimization fails, a value of
+#'  close to the bounds, or if the optimization fails, a value of
 #'  `NA` is used and no transformation is applied.
 #'
 #' # Tidying
@@ -54,28 +55,24 @@
 #'
 #' @references Yeo, I. K., and Johnson, R. A. (2000). A new family of power
 #'   transformations to improve normality or symmetry. *Biometrika*.
-#' @examplesIf rlang::is_installed("modeldata")
-#' data(biomass, package = "modeldata")
+#' @examplesIf
+#' jhu <- cases_deaths_subset %>%
+#'   filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+#'   select(geo_value, time_value, cases)
+#' filtered_data <- jhu
 #'
-#' biomass_tr <- biomass[biomass$dataset == "Training", ]
-#' biomass_te <- biomass[biomass$dataset == "Testing", ]
-#'
-#' rec <- recipe(
-#'   HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur,
-#'   data = biomass_tr
-#' )
-#'
-#' yj_transform <- step_epi_YeoJohnson(rec, all_numeric())
-#'
-#' yj_estimates <- prep(yj_transform, training = biomass_tr)
-#'
-#' yj_te <- bake(yj_estimates, biomass_te)
-#'
-#' plot(density(biomass_te$sulfur), main = "before")
-#' plot(density(yj_te$sulfur), main = "after")
-#'
-#' tidy(yj_transform, number = 1)
-#' tidy(yj_estimates, number = 1)
+#' r <- epi_recipe(filtered_data) %>%
+#'   step_epi_YeoJohnson(cases)
+#' # View the recipe
+#' r
+#' # Fit the recipe
+#' tr <- r %>% prep(filtered_data)
+#' # View the lambda values
+#' tr$steps[[1]]$lambdas
+#' # View the transformed data
+#' df <- tr %>% bake(filtered_data)
+#' plot(density(df$cases))
+#' plot(density(filtered_data$cases))
 step_epi_YeoJohnson <- function(
   recipe,
   ...,
@@ -266,6 +263,8 @@ get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lam
 
 #' Internal Functions
 #'
+#' Note that this function is vectorized in x, but not in lambda.
+#'
 #' @keywords internal
 #' @rdname recipes-internal
 #' @export
@@ -314,14 +313,14 @@ yj_transform <- function(x, lambda, ind_neg = NULL, eps = 0.001) {
   x
 }
 
-
 ## Helper for the log-likelihood calc for eq 3.1 of Yeo, I. K.,
 ## & Johnson, R. A. (2000). A new family of power transformations
 ## to improve normality or symmetry. Biometrika. page 957
 ll_yj <- function(lambda, y, ind_neg, const, eps = 0.001) {
   n <- length(y)
   y_t <- yj_transform(y, lambda, ind_neg)
-  mu_t <- mean(y_t)
+  # EDIT: Unused in the original recipes code.
+  # mu_t <- mean(y_t)
   var_t <- var(y_t) * (n - 1) / n
   res <- -.5 * n * log(var_t) + (lambda - 1) * const
   res
@@ -361,6 +360,7 @@ estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, ca
 
   const <- sum(sign(dat) * log(abs(dat) + 1))
 
+  suppressWarnings(
   res <- optimize(
     yj_obj,
     interval = limits,
@@ -370,6 +370,7 @@ estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, ca
     const = const,
     tol = .0001
   )
+  )
   lam <- res$maximum
   if (abs(limits[1] - lam) <= eps | abs(limits[2] - lam) <= eps) {
     lam <- NA
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index ea5653a8..28ca7b72 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -77,21 +77,47 @@ test_that("Yeo-Johnson steps and layers invert each other", {
 })
 
 test_that("Yeo-Johnson steps and layers invert each other when other_keys are present", {
-  skip("TODO")
-  jhu <- cases_deaths_subset %>%
-    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
-    select(geo_value, time_value, cases)
-  filtered_data <- jhu
+  # Small synthetic grad_employ_dataset version.
+  filtered_data <- tribble(
+    ~geo_value, ~age_group, ~edu_qual, ~time_value, ~med_income_2y,
+    "ca", "25-34", "bachelor", 2017, 50000,
+    "ca", "25-34", "bachelor", 2018, 50500,
+    "ca", "25-34", "bachelor", 2019, 51000,
+    "ca", "25-34", "bachelor", 2020, 51500,
+    "ca", "25-34", "bachelor", 2021, 52000,
+    "ca", "25-34", "bachelor", 2022, 52500,
+    "ca", "35-1000", "bachelor", 2017, 3e10,
+    "ca", "35-1000", "bachelor", 2018, 3e10 + 10,
+    "ca", "35-1000", "bachelor", 2019, 3e10 + 20,
+    "ca", "35-1000", "bachelor", 2020, 3e10 + 30,
+    "ca", "35-1000", "bachelor", 2021, 3e10 + 40,
+    "ca", "35-1000", "bachelor", 2022, 3e10 + 50,
+    "ca", "25-34", "master", 2017, 2 * 50000,
+    "ca", "25-34", "master", 2018, 2 * 50500,
+    "ca", "25-34", "master", 2019, 2 * 51000,
+    "ca", "25-34", "master", 2020, 2 * 51500,
+    "ca", "25-34", "master", 2021, 2 * 52000,
+    "ca", "25-34", "master", 2022, 2 * 52500,
+    "ca", "35-1000", "master", 2017, 2 * 3e10,
+    "ca", "35-1000", "master", 2018, 2 * (3e10 + 10),
+    "ca", "35-1000", "master", 2019, 2 * (3e10 + 20),
+    "ca", "35-1000", "master", 2020, 2 * (3e10 + 30),
+    "ca", "35-1000", "master", 2021, 2 * (3e10 + 40),
+    "ca", "35-1000", "master", 2022, 2 * (3e10 + 50)
+  ) %>% as_epi_df(other_keys = c("age_group", "edu_qual"))
 
   # Get some lambda values
   r <- epi_recipe(filtered_data) %>%
-    step_epi_YeoJohnson(cases) %>%
-    step_epi_lag(cases, lag = 0) %>%
-    step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
+    step_epi_YeoJohnson(med_income_2y) %>%
+    step_epi_lag(med_income_2y, lag = 0) %>%
+    step_epi_ahead(med_income_2y, ahead = 0, role = "outcome") %>%
     step_epi_naomit()
   tr <- r %>% prep(filtered_data)
-  # Check for fixed lambda values
-  expect_true(all(near(tr$steps[[1]]$lambdas$.lambda_cases, c(0.856, 0.207), tol = 0.001)))
+  expect_true(".lambda_med_income_2y" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("geo_value" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("age_group" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("edu_qual" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_med_income_2y))
 
   # Make sure that the inverse transformation works
   f <- frosting() %>%
@@ -100,7 +126,7 @@ test_that("Yeo-Johnson steps and layers invert each other when other_keys are pr
   wf <- epi_workflow(r, linear_reg()) %>%
     fit(filtered_data) %>%
     add_frosting(f)
-  out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value)
-  out2 <- forecast(wf) %>% rename(cases = .pred)
+  out1 <- filtered_data %>% as_tibble() %>% slice_max(time_value, by = geo_value) %>% select(geo_value, age_group, time_value, med_income_2y) %>% arrange(geo_value, age_group, time_value)
+  out2 <- forecast(wf) %>% rename(med_income_2y = .pred) %>% select(geo_value, age_group, time_value, med_income_2y) %>% arrange(geo_value, age_group, time_value)
   expect_equal(out1, out2)
 })

From e8eef18a377e3197b5d127da8d2a82fec5f9e4d7 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 13:48:16 -0700
Subject: [PATCH 18/20] Update R/new_epipredict_steps/layer_yeo_johnson.R

Co-authored-by: David Weber <david.weber2@pm.me>
---
 R/new_epipredict_steps/layer_yeo_johnson.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index abd3ae41..1fac3261 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -39,8 +39,8 @@
 #' # Forecast the workflow, which should reverse the Yeo-Johnson transformation.
 #' forecast(wf)
 #' # Compare to the original data.
-#' plot(density(jhu$cases))
-#' plot(density(forecast(wf)$cases))
+#' jhu %>% filter(time_value == "2021-12-31")
+#' forecast(wf)
 layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
   checkmate::assert_tibble(lambdas, min.rows = 1, null.ok = TRUE)
 

From 9e5748802547430818838707c2cd477baca99df4 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 13:50:52 -0700
Subject: [PATCH 19/20] fix: delete unused function

---
 R/new_epipredict_steps/layer_yeo_johnson.R | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/R/new_epipredict_steps/layer_yeo_johnson.R b/R/new_epipredict_steps/layer_yeo_johnson.R
index 1fac3261..b3ae0bd7 100644
--- a/R/new_epipredict_steps/layer_yeo_johnson.R
+++ b/R/new_epipredict_steps/layer_yeo_johnson.R
@@ -243,16 +243,3 @@ get_lambdas_in_layer <- function(workflow) {
   }
   lambdas
 }
-
-get_transformed_cols_in_layer <- function(workflow) {
-  this_recipe <- hardhat::extract_recipe(workflow)
-  if (!(this_recipe %>% recipes::detect_step("epi_YeoJohnson"))) {
-    cli_abort("`layer_epi_YeoJohnson` requires `step_epi_YeoJohnson` in the recipe.", call = rlang::caller_env())
-  }
-  for (step in this_recipe$steps) {
-    if (inherits(step, "step_epi_YeoJohnson")) {
-      lambdas <- step$lambdas
-      break
-    }
-  }
-}

From 5d5f1a6f1d01fdc2a80145293e781a93a9129e47 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 13:51:48 -0700
Subject: [PATCH 20/20] doc: fix @examples

---
 R/new_epipredict_steps/step_yeo_johnson.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/new_epipredict_steps/step_yeo_johnson.R b/R/new_epipredict_steps/step_yeo_johnson.R
index 65290628..ef83e172 100644
--- a/R/new_epipredict_steps/step_yeo_johnson.R
+++ b/R/new_epipredict_steps/step_yeo_johnson.R
@@ -55,7 +55,7 @@
 #'
 #' @references Yeo, I. K., and Johnson, R. A. (2000). A new family of power
 #'   transformations to improve normality or symmetry. *Biometrika*.
-#' @examplesIf
+#' @examples
 #' jhu <- cases_deaths_subset %>%
 #'   filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
 #'   select(geo_value, time_value, cases)