From 577a866d213b59bde59522f6de7c36d4bb91ae3b Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 13:41:15 -0700
Subject: [PATCH 1/7] feat: add step_/layer_ epi_YeoJohnson

---
 DESCRIPTION                       |   1 +
 NAMESPACE                         |   8 +
 R/layer_yeo_johnson.R             | 249 +++++++++++++++++++
 R/step_yeo_johnson.R              | 397 ++++++++++++++++++++++++++++++
 man/layer_epi_YeoJohnson.Rd       |  63 +++++
 man/step_adjust_latency.Rd        |   8 +-
 man/step_epi_YeoJohnson.Rd        | 118 +++++++++
 tests/testthat/test-yeo-johnson.R | 141 +++++++++++
 8 files changed, 981 insertions(+), 4 deletions(-)
 create mode 100644 R/layer_yeo_johnson.R
 create mode 100644 R/step_yeo_johnson.R
 create mode 100644 man/layer_epi_YeoJohnson.Rd
 create mode 100644 man/step_epi_YeoJohnson.Rd
 create mode 100644 tests/testthat/test-yeo-johnson.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 81a35b30..e3594a50 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -42,6 +42,7 @@ Imports:
     recipes (>= 1.0.4),
     rlang (>= 1.1.0),
     stats,
+    stringr,
     tibble,
     tidyr,
     tidyselect,
diff --git a/NAMESPACE b/NAMESPACE
index c2fa9494..351530de 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,6 +19,7 @@ S3method(bake,check_enough_data)
 S3method(bake,epi_recipe)
 S3method(bake,step_adjust_latency)
 S3method(bake,step_climate)
+S3method(bake,step_epi_YeoJohnson)
 S3method(bake,step_epi_ahead)
 S3method(bake,step_epi_lag)
 S3method(bake,step_epi_slide)
@@ -55,6 +56,7 @@ S3method(prep,check_enough_data)
 S3method(prep,epi_recipe)
 S3method(prep,step_adjust_latency)
 S3method(prep,step_climate)
+S3method(prep,step_epi_YeoJohnson)
 S3method(prep,step_epi_ahead)
 S3method(prep,step_epi_lag)
 S3method(prep,step_epi_slide)
@@ -76,6 +78,7 @@ S3method(print,flatline)
 S3method(print,frosting)
 S3method(print,layer_add_forecast_date)
 S3method(print,layer_add_target_date)
+S3method(print,layer_epi_YeoJohnson)
 S3method(print,layer_naomit)
 S3method(print,layer_point_from_distn)
 S3method(print,layer_population_scaling)
@@ -86,6 +89,7 @@ S3method(print,layer_threshold)
 S3method(print,layer_unnest)
 S3method(print,step_adjust_latency)
 S3method(print,step_climate)
+S3method(print,step_epi_YeoJohnson)
 S3method(print,step_epi_ahead)
 S3method(print,step_epi_lag)
 S3method(print,step_epi_slide)
@@ -101,6 +105,7 @@ S3method(run_mold,default_epi_recipe_blueprint)
 S3method(slather,layer_add_forecast_date)
 S3method(slather,layer_add_target_date)
 S3method(slather,layer_cdc_flatline_quantiles)
+S3method(slather,layer_epi_YeoJohnson)
 S3method(slather,layer_naomit)
 S3method(slather,layer_point_from_distn)
 S3method(slather,layer_population_scaling)
@@ -114,6 +119,7 @@ S3method(snap,quantile_pred)
 S3method(tidy,check_enough_data)
 S3method(tidy,frosting)
 S3method(tidy,layer)
+S3method(tidy,step_epi_YeoJohnson)
 S3method(update,layer)
 S3method(vec_arith,quantile_pred)
 S3method(vec_arith.numeric,quantile_pred)
@@ -176,6 +182,7 @@ export(layer)
 export(layer_add_forecast_date)
 export(layer_add_target_date)
 export(layer_cdc_flatline_quantiles)
+export(layer_epi_YeoJohnson)
 export(layer_naomit)
 export(layer_point_from_distn)
 export(layer_population_scaling)
@@ -207,6 +214,7 @@ export(smooth_quantile_reg)
 export(snap)
 export(step_adjust_latency)
 export(step_climate)
+export(step_epi_YeoJohnson)
 export(step_epi_ahead)
 export(step_epi_lag)
 export(step_epi_naomit)
diff --git a/R/layer_yeo_johnson.R b/R/layer_yeo_johnson.R
new file mode 100644
index 00000000..826bd23d
--- /dev/null
+++ b/R/layer_yeo_johnson.R
@@ -0,0 +1,249 @@
+#' Unormalizing transformation
+#'
+#' Will undo a step_epi_YeoJohnson transformation.
+#'
+#' @param frosting a `frosting` postprocessor. The layer will be added to the
+#'   sequence of operations for this frosting.
+#' @param lambdas Internal. A data frame of lambda values to be used for
+#'   inverting the transformation.
+#' @param ... One or more selector functions to scale variables
+#'   for this step. See [recipes::selections()] for more details.
+#' @param by A (possibly named) character vector of variables to join by.
+#' @param id a random id string
+#'
+#' @return an updated `frosting` postprocessor
+#' @export
+#' @examples
+#' library(dplyr)
+#' jhu <- epidatasets::cases_deaths_subset %>%
+#'   filter(time_value > "2021-11-01", geo_value %in% c("ca", "ny")) %>%
+#'   select(geo_value, time_value, cases)
+#'
+#' # Create a recipe with a Yeo-Johnson transformation.
+#' r <- epi_recipe(jhu) %>%
+#'   step_epi_YeoJohnson(cases) %>%
+#'   step_epi_lag(cases, lag = 0) %>%
+#'   step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
+#'   step_epi_naomit()
+#'
+#' # Create a frosting layer that will undo the Yeo-Johnson transformation.
+#' f <- frosting() %>%
+#'   layer_predict() %>%
+#'   layer_epi_YeoJohnson(.pred)
+#'
+#' # Create a workflow and fit it.
+#' wf <- epi_workflow(r, linear_reg()) %>%
+#'   fit(jhu) %>%
+#'   add_frosting(f)
+#'
+#' # Forecast the workflow, which should reverse the Yeo-Johnson transformation.
+#' forecast(wf)
+#' # Compare to the original data.
+#' jhu %>% filter(time_value == "2021-12-31")
+#' forecast(wf)
+layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
+  checkmate::assert_tibble(lambdas, min.rows = 1, null.ok = TRUE)
+
+  add_layer(
+    frosting,
+    layer_epi_YeoJohnson_new(
+      lambdas = lambdas,
+      by = by,
+      terms = dplyr::enquos(...),
+      id = id
+    )
+  )
+}
+
+layer_epi_YeoJohnson_new <- function(lambdas, by, terms, id) {
+  layer("epi_YeoJohnson", lambdas = lambdas, by = by, terms = terms, id = id)
+}
+
+#' @export
+#' @importFrom workflows extract_preprocessor
+slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data, ...) {
+  rlang::check_dots_empty()
+
+  # Get the lambdas from the layer or from the workflow.
+  lambdas <- object$lambdas %||% get_lambdas_in_layer(workflow)
+
+  # If the by is not specified, try to infer it from the lambdas.
+  if (is.null(object$by)) {
+    # Assume `layer_predict` has calculated the prediction keys and other
+    # layers don't change the prediction key colnames:
+    prediction_key_colnames <- names(components$keys)
+    lhs_potential_keys <- prediction_key_colnames
+    rhs_potential_keys <- colnames(select(lambdas, -starts_with("lambda_")))
+    object$by <- intersect(lhs_potential_keys, rhs_potential_keys)
+    suggested_min_keys <- setdiff(lhs_potential_keys, "time_value")
+    if (!all(suggested_min_keys %in% object$by)) {
+      cli_warn(
+        c(
+          "{setdiff(suggested_min_keys, object$by)} {?was an/were} epikey column{?s} in the predictions,
+          but {?wasn't/weren't} found in the population `df`.",
+          "i" = "Defaulting to join by {object$by}",
+          ">" = "Double-check whether column names on the population `df` match those expected in your predictions",
+          ">" = "Consider using population data with breakdowns by {suggested_min_keys}",
+          ">" = "Manually specify `by =` to silence"
+        ),
+        class = "epipredict__layer_population_scaling__default_by_missing_suggested_keys"
+      )
+    }
+  }
+
+  # Establish the join columns.
+  object$by <- object$by %||%
+    intersect(
+      epi_keys_only(components$predictions),
+      colnames(select(lambdas, -starts_with(".lambda_")))
+    )
+  joinby <- list(x = names(object$by) %||% object$by, y = object$by)
+  hardhat::validate_column_names(components$predictions, joinby$x)
+  hardhat::validate_column_names(lambdas, joinby$y)
+
+  # Join the lambdas.
+  components$predictions <- inner_join(
+    components$predictions,
+    lambdas,
+    by = object$by,
+    relationship = "many-to-one",
+    unmatched = c("error", "drop")
+  )
+
+  exprs <- rlang::expr(c(!!!object$terms))
+  pos <- tidyselect::eval_select(exprs, components$predictions)
+  col_names <- names(pos)
+
+  # The `object$terms` is where the user specifies the columns they want to
+  # untransform. We need to match the outcomes with their lambda columns in our
+  # parameter table and then apply the inverse transformation.
+  if (identical(col_names, ".pred")) {
+    # In this case, we don't get a hint for the outcome column name, so we need
+    # to infer it from the mold.
+    if (length(components$mold$outcomes) > 1) {
+      cli_abort("Only one outcome is allowed when specifying `.pred`.", call = rlang::caller_env())
+    }
+    # `outcomes` is a vector of objects like ahead_1_cases, ahead_7_cases, etc.
+    # We want to extract the cases part.
+    outcome_cols <- names(components$mold$outcomes) %>%
+      stringr::str_match("ahead_\\d+_(.*)") %>%
+      magrittr::extract(, 2)
+
+    components$predictions <- components$predictions %>%
+      rowwise() %>%
+      mutate(.pred := yj_inverse(.pred, !!sym(paste0(".lambda_", outcome_cols))))
+  } else if (identical(col_names, character(0))) {
+    # Wish I could suggest `all_outcomes()` here, but currently it's the same as
+    # not specifying any terms. I don't want to spend time with dealing with
+    # this case until someone asks for it.
+    cli::cli_abort(
+      "Not specifying columns to layer Yeo-Johnson is not implemented.
+    If you had a single outcome, you can use `.pred` as a column name.
+    If you had multiple outcomes, you'll need to specify them like
+    `.pred_ahead_1_<outcome_col>`, `.pred_ahead_7_<outcome_col>`, etc.
+    ",
+      call = rlang::caller_env()
+    )
+  } else {
+    # In this case, we assume that the user has specified the columns they want
+    # transformed here. We then need to determine the lambda columns for each of
+    # these columns. That is, we need to convert a vector of column names like
+    # c(".pred_ahead_1_case_rate", ".pred_ahead_7_case_rate") to
+    # c("lambda_ahead_1_case_rate", "lambda_ahead_7_case_rate").
+    original_outcome_cols <- stringr::str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
+    outcomes_wout_ahead <- stringr::str_match(names(components$mold$outcomes), "ahead_\\d+_(.*)")[, 2]
+    if (any(original_outcome_cols %nin% outcomes_wout_ahead)) {
+      cli_abort(
+        "All columns specified in `...` must be outcome columns.
+      They must be of the form `.pred_ahead_1_<outcome_col>`, `.pred_ahead_7_<outcome_col>`, etc.
+      ",
+        call = rlang::caller_env()
+      )
+    }
+
+    for (i in seq_along(col_names)) {
+      col <- col_names[i]
+      lambda_col <- paste0(".lambda_", original_outcome_cols[i])
+      components$predictions <- components$predictions %>%
+        rowwise() %>%
+        mutate(!!sym(col) := yj_inverse(!!sym(col), !!sym(lambda_col)))
+    }
+  }
+
+  # Remove the lambda columns.
+  components$predictions <- components$predictions %>%
+    select(-any_of(starts_with(".lambda_"))) %>%
+    ungroup()
+  components
+}
+
+#' @export
+print.layer_epi_YeoJohnson <- function(x, width = max(20, options()$width - 30), ...) {
+  title <- "Yeo-Johnson transformation (see `lambdas` object for values) on "
+  print_layer(x$terms, title = title, width = width)
+}
+
+# Inverse Yeo-Johnson transformation
+#
+# Inverse of `yj_transform` in step_yeo_johnson.R. Note that this function is
+# vectorized in x, but not in lambda.
+yj_inverse <- function(x, lambda, eps = 0.001) {
+  if (is.na(lambda)) {
+    return(x)
+  }
+  if (!inherits(x, "tbl_df") || is.data.frame(x)) {
+    x <- unlist(x, use.names = FALSE)
+  } else {
+    if (!is.vector(x)) {
+      x <- as.vector(x)
+    }
+  }
+
+  dat_neg <- x < 0
+  ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
+  not_neg <- ind_neg[["not"]]
+  is_neg <- ind_neg[["is"]]
+
+  nn_inv_trans <- function(x, lambda) {
+    if (abs(lambda) < eps) {
+      # log(x + 1)
+      exp(x) - 1
+    } else {
+      # ((x + 1)^lambda - 1) / lambda
+      (lambda * x + 1)^(1 / lambda) - 1
+    }
+  }
+
+  ng_inv_trans <- function(x, lambda) {
+    if (abs(lambda - 2) < eps) {
+      # -log(-x + 1)
+      -(exp(-x) - 1)
+    } else {
+      # -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
+      -(((lambda - 2) * x + 1)^(1 / (2 - lambda)) - 1)
+    }
+  }
+
+  if (length(not_neg) > 0) {
+    x[not_neg] <- nn_inv_trans(x[not_neg], lambda)
+  }
+
+  if (length(is_neg) > 0) {
+    x[is_neg] <- ng_inv_trans(x[is_neg], lambda)
+  }
+  x
+}
+
+get_lambdas_in_layer <- function(workflow) {
+  this_recipe <- hardhat::extract_recipe(workflow)
+  if (!(this_recipe %>% recipes::detect_step("epi_YeoJohnson"))) {
+    cli_abort("`layer_epi_YeoJohnson` requires `step_epi_YeoJohnson` in the recipe.", call = rlang::caller_env())
+  }
+  for (step in this_recipe$steps) {
+    if (inherits(step, "step_epi_YeoJohnson")) {
+      lambdas <- step$lambdas
+      break
+    }
+  }
+  lambdas
+}
diff --git a/R/step_yeo_johnson.R b/R/step_yeo_johnson.R
new file mode 100644
index 00000000..3a5fdce6
--- /dev/null
+++ b/R/step_yeo_johnson.R
@@ -0,0 +1,397 @@
+#' Yeo-Johnson transformation
+#'
+#' `step_epi_YeoJohnson()` creates a *specification* of a recipe step that will
+#' transform data using a Yeo-Johnson transformation. This fork works with panel
+#' data and is meant for epidata.
+#'
+#' @param recipe A recipe object. The step will be added to the
+#'  sequence of operations for this recipe.
+#' @param ... One or more selector functions to choose variables
+#'  for this step. See [recipes::selections()] for more details.
+#' @param role For model terms created by this step, what analysis role should
+#'  they be assigned? `lag` is default a predictor while `ahead` is an outcome.
+#' @param trained A logical for whether the selectors in `...`
+#' have been resolved by [prep()].
+#' @param lambdas Internal. A numeric vector of transformation values. This
+#'  is `NULL` until computed by [prep()].
+#' @param na_lambda_fill A numeric value to fill in for any
+#'  geos where the lambda cannot be estimated.
+#' @param limits A length 2 numeric vector defining the range to
+#'  compute the transformation parameter lambda.
+#' @param num_unique An integer where data that have fewer than this
+#'  many unique values will not be evaluated for a transformation.
+#' @param na_rm A logical indicating whether missing values should be
+#'  removed.
+#' @param skip A logical. Should the step be skipped when the recipe is
+#'  baked by [bake()]. On the `training` data, the step will always be
+#'  conducted (even if `skip = TRUE`).
+#' @param id A unique identifier for the step
+#' @template step-return
+#' @family individual transformation steps
+#' @export
+#' @details The Yeo-Johnson transformation is variance-stabilizing
+#'  transformation, similar to the Box-Cox but does not require the input
+#'  variables to be strictly positive. In the package, the partial
+#'  log-likelihood function is directly optimized within a reasonable set of
+#'  transformation values (which can be changed by the user). The optimization
+#'  finds a lambda parameter for each group in the data that minimizes the
+#'  variance of the transformed data.
+#'
+#' This transformation is typically done on the outcome variable
+#'  using the residuals for a statistical model (such as ordinary
+#'  least squares). Here, a simple null model (intercept only) is
+#'  used to apply the transformation to the *predictor*
+#'  variables individually. This can have the effect of making the
+#'  variable distributions more symmetric.
+#'
+#' If the transformation parameters are estimated to be very
+#'  close to the bounds, or if the optimization fails, a value of
+#'  `NA` is used and no transformation is applied.
+#'
+#' # Tidying
+#'
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
+#' columns `terms`, `value` , and `id`:
+#'
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, the lambda estimate}
+#'   \item{id}{character, id of this step}
+#' }
+#'
+#' @references Yeo, I. K., and Johnson, R. A. (2000). A new family of power
+#'   transformations to improve normality or symmetry. *Biometrika*.
+#' @examples
+#' jhu <- cases_deaths_subset %>%
+#'   filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+#'   select(geo_value, time_value, cases)
+#' filtered_data <- jhu
+#'
+#' r <- epi_recipe(filtered_data) %>%
+#'   step_epi_YeoJohnson(cases)
+#' # View the recipe
+#' r
+#' # Fit the recipe
+#' tr <- r %>% prep(filtered_data)
+#' # View the lambda values
+#' tr$steps[[1]]$lambdas
+#' # View the transformed data
+#' df <- tr %>% bake(filtered_data)
+#' plot(density(df$cases))
+#' plot(density(filtered_data$cases))
+step_epi_YeoJohnson <- function(
+  recipe,
+  ...,
+  role = "predictor",
+  trained = FALSE,
+  lambdas = NULL,
+  na_lambda_fill = 1 / 4,
+  limits = c(-5, 5),
+  num_unique = 5,
+  na_rm = TRUE,
+  skip = FALSE,
+  id = rand_id("epi_YeoJohnson")
+) {
+  checkmate::assert_numeric(limits, len = 2)
+  checkmate::assert_numeric(na_lambda_fill, lower = min(limits), upper = max(limits), len = 1)
+  checkmate::assert_numeric(num_unique, lower = 2, upper = Inf, len = 1)
+  checkmate::assert_logical(na_rm, len = 1)
+  checkmate::assert_logical(skip, len = 1)
+  add_step(
+    recipe,
+    step_epi_YeoJohnson_new(
+      terms = enquos(...),
+      role = role,
+      trained = trained,
+      lambdas = lambdas,
+      na_lambda_fill = na_lambda_fill,
+      limits = sort(limits)[1:2],
+      num_unique = num_unique,
+      na_rm = na_rm,
+      forecast_date = NULL,
+      metadata = NULL,
+      columns = NULL,
+      skip = skip,
+      id = id
+    )
+  )
+}
+
+step_epi_YeoJohnson_new <- function(
+  terms,
+  role,
+  trained,
+  lambdas,
+  na_lambda_fill,
+  limits,
+  num_unique,
+  na_rm,
+  forecast_date,
+  metadata,
+  columns,
+  skip,
+  id
+) {
+  step(
+    subclass = "epi_YeoJohnson",
+    terms = terms,
+    role = role,
+    trained = trained,
+    lambdas = lambdas,
+    na_lambda_fill = na_lambda_fill,
+    limits = limits,
+    num_unique = num_unique,
+    na_rm = na_rm,
+    forecast_date = forecast_date,
+    metadata = metadata,
+    columns = columns,
+    skip = skip,
+    id = id
+  )
+}
+
+#' @export
+prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
+  # Check that the columns selected for transformation are numeric.
+  col_names <- recipes_eval_select(x$terms, training, info)
+  recipes::check_type(training[, col_names], types = c("double", "integer"))
+
+  lambdas <- get_lambdas_yj_table(
+    training,
+    col_names,
+    x$limits,
+    x$num_unique,
+    x$na_lambda_fill,
+    x$na_rm,
+    key_colnames(training, exclude = "time_value")
+  )
+
+  step_epi_YeoJohnson_new(
+    terms = x$terms,
+    role = x$role,
+    trained = TRUE,
+    lambdas = lambdas,
+    na_lambda_fill = x$na_lambda_fill,
+    limits = x$limits,
+    num_unique = x$num_unique,
+    na_rm = x$na_rm,
+    forecast_date = attr(training, "metadata")$as_of,
+    metadata = attr(training, "metadata"),
+    columns = col_names,
+    skip = x$skip,
+    id = x$id
+  )
+}
+
+#' @export
+bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
+  # If not an epi_df, make it one assuming the template of training data.
+  # If it is an epi_df, check that the keys match.
+  # Imitating the pattern in step_adjust_latency().
+  if (!inherits(new_data, "epi_df") || is.null(attr(new_data, "metadata")$as_of)) {
+    new_data <- as_epi_df(
+      new_data,
+      as_of = object$forecast_date,
+      other_keys = object$metadata$other_keys %||% character()
+    )
+    attr(new_data, "metadata") <- object$metadata
+  }
+  # Check that the keys match.
+  keys <- key_colnames(new_data, exclude = "time_value")
+  old_keys <- object$lambdas %>%
+    select(-starts_with(".lambda_")) %>%
+    colnames()
+  if (!all(keys %in% old_keys)) {
+    cli::cli_abort(
+      "The keys of the new data do not match the keys of the training data.",
+      call = rlang::caller_fn()
+    )
+  }
+  # Check that the columns for transformation are present in new_data.
+  col_names <- object$columns
+  check_new_data(col_names, object, new_data)
+
+  # Transform each column, using the appropriate lambda column per row.
+  # Note that yj_transform() is vectorized in x, but not in lambda.
+  new_data <- left_join(new_data, object$lambdas, by = keys)
+  for (col in col_names) {
+    new_data <- new_data %>%
+      rowwise() %>%
+      mutate(!!col := yj_transform(!!sym(col), !!sym(paste0(".lambda_", col))))
+  }
+  # Remove the lambda columns.
+  new_data %>%
+    select(-starts_with(".lambda_")) %>%
+    ungroup()
+}
+
+#' @export
+print.step_epi_YeoJohnson <- function(x, width = max(20, options()$width - 39), ...) {
+  title <- "Yeo-Johnson transformation (see `lambdas` object for values) on "
+  print_epi_step(x$terms, x$terms, title = title, width = width)
+  invisible(x)
+}
+
+# Compute the lambda values per group for each column.
+get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lambda_fill, na_rm, epi_keys_checked) {
+  # Estimate the lambda for each column, creating a lambda_ column for each.
+  # Note that estimate_yj() operates on a vector.
+  lambdas <- training %>%
+    summarise(
+      across(all_of(col_names), ~ estimate_yj(.x, limits, num_unique, na_rm)),
+      .by = all_of(epi_keys_checked)
+    ) %>%
+    dplyr::rename_with(~ paste0(".lambda_", .x), -all_of(epi_keys_checked))
+
+  # Check for NAs in any of the lambda_ columns.
+  # EDIT: This warning was too noisy. Keeping code around, in case we want it.
+  # for (col in col_names) {
+  #   if (any(is.na(values[[paste0("lambda_", col)]]))) {
+  #     cli::cli_warn(
+  #       c(
+  #         x = "Yeo-Johnson lambda could not be estimated for some geos for {col}.",
+  #         i = "Using lambda={x$na_lambda_fill} in these cases."
+  #       ),
+  #       call = rlang::caller_fn()
+  #     )
+  #   }
+  # }
+
+  # Fill in NAs with the default lambda.
+  lambdas %>%
+    mutate(across(starts_with(".lambda_"), \(col) ifelse(is.na(col), na_lambda_fill, col)))
+}
+
+
+### Code below taken from recipes::step_YeoJohnson.
+### https://github.com/tidymodels/recipes/blob/v1.1.1/R/YeoJohnson.R#L172
+
+# Yeo-Johnson transformation
+#
+# Note that this function is vectorized in x, but not in lambda.
+yj_transform <- function(x, lambda, ind_neg = NULL, eps = 0.001) {
+  if (is.na(lambda)) {
+    return(x)
+  }
+  if (!inherits(x, "tbl_df") || is.data.frame(x)) {
+    x <- unlist(x, use.names = FALSE)
+  } else {
+    if (!is.vector(x)) {
+      x <- as.vector(x)
+    }
+  }
+  # TODO case weights: can we use weights here?
+  if (is.null(ind_neg)) {
+    dat_neg <- x < 0
+    ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
+  }
+  not_neg <- ind_neg[["not"]]
+  is_neg <- ind_neg[["is"]]
+
+  nn_trans <- function(x, lambda) {
+    if (abs(lambda) < eps) {
+      log(x + 1)
+    } else {
+      ((x + 1)^lambda - 1) / lambda
+    }
+  }
+
+  ng_trans <- function(x, lambda) {
+    if (abs(lambda - 2) < eps) {
+      -log(-x + 1)
+    } else {
+      -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
+    }
+  }
+
+  if (length(not_neg) > 0) {
+    x[not_neg] <- nn_trans(x[not_neg], lambda)
+  }
+
+  if (length(is_neg) > 0) {
+    x[is_neg] <- ng_trans(x[is_neg], lambda)
+  }
+  x
+}
+
+## Helper for the log-likelihood calc for eq 3.1 of Yeo, I. K.,
+## & Johnson, R. A. (2000). A new family of power transformations
+## to improve normality or symmetry. Biometrika. page 957
+ll_yj <- function(lambda, y, ind_neg, const, eps = 0.001) {
+  n <- length(y)
+  y_t <- yj_transform(y, lambda, ind_neg)
+  # EDIT: Unused in the original recipes code.
+  # mu_t <- mean(y_t)
+  var_t <- var(y_t) * (n - 1) / n
+  res <- -.5 * n * log(var_t) + (lambda - 1) * const
+  res
+}
+
+## eliminates missing data and returns -llh
+yj_obj <- function(lam, dat, ind_neg, const) {
+  ll_yj(lambda = lam, y = dat, ind_neg = ind_neg, const = const)
+}
+
+## estimates the values
+estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, call = caller_env(2)) {
+  na_rows <- which(is.na(dat))
+  if (length(na_rows) > 0) {
+    if (na_rm) {
+      dat <- dat[-na_rows]
+    } else {
+      cli::cli_abort(
+        c(
+          x = "Missing values are not allowed for the YJ transformation.",
+          i = "See {.arg na_rm} option."
+        ),
+        call = call
+      )
+    }
+  }
+
+  eps <- .001
+  if (length(unique(dat)) < num_unique) {
+    return(NA)
+  }
+  dat_neg <- dat < 0
+  ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
+
+  const <- sum(sign(dat) * log(abs(dat) + 1))
+
+  suppressWarnings(
+    res <- optimize(
+      yj_obj,
+      interval = limits,
+      maximum = TRUE,
+      dat = dat,
+      ind_neg = ind_neg,
+      const = const,
+      tol = .0001
+    )
+  )
+  lam <- res$maximum
+  if (abs(limits[1] - lam) <= eps | abs(limits[2] - lam) <= eps) {
+    lam <- NA
+  }
+  lam
+}
+
+# Copied from recipes::tidy.step_BoxCox
+#
+#' @export
+tidy.step_epi_YeoJohnson <- function(x, ...) {
+  if (is_trained(x)) {
+    res <- tibble(
+      terms = names(x$lambdas),
+      value = unname(x$lambdas)
+    )
+  } else {
+    term_names <- sel2char(x$terms)
+    res <- tibble(
+      terms = term_names,
+      value = na_dbl
+    )
+  }
+  res$id <- x$id
+  res
+}
diff --git a/man/layer_epi_YeoJohnson.Rd b/man/layer_epi_YeoJohnson.Rd
new file mode 100644
index 00000000..1ca4d9cc
--- /dev/null
+++ b/man/layer_epi_YeoJohnson.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/layer_yeo_johnson.R
+\name{layer_epi_YeoJohnson}
+\alias{layer_epi_YeoJohnson}
+\title{Unormalizing transformation}
+\usage{
+layer_epi_YeoJohnson(
+  frosting,
+  ...,
+  lambdas = NULL,
+  by = NULL,
+  id = rand_id("epi_YeoJohnson")
+)
+}
+\arguments{
+\item{frosting}{a \code{frosting} postprocessor. The layer will be added to the
+sequence of operations for this frosting.}
+
+\item{...}{One or more selector functions to scale variables
+for this step. See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
+
+\item{lambdas}{Internal. A data frame of lambda values to be used for
+inverting the transformation.}
+
+\item{by}{A (possibly named) character vector of variables to join by.}
+
+\item{id}{a random id string}
+}
+\value{
+an updated \code{frosting} postprocessor
+}
+\description{
+Will undo a step_epi_YeoJohnson transformation.
+}
+\examples{
+library(dplyr)
+jhu <- epidatasets::cases_deaths_subset \%>\%
+  filter(time_value > "2021-11-01", geo_value \%in\% c("ca", "ny")) \%>\%
+  select(geo_value, time_value, cases)
+
+# Create a recipe with a Yeo-Johnson transformation.
+r <- epi_recipe(jhu) \%>\%
+  step_epi_YeoJohnson(cases) \%>\%
+  step_epi_lag(cases, lag = 0) \%>\%
+  step_epi_ahead(cases, ahead = 0, role = "outcome") \%>\%
+  step_epi_naomit()
+
+# Create a frosting layer that will undo the Yeo-Johnson transformation.
+f <- frosting() \%>\%
+  layer_predict() \%>\%
+  layer_epi_YeoJohnson(.pred)
+
+# Create a workflow and fit it.
+wf <- epi_workflow(r, linear_reg()) \%>\%
+  fit(jhu) \%>\%
+  add_frosting(f)
+
+# Forecast the workflow, which should reverse the Yeo-Johnson transformation.
+forecast(wf)
+# Compare to the original data.
+jhu \%>\% filter(time_value == "2021-12-31")
+forecast(wf)
+}
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index 9e1bafbd..c904a5ad 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -140,7 +140,7 @@ toy_recipe \%>\%
 #> * geo_type  = state
 #> * time_type = day
 #> * as_of     = 2015-01-14
-#> 
+#>
 #> # A tibble: 8 x 4
 #>   geo_value time_value     a     b
 #>   <chr>     <date>     <dbl> <dbl>
@@ -176,7 +176,7 @@ toy_recipe \%>\%
 #> * geo_type  = state
 #> * time_type = day
 #> * as_of     = 2015-01-14
-#> 
+#>
 #> # A tibble: 21 x 7
 #>    geo_value time_value     a     b lag_3_a lag_4_b ahead_1_a
 #>    <chr>     <date>     <dbl> <dbl>   <dbl>   <dbl>     <dbl>
@@ -224,7 +224,7 @@ toy_recipe \%>\%
 #> * geo_type  = state
 #> * time_type = day
 #> * as_of     = 2015-01-14
-#> 
+#>
 #> # A tibble: 10 x 6
 #>    geo_value time_value     a     b lag_0_a ahead_3_a
 #>    <chr>     <date>     <dbl> <dbl>   <dbl>     <dbl>
@@ -296,7 +296,7 @@ rates_fit
 
 }
 \seealso{
-Other row operation steps: 
+Other row operation steps:
 \code{\link{step_epi_lag}()},
 \code{\link{step_growth_rate}()},
 \code{\link{step_lag_difference}()}
diff --git a/man/step_epi_YeoJohnson.Rd b/man/step_epi_YeoJohnson.Rd
new file mode 100644
index 00000000..1fa63761
--- /dev/null
+++ b/man/step_epi_YeoJohnson.Rd
@@ -0,0 +1,118 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/step_yeo_johnson.R
+\name{step_epi_YeoJohnson}
+\alias{step_epi_YeoJohnson}
+\title{Yeo-Johnson transformation}
+\usage{
+step_epi_YeoJohnson(
+  recipe,
+  ...,
+  role = "predictor",
+  trained = FALSE,
+  lambdas = NULL,
+  na_lambda_fill = 1/4,
+  limits = c(-5, 5),
+  num_unique = 5,
+  na_rm = TRUE,
+  skip = FALSE,
+  id = rand_id("epi_YeoJohnson")
+)
+}
+\arguments{
+\item{recipe}{A recipe object. The step will be added to the
+sequence of operations for this recipe.}
+
+\item{...}{One or more selector functions to choose variables
+for this step. See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
+
+\item{role}{For model terms created by this step, what analysis role should
+they be assigned? \code{lag} is default a predictor while \code{ahead} is an outcome.}
+
+\item{trained}{A logical for whether the selectors in \code{...}
+have been resolved by \code{\link[=prep]{prep()}}.}
+
+\item{lambdas}{Internal. A numeric vector of transformation values. This
+is \code{NULL} until computed by \code{\link[=prep]{prep()}}.}
+
+\item{na_lambda_fill}{A numeric value to fill in for any
+geos where the lambda cannot be estimated.}
+
+\item{limits}{A length 2 numeric vector defining the range to
+compute the transformation parameter lambda.}
+
+\item{num_unique}{An integer where data that have fewer than this
+many unique values will not be evaluated for a transformation.}
+
+\item{na_rm}{A logical indicating whether missing values should be
+removed.}
+
+\item{skip}{A logical. Should the step be skipped when the recipe is
+baked by \code{\link[=bake]{bake()}}. On the \code{training} data, the step will always be
+conducted (even if \code{skip = TRUE}).}
+
+\item{id}{A unique identifier for the step}
+}
+\value{
+An updated version of \code{recipe} with the new step added to the
+sequence of any existing operations.
+}
+\description{
+\code{step_epi_YeoJohnson()} creates a \emph{specification} of a recipe step that will
+transform data using a Yeo-Johnson transformation. This fork works with panel
+data and is meant for epidata.
+}
+\details{
+The Yeo-Johnson transformation is variance-stabilizing
+transformation, similar to the Box-Cox but does not require the input
+variables to be strictly positive. In the package, the partial
+log-likelihood function is directly optimized within a reasonable set of
+transformation values (which can be changed by the user). The optimization
+finds a lambda parameter for each group in the data that minimizes the
+variance of the transformed data.
+
+This transformation is typically done on the outcome variable
+using the residuals for a statistical model (such as ordinary
+least squares). Here, a simple null model (intercept only) is
+used to apply the transformation to the \emph{predictor}
+variables individually. This can have the effect of making the
+variable distributions more symmetric.
+
+If the transformation parameters are estimated to be very
+close to the bounds, or if the optimization fails, a value of
+\code{NA} is used and no transformation is applied.
+}
+\section{Tidying}{
+When you \code{\link[=tidy.recipe]{tidy()}} this step, a tibble is returned with
+columns \code{terms}, \code{value} , and \code{id}:
+
+\describe{
+\item{terms}{character, the selectors or variables selected}
+\item{value}{numeric, the lambda estimate}
+\item{id}{character, id of this step}
+}
+}
+
+\examples{
+jhu <- cases_deaths_subset \%>\%
+  filter(time_value > "2021-01-01", geo_value \%in\% c("ca", "ny")) \%>\%
+  select(geo_value, time_value, cases)
+filtered_data <- jhu
+
+r <- epi_recipe(filtered_data) \%>\%
+  step_epi_YeoJohnson(cases)
+# View the recipe
+r
+# Fit the recipe
+tr <- r \%>\% prep(filtered_data)
+# View the lambda values
+tr$steps[[1]]$lambdas
+# View the transformed data
+df <- tr \%>\% bake(filtered_data)
+plot(density(df$cases))
+plot(density(filtered_data$cases))
+}
+\references{
+Yeo, I. K., and Johnson, R. A. (2000). A new family of power
+transformations to improve normality or symmetry. \emph{Biometrika}.
+}
+\concept{individual transformation steps}
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
new file mode 100644
index 00000000..124f9648
--- /dev/null
+++ b/tests/testthat/test-yeo-johnson.R
@@ -0,0 +1,141 @@
+test_that("Yeo-Johnson transformation inverts correctly", {
+  # Note that the special lambda values of 0 and 2 are covered by the tests
+  # below.
+  expect_true(
+    map_lgl(seq(-5, 5, 0.1), function(lambda) {
+      map_lgl(seq(-10, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all()
+    }) %>%
+      all()
+  )
+})
+
+test_that("Yeo-Johnson steps and layers invert each other", {
+  jhu <- epidatasets::cases_deaths_subset %>%
+    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+    select(geo_value, time_value, cases)
+  filtered_data <- jhu
+
+  # Get some lambda values
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(cases) %>%
+    step_epi_lag(cases, lag = 0) %>%
+    step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+  tr <- r %>% prep(filtered_data)
+
+  # Check general lambda values tibble structure
+  expect_true(".lambda_cases" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_cases))
+  # Still works on a tibble
+  expect_equal(
+    tr %>% bake(filtered_data %>% as_tibble()),
+    tr %>% bake(filtered_data)
+  )
+
+  # Make sure that the inverse transformation works
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_epi_YeoJohnson(.pred)
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>%
+    as_tibble() %>%
+    slice_max(time_value, by = geo_value)
+  out2 <- forecast(wf) %>% rename(cases = .pred)
+  expect_equal(out1, out2)
+
+  # Make sure it works when there are multiple predictors and outcomes
+  jhu_multi <- epidatasets::covid_case_death_rates_extended %>%
+    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+    select(geo_value, time_value, case_rate, death_rate)
+  filtered_data <- jhu_multi
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(case_rate, death_rate) %>%
+    step_epi_lag(case_rate, death_rate, lag = 0) %>%
+    step_epi_ahead(case_rate, death_rate, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+  tr <- r %>% prep(filtered_data)
+
+  # Check general lambda values tibble structure
+  expect_true(".lambda_case_rate" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(".lambda_death_rate" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_case_rate))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_death_rate))
+
+  # Make sure that the inverse transformation works
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_epi_YeoJohnson(.pred_ahead_0_case_rate, .pred_ahead_0_death_rate)
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>%
+    as_tibble() %>%
+    slice_max(time_value, by = geo_value)
+  # debugonce(slather.layer_epi_YeoJohnson)
+  out2 <- forecast(wf) %>% rename(case_rate = .pred_ahead_0_case_rate, death_rate = .pred_ahead_0_death_rate)
+  expect_equal(out1, out2)
+})
+
+test_that("Yeo-Johnson steps and layers invert each other when other_keys are present", {
+  # Small synthetic grad_employ_dataset version.
+  filtered_data <- tribble(
+    ~geo_value, ~age_group, ~edu_qual, ~time_value, ~med_income_2y,
+    "ca", "25-34", "bachelor", 2017, 50000,
+    "ca", "25-34", "bachelor", 2018, 50500,
+    "ca", "25-34", "bachelor", 2019, 51000,
+    "ca", "25-34", "bachelor", 2020, 51500,
+    "ca", "25-34", "bachelor", 2021, 52000,
+    "ca", "25-34", "bachelor", 2022, 52500,
+    "ca", "35-1000", "bachelor", 2017, 3e10,
+    "ca", "35-1000", "bachelor", 2018, 3e10 + 10,
+    "ca", "35-1000", "bachelor", 2019, 3e10 + 20,
+    "ca", "35-1000", "bachelor", 2020, 3e10 + 30,
+    "ca", "35-1000", "bachelor", 2021, 3e10 + 40,
+    "ca", "35-1000", "bachelor", 2022, 3e10 + 50,
+    "ca", "25-34", "master", 2017, 2 * 50000,
+    "ca", "25-34", "master", 2018, 2 * 50500,
+    "ca", "25-34", "master", 2019, 2 * 51000,
+    "ca", "25-34", "master", 2020, 2 * 51500,
+    "ca", "25-34", "master", 2021, 2 * 52000,
+    "ca", "25-34", "master", 2022, 2 * 52500,
+    "ca", "35-1000", "master", 2017, 2 * 3e10,
+    "ca", "35-1000", "master", 2018, 2 * (3e10 + 10),
+    "ca", "35-1000", "master", 2019, 2 * (3e10 + 20),
+    "ca", "35-1000", "master", 2020, 2 * (3e10 + 30),
+    "ca", "35-1000", "master", 2021, 2 * (3e10 + 40),
+    "ca", "35-1000", "master", 2022, 2 * (3e10 + 50)
+  ) %>% as_epi_df(other_keys = c("age_group", "edu_qual"))
+
+  # Get some lambda values
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(med_income_2y) %>%
+    step_epi_lag(med_income_2y, lag = 0) %>%
+    step_epi_ahead(med_income_2y, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+  tr <- r %>% prep(filtered_data)
+  expect_true(".lambda_med_income_2y" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("geo_value" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("age_group" %in% names(tr$steps[[1]]$lambdas))
+  expect_true("edu_qual" %in% names(tr$steps[[1]]$lambdas))
+  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_med_income_2y))
+
+  # Make sure that the inverse transformation works
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_epi_YeoJohnson(.pred)
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>%
+    as_tibble() %>%
+    slice_max(time_value, by = geo_value) %>%
+    select(geo_value, age_group, time_value, med_income_2y) %>%
+    arrange(geo_value, age_group, time_value)
+  out2 <- forecast(wf) %>%
+    rename(med_income_2y = .pred) %>%
+    select(geo_value, age_group, time_value, med_income_2y) %>%
+    arrange(geo_value, age_group, time_value)
+  expect_equal(out1, out2)
+})

From b4b06277e60de9fdd8ea1744018f5410466e44fa Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 31 Mar 2025 13:50:27 -0700
Subject: [PATCH 2/7] feat: add air.toml

---
 .Rbuildignore | 1 +
 air.toml      | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 air.toml

diff --git a/.Rbuildignore b/.Rbuildignore
index dc41e622..0bdb211f 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -22,3 +22,4 @@
 ^.lintr$
 ^.venv$
 ^inst/templates$
+^air\.toml$
\ No newline at end of file
diff --git a/air.toml b/air.toml
new file mode 100644
index 00000000..6cb579db
--- /dev/null
+++ b/air.toml
@@ -0,0 +1,2 @@
+[format]
+line-width = 120

From 2d05117e3a9a260a69fc803b78849b8c1026460a Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Fri, 21 Mar 2025 13:42:08 -0700
Subject: [PATCH 3/7] doc: version bump + news

---
 NEWS.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index de698ee9..36b0198f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -12,12 +12,12 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
   `data(<dataset name>, package = "epidatasets")`, `epidatasets::<dataset name>`
   or, after loading the package, the name of the dataset alone (#382).
 - `step_adjust_latency()` no longer allows empty column selection.
-- Addresses upstream breaking changes from cmu-delphi/epiprocess#595 (`growth_rate()`). 
+- Addresses upstream breaking changes from cmu-delphi/epiprocess#595 (`growth_rate()`).
   `step_growth_rate()` has lost its `additional_gr_args_list` argument and now
   has an `na_rm` argument.
 - Moves `epiprocess` out of depends (#440). No internals have changed, but downstream
   users may need to add `library(epiprocess)` to existing code.
-- Removes dependence on the `distributional` package, replacing the quantiles 
+- Removes dependence on the `distributional` package, replacing the quantiles
   with `hardhat::quantile_pred()`. Some associated functions are deprecated with
   `lifecycle` messages.
 - Rename `check_enough_train_data()` to `check_enough_data()`, and generalize it
@@ -38,6 +38,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.0.x will indicat
 - Replace `dist_quantiles()` with `hardhat::quantile_pred()`
 - Allow `quantile()` to threshold to an interval if desired (#434)
 - `arx_forecaster()` detects if there's enough data to predict
+- Add `step_epi_YeoJohnson()` to perform a Yeo-Johnson transformation on the outcome variable.
+- Add `layer_epi_YeoJohnson()` to undo a Yeo-Johnson transformation on the outcome variable in a forecast workflow.
 
 ## Bug fixes
 

From 95c50b5766613d0ffd083d304cad3b59c9325cfa Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dmtryshmtv@gmail.com>
Date: Mon, 31 Mar 2025 13:58:05 -0700
Subject: [PATCH 4/7] Update R/layer_yeo_johnson.R

Co-authored-by: Daniel McDonald <dajmcdon@gmail.com>
---
 R/layer_yeo_johnson.R | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/R/layer_yeo_johnson.R b/R/layer_yeo_johnson.R
index 826bd23d..30c62ff1 100644
--- a/R/layer_yeo_johnson.R
+++ b/R/layer_yeo_johnson.R
@@ -205,13 +205,14 @@ yj_inverse <- function(x, lambda, eps = 0.001) {
   is_neg <- ind_neg[["is"]]
 
   nn_inv_trans <- function(x, lambda) {
-    if (abs(lambda) < eps) {
-      # log(x + 1)
-      exp(x) - 1
-    } else {
-      # ((x + 1)^lambda - 1) / lambda
-      (lambda * x + 1)^(1 / lambda) - 1
-    }
+    out <- double(length(x))
+    sm_lambdas <- abs(lambda) < eps
+    out[sm_lambdas] <- exp(x[sm_lambdas]) - 1
+    x <- x[!sm_lambdas]
+    lambda <- lambda[!sm_lambdas]
+    out[!sm_lambdas] <- (lambda * x + 1)^(1 / lambda) - 1
+    out
+  }
   }
 
   ng_inv_trans <- function(x, lambda) {

From 6a247437e4d34f7ca7f98bf524d9e2906cb7e8e7 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Mon, 31 Mar 2025 18:31:06 -0700
Subject: [PATCH 5/7] fix: review tweaks * vectorize in lambda * inheritParams
 in docs * lambda -> yj_param in many places

---
 R/layer_yeo_johnson.R             | 110 ++++++++++----------
 R/step_adjust_latency.R           |   1 -
 R/step_yeo_johnson.R              | 167 ++++++++++++++++--------------
 man/layer_epi_YeoJohnson.Rd       |   4 +-
 man/step_adjust_latency.Rd        |  12 +--
 man/step_epi_YeoJohnson.Rd        |  15 +--
 tests/testthat/test-yeo-johnson.R |  54 +++++-----
 7 files changed, 191 insertions(+), 172 deletions(-)

diff --git a/R/layer_yeo_johnson.R b/R/layer_yeo_johnson.R
index 30c62ff1..d399717f 100644
--- a/R/layer_yeo_johnson.R
+++ b/R/layer_yeo_johnson.R
@@ -2,14 +2,10 @@
 #'
 #' Will undo a step_epi_YeoJohnson transformation.
 #'
-#' @param frosting a `frosting` postprocessor. The layer will be added to the
-#'   sequence of operations for this frosting.
-#' @param lambdas Internal. A data frame of lambda values to be used for
+#' @inheritParams layer_population_scaling
+#' @param yj_params Internal. A data frame of parameters to be used for
 #'   inverting the transformation.
-#' @param ... One or more selector functions to scale variables
-#'   for this step. See [recipes::selections()] for more details.
 #' @param by A (possibly named) character vector of variables to join by.
-#' @param id a random id string
 #'
 #' @return an updated `frosting` postprocessor
 #' @export
@@ -41,13 +37,13 @@
 #' # Compare to the original data.
 #' jhu %>% filter(time_value == "2021-12-31")
 #' forecast(wf)
-layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
-  checkmate::assert_tibble(lambdas, min.rows = 1, null.ok = TRUE)
+layer_epi_YeoJohnson <- function(frosting, ..., yj_params = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
+  checkmate::assert_tibble(yj_params, min.rows = 1, null.ok = TRUE)
 
   add_layer(
     frosting,
     layer_epi_YeoJohnson_new(
-      lambdas = lambdas,
+      yj_params = yj_params,
       by = by,
       terms = dplyr::enquos(...),
       id = id
@@ -55,8 +51,8 @@ layer_epi_YeoJohnson <- function(frosting, ..., lambdas = NULL, by = NULL, id =
   )
 }
 
-layer_epi_YeoJohnson_new <- function(lambdas, by, terms, id) {
-  layer("epi_YeoJohnson", lambdas = lambdas, by = by, terms = terms, id = id)
+layer_epi_YeoJohnson_new <- function(yj_params, by, terms, id) {
+  layer("epi_YeoJohnson", yj_params = yj_params, by = by, terms = terms, id = id)
 }
 
 #' @export
@@ -64,16 +60,18 @@ layer_epi_YeoJohnson_new <- function(lambdas, by, terms, id) {
 slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data, ...) {
   rlang::check_dots_empty()
 
-  # Get the lambdas from the layer or from the workflow.
-  lambdas <- object$lambdas %||% get_lambdas_in_layer(workflow)
+  # TODO: We will error if we don't have a workflow. Write a check later.
 
-  # If the by is not specified, try to infer it from the lambdas.
+  # Get the yj_params from the layer or from the workflow.
+  yj_params <- object$yj_params %||% get_yj_params_in_layer(workflow)
+
+  # If the by is not specified, try to infer it from the yj_params.
   if (is.null(object$by)) {
     # Assume `layer_predict` has calculated the prediction keys and other
     # layers don't change the prediction key colnames:
     prediction_key_colnames <- names(components$keys)
     lhs_potential_keys <- prediction_key_colnames
-    rhs_potential_keys <- colnames(select(lambdas, -starts_with("lambda_")))
+    rhs_potential_keys <- colnames(select(yj_params, -starts_with(".yj_param_")))
     object$by <- intersect(lhs_potential_keys, rhs_potential_keys)
     suggested_min_keys <- setdiff(lhs_potential_keys, "time_value")
     if (!all(suggested_min_keys %in% object$by)) {
@@ -95,16 +93,16 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
   object$by <- object$by %||%
     intersect(
       epi_keys_only(components$predictions),
-      colnames(select(lambdas, -starts_with(".lambda_")))
+      colnames(select(yj_params, -starts_with(".yj_param_")))
     )
   joinby <- list(x = names(object$by) %||% object$by, y = object$by)
   hardhat::validate_column_names(components$predictions, joinby$x)
-  hardhat::validate_column_names(lambdas, joinby$y)
+  hardhat::validate_column_names(yj_params, joinby$y)
 
-  # Join the lambdas.
+  # Join the yj_params.
   components$predictions <- inner_join(
     components$predictions,
-    lambdas,
+    yj_params,
     by = object$by,
     relationship = "many-to-one",
     unmatched = c("error", "drop")
@@ -115,7 +113,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
   col_names <- names(pos)
 
   # The `object$terms` is where the user specifies the columns they want to
-  # untransform. We need to match the outcomes with their lambda columns in our
+  # untransform. We need to match the outcomes with their yj_param columns in our
   # parameter table and then apply the inverse transformation.
   if (identical(col_names, ".pred")) {
     # In this case, we don't get a hint for the outcome column name, so we need
@@ -130,8 +128,7 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
       magrittr::extract(, 2)
 
     components$predictions <- components$predictions %>%
-      rowwise() %>%
-      mutate(.pred := yj_inverse(.pred, !!sym(paste0(".lambda_", outcome_cols))))
+      mutate(.pred := yj_inverse(.pred, !!sym(paste0(".yj_param_", outcome_cols))))
   } else if (identical(col_names, character(0))) {
     # Wish I could suggest `all_outcomes()` here, but currently it's the same as
     # not specifying any terms. I don't want to spend time with dealing with
@@ -146,10 +143,10 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
     )
   } else {
     # In this case, we assume that the user has specified the columns they want
-    # transformed here. We then need to determine the lambda columns for each of
+    # transformed here. We then need to determine the yj_param columns for each of
     # these columns. That is, we need to convert a vector of column names like
     # c(".pred_ahead_1_case_rate", ".pred_ahead_7_case_rate") to
-    # c("lambda_ahead_1_case_rate", "lambda_ahead_7_case_rate").
+    # c(".yj_param_ahead_1_case_rate", ".yj_param_ahead_7_case_rate").
     original_outcome_cols <- stringr::str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
     outcomes_wout_ahead <- stringr::str_match(names(components$mold$outcomes), "ahead_\\d+_(.*)")[, 2]
     if (any(original_outcome_cols %nin% outcomes_wout_ahead)) {
@@ -163,34 +160,37 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
 
     for (i in seq_along(col_names)) {
       col <- col_names[i]
-      lambda_col <- paste0(".lambda_", original_outcome_cols[i])
+      yj_param_col <- paste0(".yj_param_", original_outcome_cols[i])
       components$predictions <- components$predictions %>%
-        rowwise() %>%
-        mutate(!!sym(col) := yj_inverse(!!sym(col), !!sym(lambda_col)))
+        mutate(!!sym(col) := yj_inverse(!!sym(col), !!sym(yj_param_col)))
     }
   }
 
-  # Remove the lambda columns.
+  # Remove the yj_param columns.
   components$predictions <- components$predictions %>%
-    select(-any_of(starts_with(".lambda_"))) %>%
+    select(-any_of(starts_with(".yj_param_"))) %>%
     ungroup()
   components
 }
 
 #' @export
 print.layer_epi_YeoJohnson <- function(x, width = max(20, options()$width - 30), ...) {
-  title <- "Yeo-Johnson transformation (see `lambdas` object for values) on "
+  title <- "Yeo-Johnson transformation (see `yj_params` object for values) on "
   print_layer(x$terms, title = title, width = width)
 }
 
 # Inverse Yeo-Johnson transformation
 #
-# Inverse of `yj_transform` in step_yeo_johnson.R. Note that this function is
-# vectorized in x, but not in lambda.
+# Inverse of `yj_transform` in step_yeo_johnson.R.
 yj_inverse <- function(x, lambda, eps = 0.001) {
-  if (is.na(lambda)) {
+  if (any(is.na(lambda))) {
     return(x)
   }
+  if (length(x) > 1 && length(lambda) == 1) {
+    lambda <- rep(lambda, length(x))
+  } else if (length(x) != length(lambda)) {
+    cli::cli_abort("Length of `x` must be equal to length of `lambda`.", call = rlang::caller_fn())
+  }
   if (!inherits(x, "tbl_df") || is.data.frame(x)) {
     x <- unlist(x, use.names = FALSE)
   } else {
@@ -199,52 +199,58 @@ yj_inverse <- function(x, lambda, eps = 0.001) {
     }
   }
 
-  dat_neg <- x < 0
-  ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
-  not_neg <- ind_neg[["not"]]
-  is_neg <- ind_neg[["is"]]
-
   nn_inv_trans <- function(x, lambda) {
     out <- double(length(x))
     sm_lambdas <- abs(lambda) < eps
-    out[sm_lambdas] <- exp(x[sm_lambdas]) - 1
+    if (length(sm_lambdas) > 0) {
+      out[sm_lambdas] <- exp(x[sm_lambdas]) - 1
+    }
     x <- x[!sm_lambdas]
     lambda <- lambda[!sm_lambdas]
-    out[!sm_lambdas] <- (lambda * x + 1)^(1 / lambda) - 1
+    if (length(x) > 0) {
+      out[!sm_lambdas] <- (lambda * x + 1)^(1 / lambda) - 1
+    }
     out
   }
-  }
 
   ng_inv_trans <- function(x, lambda) {
-    if (abs(lambda - 2) < eps) {
-      # -log(-x + 1)
-      -(exp(-x) - 1)
-    } else {
-      # -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
-      -(((lambda - 2) * x + 1)^(1 / (2 - lambda)) - 1)
+    out <- double(length(x))
+    near2_lambdas <- abs(lambda - 2) < eps
+    if (length(near2_lambdas) > 0) {
+      out[near2_lambdas] <- -(exp(-x[near2_lambdas]) - 1)
+    }
+    x <- x[!near2_lambdas]
+    lambda <- lambda[!near2_lambdas]
+    if (length(x) > 0) {
+      out[!near2_lambdas] <- -(((lambda - 2) * x + 1)^(1 / (2 - lambda)) - 1)
     }
+    out
   }
 
+  dat_neg <- x < 0
+  not_neg <- which(!dat_neg)
+  is_neg <- which(dat_neg)
+
   if (length(not_neg) > 0) {
-    x[not_neg] <- nn_inv_trans(x[not_neg], lambda)
+    x[not_neg] <- nn_inv_trans(x[not_neg], lambda[not_neg])
   }
 
   if (length(is_neg) > 0) {
-    x[is_neg] <- ng_inv_trans(x[is_neg], lambda)
+    x[is_neg] <- ng_inv_trans(x[is_neg], lambda[is_neg])
   }
   x
 }
 
-get_lambdas_in_layer <- function(workflow) {
+get_yj_params_in_layer <- function(workflow) {
   this_recipe <- hardhat::extract_recipe(workflow)
   if (!(this_recipe %>% recipes::detect_step("epi_YeoJohnson"))) {
     cli_abort("`layer_epi_YeoJohnson` requires `step_epi_YeoJohnson` in the recipe.", call = rlang::caller_env())
   }
   for (step in this_recipe$steps) {
     if (inherits(step, "step_epi_YeoJohnson")) {
-      lambdas <- step$lambdas
+      yj_params <- step$yj_params
       break
     }
   }
-  lambdas
+  yj_params
 }
diff --git a/R/step_adjust_latency.R b/R/step_adjust_latency.R
index ae9db6ef..a0d59bc1 100644
--- a/R/step_adjust_latency.R
+++ b/R/step_adjust_latency.R
@@ -272,7 +272,6 @@ step_adjust_latency_new <-
 # lags introduces max(lags) NA's after the max_time_value.
 #' @export
 #' @importFrom glue glue
-#' @importFrom dplyr rowwise
 prep.step_adjust_latency <- function(x, training, info = NULL, ...) {
   latency <- x$latency
   col_names <- recipes::recipes_eval_select(x$terms, training, info)
diff --git a/R/step_yeo_johnson.R b/R/step_yeo_johnson.R
index 3a5fdce6..272b034a 100644
--- a/R/step_yeo_johnson.R
+++ b/R/step_yeo_johnson.R
@@ -4,28 +4,19 @@
 #' transform data using a Yeo-Johnson transformation. This fork works with panel
 #' data and is meant for epidata.
 #'
-#' @param recipe A recipe object. The step will be added to the
-#'  sequence of operations for this recipe.
-#' @param ... One or more selector functions to choose variables
-#'  for this step. See [recipes::selections()] for more details.
-#' @param role For model terms created by this step, what analysis role should
-#'  they be assigned? `lag` is default a predictor while `ahead` is an outcome.
+#' @inheritParams step_population_scaling
 #' @param trained A logical for whether the selectors in `...`
 #' have been resolved by [prep()].
-#' @param lambdas Internal. A numeric vector of transformation values. This
+#' @param yj_params Internal. A numeric vector of transformation values. This
 #'  is `NULL` until computed by [prep()].
-#' @param na_lambda_fill A numeric value to fill in for any
-#'  geos where the lambda cannot be estimated.
-#' @param limits A length 2 numeric vector defining the range to
-#'  compute the transformation parameter lambda.
-#' @param num_unique An integer where data that have fewer than this
-#'  many unique values will not be evaluated for a transformation.
-#' @param na_rm A logical indicating whether missing values should be
-#'  removed.
-#' @param skip A logical. Should the step be skipped when the recipe is
-#'  baked by [bake()]. On the `training` data, the step will always be
-#'  conducted (even if `skip = TRUE`).
-#' @param id A unique identifier for the step
+#' @param na_fill A numeric value to fill in for any geos where a Yeo-Johnson
+#'  parameter cannot be estimated.
+#' @param limits A length 2 numeric vector defining the range to compute the
+#'  transformation parameter.
+#' @param num_unique An integer where data that have fewer than this many unique
+#'  values will not be evaluated for a transformation.
+#' @param na_rm A logical indicating whether missing values should be removed
+#'  before estimating the transformation parameter.
 #' @template step-return
 #' @family individual transformation steps
 #' @export
@@ -73,8 +64,8 @@
 #' r
 #' # Fit the recipe
 #' tr <- r %>% prep(filtered_data)
-#' # View the lambda values
-#' tr$steps[[1]]$lambdas
+#' # View the parameter values
+#' tr$steps[[1]]$yj_params
 #' # View the transformed data
 #' df <- tr %>% bake(filtered_data)
 #' plot(density(df$cases))
@@ -84,8 +75,8 @@ step_epi_YeoJohnson <- function(
   ...,
   role = "predictor",
   trained = FALSE,
-  lambdas = NULL,
-  na_lambda_fill = 1 / 4,
+  yj_params = NULL,
+  na_fill = 1 / 4,
   limits = c(-5, 5),
   num_unique = 5,
   na_rm = TRUE,
@@ -93,7 +84,7 @@ step_epi_YeoJohnson <- function(
   id = rand_id("epi_YeoJohnson")
 ) {
   checkmate::assert_numeric(limits, len = 2)
-  checkmate::assert_numeric(na_lambda_fill, lower = min(limits), upper = max(limits), len = 1)
+  checkmate::assert_numeric(na_fill, lower = min(limits), upper = max(limits), len = 1)
   checkmate::assert_numeric(num_unique, lower = 2, upper = Inf, len = 1)
   checkmate::assert_logical(na_rm, len = 1)
   checkmate::assert_logical(skip, len = 1)
@@ -103,8 +94,8 @@ step_epi_YeoJohnson <- function(
       terms = enquos(...),
       role = role,
       trained = trained,
-      lambdas = lambdas,
-      na_lambda_fill = na_lambda_fill,
+      yj_params = yj_params,
+      na_fill = na_fill,
       limits = sort(limits)[1:2],
       num_unique = num_unique,
       na_rm = na_rm,
@@ -121,8 +112,8 @@ step_epi_YeoJohnson_new <- function(
   terms,
   role,
   trained,
-  lambdas,
-  na_lambda_fill,
+  yj_params,
+  na_fill,
   limits,
   num_unique,
   na_rm,
@@ -137,8 +128,8 @@ step_epi_YeoJohnson_new <- function(
     terms = terms,
     role = role,
     trained = trained,
-    lambdas = lambdas,
-    na_lambda_fill = na_lambda_fill,
+    yj_params = yj_params,
+    na_fill = na_fill,
     limits = limits,
     num_unique = num_unique,
     na_rm = na_rm,
@@ -156,12 +147,12 @@ prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   recipes::check_type(training[, col_names], types = c("double", "integer"))
 
-  lambdas <- get_lambdas_yj_table(
+  yj_params <- compute_yj_params(
     training,
     col_names,
     x$limits,
     x$num_unique,
-    x$na_lambda_fill,
+    x$na_fill,
     x$na_rm,
     key_colnames(training, exclude = "time_value")
   )
@@ -170,8 +161,8 @@ prep.step_epi_YeoJohnson <- function(x, training, info = NULL, ...) {
     terms = x$terms,
     role = x$role,
     trained = TRUE,
-    lambdas = lambdas,
-    na_lambda_fill = x$na_lambda_fill,
+    yj_params = yj_params,
+    na_fill = x$na_fill,
     limits = x$limits,
     num_unique = x$num_unique,
     na_rm = x$na_rm,
@@ -196,14 +187,10 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
     )
     attr(new_data, "metadata") <- object$metadata
   }
-  # Check that the keys match.
-  keys <- key_colnames(new_data, exclude = "time_value")
-  old_keys <- object$lambdas %>%
-    select(-starts_with(".lambda_")) %>%
-    colnames()
-  if (!all(keys %in% old_keys)) {
+  # Check that the columns for transformation are present in new_data.
+  if (!all(object$columns %in% colnames(new_data))) {
     cli::cli_abort(
-      "The keys of the new data do not match the keys of the training data.",
+      "The columns for transformation are not present in the new data.",
       call = rlang::caller_fn()
     )
   }
@@ -211,68 +198,82 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
   col_names <- object$columns
   check_new_data(col_names, object, new_data)
 
-  # Transform each column, using the appropriate lambda column per row.
-  # Note that yj_transform() is vectorized in x, but not in lambda.
-  new_data <- left_join(new_data, object$lambdas, by = keys)
+  # Check that the keys match.
+  check <- hardhat::check_column_names(new_data, object$yj_params %>% select(-starts_with(".yj_param_")) %>% colnames())
+  if (!check$ok) {
+    cli_abort(c(
+      "Some variables used for training are not available in {.arg x}.",
+      i = "The following required columns are missing: {check$missing_names}"
+    ), call = rlang::caller_fn())
+  }
+  # Transform each column, using the appropriate yj_param column per row.
+  new_data <- left_join(new_data, object$yj_params, by = key_colnames(new_data, exclude = "time_value"))
   for (col in col_names) {
     new_data <- new_data %>%
-      rowwise() %>%
-      mutate(!!col := yj_transform(!!sym(col), !!sym(paste0(".lambda_", col))))
+      mutate(!!col := yj_transform(!!sym(col), !!sym(paste0(".yj_param_", col))))
   }
-  # Remove the lambda columns.
+  # Remove the yj_param columns.
   new_data %>%
-    select(-starts_with(".lambda_")) %>%
+    select(-starts_with(".yj_param_")) %>%
     ungroup()
 }
 
 #' @export
 print.step_epi_YeoJohnson <- function(x, width = max(20, options()$width - 39), ...) {
-  title <- "Yeo-Johnson transformation (see `lambdas` object for values) on "
+  title <- "Yeo-Johnson transformation (see `yj_params` object for values) on "
   print_epi_step(x$terms, x$terms, title = title, width = width)
   invisible(x)
 }
 
-# Compute the lambda values per group for each column.
-get_lambdas_yj_table <- function(training, col_names, limits, num_unique, na_lambda_fill, na_rm, epi_keys_checked) {
-  # Estimate the lambda for each column, creating a lambda_ column for each.
-  # Note that estimate_yj() operates on a vector.
-  lambdas <- training %>%
+# Compute the yj_param values per group for each column.
+compute_yj_params <- function(training, col_names, limits, num_unique, na_fill, na_rm, epi_keys_checked) {
+  # Estimate the yj_param for each column, creating a .yj_param_<col> column for
+  # each. Note that estimate_yj() operates on each column.
+  yj_params <- training %>%
     summarise(
       across(all_of(col_names), ~ estimate_yj(.x, limits, num_unique, na_rm)),
       .by = all_of(epi_keys_checked)
     ) %>%
-    dplyr::rename_with(~ paste0(".lambda_", .x), -all_of(epi_keys_checked))
+    dplyr::rename_with(~ paste0(".yj_param_", .x), -all_of(epi_keys_checked))
 
-  # Check for NAs in any of the lambda_ columns.
+  # Check for NAs in any of the yj_param_ columns.
   # EDIT: This warning was too noisy. Keeping code around, in case we want it.
   # for (col in col_names) {
-  #   if (any(is.na(values[[paste0("lambda_", col)]]))) {
+  #   if (any(is.na(values[[paste0(".yj_param_", col)]]))) {
   #     cli::cli_warn(
   #       c(
-  #         x = "Yeo-Johnson lambda could not be estimated for some geos for {col}.",
-  #         i = "Using lambda={x$na_lambda_fill} in these cases."
+  #         x = "Yeo-Johnson parameter could not be estimated for some geos for {col}.",
+  #         i = "Using parameter={x$na_fill} in these cases."
   #       ),
   #       call = rlang::caller_fn()
   #     )
   #   }
   # }
 
-  # Fill in NAs with the default lambda.
-  lambdas %>%
-    mutate(across(starts_with(".lambda_"), \(col) ifelse(is.na(col), na_lambda_fill, col)))
+  # Fill in NAs with the default yj_param.
+  yj_params %>%
+    mutate(across(starts_with(".yj_param_"), \(col) ifelse(is.na(col), na_fill, col)))
 }
 
 
 ### Code below taken from recipes::step_YeoJohnson.
+### We keep "lambda" here, but above we renamed it to "yj_param".
+### Modified yj_transform() to be vectorized in lambda.
 ### https://github.com/tidymodels/recipes/blob/v1.1.1/R/YeoJohnson.R#L172
 
 # Yeo-Johnson transformation
-#
-# Note that this function is vectorized in x, but not in lambda.
 yj_transform <- function(x, lambda, ind_neg = NULL, eps = 0.001) {
-  if (is.na(lambda)) {
+  if (any(is.na(lambda))) {
     return(x)
   }
+  if (length(x) > 1 && length(lambda) == 1) {
+    lambda <- rep(lambda, length(x))
+  } else if (length(x) != length(lambda)) {
+    cli::cli_abort(
+      "Length of `x` must be equal to length of `lambda` or lambda must be a scalar.",
+      call = rlang::caller_fn()
+    )
+  }
   if (!inherits(x, "tbl_df") || is.data.frame(x)) {
     x <- unlist(x, use.names = FALSE)
   } else {
@@ -289,27 +290,39 @@ yj_transform <- function(x, lambda, ind_neg = NULL, eps = 0.001) {
   is_neg <- ind_neg[["is"]]
 
   nn_trans <- function(x, lambda) {
-    if (abs(lambda) < eps) {
-      log(x + 1)
-    } else {
-      ((x + 1)^lambda - 1) / lambda
+    out <- double(length(x))
+    sm_lambdas <- abs(lambda) < eps
+    if (length(sm_lambdas) > 0) {
+      out[sm_lambdas] <- log(x[sm_lambdas] + 1)
     }
+    x <- x[!sm_lambdas]
+    lambda <- lambda[!sm_lambdas]
+    if (length(x) > 0) {
+      out[!sm_lambdas] <- ((x + 1)^lambda - 1) / lambda
+    }
+    out
   }
 
   ng_trans <- function(x, lambda) {
-    if (abs(lambda - 2) < eps) {
-      -log(-x + 1)
-    } else {
-      -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
+    out <- double(length(x))
+    near2_lambdas <- abs(lambda - 2) < eps
+    if (length(near2_lambdas) > 0) {
+      out[near2_lambdas] <- -log(-x[near2_lambdas] + 1)
+    }
+    x <- x[!near2_lambdas]
+    lambda <- lambda[!near2_lambdas]
+    if (length(x) > 0) {
+      out[!near2_lambdas] <- -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
     }
+    out
   }
 
   if (length(not_neg) > 0) {
-    x[not_neg] <- nn_trans(x[not_neg], lambda)
+    x[not_neg] <- nn_trans(x[not_neg], lambda[not_neg])
   }
 
   if (length(is_neg) > 0) {
-    x[is_neg] <- ng_trans(x[is_neg], lambda)
+    x[is_neg] <- ng_trans(x[is_neg], lambda[is_neg])
   }
   x
 }
@@ -382,8 +395,8 @@ estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, ca
 tidy.step_epi_YeoJohnson <- function(x, ...) {
   if (is_trained(x)) {
     res <- tibble(
-      terms = names(x$lambdas),
-      value = unname(x$lambdas)
+      terms = names(x$yj_params),
+      value = unname(x$yj_params)
     )
   } else {
     term_names <- sel2char(x$terms)
diff --git a/man/layer_epi_YeoJohnson.Rd b/man/layer_epi_YeoJohnson.Rd
index 1ca4d9cc..53520b4e 100644
--- a/man/layer_epi_YeoJohnson.Rd
+++ b/man/layer_epi_YeoJohnson.Rd
@@ -7,7 +7,7 @@
 layer_epi_YeoJohnson(
   frosting,
   ...,
-  lambdas = NULL,
+  yj_params = NULL,
   by = NULL,
   id = rand_id("epi_YeoJohnson")
 )
@@ -19,7 +19,7 @@ sequence of operations for this frosting.}
 \item{...}{One or more selector functions to scale variables
 for this step. See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
 
-\item{lambdas}{Internal. A data frame of lambda values to be used for
+\item{yj_params}{Internal. A data frame of parameters to be used for
 inverting the transformation.}
 
 \item{by}{A (possibly named) character vector of variables to join by.}
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index c904a5ad..258b72a6 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -140,7 +140,7 @@ toy_recipe \%>\%
 #> * geo_type  = state
 #> * time_type = day
 #> * as_of     = 2015-01-14
-#>
+#> 
 #> # A tibble: 8 x 4
 #>   geo_value time_value     a     b
 #>   <chr>     <date>     <dbl> <dbl>
@@ -176,7 +176,7 @@ toy_recipe \%>\%
 #> * geo_type  = state
 #> * time_type = day
 #> * as_of     = 2015-01-14
-#>
+#> 
 #> # A tibble: 21 x 7
 #>    geo_value time_value     a     b lag_3_a lag_4_b ahead_1_a
 #>    <chr>     <date>     <dbl> <dbl>   <dbl>   <dbl>     <dbl>
@@ -224,7 +224,7 @@ toy_recipe \%>\%
 #> * geo_type  = state
 #> * time_type = day
 #> * as_of     = 2015-01-14
-#>
+#> 
 #> # A tibble: 10 x 6
 #>    geo_value time_value     a     b lag_0_a ahead_3_a
 #>    <chr>     <date>     <dbl> <dbl>   <dbl>     <dbl>
@@ -267,8 +267,8 @@ while this will not:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\%
    step_epi_lag(a, lag=0) \%>\%
    step_adjust_latency(a, method = "extend_lags")
-#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with
-#> modified data.
+#> Warning: If `method` is "extend_lags" or "locf", then the previous
+#> `step_epi_lag`s won't work with modified data.
 }\if{html}{\out{</div>}}
 
 If you create columns that you then apply lags to (such as
@@ -296,7 +296,7 @@ rates_fit
 
 }
 \seealso{
-Other row operation steps:
+Other row operation steps: 
 \code{\link{step_epi_lag}()},
 \code{\link{step_growth_rate}()},
 \code{\link{step_lag_difference}()}
diff --git a/man/step_epi_YeoJohnson.Rd b/man/step_epi_YeoJohnson.Rd
index 1fa63761..ffde5579 100644
--- a/man/step_epi_YeoJohnson.Rd
+++ b/man/step_epi_YeoJohnson.Rd
@@ -9,7 +9,7 @@ step_epi_YeoJohnson(
   ...,
   role = "predictor",
   trained = FALSE,
-  lambdas = NULL,
+  yj_params = NULL,
   na_lambda_fill = 1/4,
   limits = c(-5, 5),
   num_unique = 5,
@@ -31,7 +31,7 @@ they be assigned? \code{lag} is default a predictor while \code{ahead} is an out
 \item{trained}{A logical for whether the selectors in \code{...}
 have been resolved by \code{\link[=prep]{prep()}}.}
 
-\item{lambdas}{Internal. A numeric vector of transformation values. This
+\item{yj_params}{Internal. A numeric vector of transformation values. This
 is \code{NULL} until computed by \code{\link[=prep]{prep()}}.}
 
 \item{na_lambda_fill}{A numeric value to fill in for any
@@ -46,9 +46,12 @@ many unique values will not be evaluated for a transformation.}
 \item{na_rm}{A logical indicating whether missing values should be
 removed.}
 
-\item{skip}{A logical. Should the step be skipped when the recipe is
-baked by \code{\link[=bake]{bake()}}. On the \code{training} data, the step will always be
-conducted (even if \code{skip = TRUE}).}
+\item{skip}{A logical. Should the step be skipped when the
+recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked
+when \code{\link[=prep]{prep()}} is run, some operations may not be able to be
+conducted on new data (e.g. processing the outcome variable(s)).
+Care should be taken when using \code{skip = TRUE} as it may affect
+the computations for subsequent operations.}
 
 \item{id}{A unique identifier for the step}
 }
@@ -105,7 +108,7 @@ r
 # Fit the recipe
 tr <- r \%>\% prep(filtered_data)
 # View the lambda values
-tr$steps[[1]]$lambdas
+tr$steps[[1]]$yj_params
 # View the transformed data
 df <- tr \%>\% bake(filtered_data)
 plot(density(df$cases))
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 124f9648..48d28f10 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -1,11 +1,17 @@
 test_that("Yeo-Johnson transformation inverts correctly", {
+  # Vectorized x and scalar lambda work
+  lambdas <- seq(-5, 5, 0.1)
+  x <- seq(-10, 10, 0.1)
+  expect_true(
+    map_lgl(lambdas, \(lambda) sum(abs(yj_inverse(yj_transform(x, lambda), lambda) - x)) < 1e-5) %>%
+      all()
+  )
   # Note that the special lambda values of 0 and 2 are covered by the tests
   # below.
+  # Vectorized x and lambda both work
+  x <- seq(-5, 5, 0.1)
   expect_true(
-    map_lgl(seq(-5, 5, 0.1), function(lambda) {
-      map_lgl(seq(-10, 10, 0.1), \(x) abs(yj_inverse(yj_transform(x, lambda), lambda) - x) < 0.00001) %>% all()
-    }) %>%
-      all()
+    sum(abs(yj_inverse(yj_transform(x, lambda), lambda) - x)) < 1e-5
   )
 })
 
@@ -15,7 +21,7 @@ test_that("Yeo-Johnson steps and layers invert each other", {
     select(geo_value, time_value, cases)
   filtered_data <- jhu
 
-  # Get some lambda values
+  # Get some yj_param values
   r <- epi_recipe(filtered_data) %>%
     step_epi_YeoJohnson(cases) %>%
     step_epi_lag(cases, lag = 0) %>%
@@ -23,14 +29,9 @@ test_that("Yeo-Johnson steps and layers invert each other", {
     step_epi_naomit()
   tr <- r %>% prep(filtered_data)
 
-  # Check general lambda values tibble structure
-  expect_true(".lambda_cases" %in% names(tr$steps[[1]]$lambdas))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_cases))
-  # Still works on a tibble
-  expect_equal(
-    tr %>% bake(filtered_data %>% as_tibble()),
-    tr %>% bake(filtered_data)
-  )
+  # Check general yj_param values tibble structure
+  expect_true(".yj_param_cases" %in% names(tr$steps[[1]]$yj_params))
+  expect_true(is.numeric(tr$steps[[1]]$yj_params$.yj_param_cases))
 
   # Make sure that the inverse transformation works
   f <- frosting() %>%
@@ -40,7 +41,6 @@ test_that("Yeo-Johnson steps and layers invert each other", {
     fit(filtered_data) %>%
     add_frosting(f)
   out1 <- filtered_data %>%
-    as_tibble() %>%
     slice_max(time_value, by = geo_value)
   out2 <- forecast(wf) %>% rename(cases = .pred)
   expect_equal(out1, out2)
@@ -57,11 +57,11 @@ test_that("Yeo-Johnson steps and layers invert each other", {
     step_epi_naomit()
   tr <- r %>% prep(filtered_data)
 
-  # Check general lambda values tibble structure
-  expect_true(".lambda_case_rate" %in% names(tr$steps[[1]]$lambdas))
-  expect_true(".lambda_death_rate" %in% names(tr$steps[[1]]$lambdas))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_case_rate))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_death_rate))
+  # Check general yj_param values tibble structure
+  expect_true(".yj_param_case_rate" %in% names(tr$steps[[1]]$yj_params))
+  expect_true(".yj_param_death_rate" %in% names(tr$steps[[1]]$yj_params))
+  expect_true(is.numeric(tr$steps[[1]]$yj_params$.yj_param_case_rate))
+  expect_true(is.numeric(tr$steps[[1]]$yj_params$.yj_param_death_rate))
 
   # Make sure that the inverse transformation works
   f <- frosting() %>%
@@ -71,15 +71,14 @@ test_that("Yeo-Johnson steps and layers invert each other", {
     fit(filtered_data) %>%
     add_frosting(f)
   out1 <- filtered_data %>%
-    as_tibble() %>%
     slice_max(time_value, by = geo_value)
-  # debugonce(slather.layer_epi_YeoJohnson)
   out2 <- forecast(wf) %>% rename(case_rate = .pred_ahead_0_case_rate, death_rate = .pred_ahead_0_death_rate)
   expect_equal(out1, out2)
 })
 
 test_that("Yeo-Johnson steps and layers invert each other when other_keys are present", {
   # Small synthetic grad_employ_dataset version.
+  # fmt: skip
   filtered_data <- tribble(
     ~geo_value, ~age_group, ~edu_qual, ~time_value, ~med_income_2y,
     "ca", "25-34", "bachelor", 2017, 50000,
@@ -108,18 +107,18 @@ test_that("Yeo-Johnson steps and layers invert each other when other_keys are pr
     "ca", "35-1000", "master", 2022, 2 * (3e10 + 50)
   ) %>% as_epi_df(other_keys = c("age_group", "edu_qual"))
 
-  # Get some lambda values
+  # Get some yj_param values
   r <- epi_recipe(filtered_data) %>%
     step_epi_YeoJohnson(med_income_2y) %>%
     step_epi_lag(med_income_2y, lag = 0) %>%
     step_epi_ahead(med_income_2y, ahead = 0, role = "outcome") %>%
     step_epi_naomit()
   tr <- r %>% prep(filtered_data)
-  expect_true(".lambda_med_income_2y" %in% names(tr$steps[[1]]$lambdas))
-  expect_true("geo_value" %in% names(tr$steps[[1]]$lambdas))
-  expect_true("age_group" %in% names(tr$steps[[1]]$lambdas))
-  expect_true("edu_qual" %in% names(tr$steps[[1]]$lambdas))
-  expect_true(is.numeric(tr$steps[[1]]$lambdas$.lambda_med_income_2y))
+  expect_true(".yj_param_med_income_2y" %in% names(tr$steps[[1]]$yj_params))
+  expect_true("geo_value" %in% names(tr$steps[[1]]$yj_params))
+  expect_true("age_group" %in% names(tr$steps[[1]]$yj_params))
+  expect_true("edu_qual" %in% names(tr$steps[[1]]$yj_params))
+  expect_true(is.numeric(tr$steps[[1]]$yj_params$.yj_param_med_income_2y))
 
   # Make sure that the inverse transformation works
   f <- frosting() %>%
@@ -129,7 +128,6 @@ test_that("Yeo-Johnson steps and layers invert each other when other_keys are pr
     fit(filtered_data) %>%
     add_frosting(f)
   out1 <- filtered_data %>%
-    as_tibble() %>%
     slice_max(time_value, by = geo_value) %>%
     select(geo_value, age_group, time_value, med_income_2y) %>%
     arrange(geo_value, age_group, time_value)

From fd76c3678abcdd60e85448b77d1d9741af858df9 Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Thu, 3 Apr 2025 18:05:24 -0500
Subject: [PATCH 6/7] fix lambda(s), styling

---
 R/step_yeo_johnson.R              | 50 +++++++++++++++----------------
 man/step_adjust_latency.Rd        |  4 +--
 man/step_epi_YeoJohnson.Rd        | 20 ++++++-------
 tests/testthat/test-yeo-johnson.R |  2 +-
 4 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/R/step_yeo_johnson.R b/R/step_yeo_johnson.R
index 272b034a..4e429528 100644
--- a/R/step_yeo_johnson.R
+++ b/R/step_yeo_johnson.R
@@ -71,18 +71,17 @@
 #' plot(density(df$cases))
 #' plot(density(filtered_data$cases))
 step_epi_YeoJohnson <- function(
-  recipe,
-  ...,
-  role = "predictor",
-  trained = FALSE,
-  yj_params = NULL,
-  na_fill = 1 / 4,
-  limits = c(-5, 5),
-  num_unique = 5,
-  na_rm = TRUE,
-  skip = FALSE,
-  id = rand_id("epi_YeoJohnson")
-) {
+    recipe,
+    ...,
+    role = "predictor",
+    trained = FALSE,
+    yj_params = NULL,
+    na_fill = 1 / 4,
+    limits = c(-5, 5),
+    num_unique = 5,
+    na_rm = TRUE,
+    skip = FALSE,
+    id = rand_id("epi_YeoJohnson")) {
   checkmate::assert_numeric(limits, len = 2)
   checkmate::assert_numeric(na_fill, lower = min(limits), upper = max(limits), len = 1)
   checkmate::assert_numeric(num_unique, lower = 2, upper = Inf, len = 1)
@@ -109,20 +108,19 @@ step_epi_YeoJohnson <- function(
 }
 
 step_epi_YeoJohnson_new <- function(
-  terms,
-  role,
-  trained,
-  yj_params,
-  na_fill,
-  limits,
-  num_unique,
-  na_rm,
-  forecast_date,
-  metadata,
-  columns,
-  skip,
-  id
-) {
+    terms,
+    role,
+    trained,
+    yj_params,
+    na_fill,
+    limits,
+    num_unique,
+    na_rm,
+    forecast_date,
+    metadata,
+    columns,
+    skip,
+    id) {
   step(
     subclass = "epi_YeoJohnson",
     terms = terms,
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index 258b72a6..9e1bafbd 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -267,8 +267,8 @@ while this will not:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\%
    step_epi_lag(a, lag=0) \%>\%
    step_adjust_latency(a, method = "extend_lags")
-#> Warning: If `method` is "extend_lags" or "locf", then the previous
-#> `step_epi_lag`s won't work with modified data.
+#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with
+#> modified data.
 }\if{html}{\out{</div>}}
 
 If you create columns that you then apply lags to (such as
diff --git a/man/step_epi_YeoJohnson.Rd b/man/step_epi_YeoJohnson.Rd
index ffde5579..cfe85169 100644
--- a/man/step_epi_YeoJohnson.Rd
+++ b/man/step_epi_YeoJohnson.Rd
@@ -10,7 +10,7 @@ step_epi_YeoJohnson(
   role = "predictor",
   trained = FALSE,
   yj_params = NULL,
-  na_lambda_fill = 1/4,
+  na_fill = 1/4,
   limits = c(-5, 5),
   num_unique = 5,
   na_rm = TRUE,
@@ -34,17 +34,17 @@ have been resolved by \code{\link[=prep]{prep()}}.}
 \item{yj_params}{Internal. A numeric vector of transformation values. This
 is \code{NULL} until computed by \code{\link[=prep]{prep()}}.}
 
-\item{na_lambda_fill}{A numeric value to fill in for any
-geos where the lambda cannot be estimated.}
+\item{na_fill}{A numeric value to fill in for any geos where a Yeo-Johnson
+parameter cannot be estimated.}
 
-\item{limits}{A length 2 numeric vector defining the range to
-compute the transformation parameter lambda.}
+\item{limits}{A length 2 numeric vector defining the range to compute the
+transformation parameter.}
 
-\item{num_unique}{An integer where data that have fewer than this
-many unique values will not be evaluated for a transformation.}
+\item{num_unique}{An integer where data that have fewer than this many unique
+values will not be evaluated for a transformation.}
 
-\item{na_rm}{A logical indicating whether missing values should be
-removed.}
+\item{na_rm}{A logical indicating whether missing values should be removed
+before estimating the transformation parameter.}
 
 \item{skip}{A logical. Should the step be skipped when the
 recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked
@@ -107,7 +107,7 @@ r <- epi_recipe(filtered_data) \%>\%
 r
 # Fit the recipe
 tr <- r \%>\% prep(filtered_data)
-# View the lambda values
+# View the parameter values
 tr$steps[[1]]$yj_params
 # View the transformed data
 df <- tr \%>\% bake(filtered_data)
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 48d28f10..9ae82151 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -11,7 +11,7 @@ test_that("Yeo-Johnson transformation inverts correctly", {
   # Vectorized x and lambda both work
   x <- seq(-5, 5, 0.1)
   expect_true(
-    sum(abs(yj_inverse(yj_transform(x, lambda), lambda) - x)) < 1e-5
+    sum(abs(yj_inverse(yj_transform(x, lambdas), lambdas) - x)) < 1e-5
   )
 })
 

From 47dce2f4aac64f0f6a1957c1f48119311eeccacc Mon Sep 17 00:00:00 2001
From: David Weber <david.weber2@pm.me>
Date: Thu, 10 Apr 2025 16:50:58 -0500
Subject: [PATCH 7/7] extend to quantile_dist, exclude multi-output (#458)

* extend to quantile_dist, exclude multi-output
* Drop by specification and infer from the epi_df
* lint+test: test coverage, handle na lambda case, lint
* fix: quantile_pred arithmetic
* fix: rlang calls

---------

Co-authored-by: Dmitry Shemetov <dshemetov@ucdavis.edu>
---
 NAMESPACE                            |   2 +
 R/layer_yeo_johnson.R                | 236 ++++++++++-----------------
 R/quantile_pred-methods.R            |  20 ++-
 R/step_yeo_johnson.R                 | 127 +++++++-------
 man/epipredict-vctrs.Rd              |  13 ++
 man/get_params_in_layer.Rd           |  23 +++
 man/layer_epi_YeoJohnson.Rd          |  20 ++-
 man/step_adjust_latency.Rd           |   4 +-
 tests/testthat/_snaps/yeo-johnson.md |  16 ++
 tests/testthat/test-quantile_pred.R  |  25 +++
 tests/testthat/test-yeo-johnson.R    |  70 +++++++-
 11 files changed, 324 insertions(+), 232 deletions(-)
 create mode 100644 man/epipredict-vctrs.Rd
 create mode 100644 man/get_params_in_layer.Rd
 create mode 100644 tests/testthat/_snaps/yeo-johnson.md

diff --git a/NAMESPACE b/NAMESPACE
index 351530de..053913c9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -124,6 +124,7 @@ S3method(update,layer)
 S3method(vec_arith,quantile_pred)
 S3method(vec_arith.numeric,quantile_pred)
 S3method(vec_arith.quantile_pred,numeric)
+S3method(vec_arith.quantile_pred,quantile_pred)
 S3method(vec_math,quantile_pred)
 S3method(vec_proxy_equal,quantile_pred)
 S3method(weighted_interval_score,quantile_pred)
@@ -235,6 +236,7 @@ import(epidatasets)
 import(epiprocess)
 import(parsnip)
 import(recipes)
+import(vctrs)
 importFrom(checkmate,assert_class)
 importFrom(checkmate,assert_numeric)
 importFrom(checkmate,test_character)
diff --git a/R/layer_yeo_johnson.R b/R/layer_yeo_johnson.R
index d399717f..8d63b7f6 100644
--- a/R/layer_yeo_johnson.R
+++ b/R/layer_yeo_johnson.R
@@ -1,11 +1,21 @@
 #' Unormalizing transformation
 #'
-#' Will undo a step_epi_YeoJohnson transformation.
+#' Will undo a step_epi_YeoJohnson transformation. For practical reasons, if you
+#' are using this step on a column that will eventually become the outcome
+#' variable, you should make sure that the original name of that column is a
+#' subset of the outcome variable name. `ahead_7_cases` when `cases` is
+#' transformed will work well, while `ahead_7` will not.
 #'
 #' @inheritParams layer_population_scaling
-#' @param yj_params Internal. A data frame of parameters to be used for
-#'   inverting the transformation.
-#' @param by A (possibly named) character vector of variables to join by.
+#' @param yj_params A data frame of parameters to be used for inverting the
+#'   transformation. Typically set automatically. If you have done multiple
+#'   transformations such that the outcome variable name no longer contains the
+#'   column that this step transforms, then you should manually specify this to
+#'   be the parameters fit in the corresponding `step_epi_YeoJohnson`. For an
+#'   example where you wouldn't need to set this, if your output is
+#'   `ahead_7_cases` and `step_epi_YeoJohnson` transformed cases (possibly with
+#'   other columns), then you wouldn't need to set this. However if you have
+#'   renamed your output column to `diff_7`, then you will need to extract the `yj_params` from the step.
 #'
 #' @return an updated `frosting` postprocessor
 #' @export
@@ -37,22 +47,21 @@
 #' # Compare to the original data.
 #' jhu %>% filter(time_value == "2021-12-31")
 #' forecast(wf)
-layer_epi_YeoJohnson <- function(frosting, ..., yj_params = NULL, by = NULL, id = rand_id("epi_YeoJohnson")) {
+layer_epi_YeoJohnson <- function(frosting, ..., yj_params = NULL, id = rand_id("epi_YeoJohnson")) {
   checkmate::assert_tibble(yj_params, min.rows = 1, null.ok = TRUE)
 
   add_layer(
     frosting,
     layer_epi_YeoJohnson_new(
       yj_params = yj_params,
-      by = by,
       terms = dplyr::enquos(...),
       id = id
     )
   )
 }
 
-layer_epi_YeoJohnson_new <- function(yj_params, by, terms, id) {
-  layer("epi_YeoJohnson", yj_params = yj_params, by = by, terms = terms, id = id)
+layer_epi_YeoJohnson_new <- function(yj_params, terms, id) {
+  layer("epi_YeoJohnson", yj_params = yj_params, terms = terms, id = id)
 }
 
 #' @export
@@ -60,42 +69,14 @@ layer_epi_YeoJohnson_new <- function(yj_params, by, terms, id) {
 slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data, ...) {
   rlang::check_dots_empty()
 
-  # TODO: We will error if we don't have a workflow. Write a check later.
-
-  # Get the yj_params from the layer or from the workflow.
-  yj_params <- object$yj_params %||% get_yj_params_in_layer(workflow)
-
-  # If the by is not specified, try to infer it from the yj_params.
-  if (is.null(object$by)) {
-    # Assume `layer_predict` has calculated the prediction keys and other
-    # layers don't change the prediction key colnames:
-    prediction_key_colnames <- names(components$keys)
-    lhs_potential_keys <- prediction_key_colnames
-    rhs_potential_keys <- colnames(select(yj_params, -starts_with(".yj_param_")))
-    object$by <- intersect(lhs_potential_keys, rhs_potential_keys)
-    suggested_min_keys <- setdiff(lhs_potential_keys, "time_value")
-    if (!all(suggested_min_keys %in% object$by)) {
-      cli_warn(
-        c(
-          "{setdiff(suggested_min_keys, object$by)} {?was an/were} epikey column{?s} in the predictions,
-          but {?wasn't/weren't} found in the population `df`.",
-          "i" = "Defaulting to join by {object$by}",
-          ">" = "Double-check whether column names on the population `df` match those expected in your predictions",
-          ">" = "Consider using population data with breakdowns by {suggested_min_keys}",
-          ">" = "Manually specify `by =` to silence"
-        ),
-        class = "epipredict__layer_population_scaling__default_by_missing_suggested_keys"
-      )
-    }
-  }
+  # get the yj_params from the layer or from the workflow.
+  yj_params <-
+    object$yj_params %||%
+    get_params_in_layer(workflow, "epi_YeoJohnson", "yj_params")
 
   # Establish the join columns.
-  object$by <- object$by %||%
-    intersect(
-      epi_keys_only(components$predictions),
-      colnames(select(yj_params, -starts_with(".yj_param_")))
-    )
-  joinby <- list(x = names(object$by) %||% object$by, y = object$by)
+  join_by_columns <- key_colnames(new_data, exclude = "time_value") %>% sort()
+  joinby <- list(x = join_by_columns, y = join_by_columns)
   hardhat::validate_column_names(components$predictions, joinby$x)
   hardhat::validate_column_names(yj_params, joinby$y)
 
@@ -115,55 +96,15 @@ slather.layer_epi_YeoJohnson <- function(object, components, workflow, new_data,
   # The `object$terms` is where the user specifies the columns they want to
   # untransform. We need to match the outcomes with their yj_param columns in our
   # parameter table and then apply the inverse transformation.
-  if (identical(col_names, ".pred")) {
-    # In this case, we don't get a hint for the outcome column name, so we need
-    # to infer it from the mold.
-    if (length(components$mold$outcomes) > 1) {
-      cli_abort("Only one outcome is allowed when specifying `.pred`.", call = rlang::caller_env())
-    }
-    # `outcomes` is a vector of objects like ahead_1_cases, ahead_7_cases, etc.
-    # We want to extract the cases part.
-    outcome_cols <- names(components$mold$outcomes) %>%
-      stringr::str_match("ahead_\\d+_(.*)") %>%
-      magrittr::extract(, 2)
-
+  if (length(col_names) == 0) {
+    # not specified by the user, so just modify everything starting with `.pred`
     components$predictions <- components$predictions %>%
-      mutate(.pred := yj_inverse(.pred, !!sym(paste0(".yj_param_", outcome_cols))))
-  } else if (identical(col_names, character(0))) {
-    # Wish I could suggest `all_outcomes()` here, but currently it's the same as
-    # not specifying any terms. I don't want to spend time with dealing with
-    # this case until someone asks for it.
-    cli::cli_abort(
-      "Not specifying columns to layer Yeo-Johnson is not implemented.
-    If you had a single outcome, you can use `.pred` as a column name.
-    If you had multiple outcomes, you'll need to specify them like
-    `.pred_ahead_1_<outcome_col>`, `.pred_ahead_7_<outcome_col>`, etc.
-    ",
-      call = rlang::caller_env()
-    )
+      mutate(across(starts_with(".pred"), \(.pred) yj_inverse(.pred, .lambda))) %>%
+      select(-.lambda)
   } else {
-    # In this case, we assume that the user has specified the columns they want
-    # transformed here. We then need to determine the yj_param columns for each of
-    # these columns. That is, we need to convert a vector of column names like
-    # c(".pred_ahead_1_case_rate", ".pred_ahead_7_case_rate") to
-    # c(".yj_param_ahead_1_case_rate", ".yj_param_ahead_7_case_rate").
-    original_outcome_cols <- stringr::str_match(col_names, ".pred_ahead_\\d+_(.*)")[, 2]
-    outcomes_wout_ahead <- stringr::str_match(names(components$mold$outcomes), "ahead_\\d+_(.*)")[, 2]
-    if (any(original_outcome_cols %nin% outcomes_wout_ahead)) {
-      cli_abort(
-        "All columns specified in `...` must be outcome columns.
-      They must be of the form `.pred_ahead_1_<outcome_col>`, `.pred_ahead_7_<outcome_col>`, etc.
-      ",
-        call = rlang::caller_env()
-      )
-    }
-
-    for (i in seq_along(col_names)) {
-      col <- col_names[i]
-      yj_param_col <- paste0(".yj_param_", original_outcome_cols[i])
-      components$predictions <- components$predictions %>%
-        mutate(!!sym(col) := yj_inverse(!!sym(col), !!sym(yj_param_col)))
-    }
+    components$predictions <- components$predictions %>%
+      mutate(across(all_of(col_names), \(.pred) yj_inverse(.pred, .lambda))) %>%
+      select(-.lambda)
   }
 
   # Remove the yj_param columns.
@@ -182,75 +123,72 @@ print.layer_epi_YeoJohnson <- function(x, width = max(20, options()$width - 30),
 # Inverse Yeo-Johnson transformation
 #
 # Inverse of `yj_transform` in step_yeo_johnson.R.
-yj_inverse <- function(x, lambda, eps = 0.001) {
+yj_inverse <- function(x_in, lambda, eps = 0.001) {
   if (any(is.na(lambda))) {
-    return(x)
-  }
-  if (length(x) > 1 && length(lambda) == 1) {
-    lambda <- rep(lambda, length(x))
-  } else if (length(x) != length(lambda)) {
-    cli::cli_abort("Length of `x` must be equal to length of `lambda`.", call = rlang::caller_fn())
-  }
-  if (!inherits(x, "tbl_df") || is.data.frame(x)) {
-    x <- unlist(x, use.names = FALSE)
-  } else {
-    if (!is.vector(x)) {
-      x <- as.vector(x)
-    }
-  }
-
-  nn_inv_trans <- function(x, lambda) {
-    out <- double(length(x))
-    sm_lambdas <- abs(lambda) < eps
-    if (length(sm_lambdas) > 0) {
-      out[sm_lambdas] <- exp(x[sm_lambdas]) - 1
-    }
-    x <- x[!sm_lambdas]
-    lambda <- lambda[!sm_lambdas]
-    if (length(x) > 0) {
-      out[!sm_lambdas] <- (lambda * x + 1)^(1 / lambda) - 1
-    }
-    out
+    cli::cli_abort("`lambda` cannot be `NA`.", call = rlang::caller_call())
   }
-
-  ng_inv_trans <- function(x, lambda) {
-    out <- double(length(x))
-    near2_lambdas <- abs(lambda - 2) < eps
-    if (length(near2_lambdas) > 0) {
-      out[near2_lambdas] <- -(exp(-x[near2_lambdas]) - 1)
-    }
-    x <- x[!near2_lambdas]
-    lambda <- lambda[!near2_lambdas]
-    if (length(x) > 0) {
-      out[!near2_lambdas] <- -(((lambda - 2) * x + 1)^(1 / (2 - lambda)) - 1)
-    }
-    out
-  }
-
-  dat_neg <- x < 0
-  not_neg <- which(!dat_neg)
-  is_neg <- which(dat_neg)
-
-  if (length(not_neg) > 0) {
-    x[not_neg] <- nn_inv_trans(x[not_neg], lambda[not_neg])
-  }
-
-  if (length(is_neg) > 0) {
-    x[is_neg] <- ng_inv_trans(x[is_neg], lambda[is_neg])
+  x_lambda <- yj_input_type_management(x_in, lambda)
+  x <- x_lambda[[1]]
+  lambda <- x_lambda[[2]]
+  inv_x <- ifelse(
+    x < 0,
+    # negative values we test if lambda is ~2
+    ifelse(
+      abs(lambda - 2) < eps,
+      -(exp(-x) - 1),
+      -(((lambda - 2) * x + 1)^(1 / (2 - lambda)) - 1)
+    ),
+    # non-negative values we test if lambda is ~0
+    ifelse(
+      abs(lambda) < eps,
+      (exp(x) - 1),
+      (lambda * x + 1)^(1 / lambda) - 1
+    )
+  )
+  if (x_in %>% inherits("quantile_pred")) {
+    inv_x <- inv_x %>% quantile_pred(x_in %@% "quantile_levels")
   }
-  x
+  inv_x
 }
 
-get_yj_params_in_layer <- function(workflow) {
+
+#' get the parameters used in the initial step
+#'
+#' @param workflow the workflow to extract the parameters from
+#' @param step_name the name of the step to look for, as recognized by `detect_step`
+#' @param param_name the parameter to pull out of the step
+#' @keywords internal
+get_params_in_layer <- function(workflow, step_name = "epi_YeoJohnson", param_name = "yj_params") {
+  full_step_name <- glue::glue("step_{step_name}")
   this_recipe <- hardhat::extract_recipe(workflow)
-  if (!(this_recipe %>% recipes::detect_step("epi_YeoJohnson"))) {
-    cli_abort("`layer_epi_YeoJohnson` requires `step_epi_YeoJohnson` in the recipe.", call = rlang::caller_env())
+  if (!(this_recipe %>% recipes::detect_step(step_name))) {
+    cli_abort("`layer_{step_name}` requires `step_{step_name}` in the recipe.", call = rlang::caller_call())
+  }
+  outcomes <-
+    workflows::extract_recipe(workflow)$term_info %>%
+    filter(role == "outcome") %>%
+    pull(variable)
+  if (length(outcomes) > 1) {
+    cli_abort(
+      "`layer_{step_name}` doesn't support multiple output columns.
+      This workflow produces {outcomes} as output columns.",
+      call = rlang::caller_call(),
+      class = "epipredict__layer_yeo_johnson_multi_outcome_error"
+    )
   }
   for (step in this_recipe$steps) {
-    if (inherits(step, "step_epi_YeoJohnson")) {
-      yj_params <- step$yj_params
+    # if it's a `step_name` step that also transforms a column that is a subset
+    # of the output column name
+    is_outcome_subset <- map_lgl(step$columns, ~ grepl(.x, outcomes))
+    if (inherits(step, full_step_name) && any(is_outcome_subset)) {
+      params <- step[[param_name]] %>%
+        select(
+          key_colnames(workflow$original_data, exclude = "time_value"),
+          contains(step$columns[is_outcome_subset])
+        ) %>%
+        rename(.lambda = contains(step$columns))
       break
     }
   }
-  yj_params
+  params
 }
diff --git a/R/quantile_pred-methods.R b/R/quantile_pred-methods.R
index 293fad90..56e8fcf0 100644
--- a/R/quantile_pred-methods.R
+++ b/R/quantile_pred-methods.R
@@ -111,7 +111,6 @@ vec_proxy_equal.quantile_pred <- function(x, ...) {
     dplyr::select(-.row)
 }
 
-
 # quantiles by treating quantile_pred like a distribution -----------------
 
 
@@ -287,6 +286,12 @@ vec_math.quantile_pred <- function(.fn, .x, ...) {
   quantile_pred(.fn(.x), quantile_levels)
 }
 
+#' Internal vctrs methods
+#'
+#' @import vctrs
+#' @keywords internal
+#' @name epipredict-vctrs
+
 #' @importFrom vctrs vec_arith vec_arith.numeric
 #' @export
 #' @method vec_arith quantile_pred
@@ -294,6 +299,19 @@ vec_arith.quantile_pred <- function(op, x, y, ...) {
   UseMethod("vec_arith.quantile_pred", y)
 }
 
+
+#' @export
+#' @method vec_arith.quantile_pred quantile_pred
+vec_arith.quantile_pred.quantile_pred <- function(op, x, y, ...) {
+  all_quantiles <- unique(c(x %@% "quantile_levels", y %@% "quantile_levels"))
+  op_fn <- getExportedValue("base", op)
+  # Interpolate/extrapolate to the same quantiles
+  x <- quantile.quantile_pred(x, all_quantiles)
+  y <- quantile.quantile_pred(y, all_quantiles)
+  out <- op_fn(x, y, ...)
+  quantile_pred(out, all_quantiles)
+}
+
 #' @export
 #' @method vec_arith.quantile_pred numeric
 vec_arith.quantile_pred.numeric <- function(op, x, y, ...) {
diff --git a/R/step_yeo_johnson.R b/R/step_yeo_johnson.R
index 4e429528..ff4016cd 100644
--- a/R/step_yeo_johnson.R
+++ b/R/step_yeo_johnson.R
@@ -188,8 +188,7 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
   # Check that the columns for transformation are present in new_data.
   if (!all(object$columns %in% colnames(new_data))) {
     cli::cli_abort(
-      "The columns for transformation are not present in the new data.",
-      call = rlang::caller_fn()
+      "The columns for transformation are not present in the new data."
     )
   }
   # Check that the columns for transformation are present in new_data.
@@ -202,7 +201,7 @@ bake.step_epi_YeoJohnson <- function(object, new_data, ...) {
     cli_abort(c(
       "Some variables used for training are not available in {.arg x}.",
       i = "The following required columns are missing: {check$missing_names}"
-    ), call = rlang::caller_fn())
+    ))
   }
   # Transform each column, using the appropriate yj_param column per row.
   new_data <- left_join(new_data, object$yj_params, by = key_colnames(new_data, exclude = "time_value"))
@@ -243,7 +242,7 @@ compute_yj_params <- function(training, col_names, limits, num_unique, na_fill,
   #         x = "Yeo-Johnson parameter could not be estimated for some geos for {col}.",
   #         i = "Using parameter={x$na_fill} in these cases."
   #       ),
-  #       call = rlang::caller_fn()
+  #       call = rlang::caller_call()
   #     )
   #   }
   # }
@@ -254,75 +253,71 @@ compute_yj_params <- function(training, col_names, limits, num_unique, na_fill,
 }
 
 
-### Code below taken from recipes::step_YeoJohnson.
-### We keep "lambda" here, but above we renamed it to "yj_param".
-### Modified yj_transform() to be vectorized in lambda.
-### https://github.com/tidymodels/recipes/blob/v1.1.1/R/YeoJohnson.R#L172
-
-# Yeo-Johnson transformation
-yj_transform <- function(x, lambda, ind_neg = NULL, eps = 0.001) {
-  if (any(is.na(lambda))) {
-    return(x)
-  }
-  if (length(x) > 1 && length(lambda) == 1) {
-    lambda <- rep(lambda, length(x))
-  } else if (length(x) != length(lambda)) {
-    cli::cli_abort(
-      "Length of `x` must be equal to length of `lambda` or lambda must be a scalar.",
-      call = rlang::caller_fn()
-    )
-  }
-  if (!inherits(x, "tbl_df") || is.data.frame(x)) {
-    x <- unlist(x, use.names = FALSE)
+yj_input_type_management <- function(x_in, lambda) {
+  if (x_in %>% inherits("quantile_pred")) {
+    x <- as.matrix(x_in)
+    if (length(lambda) == 1) {
+      lambda <- lambda %>%
+        rep(prod(dim(x))) %>%
+        matrix(dim(x))
+    } else if (length(x_in) == length(lambda)) {
+      lambda <- lambda %>%
+        rep(dim(x)[[2]]) %>%
+        matrix(dim(x))
+    } else if (length(x) != length(lambda)) {
+      cli::cli_abort("Length of `x` must be equal to length of `lambda`.", call = rlang::caller_call(n = 2))
+    }
+  } else if (!inherits(x_in, "tbl_df") || is.data.frame(x_in)) {
+    x <- unlist(x_in, use.names = FALSE)
   } else {
-    if (!is.vector(x)) {
-      x <- as.vector(x)
+    if (!is.vector(x_in)) {
+      x <- as.vector(x_in)
+    } else {
+      x <- x_in
     }
   }
-  # TODO case weights: can we use weights here?
-  if (is.null(ind_neg)) {
-    dat_neg <- x < 0
-    ind_neg <- list(is = which(dat_neg), not = which(!dat_neg))
-  }
-  not_neg <- ind_neg[["not"]]
-  is_neg <- ind_neg[["is"]]
 
-  nn_trans <- function(x, lambda) {
-    out <- double(length(x))
-    sm_lambdas <- abs(lambda) < eps
-    if (length(sm_lambdas) > 0) {
-      out[sm_lambdas] <- log(x[sm_lambdas] + 1)
-    }
-    x <- x[!sm_lambdas]
-    lambda <- lambda[!sm_lambdas]
-    if (length(x) > 0) {
-      out[!sm_lambdas] <- ((x + 1)^lambda - 1) / lambda
-    }
-    out
+  # these only apply if x_in isn't a quantile distribution
+  if (length(x) > 1 && length(lambda) == 1) {
+    lambda <- rep(lambda, length(x))
+  } else if (length(x) != length(lambda)) {
+    cli::cli_abort("Length of `x` must be equal to length of `lambda`.", call = rlang::caller_call(n = 2))
   }
-
-  ng_trans <- function(x, lambda) {
-    out <- double(length(x))
-    near2_lambdas <- abs(lambda - 2) < eps
-    if (length(near2_lambdas) > 0) {
-      out[near2_lambdas] <- -log(-x[near2_lambdas] + 1)
-    }
-    x <- x[!near2_lambdas]
-    lambda <- lambda[!near2_lambdas]
-    if (length(x) > 0) {
-      out[!near2_lambdas] <- -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
-    }
-    out
+  list(x, lambda)
+}
+### Code below taken from recipes::step_YeoJohnson.
+### We keep "lambda" here, but above we renamed it to "yj_param".
+### Modified yj_transform() to be vectorized in lambda. Also modified to work on distributions.
+### https://github.com/tidymodels/recipes/blob/v1.1.1/R/YeoJohnson.R#L172
+# Yeo-Johnson transformation
+yj_transform <- function(x_in, lambda, ind_neg = NULL, eps = 0.001) {
+  if (any(is.na(lambda))) {
+    cli::cli_abort("`lambda` cannot be `NA`.", call = rlang::caller_call())
   }
+  x_lambda <- yj_input_type_management(x_in, lambda)
+  x <- x_lambda[[1]]
+  lambda <- x_lambda[[2]]
 
-  if (length(not_neg) > 0) {
-    x[not_neg] <- nn_trans(x[not_neg], lambda[not_neg])
-  }
+  transformed <- ifelse(
+    x < 0,
+    # for negative values we test if lambda is ~2
+    ifelse(
+      abs(lambda - 2) < eps,
+      -log(abs(x) + 1),
+      -((abs(x) + 1)^(2 - lambda) - 1) / (2 - lambda)
+    ),
+    # for non-negative values we test if lambda is ~0
+    ifelse(
+      abs(lambda) < eps,
+      log(abs(x) + 1),
+      ((abs(x) + 1)^lambda - 1) / lambda
+    )
+  )
 
-  if (length(is_neg) > 0) {
-    x[is_neg] <- ng_trans(x[is_neg], lambda[is_neg])
+  if (x_in %>% inherits("quantile_pred")) {
+    transformed <- transformed %>% quantile_pred(x_in %@% "quantile_levels")
   }
-  x
+  transformed
 }
 
 ## Helper for the log-likelihood calc for eq 3.1 of Yeo, I. K.,
@@ -344,7 +339,7 @@ yj_obj <- function(lam, dat, ind_neg, const) {
 }
 
 ## estimates the values
-estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, call = caller_env(2)) {
+estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE) {
   na_rows <- which(is.na(dat))
   if (length(na_rows) > 0) {
     if (na_rm) {
@@ -355,7 +350,7 @@ estimate_yj <- function(dat, limits = c(-5, 5), num_unique = 5, na_rm = TRUE, ca
           x = "Missing values are not allowed for the YJ transformation.",
           i = "See {.arg na_rm} option."
         ),
-        call = call
+        call = rlang::caller_call(n = 2)
       )
     }
   }
diff --git a/man/epipredict-vctrs.Rd b/man/epipredict-vctrs.Rd
new file mode 100644
index 00000000..a4dabbfa
--- /dev/null
+++ b/man/epipredict-vctrs.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/quantile_pred-methods.R
+\name{epipredict-vctrs}
+\alias{epipredict-vctrs}
+\alias{vec_arith.quantile_pred}
+\title{Internal vctrs methods}
+\usage{
+\method{vec_arith}{quantile_pred}(op, x, y, ...)
+}
+\description{
+Internal vctrs methods
+}
+\keyword{internal}
diff --git a/man/get_params_in_layer.Rd b/man/get_params_in_layer.Rd
new file mode 100644
index 00000000..1d6c98ef
--- /dev/null
+++ b/man/get_params_in_layer.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/layer_yeo_johnson.R
+\name{get_params_in_layer}
+\alias{get_params_in_layer}
+\title{get the parameters used in the initial step}
+\usage{
+get_params_in_layer(
+  workflow,
+  step_name = "epi_YeoJohnson",
+  param_name = "yj_params"
+)
+}
+\arguments{
+\item{workflow}{the workflow to extract the parameters from}
+
+\item{step_name}{the name of the step to look for, as recognized by \code{detect_step}}
+
+\item{param_name}{the parameter to pull out of the step}
+}
+\description{
+get the parameters used in the initial step
+}
+\keyword{internal}
diff --git a/man/layer_epi_YeoJohnson.Rd b/man/layer_epi_YeoJohnson.Rd
index 53520b4e..0c35ee74 100644
--- a/man/layer_epi_YeoJohnson.Rd
+++ b/man/layer_epi_YeoJohnson.Rd
@@ -8,7 +8,6 @@ layer_epi_YeoJohnson(
   frosting,
   ...,
   yj_params = NULL,
-  by = NULL,
   id = rand_id("epi_YeoJohnson")
 )
 }
@@ -19,10 +18,15 @@ sequence of operations for this frosting.}
 \item{...}{One or more selector functions to scale variables
 for this step. See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
 
-\item{yj_params}{Internal. A data frame of parameters to be used for
-inverting the transformation.}
-
-\item{by}{A (possibly named) character vector of variables to join by.}
+\item{yj_params}{A data frame of parameters to be used for inverting the
+transformation. Typically set automatically. If you have done multiple
+transformations such that the outcome variable name no longer contains the
+column that this step transforms, then you should manually specify this to
+be the parameters fit in the corresponding \code{step_epi_YeoJohnson}. For an
+example where you wouldn't need to set this, if your output is
+\code{ahead_7_cases} and \code{step_epi_YeoJohnson} transformed cases (possibly with
+other columns), then you wouldn't need to set this. However if you have
+renamed your output column to \code{diff_7}, then you will need to extract the \code{yj_params} from the step.}
 
 \item{id}{a random id string}
 }
@@ -30,7 +34,11 @@ inverting the transformation.}
 an updated \code{frosting} postprocessor
 }
 \description{
-Will undo a step_epi_YeoJohnson transformation.
+Will undo a step_epi_YeoJohnson transformation. For practical reasons, if you
+are using this step on a column that will eventually become the outcome
+variable, you should make sure that the original name of that column is a
+subset of the outcome variable name. \code{ahead_7_cases} when \code{cases} is
+transformed will work well, while \code{ahead_7} will not.
 }
 \examples{
 library(dplyr)
diff --git a/man/step_adjust_latency.Rd b/man/step_adjust_latency.Rd
index 9e1bafbd..685f806b 100644
--- a/man/step_adjust_latency.Rd
+++ b/man/step_adjust_latency.Rd
@@ -267,8 +267,8 @@ while this will not:
 \if{html}{\out{<div class="sourceCode r">}}\preformatted{toy_recipe <- epi_recipe(toy_df) \%>\%
    step_epi_lag(a, lag=0) \%>\%
    step_adjust_latency(a, method = "extend_lags")
-#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work with
-#> modified data.
+#> Warning: If `method` is "extend_lags" or "locf", then the previous `step_epi_lag`s won't work
+#> with modified data.
 }\if{html}{\out{</div>}}
 
 If you create columns that you then apply lags to (such as
diff --git a/tests/testthat/_snaps/yeo-johnson.md b/tests/testthat/_snaps/yeo-johnson.md
new file mode 100644
index 00000000..b3a42c24
--- /dev/null
+++ b/tests/testthat/_snaps/yeo-johnson.md
@@ -0,0 +1,16 @@
+# Yeo-Johnson transformation inverts correctly
+
+    Code
+      yj_transform(x, c(1, 2, 3))
+    Condition
+      Error:
+      ! Length of `x` must be equal to length of `lambda`.
+
+---
+
+    Code
+      yj_transform(list(1, 2), c(1, 2, 3))
+    Condition
+      Error:
+      ! Length of `x` must be equal to length of `lambda`.
+
diff --git a/tests/testthat/test-quantile_pred.R b/tests/testthat/test-quantile_pred.R
index 70d7c71a..f5097c1d 100644
--- a/tests/testthat/test-quantile_pred.R
+++ b/tests/testthat/test-quantile_pred.R
@@ -81,6 +81,7 @@ test_that("unary math works on quantiles", {
 })
 
 test_that("arithmetic works on quantiles", {
+  # Quantile and numeric arithmetic works
   dstn <- hardhat::quantile_pred(
     matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE),
     1:4 / 5
@@ -100,4 +101,28 @@ test_that("arithmetic works on quantiles", {
   expect_identical((1 / 4) * dstn, dstn2)
 
   expect_snapshot(error = TRUE, sum(dstn))
+
+  # Quantile and quantile arithmetic works
+  val <- c(1:4, 8:11)
+  dstn3 <- hardhat::quantile_pred(
+    matrix(val, nrow = 2, byrow = TRUE),
+    1:4 / 5
+  )
+  dstn4 <- hardhat::quantile_pred(
+    matrix(val + 2 * val, nrow = 2, byrow = TRUE),
+    1:4 / 5
+  )
+  expect_identical(dstn3 + (2 * dstn3), dstn4)
+
+  # Extrapolate when quantile_levels are not the same
+  val <- c(1:4, 8:11)
+  dstn5 <- hardhat::quantile_pred(
+    matrix(val, nrow = 2, byrow = TRUE),
+    c(0.1, 0.25, 0.5, 0.75)
+  )
+  dstn6 <- hardhat::quantile_pred(
+    matrix(val, nrow = 2, byrow = TRUE),
+    c(0.25, 0.5, 0.75, 0.9)
+  )
+  expect_identical((dstn5 + dstn6) %@% "quantile_levels", c(0.1, 0.25, 0.5, 0.75, 0.9))
 })
diff --git a/tests/testthat/test-yeo-johnson.R b/tests/testthat/test-yeo-johnson.R
index 9ae82151..c0bd12bd 100644
--- a/tests/testthat/test-yeo-johnson.R
+++ b/tests/testthat/test-yeo-johnson.R
@@ -13,6 +13,29 @@ test_that("Yeo-Johnson transformation inverts correctly", {
   expect_true(
     sum(abs(yj_inverse(yj_transform(x, lambdas), lambdas) - x)) < 1e-5
   )
+
+  # also works on quantile distributions
+  x <- quantile_pred(matrix(c(-5, 1, 3, 0, 0.1, 0.5), nrow = 2, byrow = TRUE), c(0.01, 0.5, 0.7))
+  x_back <- map(
+      lambdas,
+      \(lambda) mean(abs(yj_inverse(yj_transform(x, lambda), lambda) - x)) < 1e-5
+    )
+  expect_true(all(unlist(x_back)))
+
+  # Get coverage on yj_input_type_management
+  # Breaks on bad length of lambda
+  expect_snapshot(error = TRUE,
+    yj_transform(x, c(1, 2, 3))
+  )
+  expect_snapshot(error = TRUE,
+    yj_transform(list(1, 2), c(1, 2, 3))
+  )
+  expect_true(
+    identical(
+      yj_input_type_management(list(1, 2, 3), c(1, 2, 3)),
+      list(c(1, 2, 3), c(1, 2, 3))
+    )
+  )
 })
 
 test_that("Yeo-Johnson steps and layers invert each other", {
@@ -36,16 +59,16 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   # Make sure that the inverse transformation works
   f <- frosting() %>%
     layer_predict() %>%
-    layer_epi_YeoJohnson(.pred)
+    layer_epi_YeoJohnson()
   wf <- epi_workflow(r, linear_reg()) %>%
     fit(filtered_data) %>%
     add_frosting(f)
   out1 <- filtered_data %>%
-    slice_max(time_value, by = geo_value)
+    dplyr::slice_max(time_value, by = geo_value)
   out2 <- forecast(wf) %>% rename(cases = .pred)
   expect_equal(out1, out2)
 
-  # Make sure it works when there are multiple predictors and outcomes
+  # Make sure it works when there are multiple predictors
   jhu_multi <- epidatasets::covid_case_death_rates_extended %>%
     filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
     select(geo_value, time_value, case_rate, death_rate)
@@ -53,7 +76,7 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   r <- epi_recipe(filtered_data) %>%
     step_epi_YeoJohnson(case_rate, death_rate) %>%
     step_epi_lag(case_rate, death_rate, lag = 0) %>%
-    step_epi_ahead(case_rate, death_rate, ahead = 0, role = "outcome") %>%
+    step_epi_ahead(case_rate, ahead = 0, role = "outcome") %>%
     step_epi_naomit()
   tr <- r %>% prep(filtered_data)
 
@@ -66,13 +89,43 @@ test_that("Yeo-Johnson steps and layers invert each other", {
   # Make sure that the inverse transformation works
   f <- frosting() %>%
     layer_predict() %>%
-    layer_epi_YeoJohnson(.pred_ahead_0_case_rate, .pred_ahead_0_death_rate)
+    layer_epi_YeoJohnson()
+  wf <- epi_workflow(r, linear_reg()) %>%
+    fit(filtered_data) %>%
+    add_frosting(f)
+  out1 <- filtered_data %>%
+    select(-death_rate) %>%
+    dplyr::slice_max(time_value, by = geo_value)
+  out2 <- forecast(wf) %>% rename(case_rate = .pred)
+  expect_equal(out1, out2)
+})
+
+test_that("Yeo-Johnson layers work on quantiles", {
+  jhu <- epidatasets::cases_deaths_subset %>%
+    filter(time_value > "2021-01-01", geo_value %in% c("ca", "ny")) %>%
+    select(geo_value, time_value, cases)
+  filtered_data <- jhu
+
+  r <- epi_recipe(filtered_data) %>%
+    step_epi_YeoJohnson(cases) %>%
+    step_epi_lag(cases, lag = 0) %>%
+    step_epi_ahead(cases, ahead = 0, role = "outcome") %>%
+    step_epi_naomit()
+
+  f <- frosting() %>%
+    layer_predict() %>%
+    layer_residual_quantiles() %>%
+    layer_epi_YeoJohnson()
   wf <- epi_workflow(r, linear_reg()) %>%
     fit(filtered_data) %>%
     add_frosting(f)
   out1 <- filtered_data %>%
-    slice_max(time_value, by = geo_value)
-  out2 <- forecast(wf) %>% rename(case_rate = .pred_ahead_0_case_rate, death_rate = .pred_ahead_0_death_rate)
+    dplyr::slice_max(time_value, by = geo_value) %>%
+    rename(.pred = cases) %>%
+    tidyr::expand_grid(.pred_distn_quantile_level = c(0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95)) %>%
+    mutate(.pred_distn_value = .pred) %>%
+    select(geo_value, time_value, .pred, .pred_distn_value, .pred_distn_quantile_level)
+  out2 <- forecast(wf) %>% pivot_quantiles_longer(.pred_distn) %>% as_tibble()
   expect_equal(out1, out2)
 })
 
@@ -123,12 +176,13 @@ test_that("Yeo-Johnson steps and layers invert each other when other_keys are pr
   # Make sure that the inverse transformation works
   f <- frosting() %>%
     layer_predict() %>%
+    layer_residual_quantiles() %>%
     layer_epi_YeoJohnson(.pred)
   wf <- epi_workflow(r, linear_reg()) %>%
     fit(filtered_data) %>%
     add_frosting(f)
   out1 <- filtered_data %>%
-    slice_max(time_value, by = geo_value) %>%
+    dplyr::slice_max(time_value, by = geo_value) %>%
     select(geo_value, age_group, time_value, med_income_2y) %>%
     arrange(geo_value, age_group, time_value)
   out2 <- forecast(wf) %>%