From 90799292bf619b5fb31fd41cd9a622c9332afdc4 Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Mon, 5 Aug 2024 10:39:34 -0500
Subject: [PATCH 1/2] slide_mean and slide_sum

---
 NAMESPACE                                 |  10 ++
 R/step_epi_slide_mean.R                   | 188 ++++++++++++++++++++++
 R/step_epi_slide_sum.R                    | 160 ++++++++++++++++++
 man/step_epi_slide_mean.Rd                |  78 +++++++++
 man/step_epi_slide_sum.Rd                 |  77 +++++++++
 tests/testthat/test-step_epi_slide_mean.R |  88 ++++++++++
 tests/testthat/test-step_epi_slide_sum.R  |  56 +++++++
 7 files changed, 657 insertions(+)
 create mode 100644 R/step_epi_slide_mean.R
 create mode 100644 R/step_epi_slide_sum.R
 create mode 100644 man/step_epi_slide_mean.Rd
 create mode 100644 man/step_epi_slide_sum.Rd
 create mode 100644 tests/testthat/test-step_epi_slide_mean.R
 create mode 100644 tests/testthat/test-step_epi_slide_sum.R

diff --git a/NAMESPACE b/NAMESPACE
index 3a1fed508..823e78225 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -23,6 +23,8 @@ S3method(bake,step_adjust_latency)
 S3method(bake,step_epi_ahead)
 S3method(bake,step_epi_lag)
 S3method(bake,step_epi_slide)
+S3method(bake,step_epi_slide_mean)
+S3method(bake,step_epi_slide_sum)
 S3method(bake,step_growth_rate)
 S3method(bake,step_lag_difference)
 S3method(bake,step_population_scaling)
@@ -62,6 +64,8 @@ S3method(prep,step_adjust_latency)
 S3method(prep,step_epi_ahead)
 S3method(prep,step_epi_lag)
 S3method(prep,step_epi_slide)
+S3method(prep,step_epi_slide_mean)
+S3method(prep,step_epi_slide_sum)
 S3method(prep,step_growth_rate)
 S3method(prep,step_lag_difference)
 S3method(prep,step_population_scaling)
@@ -92,6 +96,8 @@ S3method(print,step_adjust_latency)
 S3method(print,step_epi_ahead)
 S3method(print,step_epi_lag)
 S3method(print,step_epi_slide)
+S3method(print,step_epi_slide_mean)
+S3method(print,step_epi_slide_sum)
 S3method(print,step_growth_rate)
 S3method(print,step_lag_difference)
 S3method(print,step_naomit)
@@ -202,6 +208,8 @@ export(step_epi_ahead)
 export(step_epi_lag)
 export(step_epi_naomit)
 export(step_epi_slide)
+export(step_epi_slide_mean)
+export(step_epi_slide_sum)
 export(step_growth_rate)
 export(step_lag_difference)
 export(step_population_scaling)
@@ -272,6 +280,8 @@ importFrom(glue,glue)
 importFrom(hardhat,extract_recipe)
 importFrom(hardhat,refresh_blueprint)
 importFrom(hardhat,run_mold)
+importFrom(lubridate,is.period)
+importFrom(lubridate,time_length)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,extract2)
 importFrom(recipes,bake)
diff --git a/R/step_epi_slide_mean.R b/R/step_epi_slide_mean.R
new file mode 100644
index 000000000..a95a8826b
--- /dev/null
+++ b/R/step_epi_slide_mean.R
@@ -0,0 +1,188 @@
+#' Calculate a rolling mean
+#'
+#' `step_epi_slide_mean()` creates a *specification* of a recipe step that will
+#'   generate one or more new columns of derived data by computing a sliding
+#'   mean along existing data.
+#'
+#'
+#' @inheritParams step_epi_lag
+#' @param before,after non-negative integers.
+#'   How far `before` and `after` each `time_value` should
+#'   the sliding window extend? Any value provided for either
+#'   argument must be a single, non-`NA`, non-negative,
+#'   [integer-compatible][vctrs::vec_cast] number of time steps. Endpoints of
+#'   the window are inclusive. Common settings:
+#'   * For trailing/right-aligned windows from `time_value - time_step(k)` to
+#'   `time_value`, use `before=k, after=0`. This is the most likely use case
+#'   for the purposes of forecasting.
+#'   * For center-aligned windows from `time_value - time_step(k)` to
+#'   `time_value + time_step(k)`, use `before=k, after=k`.
+#'   * For leading/left-aligned windows from `time_value` to
+#'   `time_value + time_step(k)`, use `after=k, after=0`.
+#'
+#'   You may also pass a [lubridate::period], like `lubridate::weeks(1)` or a
+#'   character string that is coercible to a [lubridate::period], like
+#'   `"2 weeks"`.
+#' @template step-return
+#'
+#' @export
+#' @examples
+#' library(dplyr)
+#' jhu <- case_death_rate_subset %>%
+#'   filter(time_value >= as.Date("2021-01-01"), geo_value %in% c("ca", "ny"))
+#' rec <- epi_recipe(jhu) %>%
+#'   step_epi_slide(case_rate, death_rate,
+#'     .f = \(x) mean(x, na.rm = TRUE),
+#'     before = 6L
+#'   )
+#' bake(prep(rec, jhu), new_data = NULL)
+step_epi_slide_mean <-
+  function(recipe,
+           ...,
+           before = 0L,
+           after = 0L,
+           role = "predictor",
+           prefix = "epi_slide_mean_",
+           skip = FALSE,
+           id = rand_id("epi_slide_mean")) {
+    if (!is_epi_recipe(recipe)) {
+      rlang::abort("This recipe step can only operate on an `epi_recipe`.")
+    }
+    arg_is_scalar(before, after)
+    before <- try_period(before)
+    after <- try_period(after)
+    arg_is_chr_scalar(role, prefix, id)
+    arg_is_lgl_scalar(skip)
+    add_step(
+      recipe,
+      step_epi_slide_mean_new(
+        terms = enquos(...),
+        before = before,
+        after = after,
+        role = role,
+        trained = FALSE,
+        prefix = prefix,
+        keys = epi_keys(recipe),
+        columns = NULL,
+        skip = skip,
+        id = id
+      )
+    )
+  }
+
+
+step_epi_slide_mean_new <-
+  function(terms,
+           before,
+           after,
+           role,
+           trained,
+           prefix,
+           keys,
+           columns,
+           skip,
+           id) {
+    step(
+      subclass = "epi_slide_mean",
+      terms = terms,
+      before = before,
+      after = after,
+      role = role,
+      trained = trained,
+      prefix = prefix,
+      keys = keys,
+      columns = columns,
+      skip = skip,
+      id = id
+    )
+  }
+
+
+
+#' @export
+prep.step_epi_slide_mean <- function(x, training, info = NULL, ...) {
+  col_names <- recipes::recipes_eval_select(x$terms, data = training, info = info)
+
+  check_type(training[, col_names], types = c("double", "integer"))
+  time_type <- attributes(training)$metadata$time_type
+  before <- lubridate_period_to_integer(x$before, time_type)
+  after <- lubridate_period_to_integer(x$after, time_type)
+  step_epi_slide_mean_new(
+    terms = x$terms,
+    before = before,
+    after = after,
+    role = x$role,
+    trained = TRUE,
+    prefix = x$prefix,
+    keys = x$keys,
+    columns = col_names,
+    skip = x$skip,
+    id = x$id
+  )
+}
+
+#' lubridate converts to seconds by default, and as.integer doesn't throw errors if it isn't actually an integer
+#' @importFrom lubridate time_length is.period
+#' @keywords internal
+lubridate_period_to_integer <- function(value, time_type) {
+  if (is.period(value)) {
+    if (time_type == "day") {
+      value <- time_length(value, unit = "day")
+    } else if (time_type == "week") {
+      value <- time_length(value, unit = "week")
+    } else {
+      cli_abort(
+        "unsupported time type of {time_type}. Use integer instead of lubridate period.",
+        class = "epipredict__step_epi_slide_mean__unsupported_error"
+      )
+    }
+    if (value %% 1 !=0) {
+      cli_abort(
+        "Converted `before` value of {before} is not an integer.",
+        class = "epipredict__step_epi_slide_mean__unsupported_error"
+      )
+    }
+    value <- as.integer(value)
+  }
+  return(value)
+}
+
+
+#' @export
+bake.step_epi_slide_mean <- function(object, new_data, ...) {
+  recipes::check_new_data(names(object$columns), object, new_data)
+  col_names <- as.vector(object$columns)
+  name_prefix <- object$prefix
+  new_names <- glue::glue("{name_prefix}{col_names}")
+  ## ensure no name clashes
+  new_data_names <- colnames(new_data)
+  intersection <- new_data_names %in% new_names
+  if (any(intersection)) {
+    nms <- new_data_names[intersection]
+    cli_abort(
+      c("In `step_epi_slide_mean()` a name collision occurred. The following variable names already exist:",
+        `*` = "{.var {nms}}"
+      ),
+      call = caller_env(),
+      class = "epipredict__step__name_collision_error"
+    )
+  }
+  renaming <- glue::glue("slide_value_{col_names}")
+  names(renaming) <- new_names
+  names(new_names) <- glue::glue("slide_value_{col_names}")
+  new_data %>%
+    group_by(across(all_of(object$keys[-1]))) %>%
+    epi_slide_mean(col_names, before = object$before, after = object$after) %>%
+    rename(renaming)
+}
+
+
+#' @export
+print.step_epi_slide_mean <- function(x, width = max(20, options()$width - 30), ...) {
+  print_epi_step(
+    x$columns, x$terms, x$trained,
+    title = "Calculating epi_slide for ",
+    conjunction = "with", extra_text = x$f_name
+  )
+  invisible(x)
+}
diff --git a/R/step_epi_slide_sum.R b/R/step_epi_slide_sum.R
new file mode 100644
index 000000000..394e59b15
--- /dev/null
+++ b/R/step_epi_slide_sum.R
@@ -0,0 +1,160 @@
+#' Calculate a rolling sum
+#'
+#' `step_epi_slide_sum()` creates a *specification* of a recipe step that will
+#'   generate one or more new columns of derived data by computing a sliding
+#'   sum along existing data.
+#'
+#'
+#' @inheritParams step_epi_lag
+#' @param before,after non-negative integers.
+#'   How far `before` and `after` each `time_value` should
+#'   the sliding window extend? Any value provided for either
+#'   argument must be a single, non-`NA`, non-negative,
+#'   [integer-compatible][vctrs::vec_cast] number of time steps. Endpoints of
+#'   the window are inclusive. Common settings:
+#'   * For trailing/right-aligned windows from `time_value - time_step(k)` to
+#'   `time_value`, use `before=k, after=0`. This is the most likely use case
+#'   for the purposes of forecasting.
+#'   * For center-aligned windows from `time_value - time_step(k)` to
+#'   `time_value + time_step(k)`, use `before=k, after=k`.
+#'   * For leading/left-aligned windows from `time_value` to
+#'   `time_value + time_step(k)`, use `after=k, after=0`.
+#'
+#'   You may also pass a [lubridate::period], like `lubridate::weeks(1)` or a
+#'   character string that is coercible to a [lubridate::period], like
+#'   `"2 weeks"`.
+#' @template step-return
+#'
+#' @export
+#' @examples
+#' library(dplyr)
+#' jhu <- case_death_rate_subset %>%
+#'   filter(time_value >= as.Date("2021-01-01"), geo_value %in% c("ca", "ny"))
+#' rec <- epi_recipe(jhu) %>%
+#'   step_epi_slide_sum(case_rate, death_rate,
+#'     before = 6L
+#'   )
+#' bake(prep(rec, jhu), new_data = NULL)
+step_epi_slide_sum <-
+  function(recipe,
+           ...,
+           before = 0L,
+           after = 0L,
+           role = "predictor",
+           prefix = "epi_slide_sum_",
+           skip = FALSE,
+           id = rand_id("epi_slide_sum")) {
+    if (!is_epi_recipe(recipe)) {
+      rlang::abort("This recipe step can only operate on an `epi_recipe`.")
+    }
+    arg_is_scalar(before, after)
+    before <- try_period(before)
+    after <- try_period(after)
+    arg_is_chr_scalar(role, prefix, id)
+    arg_is_lgl_scalar(skip)
+    add_step(
+      recipe,
+      step_epi_slide_sum_new(
+        terms = enquos(...),
+        before = before,
+        after = after,
+        role = role,
+        trained = FALSE,
+        prefix = prefix,
+        keys = epi_keys(recipe),
+        columns = NULL,
+        skip = skip,
+        id = id
+      )
+    )
+  }
+
+
+step_epi_slide_sum_new <-
+  function(terms,
+           before,
+           after,
+           role,
+           trained,
+           prefix,
+           keys,
+           columns,
+           skip,
+           id) {
+    step(
+      subclass = "epi_slide_sum",
+      terms = terms,
+      before = before,
+      after = after,
+      role = role,
+      trained = trained,
+      prefix = prefix,
+      keys = keys,
+      columns = columns,
+      skip = skip,
+      id = id
+    )
+  }
+
+
+
+#' @export
+prep.step_epi_slide_sum <- function(x, training, info = NULL, ...) {
+  col_names <- recipes::recipes_eval_select(x$terms, data = training, info = info)
+
+  check_type(training[, col_names], types = c("double", "integer"))
+  time_type <- attributes(training)$metadata$time_type
+  before <- lubridate_period_to_integer(x$before, time_type)
+  after <- lubridate_period_to_integer(x$after, time_type)
+  step_epi_slide_sum_new(
+    terms = x$terms,
+    before = before,
+    after = after,
+    role = x$role,
+    trained = TRUE,
+    prefix = x$prefix,
+    keys = x$keys,
+    columns = col_names,
+    skip = x$skip,
+    id = x$id
+  )
+}
+
+#' @export
+bake.step_epi_slide_sum <- function(object, new_data, ...) {
+  recipes::check_new_data(names(object$columns), object, new_data)
+  col_names <- as.vector(object$columns)
+  name_prefix <- object$prefix
+  new_names <- glue::glue("{name_prefix}{col_names}")
+  ## ensure no name clashes
+  new_data_names <- colnames(new_data)
+  intersection <- new_data_names %in% new_names
+  if (any(intersection)) {
+    nms <- new_data_names[intersection]
+    cli_abort(
+      c("In `step_epi_slide_sum()` a name collision occurred. The following variable names already exist:",
+        `*` = "{.var {nms}}"
+      ),
+      call = caller_env(),
+      class = "epipredict__step__name_collision_error"
+    )
+  }
+  renaming <- glue::glue("slide_value_{col_names}")
+  names(renaming) <- new_names
+  names(new_names) <- glue::glue("slide_value_{col_names}")
+  new_data %>%
+    group_by(across(all_of(object$keys[-1]))) %>%
+    epi_slide_sum(col_names, before = object$before, after = object$after) %>%
+    rename(all_of(renaming))
+}
+
+
+#' @export
+print.step_epi_slide_sum <- function(x, width = max(20, options()$width - 30), ...) {
+  print_epi_step(
+    x$columns, x$terms, x$trained,
+    title = "Calculating epi_slide for ",
+    conjunction = "with", extra_text = x$f_name
+  )
+  invisible(x)
+}
diff --git a/man/step_epi_slide_mean.Rd b/man/step_epi_slide_mean.Rd
new file mode 100644
index 000000000..538f9e28d
--- /dev/null
+++ b/man/step_epi_slide_mean.Rd
@@ -0,0 +1,78 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/step_epi_slide_mean.R
+\name{step_epi_slide_mean}
+\alias{step_epi_slide_mean}
+\title{Calculate a rolling mean}
+\usage{
+step_epi_slide_mean(
+  recipe,
+  ...,
+  before = 0L,
+  after = 0L,
+  role = "predictor",
+  prefix = "epi_slide_mean_",
+  skip = FALSE,
+  id = rand_id("epi_slide_mean")
+)
+}
+\arguments{
+\item{recipe}{A recipe object. The step will be added to the
+sequence of operations for this recipe.}
+
+\item{...}{One or more selector functions to choose variables
+for this step. See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
+
+\item{before, after}{non-negative integers.
+How far \code{before} and \code{after} each \code{time_value} should
+the sliding window extend? Any value provided for either
+argument must be a single, non-\code{NA}, non-negative,
+\link[vctrs:vec_cast]{integer-compatible} number of time steps. Endpoints of
+the window are inclusive. Common settings:
+\itemize{
+\item For trailing/right-aligned windows from \code{time_value - time_step(k)} to
+\code{time_value}, use \verb{before=k, after=0}. This is the most likely use case
+for the purposes of forecasting.
+\item For center-aligned windows from \code{time_value - time_step(k)} to
+\code{time_value + time_step(k)}, use \verb{before=k, after=k}.
+\item For leading/left-aligned windows from \code{time_value} to
+\code{time_value + time_step(k)}, use \verb{after=k, after=0}.
+}
+
+You may also pass a \link[lubridate:period]{lubridate::period}, like \code{lubridate::weeks(1)} or a
+character string that is coercible to a \link[lubridate:period]{lubridate::period}, like
+\code{"2 weeks"}.}
+
+\item{role}{For model terms created by this step, what analysis role should
+they be assigned? \code{lag} is default a predictor while \code{ahead} is an outcome.}
+
+\item{prefix}{A prefix to indicate what type of variable this is}
+
+\item{skip}{A logical. Should the step be skipped when the
+recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked
+when \code{\link[=prep]{prep()}} is run, some operations may not be able to be
+conducted on new data (e.g. processing the outcome variable(s)).
+Care should be taken when using \code{skip = TRUE} as it may affect
+the computations for subsequent operations.}
+
+\item{id}{A unique identifier for the step}
+}
+\value{
+An updated version of \code{recipe} with the new step added to the
+sequence of any existing operations.
+}
+\description{
+\code{step_epi_slide_mean()} creates a \emph{specification} of a recipe step that will
+generate one or more new columns of derived data by computing a sliding
+mean along existing data.
+}
+\examples{
+library(dplyr)
+jhu <- case_death_rate_subset \%>\%
+  filter(time_value >= as.Date("2021-01-01"), geo_value \%in\% c("ca", "ny"))
+rec <- epi_recipe(jhu) \%>\%
+  step_epi_slide(case_rate, death_rate,
+    .f = \(x) mean(x, na.rm = TRUE),
+    before = 6L
+  )
+bake(prep(rec, jhu), new_data = NULL)
+}
diff --git a/man/step_epi_slide_sum.Rd b/man/step_epi_slide_sum.Rd
new file mode 100644
index 000000000..d17a9990c
--- /dev/null
+++ b/man/step_epi_slide_sum.Rd
@@ -0,0 +1,77 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/step_epi_slide_sum.R
+\name{step_epi_slide_sum}
+\alias{step_epi_slide_sum}
+\title{Calculate a rolling sum}
+\usage{
+step_epi_slide_sum(
+  recipe,
+  ...,
+  before = 0L,
+  after = 0L,
+  role = "predictor",
+  prefix = "epi_slide_sum_",
+  skip = FALSE,
+  id = rand_id("epi_slide_sum")
+)
+}
+\arguments{
+\item{recipe}{A recipe object. The step will be added to the
+sequence of operations for this recipe.}
+
+\item{...}{One or more selector functions to choose variables
+for this step. See \code{\link[recipes:selections]{recipes::selections()}} for more details.}
+
+\item{before, after}{non-negative integers.
+How far \code{before} and \code{after} each \code{time_value} should
+the sliding window extend? Any value provided for either
+argument must be a single, non-\code{NA}, non-negative,
+\link[vctrs:vec_cast]{integer-compatible} number of time steps. Endpoints of
+the window are inclusive. Common settings:
+\itemize{
+\item For trailing/right-aligned windows from \code{time_value - time_step(k)} to
+\code{time_value}, use \verb{before=k, after=0}. This is the most likely use case
+for the purposes of forecasting.
+\item For center-aligned windows from \code{time_value - time_step(k)} to
+\code{time_value + time_step(k)}, use \verb{before=k, after=k}.
+\item For leading/left-aligned windows from \code{time_value} to
+\code{time_value + time_step(k)}, use \verb{after=k, after=0}.
+}
+
+You may also pass a \link[lubridate:period]{lubridate::period}, like \code{lubridate::weeks(1)} or a
+character string that is coercible to a \link[lubridate:period]{lubridate::period}, like
+\code{"2 weeks"}.}
+
+\item{role}{For model terms created by this step, what analysis role should
+they be assigned? \code{lag} is default a predictor while \code{ahead} is an outcome.}
+
+\item{prefix}{A prefix to indicate what type of variable this is}
+
+\item{skip}{A logical. Should the step be skipped when the
+recipe is baked by \code{\link[=bake]{bake()}}? While all operations are baked
+when \code{\link[=prep]{prep()}} is run, some operations may not be able to be
+conducted on new data (e.g. processing the outcome variable(s)).
+Care should be taken when using \code{skip = TRUE} as it may affect
+the computations for subsequent operations.}
+
+\item{id}{A unique identifier for the step}
+}
+\value{
+An updated version of \code{recipe} with the new step added to the
+sequence of any existing operations.
+}
+\description{
+\code{step_epi_slide_sum()} creates a \emph{specification} of a recipe step that will
+generate one or more new columns of derived data by computing a sliding
+sum along existing data.
+}
+\examples{
+library(dplyr)
+jhu <- case_death_rate_subset \%>\%
+  filter(time_value >= as.Date("2021-01-01"), geo_value \%in\% c("ca", "ny"))
+rec <- epi_recipe(jhu) \%>\%
+  step_epi_slide_sum(case_rate, death_rate,
+    before = 6L
+  )
+bake(prep(rec, jhu), new_data = NULL)
+}
diff --git a/tests/testthat/test-step_epi_slide_mean.R b/tests/testthat/test-step_epi_slide_mean.R
new file mode 100644
index 000000000..a27c3f141
--- /dev/null
+++ b/tests/testthat/test-step_epi_slide_mean.R
@@ -0,0 +1,88 @@
+tt <- seq(as.Date("2022-01-01"), by = "1 day", length.out = 20)
+edf <- data.frame(
+  time_value = c(tt, tt),
+  geo_value = rep(c("ca", "ny"), each = 20L),
+  value = c(2:21, 3:22),
+  value2 = c(5+(2:21),-1 + (3:22))
+) %>%
+  as_epi_df()
+
+r <- epi_recipe(edf)
+
+library(dplyr)
+rolled_before_epi_slide <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide(epi_slide_mean_value = mean(value), before = 3L)
+rolled_before <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide_mean("value", before = 3L) %>%
+  rename(epi_slide_mean_value = "slide_value_value") %>%
+  ungroup()
+rolled_after <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide_mean("value", after = 3L) %>%
+  rename(epi_slide_mean_value = "slide_value_value") %>%
+  ungroup()
+
+test_that("epi_slide handles classed before/after", {
+  baseline <- r %>%
+    step_epi_slide_mean(value, before = 3L) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_equal(baseline, rolled_before)
+
+  pbefore <- r %>%
+    step_epi_slide_mean(value, before = lubridate::period("3 days")) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  cbefore <- r %>%
+    step_epi_slide_mean(value, before = "3 days") %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_equal(baseline, pbefore)
+  expect_equal(baseline, cbefore)
+
+  baseline <- r %>%
+    step_epi_slide_mean(value, after = 3L) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_equal(baseline, rolled_after)
+  pafter <- r %>%
+    step_epi_slide_mean(value, after = lubridate::period("3 days")) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  cafter <- r %>%
+    step_epi_slide_mean(value, after = "3 days") %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_equal(baseline, pafter)
+  expect_equal(baseline, cafter)
+})
+
+test_that("epi_slide_mean has different edge behavior than epi_slide", {
+  # the reason base epi_slide and epi_slide_sum aren't identical is because
+  # epi_slide doesn't return `NA`'s when the window is short
+  # so we expect the comparison across all entries to return `NA`
+  res_mean_specific <- r %>%
+    step_epi_slide_mean(value, before = 3L) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_true(is.na(all(rolled_before_epi_slide == res_mean_specific)))
+})
+
+rolled_before_2 <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide_mean(c("value", "value2"), before = 3L) %>%
+  ungroup() %>%
+  rename(
+    c(epi_slide_mean_value="slide_value_value",
+      epi_slide_mean_value2="slide_value_value2")
+  )
+
+test_that("handles multiple columns correctly", {
+  stepped <- r %>%
+    step_epi_slide_mean(starts_with("value"), before = 3L) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_equal(stepped, rolled_before_2)
+})
diff --git a/tests/testthat/test-step_epi_slide_sum.R b/tests/testthat/test-step_epi_slide_sum.R
new file mode 100644
index 000000000..cfd0fe2be
--- /dev/null
+++ b/tests/testthat/test-step_epi_slide_sum.R
@@ -0,0 +1,56 @@
+tt <- seq(as.Date("2022-01-01"), by = "1 day", length.out = 20)
+edf <- data.frame(
+  time_value = c(tt, tt),
+  geo_value = rep(c("ca", "ny"), each = 20L),
+  value = c(2:21, 3:22),
+  value2 = c(5+(2:21),-1 + (3:22))
+) %>%
+  as_epi_df(as_of=Sys.Date())
+
+r <- epi_recipe(edf)
+
+library(dplyr)
+rolled_before_epi_slide <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide(epi_slide_sum_value = sum(value), before = 3L)
+rolled_before <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide_sum("value", before = 3L) %>%
+  ungroup() %>%
+  rename(epi_slide_sum_value = "slide_value_value")
+rolled_after <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide_sum("value", after = 3L) %>%
+  pull(slide_value_value)
+
+# many of the properties here are actually tested in `epi_slide_mean`, so the tests aren't repeated here
+test_that("epi_slide_sum works correctly", {
+  baseline <- r %>%
+    step_epi_slide_sum(value, before = 3L) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  expect_equal(baseline, rolled_before)
+  # the reason base epi_slide and epi_slide_sum aren't identical is because
+  # epi_slide doesn't return `NA`'s when the window is short
+  # so we expect the comparison across all entries to return `NA`
+  expect_true(is.na(all(rolled_before_epi_slide == baseline)))
+})
+
+rolled_before_2 <- edf %>%
+  group_by(geo_value) %>%
+  epi_slide_sum(c("value", "value2"), before = 3L) %>%
+  ungroup() %>%
+  rename(
+    c(epi_slide_sum_value="slide_value_value",
+      epi_slide_sum_value2="slide_value_value2")
+  )
+
+test_that("handles multiple columns correctly", {
+  stepped <- r %>%
+    step_epi_slide_sum(starts_with("value"), before = 3L) %>%
+    prep(edf) %>%
+    bake(new_data = NULL)
+  stepped
+  expect_equal(stepped, rolled_before_2)
+
+})

From 06310d53b12ebc301a4704d6a1ce2a3025b279cc Mon Sep 17 00:00:00 2001
From: dsweber2 <david.weber2@pm.me>
Date: Mon, 5 Aug 2024 10:51:13 -0500
Subject: [PATCH 2/2] missed a man page

---
 man/lubridate_period_to_integer.Rd | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 man/lubridate_period_to_integer.Rd

diff --git a/man/lubridate_period_to_integer.Rd b/man/lubridate_period_to_integer.Rd
new file mode 100644
index 000000000..6324fabd4
--- /dev/null
+++ b/man/lubridate_period_to_integer.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/step_epi_slide_mean.R
+\name{lubridate_period_to_integer}
+\alias{lubridate_period_to_integer}
+\title{lubridate converts to seconds by default, and as.integer doesn't throw errors if it isn't actually an integer}
+\usage{
+lubridate_period_to_integer(value, time_type)
+}
+\description{
+lubridate converts to seconds by default, and as.integer doesn't throw errors if it isn't actually an integer
+}
+\keyword{internal}