Skip to content

Commit 9079929

Browse files
committed
slide_mean and slide_sum
1 parent ba0a94f commit 9079929

7 files changed

+657
-0
lines changed

NAMESPACE

+10
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ S3method(bake,step_adjust_latency)
2323
S3method(bake,step_epi_ahead)
2424
S3method(bake,step_epi_lag)
2525
S3method(bake,step_epi_slide)
26+
S3method(bake,step_epi_slide_mean)
27+
S3method(bake,step_epi_slide_sum)
2628
S3method(bake,step_growth_rate)
2729
S3method(bake,step_lag_difference)
2830
S3method(bake,step_population_scaling)
@@ -62,6 +64,8 @@ S3method(prep,step_adjust_latency)
6264
S3method(prep,step_epi_ahead)
6365
S3method(prep,step_epi_lag)
6466
S3method(prep,step_epi_slide)
67+
S3method(prep,step_epi_slide_mean)
68+
S3method(prep,step_epi_slide_sum)
6569
S3method(prep,step_growth_rate)
6670
S3method(prep,step_lag_difference)
6771
S3method(prep,step_population_scaling)
@@ -92,6 +96,8 @@ S3method(print,step_adjust_latency)
9296
S3method(print,step_epi_ahead)
9397
S3method(print,step_epi_lag)
9498
S3method(print,step_epi_slide)
99+
S3method(print,step_epi_slide_mean)
100+
S3method(print,step_epi_slide_sum)
95101
S3method(print,step_growth_rate)
96102
S3method(print,step_lag_difference)
97103
S3method(print,step_naomit)
@@ -202,6 +208,8 @@ export(step_epi_ahead)
202208
export(step_epi_lag)
203209
export(step_epi_naomit)
204210
export(step_epi_slide)
211+
export(step_epi_slide_mean)
212+
export(step_epi_slide_sum)
205213
export(step_growth_rate)
206214
export(step_lag_difference)
207215
export(step_population_scaling)
@@ -272,6 +280,8 @@ importFrom(glue,glue)
272280
importFrom(hardhat,extract_recipe)
273281
importFrom(hardhat,refresh_blueprint)
274282
importFrom(hardhat,run_mold)
283+
importFrom(lubridate,is.period)
284+
importFrom(lubridate,time_length)
275285
importFrom(magrittr,"%>%")
276286
importFrom(magrittr,extract2)
277287
importFrom(recipes,bake)

R/step_epi_slide_mean.R

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
#' Calculate a rolling mean
2+
#'
3+
#' `step_epi_slide_mean()` creates a *specification* of a recipe step that will
4+
#' generate one or more new columns of derived data by computing a sliding
5+
#' mean along existing data.
6+
#'
7+
#'
8+
#' @inheritParams step_epi_lag
9+
#' @param before,after non-negative integers.
10+
#' How far `before` and `after` each `time_value` should
11+
#' the sliding window extend? Any value provided for either
12+
#' argument must be a single, non-`NA`, non-negative,
13+
#' [integer-compatible][vctrs::vec_cast] number of time steps. Endpoints of
14+
#' the window are inclusive. Common settings:
15+
#' * For trailing/right-aligned windows from `time_value - time_step(k)` to
16+
#' `time_value`, use `before=k, after=0`. This is the most likely use case
17+
#' for the purposes of forecasting.
18+
#' * For center-aligned windows from `time_value - time_step(k)` to
19+
#' `time_value + time_step(k)`, use `before=k, after=k`.
20+
#' * For leading/left-aligned windows from `time_value` to
21+
#' `time_value + time_step(k)`, use `after=k, after=0`.
22+
#'
23+
#' You may also pass a [lubridate::period], like `lubridate::weeks(1)` or a
24+
#' character string that is coercible to a [lubridate::period], like
25+
#' `"2 weeks"`.
26+
#' @template step-return
27+
#'
28+
#' @export
29+
#' @examples
30+
#' library(dplyr)
31+
#' jhu <- case_death_rate_subset %>%
32+
#' filter(time_value >= as.Date("2021-01-01"), geo_value %in% c("ca", "ny"))
33+
#' rec <- epi_recipe(jhu) %>%
34+
#' step_epi_slide(case_rate, death_rate,
35+
#' .f = \(x) mean(x, na.rm = TRUE),
36+
#' before = 6L
37+
#' )
38+
#' bake(prep(rec, jhu), new_data = NULL)
39+
step_epi_slide_mean <-
40+
function(recipe,
41+
...,
42+
before = 0L,
43+
after = 0L,
44+
role = "predictor",
45+
prefix = "epi_slide_mean_",
46+
skip = FALSE,
47+
id = rand_id("epi_slide_mean")) {
48+
if (!is_epi_recipe(recipe)) {
49+
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
50+
}
51+
arg_is_scalar(before, after)
52+
before <- try_period(before)
53+
after <- try_period(after)
54+
arg_is_chr_scalar(role, prefix, id)
55+
arg_is_lgl_scalar(skip)
56+
add_step(
57+
recipe,
58+
step_epi_slide_mean_new(
59+
terms = enquos(...),
60+
before = before,
61+
after = after,
62+
role = role,
63+
trained = FALSE,
64+
prefix = prefix,
65+
keys = epi_keys(recipe),
66+
columns = NULL,
67+
skip = skip,
68+
id = id
69+
)
70+
)
71+
}
72+
73+
74+
step_epi_slide_mean_new <-
75+
function(terms,
76+
before,
77+
after,
78+
role,
79+
trained,
80+
prefix,
81+
keys,
82+
columns,
83+
skip,
84+
id) {
85+
step(
86+
subclass = "epi_slide_mean",
87+
terms = terms,
88+
before = before,
89+
after = after,
90+
role = role,
91+
trained = trained,
92+
prefix = prefix,
93+
keys = keys,
94+
columns = columns,
95+
skip = skip,
96+
id = id
97+
)
98+
}
99+
100+
101+
102+
#' @export
103+
prep.step_epi_slide_mean <- function(x, training, info = NULL, ...) {
104+
col_names <- recipes::recipes_eval_select(x$terms, data = training, info = info)
105+
106+
check_type(training[, col_names], types = c("double", "integer"))
107+
time_type <- attributes(training)$metadata$time_type
108+
before <- lubridate_period_to_integer(x$before, time_type)
109+
after <- lubridate_period_to_integer(x$after, time_type)
110+
step_epi_slide_mean_new(
111+
terms = x$terms,
112+
before = before,
113+
after = after,
114+
role = x$role,
115+
trained = TRUE,
116+
prefix = x$prefix,
117+
keys = x$keys,
118+
columns = col_names,
119+
skip = x$skip,
120+
id = x$id
121+
)
122+
}
123+
124+
#' lubridate converts to seconds by default, and as.integer doesn't throw errors if it isn't actually an integer
125+
#' @importFrom lubridate time_length is.period
126+
#' @keywords internal
127+
lubridate_period_to_integer <- function(value, time_type) {
128+
if (is.period(value)) {
129+
if (time_type == "day") {
130+
value <- time_length(value, unit = "day")
131+
} else if (time_type == "week") {
132+
value <- time_length(value, unit = "week")
133+
} else {
134+
cli_abort(
135+
"unsupported time type of {time_type}. Use integer instead of lubridate period.",
136+
class = "epipredict__step_epi_slide_mean__unsupported_error"
137+
)
138+
}
139+
if (value %% 1 !=0) {
140+
cli_abort(
141+
"Converted `before` value of {before} is not an integer.",
142+
class = "epipredict__step_epi_slide_mean__unsupported_error"
143+
)
144+
}
145+
value <- as.integer(value)
146+
}
147+
return(value)
148+
}
149+
150+
151+
#' @export
152+
bake.step_epi_slide_mean <- function(object, new_data, ...) {
153+
recipes::check_new_data(names(object$columns), object, new_data)
154+
col_names <- as.vector(object$columns)
155+
name_prefix <- object$prefix
156+
new_names <- glue::glue("{name_prefix}{col_names}")
157+
## ensure no name clashes
158+
new_data_names <- colnames(new_data)
159+
intersection <- new_data_names %in% new_names
160+
if (any(intersection)) {
161+
nms <- new_data_names[intersection]
162+
cli_abort(
163+
c("In `step_epi_slide_mean()` a name collision occurred. The following variable names already exist:",
164+
`*` = "{.var {nms}}"
165+
),
166+
call = caller_env(),
167+
class = "epipredict__step__name_collision_error"
168+
)
169+
}
170+
renaming <- glue::glue("slide_value_{col_names}")
171+
names(renaming) <- new_names
172+
names(new_names) <- glue::glue("slide_value_{col_names}")
173+
new_data %>%
174+
group_by(across(all_of(object$keys[-1]))) %>%
175+
epi_slide_mean(col_names, before = object$before, after = object$after) %>%
176+
rename(renaming)
177+
}
178+
179+
180+
#' @export
181+
print.step_epi_slide_mean <- function(x, width = max(20, options()$width - 30), ...) {
182+
print_epi_step(
183+
x$columns, x$terms, x$trained,
184+
title = "Calculating epi_slide for ",
185+
conjunction = "with", extra_text = x$f_name
186+
)
187+
invisible(x)
188+
}

R/step_epi_slide_sum.R

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#' Calculate a rolling sum
2+
#'
3+
#' `step_epi_slide_sum()` creates a *specification* of a recipe step that will
4+
#' generate one or more new columns of derived data by computing a sliding
5+
#' sum along existing data.
6+
#'
7+
#'
8+
#' @inheritParams step_epi_lag
9+
#' @param before,after non-negative integers.
10+
#' How far `before` and `after` each `time_value` should
11+
#' the sliding window extend? Any value provided for either
12+
#' argument must be a single, non-`NA`, non-negative,
13+
#' [integer-compatible][vctrs::vec_cast] number of time steps. Endpoints of
14+
#' the window are inclusive. Common settings:
15+
#' * For trailing/right-aligned windows from `time_value - time_step(k)` to
16+
#' `time_value`, use `before=k, after=0`. This is the most likely use case
17+
#' for the purposes of forecasting.
18+
#' * For center-aligned windows from `time_value - time_step(k)` to
19+
#' `time_value + time_step(k)`, use `before=k, after=k`.
20+
#' * For leading/left-aligned windows from `time_value` to
21+
#' `time_value + time_step(k)`, use `after=k, after=0`.
22+
#'
23+
#' You may also pass a [lubridate::period], like `lubridate::weeks(1)` or a
24+
#' character string that is coercible to a [lubridate::period], like
25+
#' `"2 weeks"`.
26+
#' @template step-return
27+
#'
28+
#' @export
29+
#' @examples
30+
#' library(dplyr)
31+
#' jhu <- case_death_rate_subset %>%
32+
#' filter(time_value >= as.Date("2021-01-01"), geo_value %in% c("ca", "ny"))
33+
#' rec <- epi_recipe(jhu) %>%
34+
#' step_epi_slide_sum(case_rate, death_rate,
35+
#' before = 6L
36+
#' )
37+
#' bake(prep(rec, jhu), new_data = NULL)
38+
step_epi_slide_sum <-
39+
function(recipe,
40+
...,
41+
before = 0L,
42+
after = 0L,
43+
role = "predictor",
44+
prefix = "epi_slide_sum_",
45+
skip = FALSE,
46+
id = rand_id("epi_slide_sum")) {
47+
if (!is_epi_recipe(recipe)) {
48+
rlang::abort("This recipe step can only operate on an `epi_recipe`.")
49+
}
50+
arg_is_scalar(before, after)
51+
before <- try_period(before)
52+
after <- try_period(after)
53+
arg_is_chr_scalar(role, prefix, id)
54+
arg_is_lgl_scalar(skip)
55+
add_step(
56+
recipe,
57+
step_epi_slide_sum_new(
58+
terms = enquos(...),
59+
before = before,
60+
after = after,
61+
role = role,
62+
trained = FALSE,
63+
prefix = prefix,
64+
keys = epi_keys(recipe),
65+
columns = NULL,
66+
skip = skip,
67+
id = id
68+
)
69+
)
70+
}
71+
72+
73+
step_epi_slide_sum_new <-
74+
function(terms,
75+
before,
76+
after,
77+
role,
78+
trained,
79+
prefix,
80+
keys,
81+
columns,
82+
skip,
83+
id) {
84+
step(
85+
subclass = "epi_slide_sum",
86+
terms = terms,
87+
before = before,
88+
after = after,
89+
role = role,
90+
trained = trained,
91+
prefix = prefix,
92+
keys = keys,
93+
columns = columns,
94+
skip = skip,
95+
id = id
96+
)
97+
}
98+
99+
100+
101+
#' @export
102+
prep.step_epi_slide_sum <- function(x, training, info = NULL, ...) {
103+
col_names <- recipes::recipes_eval_select(x$terms, data = training, info = info)
104+
105+
check_type(training[, col_names], types = c("double", "integer"))
106+
time_type <- attributes(training)$metadata$time_type
107+
before <- lubridate_period_to_integer(x$before, time_type)
108+
after <- lubridate_period_to_integer(x$after, time_type)
109+
step_epi_slide_sum_new(
110+
terms = x$terms,
111+
before = before,
112+
after = after,
113+
role = x$role,
114+
trained = TRUE,
115+
prefix = x$prefix,
116+
keys = x$keys,
117+
columns = col_names,
118+
skip = x$skip,
119+
id = x$id
120+
)
121+
}
122+
123+
#' @export
124+
bake.step_epi_slide_sum <- function(object, new_data, ...) {
125+
recipes::check_new_data(names(object$columns), object, new_data)
126+
col_names <- as.vector(object$columns)
127+
name_prefix <- object$prefix
128+
new_names <- glue::glue("{name_prefix}{col_names}")
129+
## ensure no name clashes
130+
new_data_names <- colnames(new_data)
131+
intersection <- new_data_names %in% new_names
132+
if (any(intersection)) {
133+
nms <- new_data_names[intersection]
134+
cli_abort(
135+
c("In `step_epi_slide_sum()` a name collision occurred. The following variable names already exist:",
136+
`*` = "{.var {nms}}"
137+
),
138+
call = caller_env(),
139+
class = "epipredict__step__name_collision_error"
140+
)
141+
}
142+
renaming <- glue::glue("slide_value_{col_names}")
143+
names(renaming) <- new_names
144+
names(new_names) <- glue::glue("slide_value_{col_names}")
145+
new_data %>%
146+
group_by(across(all_of(object$keys[-1]))) %>%
147+
epi_slide_sum(col_names, before = object$before, after = object$after) %>%
148+
rename(all_of(renaming))
149+
}
150+
151+
152+
#' @export
153+
print.step_epi_slide_sum <- function(x, width = max(20, options()$width - 30), ...) {
154+
print_epi_step(
155+
x$columns, x$terms, x$trained,
156+
title = "Calculating epi_slide for ",
157+
conjunction = "with", extra_text = x$f_name
158+
)
159+
invisible(x)
160+
}

0 commit comments

Comments
 (0)