-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathepi_slide_opt.Rd
234 lines (206 loc) · 9.49 KB
/
epi_slide_opt.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/epi_slide_opt_edf.R
\name{epi_slide_opt}
\alias{epi_slide_opt}
\alias{epi_slide_mean}
\alias{epi_slide_sum}
\title{Optimized slide functions for common cases}
\usage{
epi_slide_opt(
.x,
.col_names,
.f,
...,
.window_size = NULL,
.align = c("right", "center", "left"),
.prefix = NULL,
.suffix = NULL,
.new_col_names = NULL,
.ref_time_values = NULL,
.all_rows = FALSE
)
epi_slide_mean(
.x,
.col_names,
...,
.window_size = NULL,
.align = c("right", "center", "left"),
.prefix = NULL,
.suffix = NULL,
.new_col_names = NULL,
.ref_time_values = NULL,
.all_rows = FALSE
)
epi_slide_sum(
.x,
.col_names,
...,
.window_size = NULL,
.align = c("right", "center", "left"),
.prefix = NULL,
.suffix = NULL,
.new_col_names = NULL,
.ref_time_values = NULL,
.all_rows = FALSE
)
}
\arguments{
\item{.x}{An \code{epi_df} object. If ungrouped, we temporarily group by \code{geo_value}
and any columns in \code{other_keys}. If grouped, we make sure the grouping is by
\code{geo_value} and \code{other_keys}.}
\item{.col_names}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column
name (e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}),
\link[tidyselect:language]{other tidy-select expression}, or a vector of
characters (e.g. \code{c("cases", "deaths")}). Variable names can be used as if
they were positions in the data frame, so expressions like \code{x:y} can be
used to select a range of variables.
The tidy-selection renaming interface is not supported, and cannot be used
to provide output column names; if you want to customize the output column
names, use \code{\link[dplyr:rename]{dplyr::rename}} after the slide.}
\item{.f}{Function; together with \code{...} specifies the computation to slide.
\code{.f} must be one of \code{data.table}'s rolling functions
(\code{frollmean}, \code{frollsum}, \code{frollapply}. See \link[data.table:froll]{data.table::roll}) or one
of \code{slider}'s specialized sliding functions (\code{slide_mean}, \code{slide_sum},
etc. See \link[slider:summary-slide]{slider::summary-slide}).
The optimized \code{data.table} and \code{slider} functions can't be directly passed
as the computation function in \code{epi_slide} without careful handling to make
sure each computation group is made up of the \code{.window_size} dates rather
than \code{.window_size} points. \code{epi_slide_opt} (and wrapper functions
\code{epi_slide_mean} and \code{epi_slide_sum}) take care of window completion
automatically to prevent associated errors.}
\item{...}{Additional arguments to pass to the slide computation \code{.f}, for
example, \code{algo} or \code{na.rm} in data.table functions. You don't need to
specify \code{.x}, \code{.window_size}, or \code{.align} (or \code{before}/\code{after} for slider
functions).}
\item{.window_size}{The size of the sliding window. The accepted values
depend on the type of the \code{time_value} column in \code{.x}:
\itemize{
\item if time type is \code{Date} and the cadence is daily, then \code{.window_size} can
be an integer (which will be interpreted in units of days) or a difftime
with units "days"
\item if time type is \code{Date} and the cadence is weekly, then \code{.window_size} must
be a \code{difftime} with units "weeks"
\item if time type is a \code{yearmonth} or an integer, then \code{.window_size} must be an
integer
}}
\item{.align}{The alignment of the sliding window.
\itemize{
\item If "right" (default), then the window has its end at the reference time.
This is likely the most common use case, e.g. \code{.window_size=7} and
\code{.align="right"} slides over the past week of data.
\item If "left", then the window has its start at the reference time.
\item If "center", then the window is centered at the reference time. If the
window size is odd, then the window will have floor(window_size/2) points
before and after the reference time; if the window size is even, then the
window will be asymmetric and have one more value before the reference time
than after.
}}
\item{.prefix}{Optional \code{\link[glue:glue]{glue::glue}} format string; name the slide result
column(s) by attaching this prefix to the corresponding input column(s).
Some shorthand is supported for basing the output names on \code{.window_size}
or other arguments; see "Prefix and suffix shorthand" below.}
\item{.suffix}{Optional \code{\link[glue:glue]{glue::glue}} format string; like \code{.prefix}. The
default naming behavior is equivalent to \code{.suffix = "_{.n}{.time_unit_abbr}{.align_abbr}{.f_abbr}"}. Can be used in combination
with \code{.prefix}.}
\item{.new_col_names}{Optional character vector with length matching the
number of input columns from \code{.col_names}; name the slide result column(s)
with these names. Cannot be used in combination with \code{.prefix} and/or
\code{.suffix}.}
\item{.ref_time_values}{The time values at which to compute the slides
values. By default, this is all the unique time values in \code{.x}.}
\item{.all_rows}{If \code{.all_rows = FALSE}, the default, then the output
\code{epi_df} will have only the rows that had a \code{time_value} in
\code{.ref_time_values}. Otherwise, all the rows from \code{.x} are included by with
a missing value marker (typically NA, but more technically the result of
\code{vctrs::vec_cast}-ing \code{NA} to the type of the slide computation output).}
}
\value{
An \code{epi_df} object with one or more new slide computation columns
added. It will be ungrouped if \code{.x} was ungrouped, and have the same groups
as \code{.x} if \code{.x} was grouped.
}
\description{
\code{epi_slide_opt} calculates n-time-step rolling means&sums,
cumulative/"running" means&sums, or other operations supported by
\code{\link[data.table:froll]{data.table::froll}} or \code{\link[slider:summary-slide]{slider::summary-slide}} functions.
\itemize{
\item On \code{epi_df}s, it will take care of looping over \code{geo_value}s, temporarily
filling in time gaps with \code{NA}s and other work needed to ensure there are
exactly n consecutive time steps per computation, and has some other
convenience features. See \code{vignette("epi_df")} for more examples.
\item On \code{epi_archive}s, it will calculate the version history for these slide
computations and combine it with the version history for the rest of the
columns.
}
This function tends to be much faster than using \code{epi_slide()} and
\code{epix_slide()} directly.
\code{epi_slide_mean} is a wrapper around \code{epi_slide_opt} with \code{.f = data.table::frollmean}.
\code{epi_slide_sum} is a wrapper around \code{epi_slide_opt} with \code{.f = data.table::frollsum}.
}
\section{Prefix and suffix shorthand}{
\code{\link[glue:glue]{glue::glue}} format strings specially interpret content within curly
braces. E.g., \code{glue::glue("ABC{2 + 2}")} evaluates to \code{"ABC4"}. For \code{.prefix}
and \code{.suffix}, we provide \code{glue} with some additional variable bindings:
\itemize{
\item \code{{.n}} will be the number of time steps in the computation
corresponding to the \code{.window_size}.
\item \code{{.time_unit_abbr}} will be a lower-case letter corresponding to the
\code{time_type} of \code{.x}
\item \code{{.align_abbr}} will be \code{""} if \code{.align} is the default of \code{"right"};
otherwise, it will be the first letter of \code{.align}
\item \code{{.f_abbr}} will be a character vector containing a short abbreviation
for \code{.f} factoring in the input column type(s) for \code{.col_names}
}
}
\examples{
library(dplyr)
# Add a column (`cases_7dsum`) containing a 7-day trailing sum on `cases`:
cases_deaths_subset \%>\%
select(geo_value, time_value, cases) \%>\%
epi_slide_sum(cases, .window_size = 7)
# Add a column (`cases_rate_7dav`) containing a 7-day trailing average on `case_rate`:
covid_case_death_rates_extended \%>\%
epi_slide_mean(case_rate, .window_size = 7)
# Use a less common specialized slide function:
cases_deaths_subset \%>\%
epi_slide_opt(cases, slider::slide_min, .window_size = 7)
# Specify output column names and/or a naming scheme:
cases_deaths_subset \%>\%
select(geo_value, time_value, cases) \%>\%
group_by(geo_value) \%>\%
epi_slide_sum(cases, .window_size = 7, .new_col_names = "case_sum") \%>\%
ungroup()
cases_deaths_subset \%>\%
select(geo_value, time_value, cases) \%>\%
group_by(geo_value) \%>\%
epi_slide_sum(cases, .window_size = 7, .prefix = "sum_") \%>\%
ungroup()
# Additional settings can be sent to the {data.table} and {slider} functions
# via `...`. This example passes some arguments to `frollmean` settings for
# speed, accuracy, and to allow partially-missing windows:
covid_case_death_rates_extended \%>\%
epi_slide_mean(
case_rate,
.window_size = 7,
na.rm = TRUE, algo = "exact", hasNA = TRUE
)
# If the more specialized possibilities for `.f` don't cover your needs, you
# can use `epi_slide_opt` with `.f = data.table::frollapply` to apply a
# custom function at the cost of more computation time. See also `epi_slide`
# if you need something even more general.
cases_deaths_subset \%>\%
select(geo_value, time_value, case_rate_7d_av, death_rate_7d_av) \%>\%
epi_slide_opt(c(case_rate_7d_av, death_rate_7d_av),
data.table::frollapply,
FUN = median, .window_size = 28,
.suffix = "_{.n}{.time_unit_abbr}_median"
) \%>\%
print(n = 40)
# You can calculate entire version histories for the derived signals by
# calling `epi_slide_opt()` on an `epi_archive`:
case_death_rate_archive \%>\%
epi_slide_mean(case_rate, .window_size = 14)
}
\seealso{
\code{\link{epi_slide}} for the more general slide function
}