Skip to content

Commit 7f8c4a7

Browse files
authored
Merge pull request #504 from cmu-delphi/ds/inf
fix: `before=Inf` in opt functions now actually works
2 parents 1d7b46f + 1fdafb2 commit 7f8c4a7

8 files changed

+245
-90
lines changed

DESCRIPTION

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: epiprocess
33
Title: Tools for basic signal processing in epidemiology
4-
Version: 0.8.1
4+
Version: 0.8.2
55
Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", email = "[email protected]", role = c("aut", "cre")),
@@ -32,6 +32,7 @@ Imports:
3232
dplyr (>= 1.0.0),
3333
genlasso,
3434
ggplot2,
35+
glue,
3536
lifecycle (>= 1.0.1),
3637
lubridate,
3738
magrittr,

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
99
- Added `complete.epi_df`, which fills in missing values in an `epi_df` with
1010
`NA`s. Uses `tidyr::complete` underneath and preserves `epi_df` metadata.
1111

12+
## Bug fixes
13+
14+
- Fix `epi_slide_opt` (and related functions) to correctly handle `before=Inf`.
15+
- Disallow `after=Inf` in slide functions, since it doesn't seem like a likely
16+
use case and complicates code.
17+
1218
# epiprocess 0.8
1319

1420
## Breaking changes

R/methods-epi_df.R

+6-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,12 @@ group_modify.epi_df <- function(.data, .f, ..., .keep = FALSE) {
286286
#' ) %>%
287287
#' as_epi_df(as_of = start_date + 3)
288288
#' daily_edf %>%
289-
#' complete(geo_value, time_value = full_seq(time_value, period = 1), fill = list(value = 0), explicit = FALSE)
289+
#' complete(
290+
#' geo_value,
291+
#' time_value = full_seq(time_value, period = 1),
292+
#' fill = list(value = 0),
293+
#' explicit = FALSE
294+
#' )
290295
#' # Complete works for weekly data and can take a fill value
291296
#' # No grouping
292297
#' weekly_edf <- tibble::tribble(

R/slide.R

+67-54
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ epi_slide <- function(x, f, ..., before = NULL, after = NULL, ref_time_values =
112112
}
113113
ref_time_values <- sort(ref_time_values)
114114

115+
# Handle defaults for before/after
116+
time_type <- attr(x, "metadata")$time_type
115117
if (is.null(before) && !is.null(after)) {
116118
if (inherits(after, "difftime")) {
117119
before <- as.difftime(0, units = units(after))
@@ -123,11 +125,15 @@ epi_slide <- function(x, f, ..., before = NULL, after = NULL, ref_time_values =
123125
if (inherits(before, "difftime")) {
124126
after <- as.difftime(0, units = units(before))
125127
} else {
126-
after <- 0
128+
if (before == Inf && time_type %in% c("day", "week")) {
129+
after <- as.difftime(0, units = glue::glue("{time_type}s"))
130+
} else {
131+
after <- 0
132+
}
127133
}
128134
}
129-
validate_slide_window_arg(before, attr(x, "metadata")$time_type)
130-
validate_slide_window_arg(after, attr(x, "metadata")$time_type)
135+
validate_slide_window_arg(before, time_type)
136+
validate_slide_window_arg(after, time_type, allow_inf = FALSE)
131137

132138
# Arrange by increasing time_value
133139
x <- arrange(x, .data$time_value)
@@ -462,6 +468,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
462468
}
463469
ref_time_values <- sort(ref_time_values)
464470

471+
# Handle defaults for before/after
472+
time_type <- attr(x, "metadata")$time_type
465473
if (is.null(before) && !is.null(after)) {
466474
if (inherits(after, "difftime")) {
467475
before <- as.difftime(0, units = units(after))
@@ -473,22 +481,22 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
473481
if (inherits(before, "difftime")) {
474482
after <- as.difftime(0, units = units(before))
475483
} else {
476-
after <- 0
484+
if (before == Inf && time_type %in% c("day", "week")) {
485+
after <- as.difftime(0, units = glue::glue("{time_type}s"))
486+
} else {
487+
after <- 0
488+
}
477489
}
478490
}
479-
validate_slide_window_arg(before, attr(x, "metadata")$time_type)
480-
validate_slide_window_arg(after, attr(x, "metadata")$time_type)
491+
validate_slide_window_arg(before, time_type)
492+
validate_slide_window_arg(after, time_type, allow_inf = FALSE)
481493

482494
# Make a complete date sequence between min(x$time_value) and max(x$time_value).
483-
date_seq_list <- full_date_seq(x, before, after, attr(x, "metadata")$time_type)
495+
date_seq_list <- full_date_seq(x, before, after, time_type)
484496
all_dates <- date_seq_list$all_dates
485497
pad_early_dates <- date_seq_list$pad_early_dates
486498
pad_late_dates <- date_seq_list$pad_late_dates
487499

488-
# `frollmean` is 1-indexed, so create a new window width based on our
489-
# `before` and `after` params.
490-
window_size <- before + after + 1L
491-
492500
# The position of a given column can be differ between input `x` and
493501
# `.data_group` since the grouping step by default drops grouping columns.
494502
# To avoid rerunning `eval_select` for every `.data_group`, convert
@@ -501,7 +509,6 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
501509
result_col_names <- paste0("slide_value_", col_names_chr)
502510
slide_one_grp <- function(.data_group, .group_key, ...) {
503511
missing_times <- all_dates[!(all_dates %in% .data_group$time_value)]
504-
505512
# `frollmean` requires a full window to compute a result. Add NA values
506513
# to beginning and end of the group so that we get results for the
507514
# first `before` and last `after` elements.
@@ -511,55 +518,61 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
511518
) %>%
512519
arrange(.data$time_value)
513520

514-
# If a group contains duplicate time values, `frollmean` will still only
515-
# use the last `k` obs. It isn't looking at dates, it just goes in row
516-
# order. So if the computation is aggregating across multiple obs for the
517-
# same date, `epi_slide_opt` and derivates will produce incorrect
518-
# results; `epi_slide` should be used instead.
519-
if (anyDuplicated(.data_group$time_value) != 0L) {
520-
cli_abort(
521-
c(
522-
"group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this
523-
group will result in incorrect results",
524-
"i" = "Please change the grouping structure of the input data so that
525-
each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
526-
%>% epi_slide_opt(f = frollmean)`)",
527-
"i" = "Use `epi_slide` to aggregate across groups"
528-
),
529-
class = "epiprocess__epi_slide_opt__duplicate_time_values",
530-
epiprocess__data_group = .data_group,
531-
epiprocess__group_key = .group_key
532-
)
533-
}
534-
if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
535-
cli_abort(
536-
c(
537-
"group contains an unexpected number of rows",
538-
"i" = c("Input data may contain `time_values` closer together than the
539-
expected `time_step` size")
540-
),
541-
class = "epiprocess__epi_slide_opt__unexpected_row_number",
542-
epiprocess__data_group = .data_group,
543-
epiprocess__group_key = .group_key
544-
)
545-
}
546-
547521
if (f_from_package == "data.table") {
548-
roll_output <- f(
549-
x = .data_group[, col_names_chr], n = window_size, align = "right", ...
550-
)
522+
# If a group contains duplicate time values, `frollmean` will still only
523+
# use the last `k` obs. It isn't looking at dates, it just goes in row
524+
# order. So if the computation is aggregating across multiple obs for the
525+
# same date, `epi_slide_opt` and derivates will produce incorrect results;
526+
# `epi_slide` should be used instead.
527+
if (anyDuplicated(.data_group$time_value) != 0L) {
528+
cli_abort(
529+
c(
530+
"group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this
531+
group will result in incorrect results",
532+
"i" = "Please change the grouping structure of the input data so that
533+
each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
534+
%>% epi_slide_opt(f = frollmean)`)",
535+
"i" = "Use `epi_slide` to aggregate across groups"
536+
),
537+
class = "epiprocess__epi_slide_opt__duplicate_time_values",
538+
epiprocess__data_group = .data_group,
539+
epiprocess__group_key = .group_key
540+
)
541+
}
542+
543+
if (nrow(.data_group) != length(c(all_dates, pad_early_dates, pad_late_dates))) {
544+
cli_abort(
545+
c(
546+
"group contains an unexpected number of rows",
547+
"i" = c("Input data may contain `time_values` closer together than the
548+
expected `time_step` size")
549+
),
550+
class = "epiprocess__epi_slide_opt__unexpected_row_number",
551+
epiprocess__data_group = .data_group,
552+
epiprocess__group_key = .group_key
553+
)
554+
}
551555

556+
# `frollmean` is 1-indexed, so create a new window width based on our
557+
# `before` and `after` params. Right-aligned `frollmean` results'
558+
# `ref_time_value`s will be `after` timesteps ahead of where they should
559+
# be; shift results to the left by `after` timesteps.
560+
if (before != Inf) {
561+
window_size <- before + after + 1L
562+
roll_output <- f(x = .data_group[, col_names_chr], n = window_size, ...)
563+
} else {
564+
window_size <- list(seq_along(.data_group$time_value))
565+
roll_output <- f(x = .data_group[, col_names_chr], n = window_size, adaptive = TRUE, ...)
566+
}
552567
if (after >= 1) {
553-
# Right-aligned `frollmean` results' `ref_time_value`s will be `after`
554-
# timesteps ahead of where they should be. Shift results to the left by
555-
# `after` timesteps.
556568
.data_group[, result_col_names] <- purrr::map(roll_output, function(.x) {
557569
c(.x[(after + 1L):length(.x)], rep(NA, after))
558570
})
559571
} else {
560572
.data_group[, result_col_names] <- roll_output
561573
}
562-
} else if (f_from_package == "slider") {
574+
}
575+
if (f_from_package == "slider") {
563576
for (i in seq_along(col_names_chr)) {
564577
.data_group[, result_col_names[i]] <- f(
565578
x = .data_group[[col_names_chr[i]]],
@@ -746,7 +759,7 @@ full_date_seq <- function(x, before, after, time_type) {
746759
if (time_type %in% c("yearmonth", "integer")) {
747760
all_dates <- seq(min(x$time_value), max(x$time_value), by = 1L)
748761

749-
if (before != 0) {
762+
if (before != 0 && before != Inf) {
750763
pad_early_dates <- all_dates[1L] - before:1
751764
}
752765
if (after != 0) {
@@ -759,7 +772,7 @@ full_date_seq <- function(x, before, after, time_type) {
759772
)
760773

761774
all_dates <- seq(min(x$time_value), max(x$time_value), by = by)
762-
if (before != 0) {
775+
if (before != 0 && before != Inf) {
763776
# The behavior is analogous to the branch with tsibble types above. For
764777
# more detail, note that the function `seq.Date(from, ..., length.out =
765778
# n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first

R/utils.R

+16-9
Original file line numberDiff line numberDiff line change
@@ -803,40 +803,47 @@ guess_period.POSIXt <- function(time_values, time_values_arg = rlang::caller_arg
803803
as.numeric(NextMethod(), units = "secs")
804804
}
805805

806-
807-
validate_slide_window_arg <- function(arg, time_type, arg_name = rlang::caller_arg(arg)) {
806+
validate_slide_window_arg <- function(arg, time_type, allow_inf = TRUE, arg_name = rlang::caller_arg(arg)) {
808807
if (is.null(arg)) {
809-
cli_abort("`{arg_name}` is a required argument.")
808+
cli_abort("`{arg_name}` is a required argument for slide functions.")
810809
}
811810

812811
if (!checkmate::test_scalar(arg)) {
813-
cli_abort("Expected `{arg_name}` to be a scalar value.")
812+
cli_abort("Slide function expected `{arg_name}` to be a scalar value.")
814813
}
815814

816815
if (time_type == "custom") {
817816
cli_abort("Unsure how to interpret slide units with a custom time type. Consider converting your time
818817
column to a Date, yearmonth, or integer type.")
819818
}
820819

820+
msg <- ""
821821
if (!identical(arg, Inf)) {
822822
if (time_type == "day") {
823823
if (!test_int(arg, lower = 0L) && !(inherits(arg, "difftime") && units(arg) == "days")) {
824-
cli_abort("Expected `{arg_name}` to be a difftime with units in days or a non-negative integer.")
824+
msg <- glue::glue_collapse(c("difftime with units in days", "non-negative integer", "Inf"), " or ")
825825
}
826826
} else if (time_type == "week") {
827827
if (!(inherits(arg, "difftime") && units(arg) == "weeks")) {
828-
cli_abort("Expected `{arg_name}` to be a difftime with units in weeks.")
828+
msg <- glue::glue_collapse(c("difftime with units in weeks", "Inf"), " or ")
829829
}
830830
} else if (time_type == "yearmonth") {
831831
if (!test_int(arg, lower = 0L) || inherits(arg, "difftime")) {
832-
cli_abort("Expected `{arg_name}` to be a non-negative integer.")
832+
msg <- glue::glue_collapse(c("non-negative integer", "Inf"), " or ")
833833
}
834834
} else if (time_type == "integer") {
835835
if (!test_int(arg, lower = 0L) || inherits(arg, "difftime")) {
836-
cli_abort("Expected `{arg_name}` to be a non-negative integer.")
836+
msg <- glue::glue_collapse(c("non-negative integer", "Inf"), " or ")
837837
}
838838
} else {
839-
cli_abort("Expected `{arg_name}` to be Inf, an appropriate a difftime, or a non-negative integer.")
839+
msg <- glue::glue_collapse(c("difftime", "non-negative integer", "Inf"), " or ")
840+
}
841+
} else {
842+
if (!allow_inf) {
843+
msg <- glue::glue_collapse(c("a difftime", "a non-negative integer"), " or ")
840844
}
841845
}
846+
if (msg != "") {
847+
cli_abort("Slide function expected `{arg_name}` to be a {msg}.")
848+
}
842849
}

man/complete.epi_df.Rd

+6-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)