@@ -112,6 +112,8 @@ epi_slide <- function(x, f, ..., before = NULL, after = NULL, ref_time_values =
112
112
}
113
113
ref_time_values <- sort(ref_time_values )
114
114
115
+ # Handle defaults for before/after
116
+ time_type <- attr(x , " metadata" )$ time_type
115
117
if (is.null(before ) && ! is.null(after )) {
116
118
if (inherits(after , " difftime" )) {
117
119
before <- as.difftime(0 , units = units(after ))
@@ -123,11 +125,15 @@ epi_slide <- function(x, f, ..., before = NULL, after = NULL, ref_time_values =
123
125
if (inherits(before , " difftime" )) {
124
126
after <- as.difftime(0 , units = units(before ))
125
127
} else {
126
- after <- 0
128
+ if (before == Inf && time_type %in% c(" day" , " week" )) {
129
+ after <- as.difftime(0 , units = glue :: glue(" {time_type}s" ))
130
+ } else {
131
+ after <- 0
132
+ }
127
133
}
128
134
}
129
- validate_slide_window_arg(before , attr( x , " metadata " ) $ time_type )
130
- validate_slide_window_arg(after , attr( x , " metadata " ) $ time_type )
135
+ validate_slide_window_arg(before , time_type )
136
+ validate_slide_window_arg(after , time_type , allow_inf = FALSE )
131
137
132
138
# Arrange by increasing time_value
133
139
x <- arrange(x , .data $ time_value )
@@ -462,6 +468,8 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
462
468
}
463
469
ref_time_values <- sort(ref_time_values )
464
470
471
+ # Handle defaults for before/after
472
+ time_type <- attr(x , " metadata" )$ time_type
465
473
if (is.null(before ) && ! is.null(after )) {
466
474
if (inherits(after , " difftime" )) {
467
475
before <- as.difftime(0 , units = units(after ))
@@ -473,22 +481,22 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
473
481
if (inherits(before , " difftime" )) {
474
482
after <- as.difftime(0 , units = units(before ))
475
483
} else {
476
- after <- 0
484
+ if (before == Inf && time_type %in% c(" day" , " week" )) {
485
+ after <- as.difftime(0 , units = glue :: glue(" {time_type}s" ))
486
+ } else {
487
+ after <- 0
488
+ }
477
489
}
478
490
}
479
- validate_slide_window_arg(before , attr( x , " metadata " ) $ time_type )
480
- validate_slide_window_arg(after , attr( x , " metadata " ) $ time_type )
491
+ validate_slide_window_arg(before , time_type )
492
+ validate_slide_window_arg(after , time_type , allow_inf = FALSE )
481
493
482
494
# Make a complete date sequence between min(x$time_value) and max(x$time_value).
483
- date_seq_list <- full_date_seq(x , before , after , attr( x , " metadata " ) $ time_type )
495
+ date_seq_list <- full_date_seq(x , before , after , time_type )
484
496
all_dates <- date_seq_list $ all_dates
485
497
pad_early_dates <- date_seq_list $ pad_early_dates
486
498
pad_late_dates <- date_seq_list $ pad_late_dates
487
499
488
- # `frollmean` is 1-indexed, so create a new window width based on our
489
- # `before` and `after` params.
490
- window_size <- before + after + 1L
491
-
492
500
# The position of a given column can be differ between input `x` and
493
501
# `.data_group` since the grouping step by default drops grouping columns.
494
502
# To avoid rerunning `eval_select` for every `.data_group`, convert
@@ -501,7 +509,6 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
501
509
result_col_names <- paste0(" slide_value_" , col_names_chr )
502
510
slide_one_grp <- function (.data_group , .group_key , ... ) {
503
511
missing_times <- all_dates [! (all_dates %in% .data_group $ time_value )]
504
-
505
512
# `frollmean` requires a full window to compute a result. Add NA values
506
513
# to beginning and end of the group so that we get results for the
507
514
# first `before` and last `after` elements.
@@ -511,55 +518,61 @@ epi_slide_opt <- function(x, col_names, f, ..., before = NULL, after = NULL, ref
511
518
) %> %
512
519
arrange(.data $ time_value )
513
520
514
- # If a group contains duplicate time values, `frollmean` will still only
515
- # use the last `k` obs. It isn't looking at dates, it just goes in row
516
- # order. So if the computation is aggregating across multiple obs for the
517
- # same date, `epi_slide_opt` and derivates will produce incorrect
518
- # results; `epi_slide` should be used instead.
519
- if (anyDuplicated(.data_group $ time_value ) != 0L ) {
520
- cli_abort(
521
- c(
522
- " group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this
523
- group will result in incorrect results" ,
524
- " i" = " Please change the grouping structure of the input data so that
525
- each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
526
- %>% epi_slide_opt(f = frollmean)`)" ,
527
- " i" = " Use `epi_slide` to aggregate across groups"
528
- ),
529
- class = " epiprocess__epi_slide_opt__duplicate_time_values" ,
530
- epiprocess__data_group = .data_group ,
531
- epiprocess__group_key = .group_key
532
- )
533
- }
534
- if (nrow(.data_group ) != length(c(all_dates , pad_early_dates , pad_late_dates ))) {
535
- cli_abort(
536
- c(
537
- " group contains an unexpected number of rows" ,
538
- " i" = c(" Input data may contain `time_values` closer together than the
539
- expected `time_step` size" )
540
- ),
541
- class = " epiprocess__epi_slide_opt__unexpected_row_number" ,
542
- epiprocess__data_group = .data_group ,
543
- epiprocess__group_key = .group_key
544
- )
545
- }
546
-
547
521
if (f_from_package == " data.table" ) {
548
- roll_output <- f(
549
- x = .data_group [, col_names_chr ], n = window_size , align = " right" , ...
550
- )
522
+ # If a group contains duplicate time values, `frollmean` will still only
523
+ # use the last `k` obs. It isn't looking at dates, it just goes in row
524
+ # order. So if the computation is aggregating across multiple obs for the
525
+ # same date, `epi_slide_opt` and derivates will produce incorrect results;
526
+ # `epi_slide` should be used instead.
527
+ if (anyDuplicated(.data_group $ time_value ) != 0L ) {
528
+ cli_abort(
529
+ c(
530
+ " group contains duplicate time values. Using `epi_slide_[opt/mean/sum]` on this
531
+ group will result in incorrect results" ,
532
+ " i" = " Please change the grouping structure of the input data so that
533
+ each group has non-duplicate time values (e.g. `x %>% group_by(geo_value)
534
+ %>% epi_slide_opt(f = frollmean)`)" ,
535
+ " i" = " Use `epi_slide` to aggregate across groups"
536
+ ),
537
+ class = " epiprocess__epi_slide_opt__duplicate_time_values" ,
538
+ epiprocess__data_group = .data_group ,
539
+ epiprocess__group_key = .group_key
540
+ )
541
+ }
542
+
543
+ if (nrow(.data_group ) != length(c(all_dates , pad_early_dates , pad_late_dates ))) {
544
+ cli_abort(
545
+ c(
546
+ " group contains an unexpected number of rows" ,
547
+ " i" = c(" Input data may contain `time_values` closer together than the
548
+ expected `time_step` size" )
549
+ ),
550
+ class = " epiprocess__epi_slide_opt__unexpected_row_number" ,
551
+ epiprocess__data_group = .data_group ,
552
+ epiprocess__group_key = .group_key
553
+ )
554
+ }
551
555
556
+ # `frollmean` is 1-indexed, so create a new window width based on our
557
+ # `before` and `after` params. Right-aligned `frollmean` results'
558
+ # `ref_time_value`s will be `after` timesteps ahead of where they should
559
+ # be; shift results to the left by `after` timesteps.
560
+ if (before != Inf ) {
561
+ window_size <- before + after + 1L
562
+ roll_output <- f(x = .data_group [, col_names_chr ], n = window_size , ... )
563
+ } else {
564
+ window_size <- list (seq_along(.data_group $ time_value ))
565
+ roll_output <- f(x = .data_group [, col_names_chr ], n = window_size , adaptive = TRUE , ... )
566
+ }
552
567
if (after > = 1 ) {
553
- # Right-aligned `frollmean` results' `ref_time_value`s will be `after`
554
- # timesteps ahead of where they should be. Shift results to the left by
555
- # `after` timesteps.
556
568
.data_group [, result_col_names ] <- purrr :: map(roll_output , function (.x ) {
557
569
c(.x [(after + 1L ): length(.x )], rep(NA , after ))
558
570
})
559
571
} else {
560
572
.data_group [, result_col_names ] <- roll_output
561
573
}
562
- } else if (f_from_package == " slider" ) {
574
+ }
575
+ if (f_from_package == " slider" ) {
563
576
for (i in seq_along(col_names_chr )) {
564
577
.data_group [, result_col_names [i ]] <- f(
565
578
x = .data_group [[col_names_chr [i ]]],
@@ -746,7 +759,7 @@ full_date_seq <- function(x, before, after, time_type) {
746
759
if (time_type %in% c(" yearmonth" , " integer" )) {
747
760
all_dates <- seq(min(x $ time_value ), max(x $ time_value ), by = 1L )
748
761
749
- if (before != 0 ) {
762
+ if (before != 0 && before != Inf ) {
750
763
pad_early_dates <- all_dates [1L ] - before : 1
751
764
}
752
765
if (after != 0 ) {
@@ -759,7 +772,7 @@ full_date_seq <- function(x, before, after, time_type) {
759
772
)
760
773
761
774
all_dates <- seq(min(x $ time_value ), max(x $ time_value ), by = by )
762
- if (before != 0 ) {
775
+ if (before != 0 && before != Inf ) {
763
776
# The behavior is analogous to the branch with tsibble types above. For
764
777
# more detail, note that the function `seq.Date(from, ..., length.out =
765
778
# n)` returns `from + 0:n`. Since we want `from + 1:n`, we drop the first
0 commit comments