Skip to content

Commit 661cece

Browse files
committed
feat: WIP epix_epi_slide_opt
1 parent ad1f753 commit 661cece

7 files changed

+470
-0
lines changed

DESCRIPTION

+2
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,15 @@ Collate:
9797
'epi_df.R'
9898
'epi_df_forbidden_methods.R'
9999
'epiprocess-package.R'
100+
'epix_epi_slide_opt.R'
100101
'group_by_epi_df_methods.R'
101102
'methods-epi_archive.R'
102103
'grouped_epi_archive.R'
103104
'growth_rate.R'
104105
'key_colnames.R'
105106
'methods-epi_df.R'
106107
'outliers.R'
108+
'patch.R'
107109
'reexports.R'
108110
'revision_analysis.R'
109111
'slide.R'

NAMESPACE

+12
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ S3method(dplyr_col_modify,col_modify_recorder_df)
2727
S3method(dplyr_col_modify,epi_df)
2828
S3method(dplyr_reconstruct,epi_df)
2929
S3method(dplyr_row_slice,epi_df)
30+
S3method(epix_epi_slide_opt,epi_archive)
31+
S3method(epix_epi_slide_opt,grouped_epi_archive)
3032
S3method(epix_slide,epi_archive)
3133
S3method(epix_slide,grouped_epi_archive)
3234
S3method(epix_truncate_versions_after,epi_archive)
@@ -72,6 +74,7 @@ export(epi_slide_mean)
7274
export(epi_slide_opt)
7375
export(epi_slide_sum)
7476
export(epix_as_of)
77+
export(epix_epi_slide_opt)
7578
export(epix_fill_through_version)
7679
export(epix_merge)
7780
export(epix_slide)
@@ -140,6 +143,7 @@ importFrom(data.table,address)
140143
importFrom(data.table,as.data.table)
141144
importFrom(data.table,between)
142145
importFrom(data.table,copy)
146+
importFrom(data.table,fifelse)
143147
importFrom(data.table,frollapply)
144148
importFrom(data.table,frollmean)
145149
importFrom(data.table,frollsum)
@@ -188,6 +192,7 @@ importFrom(lubridate,as.period)
188192
importFrom(lubridate,days)
189193
importFrom(lubridate,weeks)
190194
importFrom(magrittr,"%>%")
195+
importFrom(purrr,list_rbind)
191196
importFrom(purrr,map)
192197
importFrom(purrr,map_lgl)
193198
importFrom(rlang,"!!!")
@@ -196,6 +201,7 @@ importFrom(rlang,"%||%")
196201
importFrom(rlang,.data)
197202
importFrom(rlang,.env)
198203
importFrom(rlang,arg_match)
204+
importFrom(rlang,arg_match0)
199205
importFrom(rlang,caller_arg)
200206
importFrom(rlang,caller_env)
201207
importFrom(rlang,check_dots_empty)
@@ -231,10 +237,12 @@ importFrom(slider,slide_sum)
231237
importFrom(stats,cor)
232238
importFrom(stats,median)
233239
importFrom(tibble,as_tibble)
240+
importFrom(tibble,is_tibble)
234241
importFrom(tibble,new_tibble)
235242
importFrom(tibble,validate_tibble)
236243
importFrom(tidyr,complete)
237244
importFrom(tidyr,full_seq)
245+
importFrom(tidyr,nest)
238246
importFrom(tidyr,unnest)
239247
importFrom(tidyselect,any_of)
240248
importFrom(tidyselect,eval_select)
@@ -248,12 +256,16 @@ importFrom(vctrs,vec_cast)
248256
importFrom(vctrs,vec_cbind)
249257
importFrom(vctrs,vec_data)
250258
importFrom(vctrs,vec_duplicate_any)
259+
importFrom(vctrs,vec_duplicate_id)
251260
importFrom(vctrs,vec_equal)
252261
importFrom(vctrs,vec_in)
262+
importFrom(vctrs,vec_match)
253263
importFrom(vctrs,vec_order)
254264
importFrom(vctrs,vec_rbind)
255265
importFrom(vctrs,vec_recycle_common)
256266
importFrom(vctrs,vec_rep)
267+
importFrom(vctrs,vec_rep_each)
268+
importFrom(vctrs,vec_seq_along)
257269
importFrom(vctrs,vec_size)
258270
importFrom(vctrs,vec_slice)
259271
importFrom(vctrs,vec_sort)

R/epiprocess-package.R

+9
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,34 @@
1717
#' @importFrom cli pluralize
1818
#' @importFrom cli qty
1919
#' @importFrom data.table as.data.table
20+
#' @importFrom data.table fifelse
2021
#' @importFrom data.table key
2122
#' @importFrom data.table setkeyv
2223
#' @importFrom dplyr arrange
2324
#' @importFrom dplyr grouped_df
2425
#' @importFrom dplyr is_grouped_df
2526
#' @importFrom dplyr select
2627
#' @importFrom lifecycle deprecated
28+
#' @importFrom purrr list_rbind
2729
#' @importFrom rlang %||%
30+
#' @importFrom rlang arg_match0
2831
#' @importFrom rlang is_bare_integerish
32+
#' @importFrom tibble is_tibble
33+
#' @importFrom tidyr nest
2934
#' @importFrom tools toTitleCase
3035
#' @importFrom vctrs vec_cast
3136
#' @importFrom vctrs vec_cbind
3237
#' @importFrom vctrs vec_data
38+
#' @importFrom vctrs vec_duplicate_id
3339
#' @importFrom vctrs vec_equal
3440
#' @importFrom vctrs vec_in
41+
#' @importFrom vctrs vec_match
3542
#' @importFrom vctrs vec_order
3643
#' @importFrom vctrs vec_rbind
3744
#' @importFrom vctrs vec_recycle_common
3845
#' @importFrom vctrs vec_rep
46+
#' @importFrom vctrs vec_rep_each
47+
#' @importFrom vctrs vec_seq_along
3948
#' @importFrom vctrs vec_slice
4049
#' @importFrom vctrs vec_slice<-
4150
#' @importFrom vctrs vec_sort

R/epix_epi_slide_opt.R

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
epix_epi_slide_opt_one_epikey <- function(updates, in_colnames, f, f_from_package, before, after, time_type, out_colnames) {
2+
if (before == Inf) {
3+
cli_abort("epiprocess internal error: epix_epi_slide_opt_one_epikey() called with before == Inf")
4+
}
5+
unit_step <- epiprocess:::unit_time_delta(time_type)
6+
prev_inp_snapshot <- NULL
7+
prev_out_snapshot <- NULL
8+
result <- map(seq_len(nrow(updates)), function(update_i) {
9+
version <- updates$version[[update_i]]
10+
inp_update <- updates$subtbl[[update_i]] # TODO decide whether DT
11+
## setDF(inp_update)
12+
## inp_update <- new_tibble(inp_update, nrow = nrow(inp_update))
13+
inp_snapshot <- tbl_patch(prev_inp_snapshot, inp_update, "time_value")
14+
inp_update_min_t <- min(inp_update$time_value) # TODO check efficiency
15+
inp_update_max_t <- max(inp_update$time_value)
16+
# If the input had updates in the range t1..t2, this could produce changes
17+
# in slide outputs in the range t1-after..t2+before, and to compute those
18+
# slide values, we need to look at the input snapshot from
19+
# t1-after-before..t2+before+after.
20+
slide_min_t <- inp_update_min_t - (before + after) * unit_step
21+
slide_max_t <- inp_update_max_t + (before + after) * unit_step
22+
slide_n <- time_delta_to_n_steps(slide_max_t - slide_min_t, time_type) + 1L
23+
slide_time_values <- slide_min_t + 0L:(slide_n - 1L) * unit_step
24+
slide_inp_backrefs <- vec_match(slide_time_values, inp_snapshot$time_value)
25+
slide <- inp_snapshot[slide_inp_backrefs, ] # TODO vs. DT key index vs ....
26+
slide$time_value <- slide_time_values
27+
# TODO ensure before & after as integers?
28+
# TODO parameterize naming, slide function, options, ...
29+
if (f_from_package == "data.table") {
30+
for (col_i in seq_along(in_colnames)) {
31+
# FIXME other arg forwarding
32+
slide[[out_colnames[[col_i]]]] <- f(slide[[in_colnames[[col_i]]]], before + after + 1L)
33+
}
34+
} else if (f_from_package == "slider") {
35+
for (col_i in seq_along(in_colnames)) {
36+
# with adaptive tails that incorporate fewer inputs:
37+
# FIXME other arg forwarding
38+
out_col <- f(slide[[in_colnames[[col_i]]]], before = before, after = after)
39+
# XXX is this actually required or being done at the right time? we are
40+
# already chopping off a good amount that might include this?
41+
#
42+
# FIXME can this generate an error on very short series?
43+
vec_slice(out_col, seq_len(before)) <- NA
44+
vec_slice(out_col, slide_n - after + seq_len(after)) <- NA
45+
slide[[out_colnames[[col_i]]]] <- out_col
46+
}
47+
} else {
48+
cli_abort("epiprocess internal error: `f_from_package` was {format_chr_deparse(f_from_package)}")
49+
}
50+
out_update <- slide[
51+
# Get back to t1-after..t2+before; times outside this range were included
52+
# only so those inside would have enough context for their slide
53+
# computations, but these "context" rows may contain invalid slide
54+
# computation outputs:
55+
vec_rep_each(c(FALSE, TRUE, FALSE), c(before, slide_n - before - after, after)) &
56+
# Only include time_values that appeared in the input snapshot:
57+
!is.na(slide_inp_backrefs),
58+
]
59+
out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
60+
out_snapshot <- tbl_patch(prev_out_snapshot, out_diff)
61+
prev_inp_snapshot <<- inp_snapshot
62+
prev_out_snapshot <<- out_snapshot # TODO avoid need to patch twice?
63+
out_diff$version <- version
64+
out_diff
65+
})
66+
result
67+
}
68+
69+
#' @export
70+
epix_epi_slide_opt <-
71+
function(.x, .col_names, .f, ...,
72+
.window_size = NULL, .align = c("right", "center", "left"),
73+
.prefix = NULL, .suffix = NULL, .new_col_names = NULL # ,
74+
## .ref_time_values = NULL, .all_rows = FALSE
75+
) {
76+
UseMethod("epix_epi_slide_opt")
77+
}
78+
79+
#' @method epix_epi_slide_opt grouped_epi_archive
80+
#' @export
81+
epix_epi_slide_opt.grouped_epi_archive <- function(.x, ...) {
82+
assert_set_equal(
83+
group_vars(.x),
84+
key_colnames(.x, exclude = c("time_value", "version"))
85+
)
86+
orig_group_vars <- group_vars(.x)
87+
orig_drop <- .x$private$drop
88+
.x %>%
89+
ungroup() %>%
90+
epix_epi_slide_opt(...) %>%
91+
group_by(pick(all_of(orig_group_vars)), .drop = orig_drop)
92+
}
93+
94+
#' @method epix_epi_slide_opt epi_archive
95+
#' @export
96+
epix_epi_slide_opt.epi_archive <-
97+
function(.x, .col_names, .f, ...,
98+
.window_size = NULL, .align = c("right", "center", "left"),
99+
.prefix = NULL, .suffix = NULL, .new_col_names = NULL,
100+
## , .ref_time_values = NULL, .all_rows = FALSE
101+
.progress = FALSE) {
102+
# Extract metadata:
103+
time_type <- .x$time_type
104+
epikey_names <- key_colnames(.x, exclude = c("time_value", "version"))
105+
# Validation & pre-processing:
106+
.align <- arg_match(.align)
107+
f_info <- upstream_slide_f_info(.f)
108+
col_names_quo <- enquo(.col_names)
109+
names_info <- across_ish_names_info(.x$DT, time_type, col_names_quo, f_info$namer, .window_size, .align, .prefix, .suffix, .new_col_names)
110+
window_args <- get_before_after_from_window(.window_size, .align, time_type)
111+
assert(
112+
checkmate::check_logical(.progress, any.missing = FALSE, len = 1L, names = "unnamed"),
113+
checkmate::check_string(.progress)
114+
)
115+
if (isTRUE(.progress)) {
116+
.progress <- "Time series processed:"
117+
}
118+
use_progress <- !isFALSE(.progress)
119+
# Perform the slide:
120+
updates_grouped <- .x$DT %>%
121+
as.data.frame() %>%
122+
as_tibble(.name_repair = "minimal") %>%
123+
# 0 rows input -> 0 rows output, so we can just say drop = TRUE:
124+
grouped_df(epikey_names, TRUE)
125+
if (use_progress) progress_bar_id <- cli::cli_progress_bar(.progress, total = n_groups(updates_grouped))
126+
result <- updates_grouped %>%
127+
group_modify(function(group_values, group_key) {
128+
group_updates <- group_values %>% nest(.by = version, .key = "subtbl") %>% arrange(version)
129+
res <- epix_epi_slide_opt_one_epikey(group_updates, names_info$input_col_names, .f, f_info$from_package, window_args$before, window_args$after, time_type, names_info$output_col_names) %>%
130+
list_rbind()
131+
if (use_progress) cli::cli_progress_update(id = progress_bar_id)
132+
res
133+
}) %>%
134+
ungroup() %>%
135+
new_epi_archive(
136+
.x$geo_type, .x$time_type, .x$other_keys,
137+
.x$clobberable_versions_start, .x$versions_end
138+
)
139+
if (use_progress) cli::cli_progress_done(id = progress_bar_id)
140+
result
141+
}

0 commit comments

Comments
 (0)