9
9
# ' includes `NA`'s)
10
10
# ' 3. `max_lag`: the amount of time until the final (new) version (same caveat
11
11
# ' for `drop_nas=FALSE`, though it is far less likely to matter)
12
- # ' 4. `spread`: the difference between the smallest and largest values (this
12
+ # ' 4. `min_value`: the minimum value across revisions
13
+ # ' 5. `max_value`: the maximum value across revisions
14
+ # ' 6. `median_value`: the median value across revisions
15
+ # ' 7. `spread`: the difference between the smallest and largest values (this
13
16
# ' always excludes `NA` values)
14
- # ' 5 . `rel_spread`: `spread` divided by the largest value (so it will
17
+ # ' 8 . `rel_spread`: `spread` divided by the largest value (so it will
15
18
# ' always be less than 1). Note that this need not be the final value. It will
16
19
# ' be `NA` whenever `spread` is 0.
17
- # ' 6 . `time_near_latest`: This gives the lag when the value is within
20
+ # ' 9 . `time_near_latest`: This gives the lag when the value is within
18
21
# ' `within_latest` (default 20%) of the value at the latest time. For example,
19
22
# ' consider the series (0,20, 99, 150, 102, 100); then `time_near_latest` is
20
23
# ' the 5th index, since even though 99 is within 20%, it is outside the window
21
24
# ' afterwards at 150.
22
25
# ' @param epi_arch an epi_archive to be analyzed
23
- # ' @param ... <[`tidyselect`][dplyr_tidy_select]>, used to choose the column to summarize. If empty, it
24
- # ' chooses the first. Currently only implemented for one column at a time.
26
+ # ' @param ... <[`tidyselect`][dplyr_tidy_select]>, used to choose the column to
27
+ # ' summarize. If empty, it chooses the first. Currently only implemented for
28
+ # ' one column at a time.
25
29
# ' @param drop_nas bool, drop any `NA` values from the archive? After dropping
26
30
# ' `NA`'s compactify is run again to make sure there are no duplicate values
27
31
# ' from occasions when the signal is revised to `NA`, and then back to its
28
32
# ' immediately-preceding value.
29
33
# ' @param print_inform bool, determines whether to print summary information, or
30
34
# ' only return the full summary tibble
35
+ # ' @param min_waiting_period `difftime`, integer or `NULL`. Sets a cutoff: any
36
+ # ' time_values not earlier than `min_waiting_period` before `versions_end` are
37
+ # ' removed. `min_waiting_period` should characterize the typical time during
38
+ # ' which revisions occur. The default of 60 days corresponds to a typical
39
+ # ' final value for case counts as reported in the context of insurance. To
40
+ # ' avoid this filtering, either set to `NULL` or 0.
31
41
# ' @param within_latest double between 0 and 1. Determines the threshold
32
42
# ' used for the `time_to`
33
43
# ' @param quick_revision difftime or integer (integer is treated as days), for
@@ -60,6 +70,7 @@ revision_summary <- function(epi_arch,
60
70
... ,
61
71
drop_nas = TRUE ,
62
72
print_inform = TRUE ,
73
+ min_waiting_period = as.difftime(60 , units = " days" ),
63
74
within_latest = 0.2 ,
64
75
quick_revision = as.difftime(3 , units = " days" ),
65
76
few_revisions = 3 ,
@@ -92,6 +103,11 @@ revision_summary <- function(epi_arch,
92
103
revision_behavior <-
93
104
epi_arch $ DT %> %
94
105
select(c(geo_value , time_value , all_of(keys ), version , !! arg ))
106
+ if (! is.null(min_waiting_period )) {
107
+ revision_behavior <- revision_behavior %> %
108
+ filter(abs(time_value - as.Date(epi_arch $ versions_end )) > = min_waiting_period )
109
+ }
110
+
95
111
if (drop_nas ) {
96
112
# if we're dropping NA's, we should recompactify
97
113
revision_behavior <-
@@ -113,18 +129,22 @@ revision_summary <- function(epi_arch,
113
129
n_revisions = dplyr :: n() - 1 ,
114
130
min_lag = min(lag ), # nolint: object_usage_linter
115
131
max_lag = max(lag ), # nolint: object_usage_linter
116
- spread = spread_vec(pick(!! arg )),
117
- rel_spread = spread / max_no_na(pick(!! arg )), # nolint: object_usage_linter
132
+ min_value = f_no_na(min , pick(!! arg )),
133
+ max_value = f_no_na(max , pick(!! arg )),
134
+ median_value = f_no_na(median , pick(!! arg )),
118
135
time_to = time_within_x_latest(lag , pick(!! arg ), prop = within_latest ), # nolint: object_usage_linter
119
136
.groups = " drop"
120
137
) %> %
121
138
mutate(
139
+ spread = max_value - min_value , # nolint: object_usage_linter
140
+ rel_spread = spread / max_value , # nolint: object_usage_linter
122
141
# TODO the units here may be a problem
123
142
min_lag = as.difftime(min_lag , units = " days" ), # nolint: object_usage_linter
124
143
max_lag = as.difftime(max_lag , units = " days" ), # nolint: object_usage_linter
125
144
time_near_latest = as.difftime(time_to , units = " days" ) # nolint: object_usage_linter
126
145
) %> %
127
- select(- time_to )
146
+ select(- time_to ) %> %
147
+ relocate(time_value , geo_value , all_of(keys ), n_revisions , min_lag , max_lag , time_near_latest , spread , rel_spread , min_value , max_value , median_value )
128
148
if (print_inform ) {
129
149
cli_inform(" Min lag (time to first version):" )
130
150
difftime_summary(revision_behavior $ min_lag ) %> % print()
@@ -203,31 +223,17 @@ get_last_run <- function(bool_vec, values_from) {
203
223
values_from [[length(bool_vec ) - tail(runs $ lengths , n = 1 ) + 1 ]]
204
224
}
205
225
206
- # ' the default behavior returns a warning on empty lists, which we do not want,
207
- # ' and there is no super clean way of preventing this
226
+ # ' use when the default behavior returns a warning on empty lists, which we do
227
+ # ' not want, and there is no super clean way of preventing this
208
228
# ' @keywords internal
209
- max_no_na <- function (x ) {
229
+ f_no_na <- function (f , x ) {
210
230
x <- x [! is.na(x )]
211
231
if (length(x ) == 0 ) {
212
232
return (Inf )
213
233
} else {
214
- return (max (x ))
234
+ return (f (x ))
215
235
}
216
236
}
217
- # ' the default behavior returns a warning on empty lists, which we do not want
218
- # ' @keywords internal
219
- spread_vec <- function (x ) {
220
- x <- x [! is.na(x )]
221
- if (length(x ) == 0 ) {
222
- return (- Inf )
223
- } else {
224
- res <- x %> %
225
- range(na.rm = TRUE ) %> %
226
- diff(na.rm = TRUE )
227
- return (res )
228
- }
229
- }
230
-
231
237
232
238
233
239
# ' simple util for printing a fraction and it's percent
0 commit comments