Skip to content

Commit 4f44223

Browse files
authored
Merge pull request #472 from cmu-delphi/ds/before-default
refactor(epi_slide, epix_slide): remove `time_step`, remove `time_type`, unify time handling, require difftimes in `before` and `after`
2 parents a2c5154 + 7a50d9d commit 4f44223

40 files changed

+1327
-1798
lines changed

.github/workflows/R-CMD-check.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
2020
R_KEEP_PKG_SOURCE: yes
2121
steps:
22-
- uses: actions/checkout@v3
22+
- uses: actions/checkout@v4
2323

2424
- uses: r-lib/actions/setup-r@v2
2525
with:

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ importFrom(checkmate,check_atomic)
102102
importFrom(checkmate,check_data_frame)
103103
importFrom(checkmate,check_names)
104104
importFrom(checkmate,expect_class)
105+
importFrom(checkmate,test_int)
105106
importFrom(checkmate,test_set_equal)
106107
importFrom(checkmate,test_subset)
107108
importFrom(checkmate,vname)

NEWS.md

+14-5
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
55
# epiprocess 0.8
66

77
## Breaking changes
8+
89
- `detect_outlr_stl(seasonal_period = NULL)` is no longer accepted. Use
910
`detect_outlr_stl(seasonal_period = <value>, seasonal_as_residual = TRUE)`
10-
instead. See `?detect_outlr_stl` for more details.
11+
instead. See `?detect_outlr_stl` for more details.
1112

1213
## Improvements
1314

@@ -49,15 +50,23 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
4950
output a huge number of `ref_time_values` spaced apart by mere seconds.
5051

5152
## Cleanup
52-
- Resolved some linting messages in package checks (#468).
5353

54-
## Cleanup
54+
- Resolved some linting messages in package checks (#468).
5555
- Added optional `decay_to_tibble` attribute controlling `as_tibble()` behavior
5656
of `epi_df`s to let `{epipredict}` work more easily with other libraries (#471).
57-
58-
## Cleanup
5957
- Removed some external package dependencies.
6058

59+
## Breaking Changes
60+
61+
- `epi_df`'s are now more strict about what types they allow in the time column.
62+
Namely, we are explicit about only supporting `Date` at the daily and weekly
63+
cadence and generic integer types (for yearly cadence).
64+
- `epi_slide` `before` and `after` arguments are now require the user to
65+
specific time units in certain cases. The `time_step` argument has been
66+
removed.
67+
- `epix_slide` `before` argument now defaults to `Inf`, and requires the user to
68+
specify units in some cases. The `time_step` argument has been removed.
69+
6170
# epiprocess 0.7.0
6271

6372
## Breaking changes:

R/archive.R

+54-60
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,8 @@ NULL
170170
#' The data table `DT` has key variables `geo_value`, `time_value`, `version`,
171171
#' as well as any others (these can be specified when instantiating the
172172
#' `epi_archive` object via the `other_keys` argument, and/or set by operating
173-
#' on `DT` directly). Refer to the documentation for `as_epi_archive()` for
174-
#' information and examples of relevant parameter names for an `epi_archive`
175-
#' object. Note that there can only be a single row per unique combination of
176-
#' key variables, and thus the key variables are critical for figuring out how
177-
#' to generate a snapshot of data from the archive, as of a given version.
173+
#' on `DT` directly). Note that there can only be a single row per unique
174+
#' combination of key variables.
178175
#'
179176
#' @section Metadata:
180177
#' The following pieces of metadata are included as fields in an `epi_archive`
@@ -184,18 +181,15 @@ NULL
184181
#' * `time_type`: the type for the time values.
185182
#' * `additional_metadata`: list of additional metadata for the data archive.
186183
#'
187-
#' Unlike an `epi_df` object, metadata for an `epi_archive` object `x` can be
188-
#' accessed (and altered) directly, as in `x$geo_type` or `x$time_type`,
189-
#' etc. Like an `epi_df` object, the `geo_type` and `time_type` fields in the
190-
#' metadata of an `epi_archive` object are not currently used by any
191-
#' downstream functions in the `epiprocess` package, and serve only as useful
192-
#' bits of information to convey about the data set at hand.
184+
#' While this metadata is not protected, it is generally recommended to treat it
185+
#' as read-only, and to use the `epi_archive` methods to interact with the data
186+
#' archive. Unexpected behavior may result from modifying the metadata
187+
#' directly.
193188
#'
194189
#' @section Generating Snapshots:
195190
#' An `epi_archive` object can be used to generate a snapshot of the data in
196-
#' `epi_df` format, which represents the most up-to-date values of the signal
197-
#' variables, as of the specified version. This is accomplished by calling
198-
#' `epix_as_of()`.
191+
#' `epi_df` format, which represents the most up-to-date time series values up
192+
#' to a point in time. This is accomplished by calling `epix_as_of()`.
199193
#'
200194
#' @section Sliding Computations:
201195
#' We can run a sliding computation over an `epi_archive` object, much like
@@ -208,19 +202,18 @@ NULL
208202
#'
209203
#' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
210204
#' `time_value`, `version`, and then any additional number of columns.
211-
#' @param geo_type Type for the geo values. If missing, then the function will
212-
#' attempt to infer it from the geo values present; if this fails, then it
213-
#' will be set to "custom".
214-
#' @param time_type Type for the time values. If missing, then the function will
215-
#' attempt to infer it from the time values present; if this fails, then it
216-
#' will be set to "custom".
205+
#' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
206+
#' location column and set to "custom" if not recognized.
207+
#' @param time_type DEPRECATED Has no effect. Time value type inferred from the time
208+
#' column and set to "custom" if not recognized. Unpredictable behavior may result
209+
#' if the time type is not recognized.
217210
#' @param other_keys Character vector specifying the names of variables in `x`
218211
#' that should be considered key variables (in the language of `data.table`)
219212
#' apart from "geo_value", "time_value", and "version".
220213
#' @param additional_metadata List of additional metadata to attach to the
221-
#' `epi_archive` object. The metadata will have `geo_type` and `time_type`
222-
#' fields; named entries from the passed list or will be included as well.
223-
#' @param compactify Optional; Boolean or `NULL`. `TRUE` will remove some
214+
#' `epi_archive` object. The metadata will have the `geo_type` field; named
215+
#' entries from the passed list or will be included as well.
216+
#' @param compactify Optional; Boolean. `TRUE` will remove some
224217
#' redundant rows, `FALSE` will not, and missing or `NULL` will remove
225218
#' redundant rows, but issue a warning. See more information at `compactify`.
226219
#' @param clobberable_versions_start Optional; `length`-1; either a value of the
@@ -269,10 +262,7 @@ NULL
269262
#' value = rnorm(10, mean = 2, sd = 1)
270263
#' )
271264
#'
272-
#' toy_epi_archive <- tib %>% as_epi_archive(
273-
#' geo_type = "state",
274-
#' time_type = "day"
275-
#' )
265+
#' toy_epi_archive <- tib %>% as_epi_archive()
276266
#' toy_epi_archive
277267
#'
278268
#' # Ex. with an additional key for county
@@ -295,21 +285,17 @@ NULL
295285
#' cases_rate = c(0.01, 0.02, 0.01, 0.05)
296286
#' )
297287
#'
298-
#' x <- df %>% as_epi_archive(
299-
#' geo_type = "state",
300-
#' time_type = "day",
301-
#' other_keys = "county"
302-
#' )
288+
#' x <- df %>% as_epi_archive(other_keys = "county")
303289
#'
304290
new_epi_archive <- function(
305291
x,
306-
geo_type = NULL,
307-
time_type = NULL,
308-
other_keys = NULL,
309-
additional_metadata = NULL,
310-
compactify = NULL,
311-
clobberable_versions_start = NULL,
312-
versions_end = NULL) {
292+
geo_type,
293+
time_type,
294+
other_keys,
295+
additional_metadata,
296+
compactify,
297+
clobberable_versions_start,
298+
versions_end) {
313299
# Create the data table; if x was an un-keyed data.table itself,
314300
# then the call to as.data.table() will fail to set keys, so we
315301
# need to check this, then do it manually if needed
@@ -398,13 +384,11 @@ new_epi_archive <- function(
398384
#' @export
399385
validate_epi_archive <- function(
400386
x,
401-
geo_type = NULL,
402-
time_type = NULL,
403-
other_keys = NULL,
404-
additional_metadata = NULL,
405-
compactify = NULL,
406-
clobberable_versions_start = NULL,
407-
versions_end = NULL) {
387+
other_keys,
388+
additional_metadata,
389+
compactify,
390+
clobberable_versions_start,
391+
versions_end) {
408392
# Finish off with small checks on keys variables and metadata
409393
if (!test_subset(other_keys, names(x))) {
410394
cli_abort("`other_keys` must be contained in the column names of `x`.")
@@ -413,12 +397,20 @@ validate_epi_archive <- function(
413397
cli_abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".")
414398
}
415399
if (any(names(additional_metadata) %in% c("geo_type", "time_type"))) {
416-
cli_warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".")
400+
cli_warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\" or \"time_type\".")
417401
}
418402

419403
# Conduct checks and apply defaults for `compactify`
420404
assert_logical(compactify, len = 1, any.missing = FALSE, null.ok = TRUE)
421405

406+
# Make sure `time_value` and `version` have the same time type
407+
if (!identical(class(x[["time_value"]]), class(x[["version"]]))) {
408+
cli_abort(
409+
"`time_value` and `version` must have the same class.",
410+
class = "epiprocess__time_value_version_mismatch"
411+
)
412+
}
413+
422414
# Apply defaults and conduct checks for
423415
# `clobberable_versions_start`, `versions_end`:
424416
validate_version_bound(clobberable_versions_start, x, na_ok = TRUE)
@@ -453,13 +445,13 @@ validate_epi_archive <- function(
453445
#' @export
454446
as_epi_archive <- function(
455447
x,
456-
geo_type = NULL,
457-
time_type = NULL,
458-
other_keys = NULL,
459-
additional_metadata = NULL,
448+
geo_type = deprecated(),
449+
time_type = deprecated(),
450+
other_keys = character(0L),
451+
additional_metadata = list(),
460452
compactify = NULL,
461-
clobberable_versions_start = NULL,
462-
.versions_end = NULL, ...,
453+
clobberable_versions_start = NA,
454+
.versions_end = max_version_with_row_in(x), ...,
463455
versions_end = .versions_end) {
464456
assert_data_frame(x)
465457
x <- rename(x, ...)
@@ -477,16 +469,18 @@ as_epi_archive <- function(
477469
if (anyMissing(x$version)) {
478470
cli_abort("Column `version` must not contain missing values.")
479471
}
472+
if (lifecycle::is_present(geo_type)) {
473+
cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
474+
}
475+
if (lifecycle::is_present(time_type)) {
476+
cli_warn("epi_archive constructor argument `time_type` is now ignored. Consider removing.")
477+
}
480478

481-
geo_type <- geo_type %||% guess_geo_type(x$geo_value)
482-
time_type <- time_type %||% guess_time_type(x$time_value)
483-
other_keys <- other_keys %||% character(0L)
484-
additional_metadata <- additional_metadata %||% list()
485-
clobberable_versions_start <- clobberable_versions_start %||% NA
486-
versions_end <- versions_end %||% max_version_with_row_in(x)
479+
geo_type <- guess_geo_type(x$geo_value)
480+
time_type <- guess_time_type(x$time_value)
487481

488482
validate_epi_archive(
489-
x, geo_type, time_type, other_keys, additional_metadata,
483+
x, other_keys, additional_metadata,
490484
compactify, clobberable_versions_start, versions_end
491485
)
492486
new_epi_archive(

0 commit comments

Comments
 (0)