9
9
10
10
# ' Validate a version bound arg
11
11
# '
12
- # ' Expected to be used on `clobberable_versions_start`, `versions_end`,
13
- # ' and similar arguments. Some additional context-specific checks may be needed.
12
+ # ' Expected to be used on `clobberable_versions_start`, `versions_end`, and
13
+ # ' similar arguments. Some additional context-specific checks may be needed.
14
+ # ' Side effects: raises an error if version bound appears invalid.
14
15
# '
15
16
# ' @param version_bound the version bound to validate
16
17
# ' @param x a data frame containing a version column with which to check
20
21
# ' @param version_bound_arg optional string; what to call the version bound in
21
22
# ' error messages
22
23
# '
23
- # ' @section Side effects: raises an error if version bound appears invalid
24
- # '
25
- # ' @noRd
24
+ # ' @keywords internal
26
25
validate_version_bound <- function (version_bound , x , na_ok = FALSE ,
27
26
version_bound_arg = rlang :: caller_arg(version_bound ),
28
27
x_arg = rlang :: caller_arg(x )) {
@@ -75,9 +74,7 @@ validate_version_bound <- function(version_bound, x, na_ok = FALSE,
75
74
# ' @return `max(x$version)` if it has any rows; raises error if it has 0 rows or
76
75
# ' an `NA` version value
77
76
# '
78
- # ' @importFrom checkmate check_names
79
- # '
80
- # ' @export
77
+ # ' @keywords internal
81
78
max_version_with_row_in <- function (x ) {
82
79
if (nrow(x ) == 0L ) {
83
80
cli_abort(
@@ -108,72 +105,71 @@ max_version_with_row_in <- function(x) {
108
105
# ' @param x the starting "value"(s)
109
106
# ' @return same class, typeof, and length as `x`
110
107
# '
111
- # ' @export
108
+ # ' @keywords internal
112
109
next_after <- function (x ) UseMethod(" next_after" )
113
110
114
111
115
- # ' @export
112
+ # ' @keywords internal
116
113
next_after.integer <- function (x ) x + 1L
117
114
118
115
119
- # ' @export
116
+ # ' @keywords internal
120
117
next_after.Date <- function (x ) x + 1L
121
118
122
119
123
- # ' Compactify
124
- # '
125
- # ' This section describes the internals of how compactification works in an
126
- # ' `epi_archive()`. Compactification can potentially improve code speed or
127
- # ' memory usage, depending on your data.
128
- # '
129
- # ' In general, the last version of each observation is carried forward (LOCF) to
130
- # ' fill in data between recorded versions, and between the last recorded
131
- # ' update and the `versions_end`. One consequence is that the `DT` doesn't
132
- # ' have to contain a full snapshot of every version (although this generally
133
- # ' works), but can instead contain only the rows that are new or changed from
134
- # ' the previous version (see `compactify`, which does this automatically).
135
- # ' Currently, deletions must be represented as revising a row to a special
136
- # ' state (e.g., making the entries `NA` or including a special column that
137
- # ' flags the data as removed and performing some kind of post-processing), and
138
- # ' the archive is unaware of what this state is. Note that `NA`s *can* be
139
- # ' introduced by `epi_archive` methods for other reasons, e.g., in
140
- # ' [`epix_fill_through_version`] and [`epix_merge`], if requested, to
141
- # ' represent potential update data that we do not yet have access to; or in
142
- # ' [`epix_merge`] to represent the "value" of an observation before the
143
- # ' version in which it was first released, or if no version of that
144
- # ' observation appears in the archive data at all.
120
+ # ' `epi_archive` object
145
121
# '
146
- # ' @name compactify
147
- NULL
148
-
149
-
150
- # ' Epi Archive
151
- # '
152
- # ' @title `epi_archive` object
122
+ # ' @description The second main data structure for storing time series in
123
+ # ' `epiprocess`. It is similar to `epi_df` in that it fundamentally a table with
124
+ # ' a few required columns that stores epidemiological time series data. An
125
+ # ' `epi_archive` requires a `geo_value`, `time_value`, and `version` column (and
126
+ # ' possibly other key columns) along with measurement values. In brief, an
127
+ # ' `epi_archive` is a history of the time series data, where the `version`
128
+ # ' column tracks the time at which the data was available. This allows for
129
+ # ' version-aware forecasting.
153
130
# '
154
- # ' @description An `epi_archive` is an S3 class which contains a data table
155
- # ' along with several relevant pieces of metadata. The data table can be seen
156
- # ' as the full archive (version history) for some signal variables of
157
- # ' interest.
131
+ # ' `new_epi_archive` is the constructor for `epi_archive` objects that assumes
132
+ # ' all arguments have been validated. Most users should use `as_epi_archive`.
158
133
# '
159
- # ' @details An `epi_archive` contains a data table `DT`, of class `data.table`
160
- # ' from the ` data.table` package, with (at least) the following columns:
134
+ # ' @details An `epi_archive` contains a ` data. table` object `DT` (from the
135
+ # ' `{ data.table} ` package) , with (at least) the following columns:
161
136
# '
162
- # ' * `geo_value`: the geographic value associated with each row of measurements.
163
- # ' * `time_value`: the time value associated with each row of measurements.
137
+ # ' * `geo_value`: the geographic value associated with each row of measurements,
138
+ # ' * `time_value`: the time value associated with each row of measurements,
164
139
# ' * `version`: the time value specifying the version for each row of
165
140
# ' measurements. For example, if in a given row the `version` is January 15,
166
141
# ' 2022 and `time_value` is January 14, 2022, then this row contains the
167
142
# ' measurements of the data for January 14, 2022 that were available one day
168
143
# ' later.
169
144
# '
170
- # ' The data table `DT` has key variables `geo_value`, `time_value`, `version`,
171
- # ' as well as any others (these can be specified when instantiating the
172
- # ' `epi_archive` object via the `other_keys` argument, and/or set by operating
173
- # ' on `DT` directly). Note that there can only be a single row per unique
174
- # ' combination of key variables.
145
+ # ' The variables `geo_value`, `time_value`, `version` serve as key variables for
146
+ # ' the data table (in addition to any other keys specified in the metadata).
147
+ # ' There can only be a single row per unique combination of key variables. The
148
+ # ' keys for an `epi_archive` can be viewed with `key(epi_archive$DT)`.
149
+ # '
150
+ # ' ## Compactification
151
+ # '
152
+ # ' By default, an `epi_archive` will compactify the data table to remove
153
+ # ' redundant rows. This is done by not storing rows that have the same value,
154
+ # ' except for the `version` column (this is essentially a last observation
155
+ # ' carried forward, but along the version index). This is done to save space and
156
+ # ' improve performance. If you do not want to compactify the data, you can set
157
+ # ' `compactify = FALSE` in `as_epi_archive()`.
158
+ # '
159
+ # ' Note that in some data scenarios, LOCF may not be appropriate. For instance,
160
+ # ' if you expected data to be updated on a given day, but your data source did
161
+ # ' not update, then it could be reasonable to code the data as `NA` for that
162
+ # ' day, instead of assuming LOCF.
163
+ # '
164
+ # ' `NA`s *can* be introduced by `epi_archive` methods for other
165
+ # ' reasons, e.g., in [`epix_fill_through_version`] and [`epix_merge`], if
166
+ # ' requested, to represent potential update data that we do not yet have access
167
+ # ' to; or in [`epix_merge`] to represent the "value" of an observation before
168
+ # ' the version in which it was first released, or if no version of that
169
+ # ' observation appears in the archive data at all.
170
+ # '
171
+ # ' ## Metadata
175
172
# '
176
- # ' @section Metadata:
177
173
# ' The following pieces of metadata are included as fields in an `epi_archive`
178
174
# ' object:
179
175
# '
@@ -187,20 +183,6 @@ NULL
187
183
# ' archive. Unexpected behavior may result from modifying the metadata
188
184
# ' directly.
189
185
# '
190
- # ' @section Generating Snapshots:
191
- # ' An `epi_archive` object can be used to generate a snapshot of the data in
192
- # ' `epi_df` format, which represents the most up-to-date time series values up
193
- # ' to a point in time. This is accomplished by calling `epix_as_of()`.
194
- # '
195
- # ' @section Sliding Computations:
196
- # ' We can run a sliding computation over an `epi_archive` object, much like
197
- # ' `epi_slide()` does for an `epi_df` object. This is accomplished by calling
198
- # ' the `slide()` method for an `epi_archive` object, which works similarly to
199
- # ' the way `epi_slide()` works for an `epi_df` object, but with one key
200
- # ' difference: it is version-aware. That is, for an `epi_archive` object, the
201
- # ' sliding computation at any given reference time point t is performed on
202
- # ' **data that would have been available as of t**.
203
- # '
204
186
# ' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
205
187
# ' `time_value`, `version`, and then any additional number of columns.
206
188
# ' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
@@ -239,10 +221,11 @@ NULL
239
221
# ' value of `clobberable_versions_start` does not fully trust these empty
240
222
# ' updates, and assumes that any version `>= max(x$version)` could be
241
223
# ' clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
242
- # ' @param compactify_tol double. the tolerance used to detect approximate equality for compactification
224
+ # ' @param compactify_tol double. the tolerance used to detect approximate
225
+ # ' equality for compactification
243
226
# ' @return An `epi_archive` object.
244
227
# '
245
- # ' @importFrom data.table as.data.table key setkeyv
228
+ # ' @seealso [`epix_as_of`] [`epix_merge`] [`epix_slide`]
246
229
# ' @importFrom dplyr if_any if_all everything
247
230
# ' @importFrom utils capture.output
248
231
# '
@@ -356,12 +339,13 @@ new_epi_archive <- function(
356
339
)
357
340
}
358
341
359
- # ' given a tibble as would be found in an epi_archive, remove duplicate entries.
360
- # ' @description
361
- # ' works by shifting all rows except the version, then comparing values to see
342
+ # ' Given a tibble as would be found in an epi_archive, remove duplicate entries.
343
+ # '
344
+ # ' Works by shifting all rows except the version, then comparing values to see
362
345
# ' if they've changed. We need to arrange in descending order, but note that
363
346
# ' we don't need to group, since at least one column other than version has
364
347
# ' changed, and so is kept.
348
+ # '
365
349
# ' @keywords internal
366
350
# ' @importFrom dplyr filter
367
351
apply_compactify <- function (df , keys , tolerance = .Machine $ double.eps ^ .5 ) {
@@ -466,6 +450,7 @@ validate_epi_archive <- function(
466
450
467
451
# ' `as_epi_archive` converts a data frame, data table, or tibble into an
468
452
# ' `epi_archive` object.
453
+ # '
469
454
# ' @param ... used for specifying column names, as in [`dplyr::rename`]. For
470
455
# ' example `version = release_date`
471
456
# ' @param .versions_end location based versions_end, used to avoid prefix
0 commit comments