@@ -216,11 +216,24 @@ epi_archive <-
216
216
classname = " epi_archive" ,
217
217
# ####
218
218
public = list (
219
+ # ' @field DT (`data.table`)\cr
220
+ # ' the (optionally compactified) datatable
219
221
DT = NULL ,
222
+ # ' @field geo_type (string)\cr
223
+ # ' the resolution of the geographic label (e.g. state)
220
224
geo_type = NULL ,
225
+ # ' @field time_type (string)\cr
226
+ # ' the resolution of the time column (e.g. day)
221
227
time_type = NULL ,
228
+ # ' @field additional_metadata (named list)\cr
229
+ # ' any extra fields, such as `other_keys`
222
230
additional_metadata = NULL ,
231
+ # ' @field clobberable_versions_start (length-1 of same type&class as `version` column, or `NA`)\cr
232
+ # ' the earliest version number that might be rewritten in the future without assigning a new version
233
+ # ' date/number, or `NA` if this won't happen
223
234
clobberable_versions_start = NULL ,
235
+ # ' @field versions_end (length-1 of same type&class as `version` column)\cr
236
+ # ' the latest version observed
224
237
versions_end = NULL ,
225
238
# ' @description Creates a new `epi_archive` object.
226
239
# ' @param x A data frame, data table, or tibble, with columns `geo_value`,
@@ -426,6 +439,10 @@ epi_archive <-
426
439
self $ clobberable_versions_start <- clobberable_versions_start
427
440
self $ versions_end <- versions_end
428
441
},
442
+ # ' Print information about an archive
443
+ # ' @param class Boolean; whether to print the class label header
444
+ # ' @param methods Boolean; whether to print all available methods of
445
+ # ' the archive
429
446
print = function (class = TRUE , methods = TRUE ) {
430
447
if (class ) cat(" An `epi_archive` object, with metadata:\n " )
431
448
cat(sprintf(" * %-9s = %s\n " , " geo_type" , self $ geo_type ))
@@ -487,7 +504,23 @@ epi_archive <-
487
504
},
488
505
# ####
489
506
# ' @description Generates a snapshot in `epi_df` format as of a given version.
490
- # ' See the documentation for the wrapper function [`epix_as_of()`] for details.
507
+ # ' See the documentation for the wrapper function [`epix_as_of()`] for
508
+ # ' details. The parameter descriptions below are copied from there
509
+ # ' @param x An `epi_archive` object
510
+ # ' @param max_version Version specifying the max version to permit in the
511
+ # ' snapshot. That is, the snapshot will comprise the unique rows of the
512
+ # ' current archive data that represent the most up-to-date signal values, as
513
+ # ' of the specified `max_version` (and whose `time_value`s are at least
514
+ # ' `min_time_value`).
515
+ # ' @param min_time_value Time value specifying the min `time_value` to permit in
516
+ # ' the snapshot. Default is `-Inf`, which effectively means that there is no
517
+ # ' minimum considered.
518
+ # ' @param all_versions Boolean; If `all_versions = TRUE`, then the output will be in
519
+ # ' `epi_archive` format, and contain rows in the specified `time_value` range
520
+ # ' having `version <= max_version`. The resulting object will cover a
521
+ # ' potentially narrower `version` and `time_value` range than `x`, depending
522
+ # ' on user-provided arguments. Otherwise, there will be one row in the output
523
+ # ' for the `max_version` of each `time_value`. Default is `FALSE`.
491
524
# ' @importFrom data.table between key
492
525
as_of = function (max_version , min_time_value = - Inf , all_versions = FALSE ) {
493
526
# Self max version and other keys
@@ -679,15 +712,94 @@ epi_archive <-
679
712
680
713
return (invisible (self ))
681
714
},
682
- # ####
715
+ # ' group an epi_archive
716
+ # ' @description
717
+ # ' group an epi_archive
718
+ # ' @param ... variables or computations to group by. Computations are always
719
+ # ' done on the ungrouped data frame. To perform computations on the grouped
720
+ # ' data, you need to use a separate [`mutate()`] step before the
721
+ # ' [`group_by()`]
722
+ # ' @param .add When `FALSE`, the default, [`group_by()`] will override existing
723
+ # ' groups. To add to the existing groups, use `.add = TRUE`.
724
+ # ' @param .drop Drop groups formed by factor levels that don't appear in the
725
+ # ' data. The default is `TRUE` except when `.data` has been previously grouped
726
+ # ' with `.drop = FALSE`. See [`group_by_drop_default()`] for details.
683
727
group_by = function (... , .add = FALSE , .drop = dplyr :: group_by_drop_default(self )) {
684
728
group_by.epi_archive(self , ... , .add = .add , .drop = .drop )
685
729
},
686
730
# ' @description Slides a given function over variables in an `epi_archive`
687
731
# ' object. See the documentation for the wrapper function [`epix_slide()`] for
688
- # ' details.
732
+ # ' details. The parameter descriptions below are copied from there
689
733
# ' @importFrom data.table key
690
734
# ' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
735
+ # ' @param f Function, formula, or missing; together with `...` specifies the
736
+ # ' computation to slide. To "slide" means to apply a computation over a
737
+ # ' sliding (a.k.a. "rolling") time window for each data group. The window is
738
+ # ' determined by the `before` parameter described below. One time step is
739
+ # ' typically one day or one week; see [`epi_slide`] details for more
740
+ # ' explanation. If a function, `f` must take an `epi_df` with the same
741
+ # ' column names as the archive's `DT`, minus the `version` column; followed
742
+ # ' by a one-row tibble containing the values of the grouping variables for
743
+ # ' the associated group; followed by a reference time value, usually as a
744
+ # ' `Date` object; followed by any number of named arguments. If a formula,
745
+ # ' `f` can operate directly on columns accessed via `.x$var` or `.$var`, as
746
+ # ' in `~ mean (.x$var)` to compute a mean of a column `var` for each
747
+ # ' group-`ref_time_value` combination. The group key can be accessed via
748
+ # ' `.y` or `.group_key`, and the reference time value can be accessed via
749
+ # ' `.z` or `.ref_time_value`. If `f` is missing, then `...` will specify the
750
+ # ' computation.
751
+ # ' @param ... Additional arguments to pass to the function or formula specified
752
+ # ' via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
753
+ # ' expression for tidy evaluation; in addition to referring to columns
754
+ # ' directly by name, the expression has access to `.data` and `.env` pronouns
755
+ # ' as in `dplyr` verbs, and can also refer to the `.group_key` and
756
+ # ' `.ref_time_value`. See details of [`epi_slide`].
757
+ # ' @param before How far `before` each `ref_time_value` should the sliding
758
+ # ' window extend? If provided, should be a single, non-NA,
759
+ # ' [integer-compatible][vctrs::vec_cast] number of time steps. This window
760
+ # ' endpoint is inclusive. For example, if `before = 7`, and one time step is
761
+ # ' one day, then to produce a value for a `ref_time_value` of January 8, we
762
+ # ' apply the given function or formula to data (for each group present) with
763
+ # ' `time_value`s from January 1 onward, as they were reported on January 8.
764
+ # ' For typical disease surveillance sources, this will not include any data
765
+ # ' with a `time_value` of January 8, and, depending on the amount of reporting
766
+ # ' latency, may not include January 7 or even earlier `time_value`s. (If
767
+ # ' instead the archive were to hold nowcasts instead of regular surveillance
768
+ # ' data, then we would indeed expect data for `time_value` January 8. If it
769
+ # ' were to hold forecasts, then we would expect data for `time_value`s after
770
+ # ' January 8, and the sliding window would extend as far after each
771
+ # ' `ref_time_value` as needed to include all such `time_value`s.)
772
+ # ' @param ref_time_values Reference time values / versions for sliding
773
+ # ' computations; each element of this vector serves both as the anchor point
774
+ # ' for the `time_value` window for the computation and the `max_version`
775
+ # ' `as_of` which we fetch data in this window. If missing, then this will set
776
+ # ' to a regularly-spaced sequence of values set to cover the range of
777
+ # ' `version`s in the `DT` plus the `versions_end`; the spacing of values will
778
+ # ' be guessed (using the GCD of the skips between values).
779
+ # ' @param time_step Optional function used to define the meaning of one time
780
+ # ' step, which if specified, overrides the default choice based on the
781
+ # ' `time_value` column. This function must take a positive integer and return
782
+ # ' an object of class `lubridate::period`. For example, we can use `time_step
783
+ # ' = lubridate::hours` in order to set the time step to be one hour (this
784
+ # ' would only be meaningful if `time_value` is of class `POSIXct`).
785
+ # ' @param new_col_name String indicating the name of the new column that will
786
+ # ' contain the derivative values. Default is "slide_value"; note that setting
787
+ # ' `new_col_name` equal to an existing column name will overwrite this column.
788
+ # ' @param as_list_col Should the slide results be held in a list column, or be
789
+ # ' [unchopped][tidyr::unchop]/[unnested][tidyr::unnest]? Default is `FALSE`,
790
+ # ' in which case a list object returned by `f` would be unnested (using
791
+ # ' [`tidyr::unnest()`]), and, if the slide computations output data frames,
792
+ # ' the names of the resulting columns are given by prepending `new_col_name`
793
+ # ' to the names of the list elements.
794
+ # ' @param names_sep String specifying the separator to use in `tidyr::unnest()`
795
+ # ' when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
796
+ # ' from `new_col_name` entirely.
797
+ # ' @param all_versions (Not the same as `all_rows` parameter of `epi_slide`.) If
798
+ # ' `all_versions = TRUE`, then `f` will be passed the version history (all
799
+ # ' `version <= ref_time_value`) for rows having `time_value` between
800
+ # ' `ref_time_value - before` and `ref_time_value`. Otherwise, `f` will be
801
+ # ' passed only the most recent `version` for every unique `time_value`.
802
+ # ' Default is `FALSE`.
691
803
slide = function (f , ... , before , ref_time_values ,
692
804
time_step , new_col_name = " slide_value" ,
693
805
as_list_col = FALSE , names_sep = " _" ,
@@ -717,7 +829,7 @@ epi_archive <-
717
829
# ' Converts a data frame, data table, or tibble into an `epi_archive`
718
830
# ' object. See the [archive
719
831
# ' vignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for
720
- # ' examples.
832
+ # ' examples. The parameter descriptions below are copied from there
721
833
# '
722
834
# ' @param x A data frame, data table, or tibble, with columns `geo_value`,
723
835
# ' `time_value`, `version`, and then any additional number of columns.
0 commit comments