Skip to content

Commit a2c5154

Browse files
authored
Merge pull request #484 from cmu-delphi/lcb/fix-guess-period-datetimes
Fix guess_period on datetimes, make it more precise + generic
2 parents 69ea5e4 + 8948868 commit a2c5154

File tree

6 files changed

+122
-37
lines changed

6 files changed

+122
-37
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: epiprocess
33
Title: Tools for basic signal processing in epidemiology
4-
Version: 0.7.13
4+
Version: 0.7.14
55
Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", email = "[email protected]", role = c("aut", "cre")),

NAMESPACE

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ S3method(group_by,grouped_epi_archive)
2525
S3method(group_by_drop_default,grouped_epi_archive)
2626
S3method(group_modify,epi_df)
2727
S3method(groups,grouped_epi_archive)
28+
S3method(guess_period,Date)
29+
S3method(guess_period,POSIXt)
30+
S3method(guess_period,default)
2831
S3method(key_colnames,data.frame)
2932
S3method(key_colnames,default)
3033
S3method(key_colnames,epi_archive)
@@ -65,6 +68,7 @@ export(geo_column_names)
6568
export(group_by)
6669
export(group_modify)
6770
export(growth_rate)
71+
export(guess_period)
6872
export(is_epi_df)
6973
export(is_grouped_epi_archive)
7074
export(key_colnames)

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
4545
of default conversions, see `time_column_names` for a list of columns that
4646
will automatically be recognized and converted to `time_value` column (there
4747
are similar functions for `geo` and `version`).
48+
- Fixed bug where `epix_slide_ref_time_values_default()` on datetimes would
49+
output a huge number of `ref_time_values` spaced apart by mere seconds.
4850

4951
## Cleanup
5052
- Resolved some linting messages in package checks (#468).

R/utils.R

+49-23
Original file line numberDiff line numberDiff line change
@@ -769,28 +769,54 @@ gcd_num <- function(dividends, ..., rrtol = 1e-6, pqlim = 1e6, irtol = 1e-6) {
769769
vctrs::vec_cast(numeric_gcd, dividends)
770770
}
771771

772-
#' Use max valid period as guess for `period` of `ref_time_values`
773-
#'
774-
#' @param ref_time_values Vector containing time-interval-like or time-like
775-
#' data, with at least two distinct values, [`diff`]-able (e.g., a
776-
#' `time_value` or `version` column), and should have a sensible result from
777-
#' adding `is.numeric` versions of its `diff` result (via `as.integer` if its
778-
#' `typeof` is `"integer"`, otherwise via `as.numeric`).
779-
#' @param ref_time_values_arg Optional, string; name to give `ref_time_values`
780-
#' in error messages. Defaults to quoting the expression the caller fed into
781-
#' the `ref_time_values` argument.
782-
#' @return `is.numeric`, length 1; attempts to match `typeof(ref_time_values)`
783-
guess_period <- function(ref_time_values, ref_time_values_arg = rlang::caller_arg(ref_time_values)) {
784-
sorted_distinct_ref_time_values <- sort(unique(ref_time_values))
785-
if (length(sorted_distinct_ref_time_values) < 2L) {
786-
cli_abort("Not enough distinct values in {.code {ref_time_values_arg}} to guess the period.", ref_time_values_arg)
772+
#' Use max valid period as guess for `period` of `time_values`
773+
#'
774+
#' `r lifecycle::badge("experimental")`
775+
#'
776+
#' @param time_values Vector containing time-interval-like or time-point-like
777+
#' data, with at least two distinct values.
778+
#' @param time_values_arg Optional, string; name to give `time_values` in error
779+
#' messages. Defaults to quoting the expression the caller fed into the
780+
#' `time_values` argument.
781+
#' @param ... Should be empty, there to satisfy the S3 generic.
782+
#' @return length-1 vector; `r lifecycle::badge("experimental")` class will
783+
#' either be the same class as [`base::diff()`] on such time values, an
784+
#' integer, or a double, such that all `time_values` can be exactly obtained
785+
#' by adding `k * result` for an integer k, and such that there is no smaller
786+
#' `result` that can achieve this.
787+
#'
788+
#' @export
789+
guess_period <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
790+
UseMethod("guess_period")
791+
}
792+
793+
#' @export
794+
guess_period.default <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
795+
rlang::check_dots_empty()
796+
sorted_distinct_time_values <- sort(unique(time_values))
797+
if (length(sorted_distinct_time_values) < 2L) {
798+
cli_abort("Not enough distinct values in {.code {time_values_arg}} to guess the period.",
799+
class = "epiprocess__guess_period__not_enough_times",
800+
time_values = time_values
801+
)
787802
}
788-
skips <- diff(sorted_distinct_ref_time_values)
789-
decayed_skips <-
790-
if (typeof(skips) == "integer") {
791-
as.integer(skips)
792-
} else {
793-
as.numeric(skips)
794-
}
795-
gcd_num(decayed_skips)
803+
skips <- diff(sorted_distinct_time_values)
804+
# Certain diff results have special classes or attributes; use vctrs to try to
805+
# appropriately destructure for gcd_num, then restore to their original class
806+
# & attributes.
807+
skips_data <- vctrs::vec_data(skips)
808+
period_data <- gcd_num(skips_data, rrtol = 0)
809+
vctrs::vec_restore(period_data, skips)
810+
}
811+
812+
# `full_seq()` doesn't like difftimes, so convert to the natural units of some time types:
813+
814+
#' @export
815+
guess_period.Date <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
816+
as.numeric(NextMethod(), units = "days")
817+
}
818+
819+
#' @export
820+
guess_period.POSIXt <- function(time_values, time_values_arg = rlang::caller_arg(time_values), ...) {
821+
as.numeric(NextMethod(), units = "secs")
796822
}

man/guess_period.Rd

+17-13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-utils.R

+49
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,52 @@ test_that("as_slide_computation raises errors as expected", {
231231
class = "epiprocess__as_slide_computation__cant_convert_catchall"
232232
)
233233
})
234+
235+
test_that("guess_period works", {
236+
# Error cases:
237+
expect_error(guess_period(numeric(0L)), class = "epiprocess__guess_period__not_enough_times")
238+
expect_error(guess_period(c(1)), class = "epiprocess__guess_period__not_enough_times")
239+
# Different numeric classes and cases:
240+
expect_identical(guess_period(c(1, 8)), 7)
241+
expect_identical(guess_period(c(1, 8, 15)), 7)
242+
expect_identical(guess_period(c(1L, 8L, 15L)), 7L)
243+
expect_identical(guess_period(c(0, 7, 14, 15)), 1)
244+
# We currently allow the guessed frequency to not appear in the diffs, but
245+
# this might not be a good idea as it likely indicates an issue with the data
246+
# (#485).
247+
expect_identical(guess_period(c(0, 2, 5)), 1)
248+
expect_identical(guess_period(c(0, 4, 10)), 2)
249+
# On Dates:
250+
daily_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "day")
251+
weekly_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "week")
252+
expect_identical(
253+
daily_dates[[1L]] + guess_period(daily_dates) * (seq_along(daily_dates) - 1L),
254+
daily_dates
255+
)
256+
expect_identical(
257+
weekly_dates[[1L]] + guess_period(weekly_dates) * (seq_along(weekly_dates) - 1L),
258+
weekly_dates
259+
)
260+
# On POSIXcts:
261+
daily_posixcts <- as.POSIXct(daily_dates, tz = "ET") + 3600
262+
weekly_posixcts <- as.POSIXct(weekly_dates, tz = "ET") + 3600
263+
expect_identical(
264+
daily_posixcts[[1L]] + guess_period(daily_posixcts) * (seq_along(daily_posixcts) - 1L),
265+
daily_posixcts
266+
)
267+
expect_identical(
268+
weekly_posixcts[[1L]] + guess_period(weekly_posixcts) * (seq_along(weekly_posixcts) - 1L),
269+
weekly_posixcts
270+
)
271+
# On POSIXlts:
272+
daily_posixlts <- as.POSIXlt(daily_dates, tz = "ET") + 3600
273+
weekly_posixlts <- as.POSIXlt(weekly_dates, tz = "ET") + 3600
274+
expect_identical(
275+
daily_posixlts[[1L]] + guess_period(daily_posixlts) * (seq_along(daily_posixlts) - 1L),
276+
daily_posixlts
277+
)
278+
expect_identical(
279+
weekly_posixlts[[1L]] + guess_period(weekly_posixlts) * (seq_along(weekly_posixlts) - 1L),
280+
weekly_posixlts
281+
)
282+
})

0 commit comments

Comments
 (0)