Skip to content

Commit dd84924

Browse files
committed
tests(epi_slide_opt): on example data sets
1 parent 0435460 commit dd84924

File tree

1 file changed

+71
-21
lines changed

1 file changed

+71
-21
lines changed

tests/testthat/test-epi_slide_opt_archive.R

+71-21
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ test_that("epi_slide_opt_archive_one_epikey works as expected", {
3535
),
3636
tibble(
3737
version = 13, time_value = 8:10, value = c(9, 9, 10),
38-
slide_value = frollmean(c(6, 7, 9, 9, 10), 3, algo = "exact")[-(1:2)]
38+
slide_value = frollmean(c(6, 7, 9, 9, 10), 3, algo = "exact")[-(1:2)]
3939
),
4040
tibble(
4141
version = 14, time_value = 11:13, value = c(NA, 12, 13), slide_value = rep(NA_real_, 3L)
@@ -89,7 +89,6 @@ test_that("epi_slide_opt.epi_archive is not confused by unique(DT$version) unsor
8989
})
9090

9191
test_that("epi_slide_opt.epi_archive is not confused by unique(DT$time_value) unsorted", {
92-
9392
start_date <- as.Date("2020-01-01")
9493
tibble(
9594
geo_value = c(1, 1, 2, 2),
@@ -109,26 +108,77 @@ test_that("epi_slide_opt.epi_archive is not confused by unique(DT$time_value) un
109108
) %>%
110109
as_epi_archive()
111110
)
112-
113111
})
114112

115-
test_that("epi_slide_opt.epi_archive is equivalent to epix_slide reconversion on example data", {
116-
117-
case_death_rate_archive %>%
118-
epi_slide_opt(case_rate, frollmean, .window_size = 7
119-
# , algo = "exact"
120-
) %>%
121-
.$DT %>%
122-
as.data.frame() %>%
123-
as_tibble() %>%
124-
filter(!approx_equal(case_rate_7dav, case_rate_7d_av, 1e-6, TRUE)) %>%
125-
dplyr::transmute(version, geo_value, time_value, case_rate_7dav, case_rate_7d_av,
126-
abs_diff = abs(case_rate_7dav - case_rate_7d_av)) %>%
127-
{}
128-
129-
# TODO finish tests on example data sets
130-
131-
})
132-
113+
test_that("epi_slide_opt.epi_archive gives expected results on example data", {
114+
# vs. built-in case_rate_7d_av column.
115+
#
116+
# If we were to compare the keyset vs.
117+
# the original, it changes, as the original contains some tiny deviations in
118+
# values that don't seem achievable with available sliding functions. E.g., in
119+
# the recomputed result, geo "ak" version "2020-11-01" changes time 2020-03-13
120+
# from 0 to 0.138 and time 2020-03-14 from a slightly different value of 0.138
121+
# to 0, while nearby times remained stable; in the original, this resulted in
122+
# a tiny update to the 7d_av for 2020-03-14 but not following times somehow,
123+
# while in the recomputation there are also minute updates to 2020-03-15 and
124+
# 2020-03-16; 2020-03-17 onward have other case_rate changes factoring in.
125+
# Compactifying and comparing with tolerances would help account for some of
126+
# these differences, but only through writing this was it realized that both
127+
# archives would need the recompactification with tolerance; it's not just
128+
# epi_slide_opt.epi_archive's very rigid compactification that's the cause.
129+
# (Side note: allowing configurable compactification tolerance in
130+
# epi_slide_opt.epi_archive wasn't included due to either feeling strange
131+
# applying the compactification tolerance to all columns rather than just
132+
# computed columns, and a slowdown when using one approach to compactify just
133+
# the new columns + also awkward not matching what's possible with just
134+
# construction functions.)
135+
#
136+
# --> just compare essentially an epix_merge of the original & the recomputation:
137+
case_death_rate_archive_time <- system.time(
138+
case_death_rate_archive_result <- case_death_rate_archive %>%
139+
epi_slide_opt(case_rate, frollmean, algo = "exact", .window_size = 7)
140+
)
141+
expect_equal(
142+
case_death_rate_archive_result$DT$case_rate_7dav,
143+
case_death_rate_archive_result$DT$case_rate_7d_av
144+
)
145+
146+
# vs. computing via epix_slide:
147+
148+
mini_case_death_rate_archive <- case_death_rate_archive %>%
149+
{
150+
as_tibble(as.data.frame(.$DT))
151+
} %>%
152+
filter(geo_value %in% head(unique(geo_value), 4L)) %>%
153+
as_epi_archive()
154+
155+
mini_case_death_rate_archive_time_opt <- system.time(
156+
mini_case_death_rate_archive_result <- mini_case_death_rate_archive %>%
157+
epi_slide_opt(case_rate, frollmean, .window_size = 7)
158+
)
159+
160+
mini_case_death_rate_archive_time_gen <- system.time(
161+
mini_case_death_rate_archive_expected <- mini_case_death_rate_archive %>%
162+
epix_slide(~ .x %>% epi_slide_opt(case_rate, frollmean, .window_size = 7)) %>%
163+
select(names(mini_case_death_rate_archive$DT), everything()) %>%
164+
as_epi_archive()
165+
)
166+
167+
expect_equal(mini_case_death_rate_archive_result, mini_case_death_rate_archive_expected)
168+
169+
archive_cases_dv_subset_time_opt <- system.time(
170+
archive_cases_dv_subset_result <- archive_cases_dv_subset %>%
171+
epi_slide_opt(percent_cli, frollmean, .window_size = 7)
172+
)
173+
174+
archive_cases_dv_subset_time_gen <- system.time(
175+
archive_cases_dv_subset_expected <- archive_cases_dv_subset %>%
176+
epix_slide(~ .x %>% epi_slide_opt(percent_cli, frollmean, .window_size = 7)) %>%
177+
select(geo_value, time_value, version, everything()) %>%
178+
as_epi_archive()
179+
)
180+
181+
expect_equal(archive_cases_dv_subset_result, archive_cases_dv_subset_expected)
182+
})
133183

134184
# TODO grouped behavior checks

0 commit comments

Comments
 (0)