Skip to content

Commit 71e11f7

Browse files
authored
Merge pull request #390 from cmu-delphi/fixSelect
other keys defaulting to `character(0)` and fix `select` on grouped `epi_df`s
2 parents e40d02a + ff3dfae commit 71e11f7

File tree

7 files changed

+148
-19
lines changed

7 files changed

+148
-19
lines changed

DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ Collate:
7676
'data.R'
7777
'epi_df.R'
7878
'epiprocess.R'
79+
'group_by_epi_df_methods.R'
7980
'methods-epi_archive.R'
8081
'grouped_epi_archive.R'
8182
'growth_rate.R'

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ S3method(groups,grouped_epi_archive)
2323
S3method(next_after,Date)
2424
S3method(next_after,integer)
2525
S3method(print,epi_df)
26+
S3method(select,epi_df)
2627
S3method(summary,epi_df)
2728
S3method(ungroup,epi_df)
2829
S3method(ungroup,grouped_epi_archive)

R/epi_df.R

+4-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,
122122
if (!is.list(additional_metadata)) {
123123
Abort("`additional_metadata` must be a list type.")
124124
}
125+
if (is.null(additional_metadata[["other_keys"]])) {
126+
additional_metadata[["other_keys"]] <- character(0L)
127+
}
125128

126129
# If geo type is missing, then try to guess it
127130
if (missing(geo_type)) {
@@ -334,4 +337,4 @@ as_epi_df.tbl_ts = function(x, geo_type, time_type, as_of,
334337
#' @export
335338
is_epi_df = function(x) {
336339
inherits(x, "epi_df")
337-
}
340+
}

R/group_by_epi_df_methods.R

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# These methods (and maybe some others in methods-epi_df.R) are here to augment
2+
# `?dplyr_extending` implementations to get the correct behavior on grouped
3+
# `epi_df`s. It would be nice if there were a way to replace these with a
4+
# generic core that automatically fixed all misbehaving methods; see
5+
# brainstorming within Issue #223.
6+
7+
#' @importFrom dplyr select
8+
#' @export
9+
select.epi_df <- function(.data, ...) {
10+
selected <- NextMethod(.data)
11+
might_decay <- reclass(selected, attr(selected, "metadata"))
12+
return(dplyr_reconstruct(might_decay, might_decay))
13+
}
14+
15+
# others to consider:
16+
# - arrange
17+
# -

R/methods-epi_df.R

+8-4
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,14 @@ dplyr_row_slice.epi_df = function(data, i, ...) {
179179

180180
#' @export
181181
`names<-.epi_df` = function(x, value) {
182-
old_names = names(x)
183-
old_other_keys = attributes(x)$metadata$other_keys
184-
result = NextMethod()
185-
attributes(x)$metadata$other_keys <- value[match(old_other_keys, old_names)]
182+
old_names <- names(x)
183+
old_metadata <- attr(x, "metadata")
184+
old_other_keys <- old_metadata[["other_keys"]]
185+
new_other_keys <- value[match(old_other_keys, old_names)]
186+
new_metadata <- old_metadata
187+
new_metadata[["other_keys"]] <- new_other_keys
188+
result <- reclass(NextMethod(), new_metadata)
189+
# decay to non-`epi_df` if needed:
186190
dplyr::dplyr_reconstruct(result, result)
187191
}
188192

tests/testthat/test-epi_df.R

+80-14
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
test_that("new_epi_df works as intended", {
22
# Empty tibble
3-
wmsg = capture_warnings(a <- new_epi_df())
4-
expect_match(wmsg[1],
5-
"Unknown or uninitialised column: `geo_value`.")
6-
expect_match(wmsg[2],
7-
"Unknown or uninitialised column: `time_value`.")
3+
wmsg <- capture_warnings(a <- new_epi_df())
4+
expect_match(
5+
wmsg[1],
6+
"Unknown or uninitialised column: `geo_value`."
7+
)
8+
expect_match(
9+
wmsg[2],
10+
"Unknown or uninitialised column: `time_value`."
11+
)
812
expect_true(is_epi_df(a))
913
expect_identical(attributes(a)$metadata$geo_type, "custom")
1014
expect_identical(attributes(a)$metadata$time_type, "custom")
1115
expect_true(lubridate::is.POSIXt(attributes(a)$metadata$as_of))
12-
16+
1317
# Simple non-empty tibble with geo_value and time_value cols
1418
tib <- tibble::tibble(
1519
x = 1:10, y = 1:10,
1620
time_value = rep(seq(as.Date("2020-01-01"), by = 1, length.out = 5), times = 2),
1721
geo_value = rep(c("ca", "hi"), each = 5)
1822
)
19-
20-
epi_tib = new_epi_df(tib)
23+
24+
epi_tib <- new_epi_df(tib)
2125
expect_true(is_epi_df(epi_tib))
2226
expect_length(epi_tib, 4L)
2327
expect_identical(attributes(epi_tib)$metadata$geo_type, "state")
@@ -32,10 +36,72 @@ test_that("as_epi_df errors when additional_metadata is not a list", {
3236
dplyr::slice_tail(n = 6) %>%
3337
tsibble::as_tsibble() %>%
3438
dplyr::mutate(
35-
state = rep("MA",6),
36-
pol = rep(c("blue", "swing", "swing"), each = 2))
37-
39+
state = rep("MA", 6),
40+
pol = rep(c("blue", "swing", "swing"), each = 2)
41+
)
42+
3843
expect_error(
39-
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
40-
"`additional_metadata` must be a list type.")
41-
})
44+
as_epi_df(ex_input, additional_metadata = c(other_keys = "state", "pol")),
45+
"`additional_metadata` must be a list type."
46+
)
47+
})
48+
49+
# select fixes
50+
51+
tib <- tibble::tibble(
52+
x = 1:10, y = 1:10,
53+
time_value = rep(seq(as.Date("2020-01-01"),
54+
by = 1, length.out = 5
55+
), times = 2),
56+
geo_value = rep(c("ca", "hi"), each = 5)
57+
)
58+
epi_tib <- epiprocess::new_epi_df(tib)
59+
test_that("grouped epi_df maintains type for select", {
60+
grouped_epi <- epi_tib %>% group_by(geo_value)
61+
selected_df <- grouped_epi %>% select(-y)
62+
expect_true(inherits(selected_df, "epi_df"))
63+
# make sure that the attributes are right
64+
epi_attr <- attributes(selected_df)
65+
expect_identical(epi_attr$names, c("geo_value", "time_value", "x"))
66+
expect_identical(epi_attr$row.names, seq(1, 10))
67+
expect_identical(epi_attr$groups, attributes(grouped_epi)$groups)
68+
expect_identical(epi_attr$metadata, attributes(epi_tib)$metadata)
69+
expect_identical(selected_df, epi_tib %>% select(-y) %>% group_by(geo_value))
70+
})
71+
72+
test_that("grouped epi_df drops type when dropping keys", {
73+
grouped_epi <- epi_tib %>% group_by(geo_value)
74+
selected_df <- grouped_epi %>% select(geo_value)
75+
expect_true(!inherits(selected_df, "epi_df"))
76+
})
77+
78+
test_that("grouped epi_df handles extra keys correctly", {
79+
tib <- tibble::tibble(
80+
x = 1:10, y = 1:10,
81+
time_value = rep(seq(as.Date("2020-01-01"),
82+
by = 1, length.out = 5
83+
), times = 2),
84+
geo_value = rep(c("ca", "hi"), each = 5),
85+
extra_key = rep(seq(as.Date("2020-01-01"),
86+
by = 1, length.out = 5
87+
), times = 2)
88+
)
89+
epi_tib <- epiprocess::new_epi_df(tib,
90+
additional_metadata = list(other_keys = "extra_key")
91+
)
92+
attributes(epi_tib)
93+
grouped_epi <- epi_tib %>% group_by(geo_value)
94+
selected_df <- grouped_epi %>% select(-extra_key)
95+
expect_true(inherits(selected_df, "epi_df"))
96+
# make sure that the attributes are right
97+
old_attr <- attributes(epi_tib)
98+
epi_attr <- attributes(selected_df)
99+
expect_identical(epi_attr$names, c("geo_value", "time_value", "x", "y"))
100+
expect_identical(epi_attr$row.names, seq(1, 10))
101+
expect_identical(epi_attr$groups, attributes(grouped_epi)$groups)
102+
expect_identical(epi_attr$metadata, list(
103+
geo_type = "state", time_type = "day",
104+
as_of = old_attr$metadata$as_of,
105+
other_keys = character(0)
106+
))
107+
})

tests/testthat/test-methods-epi_df.R

+37
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,40 @@ test_that("Metadata and grouping are dropped by `as_tibble`", {
124124
!any(c("metadata", "groups") %in% names(attributes(grouped_converted)))
125125
)
126126
})
127+
128+
test_that("Renaming columns gives appropriate colnames and metadata", {
129+
edf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>%
130+
as_epi_df(additional_metadata = list(other_keys = "age"))
131+
# renaming using base R
132+
renamed_edf1 <- edf %>%
133+
`[`(c("geo_value", "time_value", "age", "value")) %>%
134+
`names<-`(c("geo_value", "time_value", "age_group", "value"))
135+
expect_identical(names(renamed_edf1), c("geo_value", "time_value", "age_group", "value"))
136+
expect_identical(attr(renamed_edf1, "metadata")$other_keys, c("age_group"))
137+
# renaming using select
138+
renamed_edf2 <- edf %>%
139+
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
140+
select(geo_value, time_value, age_group = age, value)
141+
expect_identical(renamed_edf1, renamed_edf2)
142+
})
143+
144+
test_that("Renaming columns while grouped gives appropriate colnames and metadata", {
145+
gedf <- tibble::tibble(geo_value = 1, time_value = 1, age = 1, value = 1) %>%
146+
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
147+
group_by(geo_value)
148+
# renaming using base R
149+
renamed_gedf1 <- gedf %>%
150+
`[`(c("geo_value", "time_value", "age", "value")) %>%
151+
`names<-`(c("geo_value", "time_value", "age_group", "value"))
152+
# tets type preservation
153+
expect_true(inherits(renamed_gedf1, "epi_df"))
154+
expect_true(inherits(renamed_gedf1, "grouped_df"))
155+
# the names are right
156+
expect_identical(names(renamed_gedf1), c("geo_value", "time_value", "age_group", "value"))
157+
expect_identical(attr(renamed_gedf1, "metadata")$other_keys, c("age_group"))
158+
# renaming using select
159+
renamed_gedf2 <- gedf %>%
160+
as_epi_df(additional_metadata = list(other_keys = "age")) %>%
161+
select(geo_value, time_value, age_group = age, value)
162+
expect_identical(renamed_gedf1, renamed_gedf2)
163+
})

0 commit comments

Comments
 (0)