Skip to content

Commit 540549d

Browse files
committed
fix!: as_epi_archive(tibble) key setting; + distrust key if data.table
1 parent e2fb79b commit 540549d

File tree

4 files changed

+45
-28
lines changed

4 files changed

+45
-28
lines changed

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ importFrom(checkmate,assert_scalar)
124124
importFrom(checkmate,assert_string)
125125
importFrom(checkmate,assert_subset)
126126
importFrom(checkmate,assert_tibble)
127+
importFrom(checkmate,assert_true)
127128
importFrom(checkmate,checkInt)
128129
importFrom(checkmate,check_atomic)
129130
importFrom(checkmate,check_character)
@@ -160,6 +161,7 @@ importFrom(data.table,key)
160161
importFrom(data.table,rbindlist)
161162
importFrom(data.table,set)
162163
importFrom(data.table,setDF)
164+
importFrom(data.table,setDT)
163165
importFrom(data.table,setcolorder)
164166
importFrom(data.table,setkeyv)
165167
importFrom(dplyr,"%>%")

NEWS.md

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicate PR's.
44

5+
# epiprocess 0.12
6+
7+
## Breaking changes
8+
9+
- `new_epi_archive()`'s `x` argument has been replaced with a `data_table`
10+
argument, which must be a `data.table` with the key already set appropriately.
11+
12+
## Bug fixes
13+
14+
- `as_epi_archive()` no longer has issues setting its `DT`'s `key` on some
15+
versions of `{data.table}` when `x` is a tibble.
16+
517
# epiprocess 0.11
618

719
## Breaking changes

R/archive.R

+29-28
Original file line numberDiff line numberDiff line change
@@ -278,41 +278,22 @@ next_after.Date <- function(x) x + 1L
278278
#' x <- df %>% as_epi_archive(other_keys = "county")
279279
#'
280280
new_epi_archive <- function(
281-
x,
281+
data_table,
282282
geo_type,
283283
time_type,
284284
other_keys,
285285
clobberable_versions_start,
286286
versions_end) {
287-
assert_data_frame(x)
287+
assert_class(data_table, "data.table")
288288
assert_string(geo_type)
289289
assert_string(time_type)
290290
assert_character(other_keys, any.missing = FALSE)
291291
if (any(c("geo_value", "time_value", "version") %in% other_keys)) {
292292
cli_abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".")
293293
}
294-
validate_version_bound(clobberable_versions_start, x, na_ok = TRUE)
295-
validate_version_bound(versions_end, x, na_ok = FALSE)
296-
297-
key_vars <- c("geo_value", "time_value", other_keys, "version")
298-
if (!all(key_vars %in% names(x))) {
299-
# Give a more tailored error message than as.data.table would:
300-
cli_abort(c(
301-
"`x` is missing the following expected columns:
302-
{format_varnames(setdiff(key_vars, names(x)))}.",
303-
">" = "You might need to `dplyr::rename()` beforehand
304-
or use `as_epi_archive()`'s renaming feature.",
305-
">" = if (!all(other_keys %in% names(x))) {
306-
"Check also for typos in `other_keys`."
307-
}
308-
))
309-
}
310-
311-
# Create the data table; if x was an un-keyed data.table itself,
312-
# then the call to as.data.table() will fail to set keys, so we
313-
# need to check this, then do it manually if needed
314-
data_table <- as.data.table(x, key = key_vars)
315-
if (!identical(key_vars, key(data_table))) setkeyv(data_table, cols = key_vars)
294+
assert_true(identical(key(data_table), c("geo_value", "time_value", other_keys, "version")))
295+
validate_version_bound(clobberable_versions_start, data_table, na_ok = TRUE)
296+
validate_version_bound(versions_end, data_table, na_ok = FALSE)
316297

317298
structure(
318299
list(
@@ -523,11 +504,32 @@ as_epi_archive <- function(
523504
.versions_end = max_version_with_row_in(x), ...,
524505
versions_end = .versions_end) {
525506
assert_data_frame(x)
507+
# Convert first to data.frame to guard against data.table#6859 and potentially
508+
# other things epiprocess#618:
509+
x_already_copied <- identical(class(x), c("data.table", "data.frame"))
510+
x <- as.data.frame(x)
526511
x <- rename(x, ...)
527-
x <- guess_column_name(x, "time_value", time_column_names())
528512
x <- guess_column_name(x, "geo_value", geo_column_names())
513+
if (!all(other_keys %in% names(x))) {
514+
# Give a more tailored error message than as.data.table would:
515+
cli_abort(c(
516+
"`x` is missing the following expected columns:
517+
{format_varnames(setdiff(other_keys, names(x)))}.",
518+
">" = "You might need to `dplyr::rename()` beforehand
519+
or using `as_epi_archive()`'s renaming feature."
520+
))
521+
}
522+
x <- guess_column_name(x, "time_value", time_column_names())
529523
x <- guess_column_name(x, "version", version_column_names())
530524

525+
# Convert to data.table:
526+
key_vars <- c("geo_value", "time_value", other_keys, "version")
527+
if (x_already_copied) {
528+
setDT(x, key = key_vars)
529+
} else {
530+
x <- as.data.table(x, key = key_vars)
531+
}
532+
531533
if (lifecycle::is_present(geo_type)) {
532534
cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
533535
}
@@ -548,11 +550,10 @@ as_epi_archive <- function(
548550
cli_abort('`compactify` must be `TRUE`, `FALSE`, or `"message"`')
549551
}
550552

551-
data_table <- result$DT
552-
key_vars <- key(data_table)
553+
data_table <- result$DT # probably just `x`, but take no chances
553554

554555
nrow_before_compactify <- nrow(data_table)
555-
# Runs compactify on data frame
556+
# Runs compactify on data_table
556557
if (identical(compactify, TRUE) || identical(compactify, "message")) {
557558
compactified <- apply_compactify(data_table, key_vars, compactify_abs_tol)
558559
} else {

R/epiprocess-package.R

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#' @importFrom checkmate assert_string
1212
#' @importFrom checkmate assert_subset
1313
#' @importFrom checkmate assert_tibble
14+
#' @importFrom checkmate assert_true
1415
#' @importFrom checkmate check_atomic check_data_frame expect_class test_int
1516
#' @importFrom checkmate check_character
1617
#' @importFrom checkmate check_logical
@@ -25,6 +26,7 @@
2526
#' @importFrom data.table fifelse
2627
#' @importFrom data.table key
2728
#' @importFrom data.table setcolorder
29+
#' @importFrom data.table setDT
2830
#' @importFrom data.table setkeyv
2931
#' @importFrom dplyr arrange
3032
#' @importFrom dplyr grouped_df

0 commit comments

Comments
 (0)