240
240
# ' value of `clobberable_versions_start` does not fully trust these empty
241
241
# ' updates, and assumes that any version `>= max(x$version)` could be
242
242
# ' clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
243
+ # ' @param compactify_tol double. the tolerance used to detect approximate equality for compactification
243
244
# ' @return An `epi_archive` object.
244
245
# '
245
246
# ' @importFrom data.table as.data.table key setkeyv
@@ -295,15 +296,16 @@ new_epi_archive <- function(
295
296
additional_metadata ,
296
297
compactify ,
297
298
clobberable_versions_start ,
298
- versions_end ) {
299
+ versions_end ,
300
+ compactify_tol = .Machine $ double.eps ^ 0.5 ) {
299
301
# Create the data table; if x was an un-keyed data.table itself,
300
302
# then the call to as.data.table() will fail to set keys, so we
301
303
# need to check this, then do it manually if needed
302
304
key_vars <- c(" geo_value" , " time_value" , other_keys , " version" )
303
- DT <- as.data.table(x , key = key_vars ) # nolint: object_name_linter
304
- if (! identical(key_vars , key(DT ))) setkeyv(DT , cols = key_vars )
305
+ data_table <- as.data.table(x , key = key_vars ) # nolint: object_name_linter
306
+ if (! identical(key_vars , key(data_table ))) setkeyv(data_table , cols = key_vars )
305
307
306
- if (anyDuplicated(DT , by = key(DT )) != 0L ) {
308
+ if (anyDuplicated(data_table , by = key(data_table )) != 0L ) {
307
309
cli_abort(" `x` must have one row per unique combination of the key variables. If you
308
310
have additional key variables other than `geo_value`, `time_value`, and
309
311
`version`, such as an age group column, please specify them in `other_keys`.
@@ -313,38 +315,17 @@ new_epi_archive <- function(
313
315
)
314
316
}
315
317
316
- # Checks to see if a value in a vector is LOCF
317
- is_locf <- function (vec ) { # nolint: object_usage_linter
318
- dplyr :: if_else(! is.na(vec ) & ! is.na(dplyr :: lag(vec )),
319
- vec == dplyr :: lag(vec ),
320
- is.na(vec ) & is.na(dplyr :: lag(vec ))
321
- )
322
- }
323
-
324
- # LOCF is defined by a row where all values except for the version
325
- # differ from their respective lag values
326
-
327
- # Checks for LOCF's in a data frame
328
- rm_locf <- function (df ) {
329
- dplyr :: filter(df , if_any(c(everything(), - version ), ~ ! is_locf(. ))) # nolint: object_usage_linter
330
- }
331
-
332
- # Keeps LOCF values, such as to be printed
333
- keep_locf <- function (df ) {
334
- dplyr :: filter(df , if_all(c(everything(), - version ), ~ is_locf(. ))) # nolint: object_usage_linter
335
- }
336
-
318
+ nrow_before_compactify <- nrow(data_table )
337
319
# Runs compactify on data frame
338
320
if (is.null(compactify ) || compactify == TRUE ) {
339
- elim <- keep_locf(DT )
340
- DT <- rm_locf(DT ) # nolint: object_name_linter
321
+ compactified <- apply_compactify(data_table , key_vars , compactify_tol )
341
322
} else {
342
- # Create empty data frame for nrow(elim) to be 0
343
- elim <- tibble :: tibble()
323
+ compactified <- data_table
344
324
}
345
-
346
- # Warns about redundant rows
347
- if (is.null(compactify ) && nrow(elim ) > 0 ) {
325
+ # Warns about redundant rows if the number of rows decreased, and we didn't
326
+ # explicitly say to compactify
327
+ if (is.null(compactify ) && nrow(compactified ) < nrow_before_compactify ) {
328
+ elim <- removed_by_compactify(data_table , key_vars , compactify_tol )
348
329
warning_intro <- cli :: format_inline(
349
330
" Found rows that appear redundant based on
350
331
last (version of each) observation carried forward;
@@ -366,7 +347,7 @@ new_epi_archive <- function(
366
347
367
348
structure(
368
349
list (
369
- DT = DT ,
350
+ DT = compactified ,
370
351
geo_type = geo_type ,
371
352
time_type = time_type ,
372
353
additional_metadata = additional_metadata ,
@@ -377,6 +358,63 @@ new_epi_archive <- function(
377
358
)
378
359
}
379
360
361
+ # ' given a tibble as would be found in an epi_archive, remove duplicate entries.
362
+ # ' @description
363
+ # ' works by shifting all rows except the version, then comparing values to see
364
+ # ' if they've changed. We need to arrange in descending order, but note that
365
+ # ' we don't need to group, since at least one column other than version has
366
+ # ' changed, and so is kept.
367
+ # ' @keywords internal
368
+ # ' @importFrom dplyr filter
369
+ apply_compactify <- function (df , keys , tolerance = .Machine $ double.eps ^ .5 ) {
370
+ df %> %
371
+ arrange(!!! keys ) %> %
372
+ filter(if_any(
373
+ c(everything(), - version ), # all non-version columns
374
+ ~ ! is_locf(. , tolerance )
375
+ ))
376
+ }
377
+
378
+ # ' get the entries that `compactify` would remove
379
+ # ' @keywords internal
380
+ # ' @importFrom dplyr filter if_all everything
381
+ removed_by_compactify <- function (df , keys , tolerance ) {
382
+ df %> %
383
+ arrange(!!! keys ) %> %
384
+ filter(if_all(
385
+ c(everything(), - version ),
386
+ ~ is_locf(. , tolerance )
387
+ )) # nolint: object_usage_linter
388
+ }
389
+
390
+ # ' Checks to see if a value in a vector is LOCF
391
+ # ' @description
392
+ # ' LOCF meaning last observation carried forward. lags the vector by 1, then
393
+ # ' compares with itself. For doubles it uses float comparison via
394
+ # ' [`dplyr::near`], otherwise it uses equality. `NA`'s and `NaN`'s are
395
+ # ' considered equal to themselves and each other.
396
+ # ' @importFrom dplyr lag if_else near
397
+ # ' @keywords internal
398
+ is_locf <- function (vec , tolerance ) { # nolint: object_usage_linter
399
+ lag_vec <- dplyr :: lag(vec )
400
+ if (typeof(vec ) == " double" ) {
401
+ res <- if_else(
402
+ ! is.na(vec ) & ! is.na(lag_vec ),
403
+ near(vec , lag_vec , tol = tolerance ),
404
+ is.na(vec ) & is.na(lag_vec )
405
+ )
406
+ return (res )
407
+ } else {
408
+ res <- if_else(
409
+ ! is.na(vec ) & ! is.na(lag_vec ),
410
+ vec == lag_vec ,
411
+ is.na(vec ) & is.na(lag_vec )
412
+ )
413
+ return (res )
414
+ }
415
+ }
416
+
417
+
380
418
# ' `validate_epi_archive` ensures correctness of arguments fed to `as_epi_archive`.
381
419
# '
382
420
# ' @rdname epi_archive
0 commit comments