Skip to content

Commit 08b1783

Browse files
committed
approx_equal: make "abs_tol=" mandatory, +validation, +docs
1 parent ab82b09 commit 08b1783

File tree

7 files changed

+154
-34
lines changed

7 files changed

+154
-34
lines changed

NAMESPACE

+6
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ S3method(ungroup,epi_df)
5858
S3method(ungroup,grouped_epi_archive)
5959
S3method(unnest,epi_df)
6060
export("%>%")
61+
export(approx_equal)
6162
export(arrange)
6263
export(arrange_canonical)
6364
export(as_epi_archive)
@@ -125,8 +126,12 @@ importFrom(checkmate,assert_subset)
125126
importFrom(checkmate,assert_tibble)
126127
importFrom(checkmate,checkInt)
127128
importFrom(checkmate,check_atomic)
129+
importFrom(checkmate,check_character)
128130
importFrom(checkmate,check_data_frame)
131+
importFrom(checkmate,check_logical)
129132
importFrom(checkmate,check_names)
133+
importFrom(checkmate,check_null)
134+
importFrom(checkmate,check_numeric)
130135
importFrom(checkmate,expect_class)
131136
importFrom(checkmate,test_int)
132137
importFrom(checkmate,test_set_equal)
@@ -256,6 +261,7 @@ importFrom(tsibble,as_tsibble)
256261
importFrom(utils,capture.output)
257262
importFrom(utils,tail)
258263
importFrom(vctrs,"vec_slice<-")
264+
importFrom(vctrs,obj_is_vector)
259265
importFrom(vctrs,vec_cast)
260266
importFrom(vctrs,vec_cast_common)
261267
importFrom(vctrs,vec_data)

R/archive.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -489,12 +489,12 @@ update_is_locf <- function(arranged_updates_df, ukey_names, abs_tol) {
489489
FALSE, # first observation is not LOCF
490490
approx_equal0(ekts_tbl,
491491
inds1 = inds1, ekts_tbl, inds2 = inds2,
492-
# check ekt cols without tolerance:
493-
abs_tol = 0, na_equal = TRUE
492+
# check ekt (key) cols with 0 tolerance:
493+
na_equal = TRUE, abs_tol = 0
494494
) &
495495
approx_equal0(vals_tbl,
496496
inds1 = inds1, vals_tbl, inds2 = inds2,
497-
abs_tol = abs_tol, na_equal = TRUE
497+
na_equal = TRUE, abs_tol = abs_tol
498498
)
499499
)
500500
}

R/epiprocess-package.R

+5
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
#' @importFrom checkmate assert_subset
1313
#' @importFrom checkmate assert_tibble
1414
#' @importFrom checkmate check_atomic check_data_frame expect_class test_int
15+
#' @importFrom checkmate check_character
16+
#' @importFrom checkmate check_logical
1517
#' @importFrom checkmate check_names
18+
#' @importFrom checkmate check_null
19+
#' @importFrom checkmate check_numeric
1620
#' @importFrom checkmate test_subset test_set_equal vname
1721
#' @importFrom cli cli_abort cli_warn
1822
#' @importFrom cli pluralize
@@ -36,6 +40,7 @@
3640
#' @importFrom tibble is_tibble
3741
#' @importFrom tidyr nest
3842
#' @importFrom tools toTitleCase
43+
#' @importFrom vctrs obj_is_vector
3944
#' @importFrom vctrs vec_cast
4045
#' @importFrom vctrs vec_cast_common
4146
#' @importFrom vctrs vec_data

R/patch.R

+82-16
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,88 @@
11
#' Test two vctrs vectors for equality with some tolerance in some cases
22
#'
3-
#' @param vec1,vec2 vctrs vectors (includes data frames)
4-
#' @param abs_tol tolerance; will be used for bare numeric `vec1`, `vec2`, or
5-
#' any such columns within `vec1`, `vec2` if they are data frames
3+
#' Similar to [`vctrs::vec_equal`]. Behavior may differ from `vec_equal` with
4+
#' non-`NA` `NaN`s involved, or for bare lists that contain named vectors, and
5+
#' the precise behavior in these cases may change and should not be relied upon.
6+
#'
7+
#' @param vec1,vec2 vctrs vectors (includes data frames). Take care when using
8+
#' on named vectors or "keyed" data frames; [`vec_names()`] are largely
9+
#' ignored, and key columns are treated as normal value columns (when they
10+
#' should probably generate an error if they are not lined up correctly, or be
11+
#' tested for exact rather than approximate equality).
612
#' @param na_equal should `NA`s be considered equal to each other? (In
713
#' epiprocess, we usually want this to be `TRUE`, but that doesn't match the
814
#' [`vctrs::vec_equal()`] default, so this is mandatory.)
9-
#' @param .ptype as in [`vctrs::vec_equal()`]
10-
#' @param inds1,inds2 optional (row) indices into vec1 and vec2; output should
11-
#' be consistent with `vec_slice`-ing to these indices beforehand, but can
12-
#' give faster computation if `vec1` and `vec2` are data frames.
15+
#' @param abs_tol absolute tolerance; will be used for bare numeric `vec1`,
16+
#' `vec2`, or any such columns within `vec1`, `vec2` if they are data frames.
17+
#' @param .ptype as in [`vctrs::vec_equal()`].
18+
#' @param inds1,inds2 optional (row) indices into vec1 and vec2 compatible with
19+
#' [`vctrs::vec_slice()`]; output should be consistent with `vec_slice`-ing to
20+
#' these indices beforehand, but can give faster computation if `vec1` and
21+
#' `vec2` are data frames.
1322
#'
1423
#' @return logical vector, with length matching the result of recycling `vec1`
1524
#' (at `inds1` if provided) and `vec2` (at `inds2` if provided); entries
16-
#' should all be `TRUE` or `FALSE` if `na_equal = TRUE`. Behavior may differ
17-
#' from `vec_equal` with non-`NA` `NaN`s involved, or for bare lists that
18-
#' contain named vectors.
19-
approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = NULL, inds2 = NULL) {
25+
#' should all be `TRUE` or `FALSE` if `na_equal = TRUE`.
26+
#'
27+
#' @examples
28+
#'
29+
#' # On numeric vectors:
30+
#' approx_equal(
31+
#' c(1, 2, 3, NA),
32+
#' c(1, 2 + 1e-10, NA, NA),
33+
#' na_equal = TRUE,
34+
#' abs_tol = 1e-8
35+
#' )
36+
#'
37+
#' # On tibbles:
38+
#' tbl1 <- tibble(
39+
#' a = 1:5,
40+
#' b = list(1:5, 1:4, 1:3, 1:2, 1:1) %>% lapply(as.numeric),
41+
#' c = tibble(
42+
#' c1 = 1:5
43+
#' ),
44+
#' d = matrix(1:10, 5, 2)
45+
#' )
46+
#' tbl2 <- tbl1
47+
#' tbl2$a[[2]] <- tbl1$a[[2]] + 1e-10
48+
#' tbl2$b[[3]][[1]] <- tbl1$b[[3]][[1]] + 1e-10
49+
#' tbl2$c$c1[[4]] <- tbl1$c$c1[[4]] + 1e-10
50+
#' tbl2$d[[5, 2]] <- tbl1$d[[5, 2]] + 1e-10
51+
#' vctrs::vec_equal(tbl1, tbl2, na_equal = TRUE)
52+
#' approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-12)
53+
#' approx_equal(tbl1, tbl2, na_equal = TRUE, abs_tol = 1e-8)
54+
#'
55+
#'
56+
#'
57+
#'
58+
#'
59+
#' # Type comparison within lists is stricter, matching vctrs:
60+
#' vctrs::vec_equal(list(1:2), list(as.numeric(1:2)))
61+
#' approx_equal(list(1:2), list(as.numeric(1:2)), FALSE, abs_tol = 0)
62+
#'
63+
#' @export
64+
approx_equal <- function(vec1, vec2, na_equal, .ptype = NULL, ..., abs_tol, inds1 = NULL, inds2 = NULL) {
65+
if (!obj_is_vector(vec1)) cli_abort("`vec1` must be recognized by vctrs as a vector")
66+
if (!obj_is_vector(vec2)) cli_abort("`vec2` must be recognized by vctrs as a vector")
67+
# Leave vec size checking to vctrs recycling ops.
68+
assert_logical(na_equal, any.missing = FALSE, len = 1L)
69+
# Leave .ptype checks to cast operation.
70+
check_dots_empty()
71+
assert_numeric(abs_tol, lower = 0, len = 1L)
72+
assert(
73+
check_null(inds1),
74+
check_numeric(inds1),
75+
check_logical(inds1),
76+
check_character(inds1)
77+
)
78+
assert(
79+
check_null(inds2),
80+
check_numeric(inds2),
81+
check_logical(inds2),
82+
check_character(inds2)
83+
)
84+
# Leave heavier index validation to the vctrs recycling & indexing ops.
85+
2086
# Recycle inds if provided; vecs if not:
2187
common_size <- vec_size_common(
2288
if (is.null(inds1)) vec1 else inds1,
@@ -33,13 +99,13 @@ approx_equal <- function(vec1, vec2, abs_tol, na_equal, .ptype = NULL, inds1 = N
3399
inds2 <- vec_recycle(inds2, common_size)
34100
}
35101
vecs <- vec_cast_common(vec1, vec2, .to = .ptype)
36-
approx_equal0(vecs[[1]], vecs[[2]], abs_tol, na_equal, inds1, inds2)
102+
approx_equal0(vecs[[1]], vecs[[2]], na_equal, abs_tol, inds1, inds2)
37103
}
38104

39105
#' Helper for [`approx_equal`] for vecs guaranteed to have the same ptype and size
40106
#'
41107
#' @keywords internal
42-
approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = NULL) {
108+
approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2 = NULL) {
43109
if (is_bare_numeric(vec1) && abs_tol != 0) {
44110
# perf: since we're working with bare numerics and logicals: we can use `[`
45111
# and `fifelse`. Matching vec_equal, we ignore names and other attributes.
@@ -66,7 +132,7 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
66132
rep(TRUE, nrow(vec1))
67133
} else {
68134
Reduce(`&`, lapply(seq_len(ncol(vec1)), function(col_i) {
69-
approx_equal0(vec1[[col_i]], vec2[[col_i]], abs_tol, na_equal, inds1, inds2)
135+
approx_equal0(vec1[[col_i]], vec2[[col_i]], na_equal, abs_tol, inds1, inds2)
70136
}))
71137
}
72138
} else if (is_bare_list(vec1)) {
@@ -78,7 +144,7 @@ approx_equal0 <- function(vec1, vec2, abs_tol, na_equal, inds1 = NULL, inds2 = N
78144
# consistently inconsistent, we avoid dispatching to vec_equal for bare
79145
# lists even with abs_tol = 0:
80146
identical(vec_ptype(entry1), vec_ptype(entry2)) &&
81-
all(approx_equal0(entry1, entry2, abs_tol, na_equal))
147+
all(approx_equal0(entry1, entry2, na_equal, abs_tol))
82148
}, logical(1L))
83149
} else {
84150
# XXX No special handling for any other types/situations. Makes sense for
@@ -201,8 +267,8 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
201267
combined_compactify_away[combined_ukey_is_repeat] <-
202268
approx_equal0(combined_vals,
203269
combined_vals,
204-
abs_tol = compactify_abs_tol,
205270
na_equal = TRUE,
271+
abs_tol = compactify_abs_tol,
206272
inds1 = combined_ukey_is_repeat,
207273
inds2 = ukey_repeat_first_i
208274
)

man/approx_equal.Rd

+56-13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/approx_equal0.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-archive.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,5 +220,5 @@ test_that("`epi_archive` rejects dataframes where time_value and version columns
220220
test_that("is_locf replacement works as expected", {
221221
vec <- c(1, 1, 1e-10, 1.1e-10, NA, NA, NaN, NaN)
222222
is_repeated <- c(0, 1, 0, 1, 0, 1, 1, 1)
223-
expect_equal(c(FALSE, approx_equal(head(vec, -1L), tail(vec, -1L), .Machine$double.eps^0.5, na_equal = TRUE)), as.logical(is_repeated))
223+
expect_equal(c(FALSE, approx_equal(head(vec, -1L), tail(vec, -1L), na_equal = TRUE, abs_tol = .Machine$double.eps^0.5)), as.logical(is_repeated))
224224
})

0 commit comments

Comments
 (0)