@@ -199,20 +199,39 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
199
199
# '
200
200
# ' @keywords internal
201
201
tbl_fast_anti_join <- function (x , y , ukey_names , val_names , abs_tol = 0 ) {
202
- x_orig <- x
203
- x <- x [c(ukey_names , val_names )]
204
- y <- y [c(ukey_names , val_names )]
205
- xy <- vec_rbind(x , y )
202
+ x_keyvals <- x [c(ukey_names , val_names )]
203
+ y_keyvals <- y [c(ukey_names , val_names )]
204
+ xy_keyvals <- vec_rbind(x , y )
206
205
if (abs_tol == 0 ) {
207
- x_exclude <- vec_duplicate_detect(xy )
206
+ # perf: 0 tolerance is just like a normal `anti_join` by both ukey_names and
207
+ # val_names together. We can do that more quickly than `anti_join` with
208
+ # `vctrs` by checking for keyvals of `x` that are not duplicated in `y`.
209
+ # (`vec_duplicate_detect` will mark those, unlike `duplicated`.)
210
+ x_exclude <- vec_duplicate_detect(xy_keyvals )
208
211
x_exclude <- vec_slice(x_exclude , seq_len(nrow(x )))
209
212
} else {
210
- xy_dup_ids <- vec_duplicate_id(xy [ukey_names ])
211
- xy_dup_inds2 <- which(xy_dup_ids != seq_along(xy_dup_ids ))
212
- xy_dup_inds1 <- xy_dup_ids [xy_dup_inds2 ]
213
+ xy_ukeys <- xy_keyvals [ukey_names ]
214
+ # Locate ukeys in `y` that match ukeys in `x` and where in `x` they map back
215
+ # to. It's faster to do this with `vec_duplicate_id` on `xy_ukeys` than to
216
+ # perform a `inner_join`.
217
+ xy_ukey_dup_ids <- vec_duplicate_id(xy_ukeys )
218
+ xy_ukey_dup_inds2 <- which(xy_ukey_dup_ids != seq_along(xy_ukey_dup_ids ))
219
+ # ^ these should point to rows from y that had a ukey match in x
220
+ xy_ukey_dup_inds1 <- xy_ukey_dup_ids [xy_ukey_dup_inds2 ]
221
+ # ^ these should point to the respectively corresponding rows from x
222
+
223
+ # Anything in `x` without a ukey match in `y` should be kept; start off with
224
+ # `FALSE` for everything and just fill in `TRUE`/`FALSE` results for the
225
+ # ukeys with matches in `y`:
213
226
x_exclude <- rep(FALSE , nrow(x ))
214
227
xy_vals <- xy [val_names ]
215
- x_exclude [xy_dup_inds1 ] <- vec_approx_equal(xy_vals , inds1 = xy_dup_inds2 , xy_vals , inds2 = xy_dup_inds1 , na_equal = TRUE , abs_tol = abs_tol )
228
+ x_exclude [xy_ukey_dup_inds1 ] <- vec_approx_equal(
229
+ xy_vals ,
230
+ inds1 = xy_ukey_dup_inds2 ,
231
+ xy_vals ,
232
+ inds2 = xy_ukey_dup_inds1 ,
233
+ na_equal = TRUE , abs_tol = abs_tol
234
+ )
216
235
}
217
236
vec_slice(x_orig , ! x_exclude )
218
237
}
@@ -269,7 +288,7 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
269
288
}
270
289
later_format <- arg_match0(later_format , c(" snapshot" , " update" ))
271
290
if (! (is.vector(compactify_abs_tol , mode = " numeric" ) &&
272
- length(compactify_abs_tol ) == 1L && # nolint:indentation_linter
291
+ length(compactify_abs_tol ) == 1L && # nolint: indentation_linter
273
292
compactify_abs_tol > = 0 )) {
274
293
# Give a specific message:
275
294
assert_numeric(compactify_abs_tol , lower = 0 , any.missing = FALSE , len = 1L )
0 commit comments