@@ -196,7 +196,10 @@ vec_approx_equal0 <- function(vec1, vec2, na_equal, abs_tol, inds1 = NULL, inds2
196
196
# ' @return rows from `x` that either (a) don't have a (0-tolerance) matching
197
197
# ' ukey in `y`, or (b) have a matching ukey in `y`, but don't have
198
198
# ' approximately equal value column values
199
+ # '
200
+ # ' @keywords internal
199
201
tbl_fast_anti_join <- function (x , y , ukey_names , val_names , abs_tol = 0 ) {
202
+ x_orig <- x
200
203
x <- x [c(ukey_names , val_names )]
201
204
y <- y [c(ukey_names , val_names )]
202
205
xy <- vec_rbind(x , y )
@@ -211,7 +214,7 @@ tbl_fast_anti_join <- function(x, y, ukey_names, val_names, abs_tol = 0) {
211
214
xy_vals <- xy [val_names ]
212
215
x_exclude [xy_dup_inds1 ] <- vec_approx_equal(xy_vals , inds1 = xy_dup_inds2 , xy_vals , inds2 = xy_dup_inds1 , na_equal = TRUE , abs_tol = abs_tol )
213
216
}
214
- vec_slice(x , ! x_exclude )
217
+ vec_slice(x_orig , ! x_exclude )
215
218
}
216
219
217
220
# ' Calculate compact patch to move from one snapshot/update to another
@@ -244,41 +247,53 @@ tbl_diff2 <- function(earlier_snapshot, later_tbl,
244
247
# Most input validation + handle NULL earlier_snapshot. This is a small function so
245
248
# use faster validation variants:
246
249
if (! is_tibble(later_tbl )) {
247
- cli_abort(" `later_tbl` must be a tibble" ,
248
- class = " epiprocess__tbl_diff2__later_tbl_invalid"
249
- )
250
+ cli_abort(
251
+ " `later_tbl` must be a tibble" ,
252
+ class = " epiprocess__tbl_diff2__later_tbl_invalid"
253
+ )
250
254
}
251
255
if (is.null(earlier_snapshot )) {
252
256
return (later_tbl )
253
257
}
254
258
if (! is_tibble(earlier_snapshot )) {
255
- cli_abort(" `earlier_snapshot` must be a tibble or `NULL`" ,
256
- class = " epiprocess__tbl_diff2__earlier_tbl_class_invalid"
257
- )
259
+ cli_abort(
260
+ " `earlier_snapshot` must be a tibble or `NULL`" ,
261
+ class = " epiprocess__tbl_diff2__earlier_tbl_class_invalid"
262
+ )
258
263
}
259
264
if (! is.character(ukey_names ) || ! all(ukey_names %in% names(earlier_snapshot ))) {
260
- cli_abort(" `ukey_names` must be a subset of column names" ,
261
- class = " epiprocess__tbl_diff2__ukey_names_class_invalid"
262
- )
265
+ cli_abort(
266
+ " `ukey_names` must be a subset of column names" ,
267
+ class = " epiprocess__tbl_diff2__ukey_names_class_invalid"
268
+ )
263
269
}
264
270
later_format <- arg_match0(later_format , c(" snapshot" , " update" ))
265
271
if (! (is.vector(compactify_abs_tol , mode = " numeric" ) &&
266
- length(compactify_abs_tol ) == 1L && # nolint:indentation_linter
267
- compactify_abs_tol > = 0 )) {
272
+ length(compactify_abs_tol ) == 1L && # nolint:indentation_linter
273
+ compactify_abs_tol > = 0 )) {
268
274
# Give a specific message:
269
275
assert_numeric(compactify_abs_tol , lower = 0 , any.missing = FALSE , len = 1L )
270
276
# Fallback e.g. for invalid classes not caught by assert_numeric:
271
- cli_abort(" `compactify_abs_tol` must be a length-1 double/integer >= 0" ,
272
- class = " epiprocess__tbl_diff2__compactify_abs_tol_invalid" )
277
+ cli_abort(
278
+ " `compactify_abs_tol` must be a length-1 double/integer >= 0" ,
279
+ class = " epiprocess__tbl_diff2__compactify_abs_tol_invalid"
280
+ )
273
281
}
274
282
275
283
all_names <- names(later_tbl )
276
- val_names <- all_names [! (all_names %in% ukey_names )]
284
+ val_names <- all_names [! (all_names %in% ukey_names )]
277
285
updates <- tbl_fast_anti_join(later_tbl , earlier_snapshot , ukey_names , val_names , compactify_abs_tol )
278
286
if (later_format == " snapshot" ) {
287
+ # Interpret `later_tbl` as a full snapshot, rather than a diff / sparse
288
+ # update. That means that any ukeys in `earlier_snapshot` that don't appear
289
+ # in `later_tbl` were deleted in the later snapshot.
279
290
deletions <- tbl_fast_anti_join(earlier_snapshot [ukey_names ], later_tbl [ukey_names ], ukey_names , character (), 0 )
280
- updates <- vec_rbind(updates , deletions ) # fills vals with NAs
291
+ updates <- vec_rbind(updates , deletions ) # fills val cols with NAs
281
292
}
293
+ # If `later_format == "update"`, we don't need to do anything special about
294
+ # the above ukeys. The full snapshot for the later version would include the
295
+ # corresponding rows unchanged, and the diff for these unchanged rows would be
296
+ # empty.
282
297
updates
283
298
}
284
299
@@ -299,20 +314,23 @@ tbl_patch <- function(snapshot, update, ukey_names) {
299
314
# Most input validation. This is a small function so use faster validation
300
315
# variants:
301
316
if (! is_tibble(update )) {
302
- cli_abort(" `update` must be a tibble" ,
317
+ cli_abort(
318
+ " `update` must be a tibble" ,
303
319
class = " epiprocess__tbl_patch__update_class_invalid"
304
320
)
305
321
}
306
322
if (is.null(snapshot )) {
307
323
return (update )
308
324
}
309
325
if (! is_tibble(snapshot )) {
310
- cli_abort(" `snapshot` must be a tibble" ,
326
+ cli_abort(
327
+ " `snapshot` must be a tibble" ,
311
328
class = " epiprocess__tbl_patch__snapshot_class_invalid"
312
329
)
313
330
}
314
331
if (! is.character(ukey_names ) || ! all(ukey_names %in% names(snapshot ))) {
315
- cli_abort(" `ukey_names` must be a subset of column names" ,
332
+ cli_abort(
333
+ " `ukey_names` must be a subset of column names" ,
316
334
class = " epiprocess__tbl_patch__ukey_names_invalid"
317
335
)
318
336
}
@@ -333,8 +351,8 @@ tbl_patch <- function(snapshot, update, ukey_names) {
333
351
# This is like `!duplicated()` but faster, and like `vec_unique_loc()` but guaranteeing
334
352
# that we get the first appearance since `vec_duplicate_id()` guarantees that
335
353
# it points to the first appearance.
336
- not_overwritten <- dup_ids == vec_seq_along(result_tbl )
337
- result_tbl <- result_tbl [not_overwritten , ]
354
+ is_only_or_favored_appearance <- dup_ids == vec_seq_along(result_tbl )
355
+ result_tbl <- result_tbl [is_only_or_favored_appearance , ]
338
356
339
357
result_tbl
340
358
}
0 commit comments