From afbf251931a75abd118d01c96a00b94fecd227d2 Mon Sep 17 00:00:00 2001 From: Nils Mueller Date: Sat, 30 Jan 2021 22:19:24 +0200 Subject: [PATCH] Fix duplicated rows Querying with 'gte' will lead to duplicated rows if the last oplog entry is for a collection that is supposed to be replicated. Because of the change done in efd5e0fd769b749f247266a88f617a0e617d380a this will not happen for every sync as it's quite likely that the last oplog entry references a modification in a different collection. --- tap_mongodb/sync_strategies/oplog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_mongodb/sync_strategies/oplog.py b/tap_mongodb/sync_strategies/oplog.py index 79bf9d2..ccc6cd8 100644 --- a/tap_mongodb/sync_strategies/oplog.py +++ b/tap_mongodb/sync_strategies/oplog.py @@ -129,7 +129,7 @@ def sync_collection(client, stream, state, stream_projection): start_time = time.time() oplog_query = { - 'ts': {'$gte': oplog_ts} + 'ts': {'$gt': oplog_ts} } projection = transform_projection(stream_projection)