From 2a65ff059cebf867b3a7865b21f2d1df5d0659c5 Mon Sep 17 00:00:00 2001 From: julius-heitkoetter <97237339+julius-heitkoetter@users.noreply.github.com> Date: Fri, 13 Oct 2023 11:38:49 -0400 Subject: [PATCH] checked for conflicts in file hashes --- A2rchi/utils/data_manager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/A2rchi/utils/data_manager.py b/A2rchi/utils/data_manager.py index 4245f20..c36bb12 100644 --- a/A2rchi/utils/data_manager.py +++ b/A2rchi/utils/data_manager.py @@ -225,6 +225,9 @@ def _add_to_vectorstore(self, collection, files_to_add, sources={}): time_identifier = hashlib.md5() time_identifier.update(str(time.time()).encode('utf-8')) time_hash = str(int(identifier.hexdigest(),16))[0:6] + while str(filehash) + str(chunk_hash) + str(time_hash) in ids: + print("INFO: Found conflict with hash: " + str(filehash) + str(chunk_hash) + str(time_hash) + ". Trying again") + time_hash += 1 ids.append(str(filehash) + str(chunk_hash) + str(time_hash)) print("Ids: ",ids)