Skip to content

Commit

Permalink
Fix handling hashes in lower case
Browse files Browse the repository at this point in the history
  • Loading branch information
kam193 committed Oct 2, 2024
1 parent 9d6474b commit 01a2081
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
2 changes: 1 addition & 1 deletion file-similarity/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
9
10
16 changes: 9 additions & 7 deletions file-similarity/service/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def _update_badlist(self):
)

hashes = set()
errors = 0
self.push_status("UPDATING", "Pulling currently badlisted files..")
# TODO: streaming results and configurable limit
results: Iterable[Badlist] = self.datastore.badlist.search(
Expand All @@ -94,16 +95,17 @@ def _update_badlist(self):

t = tlsh.Tlsh()
try:
t.fromTlshStr(result.hashes.tlsh)
except ValueError:
# self.log.warning(
# "Invalid TLSH hash found in Badlist [%s]", result.hashes.tlsh, exc_info=True
# )
t.fromTlshStr(result.hashes.tlsh.upper())
except Exception:
self.log.warning(
"Invalid TLSH hash found in Badlist [%s]", result.hashes.tlsh, exc_info=True
)
errors += 1
continue
if result.hashes.tlsh in hashes:
continue

hashes.add(result.hashes.tlsh)
hashes.add(result.hashes.tlsh.upper())
sources = self._safe_get(result, "sources") or []
reference = (
f"Marked by {', '.join(self._describe_source(source) for source in sources)}"
Expand Down Expand Up @@ -138,7 +140,7 @@ def _update_badlist(self):
f"{self.latest_updates_dir}/{self._current_source}",
dirs_exist_ok=True,
)
self.push_status("DONE", f"Imported {len(hashes)} hashes")
self.push_status("DONE", f"Imported {len(hashes)} hashes, {errors} errors")

def do_source_update(self, service: Service) -> None:
sources_to_update = []
Expand Down

0 comments on commit 01a2081

Please sign in to comment.