Skip to content

Commit

Permalink
Extended support for attributions
Browse files Browse the repository at this point in the history
  • Loading branch information
kam193 committed Sep 18, 2024
1 parent 282ff1e commit 602d67e
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 19 deletions.
2 changes: 1 addition & 1 deletion file-similarity/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
8
9
23 changes: 18 additions & 5 deletions file-similarity/service/al_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,15 @@ def _load_tlsh_data_from_csv(self, path: str):
for row in reader:
t = tlsh.Tlsh()
t.fromTlshStr(row["tlsh"])
self.tlsh_data[row["file_type"]].add(TLSHData(t, row["reference"], row.get("attribution.campaign", "")))
self.tlsh_data[row["file_type"]].add(
TLSHData(
t,
row["reference"],
row.get("attribution.campaign", "").split(","),
row.get("attribution.family", "").split(","),
row.get("attribution.actor", "").split(","),
)
)
hashes_count += 1
self.log.info(f"Loaded {hashes_count} TLSH hashes for {len(self.tlsh_data)} extensions")

Expand Down Expand Up @@ -151,12 +159,17 @@ def execute(self, request: ServiceRequest) -> None:
for similar in similars:
similar: TLSHResult
main_section.add_line(f"({similar.distance}) {similar.similar_to.hash.hexdigest()}")
main_section.add_line(f" {similar.similar_to.reference}")
if similar.similar_to.campaigns:
main_section.add_tag("attribution.campaign", similar.similar_to.campaigns)
main_section.add_line(f" {similar.similar_to.reference}")
for campaign in similar.similar_to.campaigns or []:
main_section.add_tag("attribution.campaign", campaign)
for family in similar.similar_to.families or []:
main_section.add_tag("attribution.family", family)
for actor in similar.similar_to.actors or []:
main_section.add_tag("attribution.actor", actor)

main_section.set_heuristic(
HEURISTIC_BY_SEVERITY[severity],
signature=f"similarity/tlsh/{severity.value}",
signature=f"file-similarity.{severity.value}",
)
result.add_section(main_section)

Expand Down
4 changes: 3 additions & 1 deletion file-similarity/service/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
class TLSHData:
hash: tlsh.Tlsh
reference: str
campaigns: str = None
campaigns: list[str] = None
families: list[str] = None
actors: list[str] = None

def get_distance(self, hash: tlsh.Tlsh):
return self.hash.diff(hash)
Expand Down
40 changes: 28 additions & 12 deletions file-similarity/service/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@

BADLIST_NAME = "Badlist"
BADLIST_QUERY = "hashes.tlsh:* AND enabled:true"
HEADERS = ["tlsh", "file_type", "reference", "attribution.campaign"]
HEADERS = [
"tlsh",
"file_type",
"reference",
"attribution.campaign",
"attribution.family",
"attribution.actor",
]
HASH_FILE_NAME = "hashes.csv"


Expand Down Expand Up @@ -50,9 +57,10 @@ def _load_hashes_set(self, file_path: str) -> set[str]:
return hashes

def _describe_source(self, source: Source) -> str:
reason = (
f" ({self._safe_get(source, 'reason')})" if self._safe_get(source, "reason") else ""
)
reason = ", ".join(self._safe_get(source, "reason") or [])
if reason:
reason = f" ({reason})"

return f"{self._safe_get(source, 'name') or ''}{reason}"

def _update_badlist(self):
Expand All @@ -69,9 +77,9 @@ def _update_badlist(self):
hashes = set()
self.push_status("UPDATING", "Pulling currently badlisted files..")
# TODO: streaming results and configurable limit
results: Iterable[Badlist] = self.datastore.badlist.search(BADLIST_QUERY, rows=10000).get(
"items", []
)
results: Iterable[Badlist] = self.datastore.badlist.search(
BADLIST_QUERY, fl="*", rows=10000
).get("items", [])

with tempfile.TemporaryDirectory() as tmpdir, open(f"{tmpdir}/{HASH_FILE_NAME}", "w+") as f:
writer = csv.DictWriter(f, fieldnames=HEADERS)
Expand All @@ -88,23 +96,31 @@ def _update_badlist(self):
try:
t.fromTlshStr(result.hashes.tlsh)
except ValueError:
self.log.warning(
"Invalid TLSH hash found in Badlist [%s]", result.hashes.tlsh, exc_info=True
)
# self.log.warning(
# "Invalid TLSH hash found in Badlist [%s]", result.hashes.tlsh, exc_info=True
# )
continue
if result.hashes.tlsh in hashes:
continue

hashes.add(result.hashes.tlsh)
sources = self._safe_get(result, "sources") or []
reference = f"Marked by ({len(sources)}: {', '.join(self._describe_source(source) for source in sources)})"
reference = (
f"Marked by {', '.join(self._describe_source(source) for source in sources)}"
)
self.log.info(self._safe_get(result, "attribution"))
self.log.info(result.as_primitives())
campaigns = self._safe_get(result, "attribution.campaign")
family = self._safe_get(result, "attribution.family")
actor = self._safe_get(result, "attribution.actor")
writer.writerow(
{
"tlsh": result.hashes.tlsh,
"file_type": type_,
"reference": reference,
"attribution.campaign": campaigns,
"attribution.campaign": ",".join(campaigns) if campaigns else None,
"attribution.family": ",".join(family) if family else None,
"attribution.actor": ",".join(actor) if actor else None,
}
)
self.log.info(f"Loaded {len(hashes)} TLSH hashes")
Expand Down

0 comments on commit 602d67e

Please sign in to comment.