Skip to content

Commit

Permalink
reimporter: make it clear that on reimport we work with matched findi…
Browse files Browse the repository at this point in the history
…ngs which not marked as duplicate
  • Loading branch information
pna-nca committed Oct 15, 2024
1 parent 3bdb6f3 commit 6d73725
Showing 1 changed file with 25 additions and 2 deletions.
27 changes: 25 additions & 2 deletions dojo/importers/default_reimporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,13 @@ def process_findings(
unsaved_finding.hash_code = self.calculate_unsaved_finding_hash_code(unsaved_finding)
deduplicationLogger.debug(f"unsaved finding's hash_code: {unsaved_finding.hash_code}")
# Match any findings to this new one coming in
matched_findings = self.match_new_finding_to_existing_finding(unsaved_finding)
deduplicationLogger.debug(f"found {len(matched_findings)} findings matching with current new finding")
matched_findings = self.match_new_finding_to_existing_finding_nonduplicate(unsaved_finding)
deduplicationLogger.debug(f"found {len(matched_findings)} original (non-duplicate) findings matching with current new finding")
# Determine how to proceed based on whether matches were found or not
if matched_findings:
# We take the first finding because we expect only one actually
# (if any). If there are more than one finding, this should not
# happen and indicates inconsistency.
existing_finding = matched_findings[0]
finding, force_continue = self.process_matched_finding(
unsaved_finding,
Expand Down Expand Up @@ -425,6 +428,26 @@ def match_new_finding_to_existing_finding(
logger.error(f'Internal error: unexpected deduplication_algorithm: "{self.deduplication_algorithm}"')
return None

def match_new_finding_to_existing_finding_nonduplicate(
self,
unsaved_finding: Finding,
) -> List[Finding]:
"""
Matches a single new finding to N existing findings and then returns those
matches, but only those which are not 'duplicate'. This normally should end
up in a single finding. In case it returns more than one, it means that the
internal state is inconsistent and hardly usable.
"""
non_duplicate_findings = []
# we could do the same by proper database query, but it would lead to
# large chunk of copy-pasted code
matched_findings = self.match_new_finding_to_existing_finding(unsaved_finding)
for finding in matched_findings:
if not finding.duplicate:
non_duplicate_findings.append(finding)

return non_duplicate_findings

def process_matched_finding(
self,
unsaved_finding: Finding,
Expand Down

0 comments on commit 6d73725

Please sign in to comment.