Skip to content

Commit

Permalink
V2 anonymizer improvements (microsoft#547)
Browse files Browse the repository at this point in the history
* Add readme file for image redactor
Fix anonymizer readme

* Fix PR comments

* improve run time of the analyzer results loop.

* Add note and some more tests.

Co-authored-by: Nava Vaisman Levy <[email protected]>
  • Loading branch information
shiranr and navalev authored Mar 1, 2021
1 parent 0f853ca commit 6baee6e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,15 @@ def _remove_conflicts(self):
:return: List
"""
unique_elements = []
# This list contains all elements which we need to check a single result
# against. If a result is dropped, it can also be dropped from this list
# since it is intersecting with another result and we selected the other one.
other_elements = AnalyzerResults(self)
for result_a in self:
other_elements = AnalyzerResults(self)
other_elements.remove(result_a)
if not any([result_a.has_conflict(other_element) for other_element in
other_elements]):
other_elements.append(result_a)
unique_elements.append(result_a)
else:
self.logger.debug(
Expand Down
17 changes: 11 additions & 6 deletions presidio-anonymizer/tests/test_analyzer_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@ def test_given_conflicting_analyzer_results_then_none_conflicting_results_return
analyze_results = AnonymizerRequest.handle_analyzer_results_json(payload)
assert len(analyze_results) == len(payload.get("analyzer_results"))
sorted_results = analyze_results.to_sorted_unique_results()
assert len(sorted_results) == 2
assert list(sorted_results)[0].start < list(sorted_results)[1].start
assert list(sorted_results)[0].end < list(sorted_results)[1].end
assert len(sorted_results) == 4
for index in range(len(sorted_results) - 1):
assert list(sorted_results)[index].start < list(sorted_results)[index + 1].start
assert list(sorted_results)[index].end < list(sorted_results)[index + 1].end


def test_given_conflict_analyzer_results_then_reversed_none_conflict_list_returned():
payload = get_dup_payload()
analyze_results = AnonymizerRequest.handle_analyzer_results_json(payload)
assert len(analyze_results) == len(payload.get("analyzer_results"))
sorted_results = analyze_results.to_sorted_unique_results(True)
assert len(sorted_results) == 2
assert list(sorted_results)[1].start < list(sorted_results)[0].start
assert list(sorted_results)[1].end < list(sorted_results)[0].end
assert len(sorted_results) == 4
for index in range(len(sorted_results) - 1):
assert list(sorted_results)[index].start > list(sorted_results)[index + 1].start
assert list(sorted_results)[index].end > list(sorted_results)[index + 1].end


def get_dup_payload():
Expand All @@ -35,5 +37,8 @@ def get_dup_payload():
{"start": 24, "end": 28, "score": 0.9, "entity_type": "FIRST_NAME"},
{"start": 29, "end": 32, "score": 0.6, "entity_type": "LAST_NAME"},
{"start": 24, "end": 30, "score": 0.8, "entity_type": "NAME"},
{"start": 18, "end": 32, "score": 0.8, "entity_type": "BLA"},
{"start": 23, "end": 35, "score": 0.8, "entity_type": "BLA"},
{"start": 28, "end": 36, "score": 0.8, "entity_type": "BLA"},
],
}

0 comments on commit 6baee6e

Please sign in to comment.