diff --git a/presidio-analyzer/presidio_analyzer/analyzer_engine.py b/presidio-analyzer/presidio_analyzer/analyzer_engine.py index 97d929568..d8bf35d66 100644 --- a/presidio-analyzer/presidio_analyzer/analyzer_engine.py +++ b/presidio-analyzer/presidio_analyzer/analyzer_engine.py @@ -228,7 +228,7 @@ def analyze( if self.log_decision_process: self.app_tracer.trace( correlation_id, - json.dumps([str(result.to_dict()) for result in results]), + json.dumps([str(result.to_dict()) for result in results],indent=4), ) # Remove duplicates or low score results diff --git a/presidio-analyzer/presidio_analyzer/pattern_recognizer.py b/presidio-analyzer/presidio_analyzer/pattern_recognizer.py index 3aec751af..956118564 100644 --- a/presidio-analyzer/presidio_analyzer/pattern_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/pattern_recognizer.py @@ -218,6 +218,7 @@ def __analyze_patterns( ) pattern_result = RecognizerResult( entity_type=self.supported_entities[0], + entity_text=text[start:end], start=start, end=end, score=score, diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py index d57a897f0..947828eae 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/azure_ai_language.py @@ -138,6 +138,7 @@ def analyze( recognizer_results.append( RecognizerResult( entity_type=entity.category, + entity_text=text[entity.offset:entity.offset + entity.length], start=entity.offset, end=entity.offset + entity.length, score=entity.confidence_score, diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/iban_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/iban_recognizer.py index ca508ae15..0bdc47bf4 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/iban_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/iban_recognizer.py @@ -158,6 +158,7 @@ def __analyze_patterns(self, text: str, flags: int = None): ) pattern_result = RecognizerResult( entity_type=self.supported_entities[0], + entity_text=text[start:end], start=start, end=end, score=score, diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/phone_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/phone_recognizer.py index 6cdf5507c..44f0a7a52 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/phone_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/phone_recognizer.py @@ -74,6 +74,7 @@ def analyze( def _get_recognizer_result(self, match, text, region, nlp_artifacts): result = RecognizerResult( entity_type="PHONE_NUMBER", + entity_text = text[match.start:match.end], start=match.start, end=match.end, score=self.SCORE, diff --git a/presidio-analyzer/presidio_analyzer/predefined_recognizers/spacy_recognizer.py b/presidio-analyzer/presidio_analyzer/predefined_recognizers/spacy_recognizer.py index f16f623eb..977c20253 100644 --- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/spacy_recognizer.py +++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/spacy_recognizer.py @@ -115,6 +115,7 @@ def analyze(self, text: str, entities, nlp_artifacts=None): # noqa D102 explanation = self.build_explanation(ner_score, textual_explanation) spacy_result = RecognizerResult( entity_type=ner_entity.label_, + entity_text=text[ner_entity.start_char: ner_entity.end_char], start=ner_entity.start_char, end=ner_entity.end_char, score=ner_score, diff --git a/presidio-analyzer/presidio_analyzer/recognizer_result.py b/presidio-analyzer/presidio_analyzer/recognizer_result.py index d4d67ff19..2297a55e5 100644 --- a/presidio-analyzer/presidio_analyzer/recognizer_result.py +++ b/presidio-analyzer/presidio_analyzer/recognizer_result.py @@ -34,6 +34,7 @@ class RecognizerResult: def __init__( self, entity_type: str, + entity_text:str, start: int, end: int, score: float, @@ -42,6 +43,7 @@ def __init__( ): self.entity_type = entity_type + self.entity_text = entity_text self.start = start self.end = end self.score = score diff --git a/presidio-image-redactor/presidio_image_redactor/entities/image_recognizer_result.py b/presidio-image-redactor/presidio_image_redactor/entities/image_recognizer_result.py index 32b93af2e..938bacea4 100644 --- a/presidio-image-redactor/presidio_image_redactor/entities/image_recognizer_result.py +++ b/presidio-image-redactor/presidio_image_redactor/entities/image_recognizer_result.py @@ -18,6 +18,7 @@ class ImageRecognizerResult(RecognizerResult): def __init__( self, entity_type: str, + entity_text: str, start: int, end: int, score: float, @@ -27,7 +28,7 @@ def __init__( height: int, ): - super().__init__(entity_type, start, end, score) + super().__init__(entity_type,entity_text, start, end, score) self.left = left self.top = top self.width = width @@ -52,6 +53,7 @@ def __str__(self) -> str: """Return a string representation of the instance.""" return ( f"type: {self.entity_type}, " + f"type: {self.entity_text}, " f"start: {self.start}, " f"end: {self.end}, " f"score: {self.score}, " diff --git a/presidio-image-redactor/presidio_image_redactor/image_analyzer_engine.py b/presidio-image-redactor/presidio_image_redactor/image_analyzer_engine.py index 8266b7c27..6cd167531 100644 --- a/presidio-image-redactor/presidio_image_redactor/image_analyzer_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/image_analyzer_engine.py @@ -178,6 +178,7 @@ def map_analyzer_results_to_bounding_boxes( bboxes.append( ImageRecognizerResult( element.entity_type, + text_element, element.start, element.end, element.score, @@ -204,6 +205,7 @@ def map_analyzer_results_to_bounding_boxes( bboxes.append( ImageRecognizerResult( element.entity_type, + text_element, element.start, element.end, element.score,