Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analysis builder improvements #1295

Merged
merged 8 commits into from
Feb 15, 2024
10 changes: 6 additions & 4 deletions presidio-structured/presidio_structured/analysis_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def generate_analysis(
def _remove_low_scores(
self,
key_recognizer_result_map: Dict[str, RecognizerResult],
score_threshold: float = None,
) -> List[RecognizerResult]:
score_threshold: Optional[float] = None,
) -> Dict[str, RecognizerResult]:
"""
Remove results for which the confidence is lower than the threshold.

Expand Down Expand Up @@ -127,13 +127,15 @@ def _generate_analysis_from_results_json(
for result in analyzer_results:
current_key = prefix + result.key

if isinstance(result.value, dict):
if isinstance(result.value, dict) and isinstance(
result.recognizer_results, Iterator
):
nested_mappings = self._generate_analysis_from_results_json(
result.recognizer_results, prefix=current_key + "."
)
key_recognizer_result_map.update(nested_mappings)
first_recognizer_result = next(iter(result.recognizer_results), None)
if first_recognizer_result is not None:
if isinstance(first_recognizer_result, RecognizerResult):
logger.debug(
f"Found result with entity {first_recognizer_result.entity_type} \
in {current_key}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class JsonDataProcessor(DataProcessorBase):
"""JSON Data Processor, Supports arbitrary nesting of dictionaries and lists."""

@staticmethod
def _get_nested_value(data: Union[Dict, List], path: List[str]) -> Any:
def _get_nested_value(data: Union[Dict, List, None], path: List[str]) -> Any:
"""
Recursively retrieves the value from nested data using a given path.

Expand Down
4 changes: 2 additions & 2 deletions presidio-structured/presidio_structured/structured_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def anonymize(
self,
data: Union[Dict, DataFrame],
structured_analysis: StructuredAnalysis,
operators: Dict[str, OperatorConfig] = None,
operators: Union[Dict[str, OperatorConfig], None] = None,
) -> Union[Dict, DataFrame]:
"""
Anonymize the given data using the given configuration.
Expand All @@ -49,7 +49,7 @@ def anonymize(
return self.data_processor.operate(data, structured_analysis, operators)

def __check_or_add_default_operator(
self, operators: Dict[str, OperatorConfig]
self, operators: Union[Dict[str, OperatorConfig], None]
) -> Dict[str, OperatorConfig]:
"""
Check if the provided operators dictionary has a default operator. \
Expand Down
Loading