Skip to content

Commit

Permalink
Remove ignored labels from supported entities (#1454)
Browse files Browse the repository at this point in the history
  • Loading branch information
omri374 authored Sep 22, 2024
1 parent 0721e36 commit 9321e14
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
14 changes: 10 additions & 4 deletions presidio-analyzer/presidio_analyzer/nlp_engine/spacy_nlp_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,15 @@ def get_supported_entities(self) -> List[str]:
raise ValueError(
"model_to_presidio_entity_mapping is missing from model configuration"
)
return list(
entities_from_mapping = list(
set(self.ner_model_configuration.model_to_presidio_entity_mapping.values())
)
entities = [
ent
for ent in entities_from_mapping
if ent not in self.ner_model_configuration.labels_to_ignore
]
return entities

def get_supported_languages(self) -> List[str]:
"""Return the supported languages for this NLP engine."""
Expand Down Expand Up @@ -121,9 +127,9 @@ def process_batch(
raise ValueError("NLP engine is not loaded. Consider calling .load()")

texts = (str(text) for text in texts)
docs = self.nlp[language].pipe(texts,
as_tuples=as_tuples,
batch_size=batch_size)
docs = self.nlp[language].pipe(
texts, as_tuples=as_tuples, batch_size=batch_size
)
for doc in docs:
yield doc.text, self._doc_to_nlp_artifact(doc, language)

Expand Down
13 changes: 13 additions & 0 deletions presidio-analyzer/tests/test_spacy_nlp_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,16 @@ def test_default_configuration_correct():
)

assert actual_config_json == expected_config_json


def test_get_supported_entities_doesnt_include_ignored():
ner_config = NerModelConfiguration(labels_to_ignore=["A","B"],
model_to_presidio_entity_mapping=dict(A="A",
B="B",
C="C"))
spacy_nlp_engine = SpacyNlpEngine(ner_model_configuration=ner_config)
entities = spacy_nlp_engine.get_supported_entities()

assert "A" not in entities
assert "B" not in entities
assert "C" in entities

0 comments on commit 9321e14

Please sign in to comment.