Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix NER check #1667

Merged
merged 3 commits into from
Oct 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 19 additions & 16 deletions backend/addcorpus/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import warnings

from django.contrib import admin
from django.contrib.auth.models import Group
from django.contrib.postgres.fields import ArrayField
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models.constraints import UniqueConstraint

from addcorpus.constants import CATEGORIES, MappingType, VisualizationType
from addcorpus.validation.creation import (
validate_es_mapping, validate_field_language, validate_implication, validate_language_code,
Expand All @@ -12,15 +19,10 @@
)
from addcorpus.validation.indexing import (validate_essential_fields,
validate_has_configuration, validate_language_field, validate_has_data_directory)
from addcorpus.validation.publishing import (validate_default_sort,
validate_ngram_has_date_field)
from django.contrib import admin
from django.contrib.auth.models import Group
from django.contrib.postgres.fields import ArrayField
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models.constraints import UniqueConstraint

from addcorpus.validation.publishing import (
validate_default_sort,
validate_ngram_has_date_field,
)
from ianalyzer.elasticsearch import elasticsearch

MAX_LENGTH_NAME = 126
Expand Down Expand Up @@ -264,14 +266,15 @@ def clean(self):

@property
def has_named_entities(self):
client = elasticsearch(self.es_index)
from es.search import total_hits
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason this line isn't at the top of the file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, otherwise it's a circular import, as the es.search module imports addcorpus.models.CorpusConfiguration.


client = elasticsearch(self.corpus.name)
try:
mapping = client.indices.get_mapping(
index=self.es_index)
# in production, the index name can be different from the object's es_index value
index_name = list(mapping.keys())[0]
fields = mapping[index_name].get('mappings', {}).get('properties', {}).keys()
if any(field.endswith(':ner') for field in fields):
# we check if any fields exist for filtering named entities
ner_exists = client.search(
index=self.es_index, query={"exists": {"field": "ner:*"}}, size=0
)
if total_hits(ner_exists):
return True
except:
return False
Expand Down