From e4b04b3b9406bfc03f08ebec2d0b107256ea1023 Mon Sep 17 00:00:00 2001 From: BeritJanssen Date: Fri, 6 Oct 2023 13:13:39 +0200 Subject: [PATCH] add settings dutchannualreports --- backend/corpora/dutchannualreports/dutchannualreports.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/corpora/dutchannualreports/dutchannualreports.py b/backend/corpora/dutchannualreports/dutchannualreports.py index d4f4c7038..6a7c89168 100644 --- a/backend/corpora/dutchannualreports/dutchannualreports.py +++ b/backend/corpora/dutchannualreports/dutchannualreports.py @@ -12,8 +12,8 @@ from addcorpus.corpus import XMLCorpusDefinition, FieldDefinition from media.image_processing import get_pdf_info, retrieve_pdf, pdf_pages, build_partial_pdf from addcorpus.load_corpus import corpus_dir - from addcorpus.es_mappings import keyword_mapping, main_content_mapping +from addcorpus.es_settings import es_settings from media.media_url import media_url @@ -48,6 +48,10 @@ class DutchAnnualReports(XMLCorpusDefinition): dutchannualreports_map = {} + @property + def es_settings(self): + return es_settings(self.languages[0], stopword_analyzer=True, stemming_analyzer=True) + with open(op.join(corpus_dir('dutchannualreports'), 'dutchannualreports_mapping.csv')) as f: reader = csv.DictReader(f) for line in reader: