diff --git a/backend/corpora/parliament/conftest.py b/backend/corpora/parliament/conftest.py index c47ea183c..06d048c30 100644 --- a/backend/corpora/parliament/conftest.py +++ b/backend/corpora/parliament/conftest.py @@ -17,6 +17,7 @@ def parliament_corpora_settings(settings): 'parliament-denmark': os.path.join(here, 'denmark.py'), 'parliament-denmark-new': os.path.join(here, 'denmark-new.py'), 'parliament-finland': os.path.join(here, 'finland.py'), + 'parliament-finland-old': os.path.join(here, 'finland-old.py'), 'parliament-norway': os.path.join(here, 'norway.py'), 'parliament-norway-new': os.path.join(here, 'norway-new.py'), 'parliament-ireland': os.path.join(here, 'ireland.py') @@ -34,6 +35,7 @@ def parliament_corpora_settings(settings): settings.PP_SWEDEN_DATA = os.path.join(here, 'tests', 'data', 'sweden') settings.PP_SWEDEN_OLD_DATA = os.path.join(here, 'tests', 'data', 'sweden-old') settings.PP_FINLAND_DATA = os.path.join(here, 'tests', 'data', 'finland') + settings.PP_FINLAND_OLD_DATA = os.path.join(here, 'tests', 'data', 'finland-old') settings.PP_NORWAY_DATA = os.path.join(here, 'tests', 'data', 'norway') settings.PP_NORWAY_NEW_DATA = os.path.join(here, 'tests', 'data', 'norway-new') settings.PP_DENMARK_DATA = os.path.join(here, 'tests', 'data', 'denmark') diff --git a/backend/corpora/parliament/finland-old.py b/backend/corpora/parliament/finland-old.py index ee6f6574c..50ed0d445 100644 --- a/backend/corpora/parliament/finland-old.py +++ b/backend/corpora/parliament/finland-old.py @@ -28,6 +28,9 @@ def sources(self, start, end): document_context = document_context() + chamber = field_defaults.chamber() + chamber.extractor = CSV(field='estate') + country = field_defaults.country() country.extractor = Constant('Finland') @@ -48,9 +51,6 @@ def sources(self, start, end): date_latest.search_filter.lower = min_date date_latest.search_filter.upper = max_date - era = field_defaults.era(include_aggregations=True) - era.extractor = CSV(field='estate') - language = field_defaults.language() language.extractor = CSV(field='language') @@ -76,6 +76,7 @@ def sources(self, start, end): def __init__(self): self.fields = [ + self.chamber, self.country, self.date_earliest, self.date_latest, diff --git a/backend/corpora/parliament/images/finland-old.jpg b/backend/corpora/parliament/images/finland-old.jpg new file mode 100644 index 000000000..9254fe014 Binary files /dev/null and b/backend/corpora/parliament/images/finland-old.jpg differ diff --git a/backend/corpora/parliament/tests/test_import.py b/backend/corpora/parliament/tests/test_import.py index 0914cd850..20d6b6696 100644 --- a/backend/corpora/parliament/tests/test_import.py +++ b/backend/corpora/parliament/tests/test_import.py @@ -481,13 +481,27 @@ 'docs': [ { 'country': 'Finland', - 'speech': 'FÖUDT HOS FINLANDS RIDDERSKAP OCR ADEL VID LANDTDAGEN ÅR 1877.TREDJE HÄFTET. Från den 1 till den 31 Oktober. FINSKA LITTERATUR-SÄLLBKAPETS TRYCKERl, 1878.', + 'speech': """FÖUDT HOS + +FINLANDS RIDDERSKAP OCR ADEL + +VID + +LANDTDAGEN ÅR 1877. + +TREDJE HÄFTET. + +Från den 1 till den 31 Oktober. + +FINSKA LITTERATUR-SÄLLBKAPETS TRYCKERl, + +1878.""", 'id': 'Adeln_Prot_1877_III.pdf_3_0', + 'chamber': 'nobility', 'date_earliest': '1877-01-01', 'date_latest': '1877-12-31', - 'page': 0, + 'page': '0', 'language': 'swe', - 'era': 'nobility', 'source_archive': 'Adeln_Prot_1877_III.pdf' }, ],