From e7aa229ccb7c4e0cbeda6f2e2542fb5a9b3e3656 Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Fri, 26 Jul 2024 17:57:28 +0200
Subject: [PATCH 1/8] First approach

---
 ckanext/schemingdcat/config.py             |  545 ++--
 ckanext/schemingdcat/harvesters/base.py    |  199 +-
 ckanext/schemingdcat/harvesters/ckan.py    |  116 +-
 ckanext/schemingdcat/helpers.py            | 2706 ++++++++++----------
 ckanext/schemingdcat/lib/field_mapping.py  |    4 +-
 ckanext/schemingdcat/package_controller.py |  316 +--
 ckanext/schemingdcat/utils.py              |  581 +++--
 7 files changed, 2308 insertions(+), 2159 deletions(-)

diff --git a/ckanext/schemingdcat/config.py b/ckanext/schemingdcat/config.py
index 3752efc3..90f555ba 100644
--- a/ckanext/schemingdcat/config.py
+++ b/ckanext/schemingdcat/config.py
@@ -1,267 +1,280 @@
-import typing
-import re
-
-# Default values
-default_facet_operator = 'OR'
-icons_dir = 'images/icons'
-default_locale = 'en'
-organization_custom_facets = False
-group_custom_facets = False
-debug = False
-linkeddata_links = None
-geometadata_links = None
-endpoints = None
-endpoints_yaml = 'endpoints.yaml'
-facet_list_limit = 6
-default_package_item_icon = 'theme'
-default_package_item_show_spatial = True
-show_metadata_templates_toolbar = True
-metadata_templates_search_identifier = 'schemingdcat_xls-template'
-mimetype_base_uri = 'http://www.iana.org/assignments/media-types'
-
-# Default DCAT metadata configuration
-OGC2CKAN_HARVESTER_MD_CONFIG = {
-    'access_rights': 'http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations',
-    'conformance': [
-        'http://inspire.ec.europa.eu/documents/inspire-metadata-regulation','http://inspire.ec.europa.eu/documents/commission-regulation-eu-no-13122014-10-december-2014-amending-regulation-eu-no-10892010-0'
-    ],
-    'author': 'ckanext-schemingdcat',
-    'author_email': 'admin@{ckan_instance}',
-    'author_url': '{ckan_instance}/organization/test',
-    'author_uri': '{ckan_instance}/organization/test',
-    'contact_name': 'ckanext-schemingdcat',
-    'contact_email': 'admin@{ckan_instance}',
-    'contact_url': '{ckan_instance}/organization/test',
-    'contact_uri': '{ckan_instance}/organization/test',
-    'dcat_type': {
-        'series': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/series',
-        'dataset': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/dataset',
-        'spatial_data_service': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/service',
-        'default': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/dataset',
-        'collection': 'http://purl.org/dc/dcmitype/Collection',
-        'event': 'http://purl.org/dc/dcmitype/Event',
-        'image': 'http://purl.org/dc/dcmitype/Image',
-        'still_image': 'http://purl.org/dc/dcmitype/StillImage',
-        'moving_image': 'http://purl.org/dc/dcmitype/MovingImage',
-        'physical_object': 'http://purl.org/dc/dcmitype/PhysicalObject',
-        'interactive_resource': 'http://purl.org/dc/dcmitype/InteractiveResource',
-        'service': 'http://purl.org/dc/dcmitype/Service',
-        'sound': 'http://purl.org/dc/dcmitype/Sound',
-        'software': 'http://purl.org/dc/dcmitype/Software',
-        'text': 'http://purl.org/dc/dcmitype/Text',
-    },
-    'encoding': 'UTF-8',
-    'frequency' : 'http://publications.europa.eu/resource/authority/frequency/UNKNOWN',
-    'inspireid_theme': 'HB',
-    'language': 'http://publications.europa.eu/resource/authority/language/ENG',
-    'license': 'http://creativecommons.org/licenses/by/4.0/',
-    'license_id': 'cc-by',
-    'lineage_process_steps': 'ckanext-schemingdcat lineage process steps.',
-    'maintainer': 'ckanext-schemingdcat',
-    'maintainer_email': 'admin@{ckan_instance}',
-    'maintainer_url': '{ckan_instance}/organization/test',
-    'maintainer_uri': '{ckan_instance}/organization/test',
-    'metadata_profile': [
-        "http://semiceu.github.io/GeoDCAT-AP/releases/2.0.0","http://inspire.ec.europa.eu/document-tags/metadata"
-    ],
-    'provenance': 'ckanext-schemingdcat provenance statement.',
-    'publisher_name': 'ckanext-schemingdcat',
-    'publisher_email': 'admin@{ckan_instance}',
-    'publisher_url': '{ckan_instance}/organization/test',
-    'publisher_identifier': '{ckan_instance}/organization/test',
-    'publisher_uri': '{ckan_instance}/organization/test',
-    'publisher_type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
-    'reference_system': 'http://www.opengis.net/def/crs/EPSG/0/4258',
-    'representation_type': {
-        'wfs': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/vector',
-        'wcs': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/grid',
-        'default': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/vector',
-        'grid': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/grid',
-        'vector': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/vector',
-        'textTable': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/textTable',
-        'tin': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/tin',
-        'stereoModel': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/stereoModel',
-        'video': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/video',
-    },
-    'resources': {
-        'availability': 'http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE',
-        'name': {
-            'es': 'Distribución {format}',
-            'en': 'Distribution {format}'
-        },
-    },
-    'rights': 'http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations',
-    'spatial': None,
-    'spatial_uri': 'http://datos.gob.es/recurso/sector-publico/territorio/Pais/España',
-    'status': 'http://purl.org/adms/status/UnderDevelopment',
-    'temporal_start': None,
-    'temporal_end': None,
-    'theme': 'http://inspire.ec.europa.eu/theme/hb',
-    'theme_es': 'http://datos.gob.es/kos/sector-publico/sector/medio-ambiente',
-    'theme_eu': 'http://publications.europa.eu/resource/authority/data-theme/ENVI',
-    'topic': 'http://inspire.ec.europa.eu/metadata-codelist/TopicCategory/biota',
-    'valid': None
-}
-
-OGC2CKAN_MD_FORMATS = {
-    'api': ('API', 'http://www.iana.org/assignments/media-types/application/vnd.api+json', None, 'Application Programming Interface'),
-    'api feature': ('OGCFeat', 'http://www.opengis.net/def/interface/ogcapi-features', 'http://www.opengeospatial.org/standards/features', 'OGC API - Features'),
-    'wms': ('WMS', 'http://www.opengis.net/def/serviceType/ogc/wms', 'http://www.opengeospatial.org/standards/wms', 'Web Map Service'),
-    'zip': ('ZIP', 'http://www.iana.org/assignments/media-types/application/zip', 'http://www.iso.org/standard/60101.html', 'ZIP File'),
-    'rar': ('RAR', 'http://www.iana.org/assignments/media-types/application/vnd.rar', 'http://www.rarlab.com/technote.htm', 'RAR File'),
-    'wfs': ('WFS', 'http://www.opengis.net/def/serviceType/ogc/wfs', 'http://www.opengeospatial.org/standards/wfs', 'Web Feature Service'),
-    'wcs': ('WCS', 'http://www.opengis.net/def/serviceType/ogc/wcs', 'http://www.opengeospatial.org/standards/wcs', 'Web Coverage Service'),
-    'tms': ('TMS', 'http://wiki.osgeo.org/wiki/Tile_Map_Service_Specification', 'http://www.opengeospatial.org/standards/tms', 'Tile Map Service'),
-    'wmts': ('WMTS', 'http://www.opengis.net/def/serviceType/ogc/wmts', 'http://www.opengeospatial.org/standards/wmts', 'Web Map Tile Service'),
-    'kml': ('KML', 'http://www.iana.org/assignments/media-types/application/vnd.google-earth.kml+xml', 'http://www.opengeospatial.org/standards/kml', 'Keyhole Markup Language'),
-    'kmz': ('KMZ', 'http://www.iana.org/assignments/media-types/application/vnd.google-earth.kmz+xml', 'http://www.opengeospatial.org/standards/kml', 'Compressed Keyhole Markup Language'),
-    'gml': ('GML', 'http://www.iana.org/assignments/media-types/application/gml+xml', 'http://www.opengeospatial.org/standards/gml', 'Geography Markup Language'),
-    'geojson': ('GeoJSON', 'http://www.iana.org/assignments/media-types/application/geo+json', 'http://www.rfc-editor.org/rfc/rfc7946', 'GeoJSON'),
-    'json': ('JSON', 'http://www.iana.org/assignments/media-types/application/json', 'http://www.ecma-international.org/publications/standards/Ecma-404.htm', 'JavaScript Object Notation'),
-    'atom': ('ATOM', 'http://www.iana.org/assignments/media-types/application/atom+xml', 'http://validator.w3.org/feed/docs/atom.html', 'Atom Syndication Format'),
-    'xml': ('XML', 'http://www.iana.org/assignments/media-types/application/xml', 'http://www.w3.org/TR/REC-xml/', 'Extensible Markup Language'),
-    'arcgis_rest': ('ESRI Rest', None, None, 'ESRI Rest Service'),
-    'shp': ('SHP', 'http://www.iana.org/assignments/media-types/application/vnd.shp', 'http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf', 'ESRI Shapefile'),
-    'shapefile': ('SHP', 'http://www.iana.org/assignments/media-types/application/vnd.shp', 'http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf', 'ESRI Shapefile'),
-    'esri': ('SHP', 'http://www.iana.org/assignments/media-types/application/vnd.shp', 'http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf', 'ESRI Shapefile'),
-    'html': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'HyperText Markup Language'),
-    'html5': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'HyperText Markup Language'),
-    'visor': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'Map Viewer'),
-    'enlace': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'Map Viewer'),
-    'pdf': ('PDF', 'http://www.iana.org/assignments/media-types/application/pdf', 'http://www.iso.org/standard/75839.html', 'Portable Document Format'),
-    'csv': ('CSV', 'http://www.iana.org/assignments/media-types/text/csv', 'http://www.rfc-editor.org/rfc/rfc4180', 'Comma-Separated Values'),
-    'netcdf': ('NetCDF', 'http://www.iana.org/assignments/media-types/text/csv', 'http://www.opengeospatial.org/standards/netcdf', 'Network Common Data Form'),
-    'csw': ('CSW', 'http://www.opengis.net/def/serviceType/ogc/csw', 'http://www.opengeospatial.org/standards/cat', 'Catalog Service for the Web'),
-    'geodcatap': ('RDF', 'http://www.iana.org/assignments/media-types/application/rdf+xml', 'http://semiceu.github.io/GeoDCAT-AP/releases/2.0.0/', 'GeoDCAT-AP 2.0 Metadata')
-    ,
-    'inspire': ('XML', 'http://www.iana.org/assignments/media-types/application/xml', ['http://inspire.ec.europa.eu/documents/inspire-metadata-regulation','http://inspire.ec.europa.eu/documents/commission-regulation-eu-no-13122014-10-december-2014-amending-regulation-eu-no-10892010-0', 'http://www.isotc211.org/2005/gmd/'], 'INSPIRE ISO 19139 Metadata')
-}
-
-OGC2CKAN_ISO_MD_ELEMENTS = {
-    'lineage_source': 'gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:source/gmd:LI_Source/gmd:description/gco:CharacterString',
-    'lineage_process_steps': 'gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:processStep'
-}
-
-# loose definition of BCP47-like strings
-BCP_47_LANGUAGE = u'^[a-z]{2,8}(-[0-9a-zA-Z]{1,8})*$'
-
-DATE_FIELDS = [
-    {'field_name': 'created', 'fallback': 'issued', 'default_value': None, 'override': True, 'dtype': str},
-    {'field_name': 'issued', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str},
-    {'field_name': 'modified', 'fallback': 'issued', 'default_value': None, 'override': True, 'dtype': str},
-    {'field_name': 'valid', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str},
-    {'field_name': 'temporal_start', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str},
-    {'field_name': 'temporal_end', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str}
-]
-
-DATASET_DEFAULT_FIELDS = [
-    {'field_name': 'id', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'name', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'title', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'notes', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'description', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'access_rights', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['access_rights'], 'override': True, 'dtype': str},
-    {'field_name': 'license', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license'], 'override': True, 'dtype': str},
-    {'field_name': 'license_id', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license_id'], 'override': True, 'dtype': str},
-    {'field_name': 'topic', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['topic'], 'override': True, 'dtype': str},
-    {'field_name': 'theme', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['theme'], 'override': True, 'dtype': str},
-    {'field_name': 'theme_eu', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['theme_eu'], 'override': True, 'dtype': str},
-    {'field_name': 'status', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['status'], 'override': True, 'dtype': str},
-    {'field_name': 'hvd_category', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-]
-
-RESOURCE_DEFAULT_FIELDS = [
-    {'field_name': 'url', 'fallback': None, 'default_value': "", 'override': False, 'dtype': str},
-    {'field_name': 'name', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'format', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'protocol', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'mimetype', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'description', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'license', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license'], 'override': True, 'dtype': str},
-    {'field_name': 'license_id', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license_id'], 'override': True, 'dtype': str},
-    {'field_name': 'rights', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['rights'], 'override': True, 'dtype': str},
-    {'field_name': 'language', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['language'], 'override': False, 'dtype': str},
-    {'field_name': 'conforms_to', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
-    {'field_name': 'size', 'fallback': None, 'default_value': 0, 'override': True, 'dtype': int},
-]
-
-# Custom rules for harvesters.base._update_custom_format()
-CUSTOM_FORMAT_RULES = [
-    {
-        'format_strings': ['esri', 'arcgis'],
-        'url_string': 'viewer.html?url=',
-        'format': 'HTML',
-        'mimetype': 'https://www.iana.org/assignments/media-types/text/html'
-    },
-    {
-        'format_strings': ['html', 'html5'],
-        'url_string': None,
-        'format': 'HTML',
-        'mimetype': 'https://www.iana.org/assignments/media-types/text/html'
-    },
-    {
-        'format_strings': None,
-        'url_string': 'getrecordbyid',
-        'format': 'XML',
-        'mimetype': 'https://www.iana.org/assignments/media-types/application/xml'
-    }
-    # Add more rules here as needed
-]
-
-DATADICTIONARY_DEFAULT_SCHEMA = [
-    'id',
-    'type',
-    'label',
-    'notes',
-    'type_override'
-    ]
-
-# Common date formats for parsing. https://docs.python.org/es/3/library/datetime.html#strftime-and-strptime-format-codes
-COMMON_DATE_FORMATS = [
-    '%Y-%m-%d',
-    '%d-%m-%Y',
-    '%m-%d-%Y',
-    '%Y/%m/%d',
-    '%d/%m/%Y',
-    '%m/%d/%Y',
-    '%Y-%m-%d %H:%M:%S',  # Date with time
-    '%d-%m-%Y %H:%M:%S',  # Date with time
-    '%m-%d-%Y %H:%M:%S',  # Date with time
-    '%Y/%m/%d %H:%M:%S',  # Date with time
-    '%d/%m/%Y %H:%M:%S',  # Date with time
-    '%m/%d/%Y %H:%M:%S',  # Date with time
-    '%Y-%m-%dT%H:%M:%S',  # ISO 8601 format
-    '%Y-%m-%dT%H:%M:%SZ',  # ISO 8601 format with Zulu time indicator
-]
-# Vocabs
-SCHEMINGDCAT_DEFAULT_DATASET_SCHEMA_NAME: typing.Final[str] = "dataset"
-SCHEMINGDCAT_INSPIRE_THEMES_VOCAB: typing.Final[str] = "theme"
-SCHEMINGDCAT_DCAT_THEMES_VOCAB: typing.Final[list] = ["theme_es", "theme_eu"]
-SCHEMINGDCAT_ISO19115_TOPICS_VOCAB: typing.Final[list] = "topic"
-
-
-# Clean ckan names
-URL_REGEX = re.compile(
-    r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
-)
-
-# Compile the regular expression
-INVALID_CHARS = re.compile(r"[^a-zñ0-9_.-]")
-
-# Define a dictionary to map accented characters to their unaccented equivalents except ñ
-ACCENT_MAP = str.maketrans({
-    "á": "a", "à": "a", "ä": "a", "â": "a", "ã": "a",
-    "é": "e", "è": "e", "ë": "e", "ê": "e",
-    "í": "i", "ì": "i", "ï": "i", "î": "i",
-    "ó": "o", "ò": "o", "ö": "o", "ô": "o", "õ": "o",
-    "ú": "u", "ù": "u", "ü": "u", "û": "u",
-    "ñ": "ñ",
-})
-
-URL_FIELD_NAMES = {
-        'dataset': 
-            ['dcat_type', 'theme_es', 'language', 'topic', 'maintainer_url', 'tag_uri', 'contact_uri', 'contact_url', 'publisher_identifier', 'publisher_uri', 'publisher_url', 'publisher_type', 'maintainer_uri', 'maintainer_url', 'author_uri', 'author_url', 'conforms_to', 'theme', 'reference_system', 'spatial_uri', 'representation_type', 'license_id', 'access_rights', 'graphic_overview', 'frequency', 'hvd_category'],
-        'resource':
-            ['url', 'availability', 'mimetype', 'status', 'resource_relation', 'license', 'rights', 'conforms_to', 'reference_system']
-    }
+import typing
+import re
+
+# Default values
+default_facet_operator = 'OR'
+icons_dir = 'images/icons'
+default_locale = 'en'
+organization_custom_facets = False
+group_custom_facets = False
+debug = False
+linkeddata_links = None
+geometadata_links = None
+endpoints = None
+endpoints_yaml = 'endpoints.yaml'
+facet_list_limit = 6
+default_package_item_icon = 'theme'
+default_package_item_show_spatial = True
+show_metadata_templates_toolbar = True
+metadata_templates_search_identifier = 'schemingdcat_xls-template'
+mimetype_base_uri = 'http://www.iana.org/assignments/media-types'
+slugify_pat = re.compile('[^a-zA-Z0-9]')
+
+# Default DCAT metadata configuration
+OGC2CKAN_HARVESTER_MD_CONFIG = {
+    'access_rights': 'http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations',
+    'conformance': [
+        'http://inspire.ec.europa.eu/documents/inspire-metadata-regulation','http://inspire.ec.europa.eu/documents/commission-regulation-eu-no-13122014-10-december-2014-amending-regulation-eu-no-10892010-0'
+    ],
+    'author': 'ckanext-schemingdcat',
+    'author_email': 'admin@{ckan_instance}',
+    'author_url': '{ckan_instance}/organization/test',
+    'author_uri': '{ckan_instance}/organization/test',
+    'contact_name': 'ckanext-schemingdcat',
+    'contact_email': 'admin@{ckan_instance}',
+    'contact_url': '{ckan_instance}/organization/test',
+    'contact_uri': '{ckan_instance}/organization/test',
+    'dcat_type': {
+        'series': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/series',
+        'dataset': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/dataset',
+        'spatial_data_service': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/service',
+        'default': 'http://inspire.ec.europa.eu/metadata-codelist/ResourceType/dataset',
+        'collection': 'http://purl.org/dc/dcmitype/Collection',
+        'event': 'http://purl.org/dc/dcmitype/Event',
+        'image': 'http://purl.org/dc/dcmitype/Image',
+        'still_image': 'http://purl.org/dc/dcmitype/StillImage',
+        'moving_image': 'http://purl.org/dc/dcmitype/MovingImage',
+        'physical_object': 'http://purl.org/dc/dcmitype/PhysicalObject',
+        'interactive_resource': 'http://purl.org/dc/dcmitype/InteractiveResource',
+        'service': 'http://purl.org/dc/dcmitype/Service',
+        'sound': 'http://purl.org/dc/dcmitype/Sound',
+        'software': 'http://purl.org/dc/dcmitype/Software',
+        'text': 'http://purl.org/dc/dcmitype/Text',
+    },
+    'encoding': 'UTF-8',
+    'frequency' : 'http://publications.europa.eu/resource/authority/frequency/UNKNOWN',
+    'inspireid_theme': 'HB',
+    'language': 'http://publications.europa.eu/resource/authority/language/ENG',
+    'license': 'http://creativecommons.org/licenses/by/4.0/',
+    'license_id': 'cc-by',
+    'lineage_process_steps': 'ckanext-schemingdcat lineage process steps.',
+    'maintainer': 'ckanext-schemingdcat',
+    'maintainer_email': 'admin@{ckan_instance}',
+    'maintainer_url': '{ckan_instance}/organization/test',
+    'maintainer_uri': '{ckan_instance}/organization/test',
+    'metadata_profile': [
+        "http://semiceu.github.io/GeoDCAT-AP/releases/2.0.0","http://inspire.ec.europa.eu/document-tags/metadata"
+    ],
+    'provenance': 'ckanext-schemingdcat provenance statement.',
+    'publisher_name': 'ckanext-schemingdcat',
+    'publisher_email': 'admin@{ckan_instance}',
+    'publisher_url': '{ckan_instance}/organization/test',
+    'publisher_identifier': '{ckan_instance}/organization/test',
+    'publisher_uri': '{ckan_instance}/organization/test',
+    'publisher_type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
+    'reference_system': 'http://www.opengis.net/def/crs/EPSG/0/4258',
+    'representation_type': {
+        'wfs': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/vector',
+        'wcs': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/grid',
+        'default': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/vector',
+        'grid': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/grid',
+        'vector': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/vector',
+        'textTable': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/textTable',
+        'tin': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/tin',
+        'stereoModel': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/stereoModel',
+        'video': 'http://inspire.ec.europa.eu/metadata-codelist/SpatialRepresentationType/video',
+    },
+    'resources': {
+        'availability': 'http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE',
+        'name': {
+            'es': 'Distribución {format}',
+            'en': 'Distribution {format}'
+        },
+    },
+    'rights': 'http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations',
+    'spatial': None,
+    'spatial_uri': 'http://datos.gob.es/recurso/sector-publico/territorio/Pais/España',
+    'status': 'http://purl.org/adms/status/UnderDevelopment',
+    'temporal_start': None,
+    'temporal_end': None,
+    'theme': 'http://inspire.ec.europa.eu/theme/hb',
+    'theme_es': 'http://datos.gob.es/kos/sector-publico/sector/medio-ambiente',
+    'theme_eu': 'http://publications.europa.eu/resource/authority/data-theme/ENVI',
+    'topic': 'http://inspire.ec.europa.eu/metadata-codelist/TopicCategory/biota',
+    'valid': None
+}
+
+OGC2CKAN_MD_FORMATS = {
+    'api': ('API', 'http://www.iana.org/assignments/media-types/application/vnd.api+json', None, 'Application Programming Interface'),
+    'api feature': ('OGCFeat', 'http://www.opengis.net/def/interface/ogcapi-features', 'http://www.opengeospatial.org/standards/features', 'OGC API - Features'),
+    'wms': ('WMS', 'http://www.opengis.net/def/serviceType/ogc/wms', 'http://www.opengeospatial.org/standards/wms', 'Web Map Service'),
+    'zip': ('ZIP', 'http://www.iana.org/assignments/media-types/application/zip', 'http://www.iso.org/standard/60101.html', 'ZIP File'),
+    'rar': ('RAR', 'http://www.iana.org/assignments/media-types/application/vnd.rar', 'http://www.rarlab.com/technote.htm', 'RAR File'),
+    'wfs': ('WFS', 'http://www.opengis.net/def/serviceType/ogc/wfs', 'http://www.opengeospatial.org/standards/wfs', 'Web Feature Service'),
+    'wcs': ('WCS', 'http://www.opengis.net/def/serviceType/ogc/wcs', 'http://www.opengeospatial.org/standards/wcs', 'Web Coverage Service'),
+    'tms': ('TMS', 'http://wiki.osgeo.org/wiki/Tile_Map_Service_Specification', 'http://www.opengeospatial.org/standards/tms', 'Tile Map Service'),
+    'wmts': ('WMTS', 'http://www.opengis.net/def/serviceType/ogc/wmts', 'http://www.opengeospatial.org/standards/wmts', 'Web Map Tile Service'),
+    'kml': ('KML', 'http://www.iana.org/assignments/media-types/application/vnd.google-earth.kml+xml', 'http://www.opengeospatial.org/standards/kml', 'Keyhole Markup Language'),
+    'kmz': ('KMZ', 'http://www.iana.org/assignments/media-types/application/vnd.google-earth.kmz+xml', 'http://www.opengeospatial.org/standards/kml', 'Compressed Keyhole Markup Language'),
+    'gml': ('GML', 'http://www.iana.org/assignments/media-types/application/gml+xml', 'http://www.opengeospatial.org/standards/gml', 'Geography Markup Language'),
+    'geojson': ('GeoJSON', 'http://www.iana.org/assignments/media-types/application/geo+json', 'http://www.rfc-editor.org/rfc/rfc7946', 'GeoJSON'),
+    'json': ('JSON', 'http://www.iana.org/assignments/media-types/application/json', 'http://www.ecma-international.org/publications/standards/Ecma-404.htm', 'JavaScript Object Notation'),
+    'atom': ('ATOM', 'http://www.iana.org/assignments/media-types/application/atom+xml', 'http://validator.w3.org/feed/docs/atom.html', 'Atom Syndication Format'),
+    'xml': ('XML', 'http://www.iana.org/assignments/media-types/application/xml', 'http://www.w3.org/TR/REC-xml/', 'Extensible Markup Language'),
+    'arcgis_rest': ('ESRI Rest', None, None, 'ESRI Rest Service'),
+    'shp': ('SHP', 'http://www.iana.org/assignments/media-types/application/vnd.shp', 'http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf', 'ESRI Shapefile'),
+    'shapefile': ('SHP', 'http://www.iana.org/assignments/media-types/application/vnd.shp', 'http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf', 'ESRI Shapefile'),
+    'esri': ('SHP', 'http://www.iana.org/assignments/media-types/application/vnd.shp', 'http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf', 'ESRI Shapefile'),
+    'html': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'HyperText Markup Language'),
+    'html5': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'HyperText Markup Language'),
+    'visor': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'Map Viewer'),
+    'enlace': ('HTML', 'http://www.iana.org/assignments/media-types/text/html', 'http://www.w3.org/TR/2011/WD-html5-20110405/', 'Map Viewer'),
+    'pdf': ('PDF', 'http://www.iana.org/assignments/media-types/application/pdf', 'http://www.iso.org/standard/75839.html', 'Portable Document Format'),
+    'csv': ('CSV', 'http://www.iana.org/assignments/media-types/text/csv', 'http://www.rfc-editor.org/rfc/rfc4180', 'Comma-Separated Values'),
+    'netcdf': ('NetCDF', 'http://www.iana.org/assignments/media-types/text/csv', 'http://www.opengeospatial.org/standards/netcdf', 'Network Common Data Form'),
+    'csw': ('CSW', 'http://www.opengis.net/def/serviceType/ogc/csw', 'http://www.opengeospatial.org/standards/cat', 'Catalog Service for the Web'),
+    'geodcatap': ('RDF', 'http://www.iana.org/assignments/media-types/application/rdf+xml', 'http://semiceu.github.io/GeoDCAT-AP/releases/2.0.0/', 'GeoDCAT-AP 2.0 Metadata')
+    ,
+    'inspire': ('XML', 'http://www.iana.org/assignments/media-types/application/xml', ['http://inspire.ec.europa.eu/documents/inspire-metadata-regulation','http://inspire.ec.europa.eu/documents/commission-regulation-eu-no-13122014-10-december-2014-amending-regulation-eu-no-10892010-0', 'http://www.isotc211.org/2005/gmd/'], 'INSPIRE ISO 19139 Metadata')
+}
+
+OGC2CKAN_ISO_MD_ELEMENTS = {
+    'lineage_source': 'gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:source/gmd:LI_Source/gmd:description/gco:CharacterString',
+    'lineage_process_steps': 'gmd:dataQualityInfo/gmd:DQ_DataQuality/gmd:lineage/gmd:LI_Lineage/gmd:processStep'
+}
+
+# loose definition of BCP47-like strings
+BCP_47_LANGUAGE = u'^[a-z]{2,8}(-[0-9a-zA-Z]{1,8})*$'
+
+DATASET_DEFAULT_SCHEMA = [
+    'id',
+    'type',
+    'isopen',
+    ]
+
+RESOURCE_DEFAULT_SCHEMA = [
+    'url',
+    'name',
+    ]
+
+
+DATE_FIELDS = [
+    {'field_name': 'created', 'fallback': 'issued', 'default_value': None, 'override': True, 'dtype': str},
+    {'field_name': 'issued', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str},
+    {'field_name': 'modified', 'fallback': 'issued', 'default_value': None, 'override': True, 'dtype': str},
+    {'field_name': 'valid', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str},
+    {'field_name': 'temporal_start', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str},
+    {'field_name': 'temporal_end', 'fallback': None, 'default_value': None, 'override': True, 'dtype': str}
+]
+
+DATASET_DEFAULT_FIELDS = [
+    {'field_name': 'id', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'name', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'title', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'notes', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'description', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'access_rights', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['access_rights'], 'override': True, 'dtype': str},
+    {'field_name': 'license', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license'], 'override': True, 'dtype': str},
+    {'field_name': 'license_id', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license_id'], 'override': True, 'dtype': str},
+    {'field_name': 'topic', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['topic'], 'override': True, 'dtype': str},
+    {'field_name': 'theme', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['theme'], 'override': True, 'dtype': str},
+    {'field_name': 'theme_eu', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['theme_eu'], 'override': True, 'dtype': str},
+    {'field_name': 'status', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['status'], 'override': True, 'dtype': str},
+    {'field_name': 'hvd_category', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+]
+
+RESOURCE_DEFAULT_FIELDS = [
+    {'field_name': 'url', 'fallback': None, 'default_value': "", 'override': False, 'dtype': str},
+    {'field_name': 'name', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'format', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'protocol', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'mimetype', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'description', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'license', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license'], 'override': True, 'dtype': str},
+    {'field_name': 'license_id', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['license_id'], 'override': True, 'dtype': str},
+    {'field_name': 'rights', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['rights'], 'override': True, 'dtype': str},
+    {'field_name': 'language', 'fallback': None, 'default_value': OGC2CKAN_HARVESTER_MD_CONFIG['language'], 'override': False, 'dtype': str},
+    {'field_name': 'conforms_to', 'fallback': None, 'default_value': None, 'override': False, 'dtype': str},
+    {'field_name': 'size', 'fallback': None, 'default_value': 0, 'override': True, 'dtype': int},
+]
+
+# Custom rules for harvesters.base._update_custom_format()
+CUSTOM_FORMAT_RULES = [
+    {
+        'format_strings': ['esri', 'arcgis'],
+        'url_string': 'viewer.html?url=',
+        'format': 'HTML',
+        'mimetype': 'https://www.iana.org/assignments/media-types/text/html'
+    },
+    {
+        'format_strings': ['html', 'html5'],
+        'url_string': None,
+        'format': 'HTML',
+        'mimetype': 'https://www.iana.org/assignments/media-types/text/html'
+    },
+    {
+        'format_strings': None,
+        'url_string': 'getrecordbyid',
+        'format': 'XML',
+        'mimetype': 'https://www.iana.org/assignments/media-types/application/xml'
+    }
+    # Add more rules here as needed
+]
+
+DATADICTIONARY_DEFAULT_SCHEMA = [
+    'id',
+    'type',
+    'label',
+    'notes',
+    'type_override'
+    ]
+
+# Common date formats for parsing. https://docs.python.org/es/3/library/datetime.html#strftime-and-strptime-format-codes
+COMMON_DATE_FORMATS = [
+    '%Y-%m-%d',
+    '%d-%m-%Y',
+    '%m-%d-%Y',
+    '%Y/%m/%d',
+    '%d/%m/%Y',
+    '%m/%d/%Y',
+    '%Y-%m-%d %H:%M:%S',  # Date with time
+    '%d-%m-%Y %H:%M:%S',  # Date with time
+    '%m-%d-%Y %H:%M:%S',  # Date with time
+    '%Y/%m/%d %H:%M:%S',  # Date with time
+    '%d/%m/%Y %H:%M:%S',  # Date with time
+    '%m/%d/%Y %H:%M:%S',  # Date with time
+    '%Y-%m-%dT%H:%M:%S',  # ISO 8601 format
+    '%Y-%m-%dT%H:%M:%SZ',  # ISO 8601 format with Zulu time indicator
+]
+# Vocabs
+SCHEMINGDCAT_DEFAULT_DATASET_SCHEMA_NAME: typing.Final[str] = "dataset"
+SCHEMINGDCAT_INSPIRE_THEMES_VOCAB: typing.Final[str] = "theme"
+SCHEMINGDCAT_DCAT_THEMES_VOCAB: typing.Final[list] = ["theme_es", "theme_eu"]
+SCHEMINGDCAT_ISO19115_TOPICS_VOCAB: typing.Final[list] = "topic"
+
+
+# Clean ckan names
+URL_REGEX = re.compile(
+    r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
+)
+
+# Compile the regular expression
+INVALID_CHARS = re.compile(r"[^a-zñ0-9_.-]")
+
+# Define a dictionary to map accented characters to their unaccented equivalents except ñ
+ACCENT_MAP = str.maketrans({
+    "á": "a", "à": "a", "ä": "a", "â": "a", "ã": "a",
+    "é": "e", "è": "e", "ë": "e", "ê": "e",
+    "í": "i", "ì": "i", "ï": "i", "î": "i",
+    "ó": "o", "ò": "o", "ö": "o", "ô": "o", "õ": "o",
+    "ú": "u", "ù": "u", "ü": "u", "û": "u",
+    "ñ": "ñ",
+})
+
+URL_FIELD_NAMES = {
+        'dataset': 
+            ['dcat_type', 'theme_es', 'language', 'topic', 'maintainer_url', 'tag_uri', 'contact_uri', 'contact_url', 'publisher_identifier', 'publisher_uri', 'publisher_url', 'publisher_type', 'maintainer_uri', 'maintainer_url', 'author_uri', 'author_url', 'conforms_to', 'theme', 'reference_system', 'spatial_uri', 'representation_type', 'license_id', 'access_rights', 'graphic_overview', 'frequency', 'hvd_category'],
+        'resource':
+            ['url', 'availability', 'mimetype', 'status', 'resource_relation', 'license', 'rights', 'conforms_to', 'reference_system']
+    }
 EMAIL_FIELD_NAMES = ['publisher_email', 'maintainer_email', 'author_email', ]
\ No newline at end of file
diff --git a/ckanext/schemingdcat/harvesters/base.py b/ckanext/schemingdcat/harvesters/base.py
index 0241645b..a4c87863 100644
--- a/ckanext/schemingdcat/harvesters/base.py
+++ b/ckanext/schemingdcat/harvesters/base.py
@@ -30,6 +30,8 @@
 from ckanext.schemingdcat.lib.field_mapping import FieldMappingValidator
 
 from ckanext.schemingdcat.config import (
+    DATASET_DEFAULT_SCHEMA,
+    RESOURCE_DEFAULT_SCHEMA,
     mimetype_base_uri,
     OGC2CKAN_HARVESTER_MD_CONFIG,
     OGC2CKAN_MD_FORMATS,
@@ -40,7 +42,8 @@
     DATADICTIONARY_DEFAULT_SCHEMA,
     URL_REGEX,
     INVALID_CHARS,
-    ACCENT_MAP
+    ACCENT_MAP,
+    slugify_pat
 )
 
 log = logging.getLogger(__name__)
@@ -241,19 +244,14 @@ def _get_local_schema(self, schema_type="dataset"):
     def _get_remote_schema(self, base_url, schema_type="dataset"):
         """
         Fetches the remote schema for a given base URL and schema type.
-
+    
         Args:
             base_url (str): The base URL of the remote server.
             schema_type (str, optional): The type of schema to fetch. Defaults to 'dataset'.
-
+    
         Returns:
-            dict: The remote schema as a dictionary.
-
-        Raises:
-            HarvesterBase.ContentFetchError: If there is an error fetching the remote schema content.
-            ValueError: If there is an error decoding the remote schema content.
-            KeyError: If the remote schema content does not contain the expected result.
-
+            dict: The remote schema as a dictionary, or None if there is an error.
+    
         """
         url = (
             base_url
@@ -264,12 +262,16 @@ def _get_remote_schema(self, base_url, schema_type="dataset"):
         try:
             content = self._get_content(url)
             content_dict = json.loads(content)
-            return content_dict["result"]
-        except (HarvesterBase.ContentFetchError, ValueError, KeyError):
-            log.debug("Could not fetch/decode remote schema")
-            raise HarvesterBase.RemoteResourceError(
-                "Could not fetch/decode remote schema"
-            )
+            log.debug('content_dict: %s', content_dict)
+            
+            # Check if content_dict is a dictionary and contains 'result'.
+            if isinstance(content_dict, dict) and "result" in content_dict:
+                return content_dict["result"]
+            else:
+                return None
+        except (ContentFetchError, ValueError, KeyError) as e:
+            log.debug("Could not fetch/decode remote schema: %s", e)
+            return None
 
     def _get_local_required_lang(self):
         """
@@ -480,6 +482,97 @@ def _standardize_field_mapping_v1(self, field_mapping):
                 # If the value is not a dictionary, it is a single-language field
                 standardized_mapping[key] = {'field_name': value}
         return standardized_mapping
+    
+    def _standardize_ckan_dict_from_field_mapping(self, dataset, field_mapping):
+        """
+        Standardizes a CKAN dataset dictionary according to the provided field mapping.
+    
+        Args:
+            dataset (dict): The CKAN dataset dictionary.
+            field_mapping (dict): The mapping of local field names to remote field names or values.
+    
+        Returns:
+            dict: The standardized CKAN dataset dictionary.
+        """
+        def normalize_key(key):
+            """
+            Helper function to normalize the key by converting to lowercase and replacing non-alphanumeric characters with underscores.
+            """
+            return slugify_pat.sub('_', key.lower())
+    
+        def get_extra_value(extras, key):
+            """
+            Helper function to get the value from the extras list where the key matches (case insensitive and normalized).
+            """
+            normalized_key = normalize_key(key)
+            for item in extras:
+                if normalize_key(item['key']) == normalized_key:
+                    return item['value']
+            return None
+    
+        def apply_field_mapping(d, mapping):
+            new_dict = {}
+            for local_field, remote_info in mapping.items():
+                if 'field_name' in remote_info:
+                    remote_field = remote_info['field_name']
+                    if remote_field and remote_field.startswith('extras.'):
+                        extra_key = remote_field.split('.', 1)[1]
+                        extra_value = get_extra_value(d.get('extras', []), extra_key)
+                        if extra_value is not None:
+                            new_dict[local_field] = extra_value
+                    elif remote_field in d:
+                        new_dict[local_field] = d[remote_field]
+                if 'field_value' in remote_info:
+                    new_dict[local_field] = remote_info['field_value']
+                if 'languages' in remote_info:
+                    for lang, lang_info in remote_info['languages'].items():
+                        if 'field_name' in lang_info:
+                            remote_field = lang_info['field_name']
+                            if remote_field and remote_field.startswith('extras.'):
+                                extra_key = remote_field.split('.', 1)[1]
+                                extra_value = get_extra_value(d.get('extras', []), extra_key)
+                                if extra_value is not None:
+                                    if local_field not in new_dict:
+                                        new_dict[local_field] = {}
+                                    new_dict[local_field][lang] = extra_value
+                            elif remote_field in d:
+                                if local_field not in new_dict:
+                                    new_dict[local_field] = {}
+                                new_dict[local_field][lang] = d[remote_field]
+                        if 'field_value' in lang_info:
+                            if local_field not in new_dict:
+                                new_dict[local_field] = {}
+                            new_dict[local_field][lang] = lang_info['field_value']
+            return new_dict
+    
+        # Apply dataset field mapping
+        dataset_field_mapping = field_mapping.get('dataset_field_mapping', {})
+        standardized_dataset = apply_field_mapping(dataset, dataset_field_mapping)
+    
+        # Ensure default schema fields are included in the dataset
+        for field in DATASET_DEFAULT_SCHEMA:
+            if field in dataset:
+                standardized_dataset[field] = dataset[field]
+    
+        # Maintain the tags list
+        standardized_dataset['tags'] = dataset.get('tags', [])
+    
+        # Apply distribution field mapping to each resource
+        distribution_field_mapping = field_mapping.get('distribution_field_mapping', {})
+        standardized_resources = []
+        for resource in dataset.get('resources', []):
+            standardized_resource = apply_field_mapping(resource, distribution_field_mapping)
+            
+            # Ensure default schema fields are included in each resource
+            for field in RESOURCE_DEFAULT_SCHEMA:
+                if field in resource:
+                    standardized_resource[field] = resource[field]
+            
+            standardized_resources.append(standardized_resource)
+    
+        standardized_dataset['resources'] = standardized_resources
+    
+        return standardized_dataset
 
     def _standardize_df_fields_from_field_mapping(self, df, field_mapping):
         """
@@ -712,16 +805,21 @@ def get_mapped_fields(fields, field_mapping):
                 log.debug("Validating remote schema from: %s", remote_ckan_base_url)
                 if self._remote_schema is None:
                     self._remote_schema = self._get_remote_schema(remote_ckan_base_url)
-
-                remote_datasets_colnames = set(
-                    field["field_name"]
-                    for field in self._remote_schema["dataset_fields"]
-                )
-                remote_distributions_colnames = set(
-                    field["field_name"]
-                    for field in self._remote_schema["resource_fields"]
-                )
-
+            
+                if self._remote_schema is not None:
+                    remote_datasets_colnames = set(
+                        field["field_name"]
+                        for field in self._remote_schema["dataset_fields"]
+                    )
+                    remote_distributions_colnames = set(
+                        field["field_name"]
+                        for field in self._remote_schema["resource_fields"]
+                    )
+                else:
+                    log.warning("Failed to retrieve remote schema from: %s. Using local schema by default.", remote_ckan_base_url)
+                    remote_datasets_colnames = set()
+                    remote_distributions_colnames = set()
+            
             elif remote_dataset_field_names is not None:
                 log.debug(
                     "Validating remote schema using field names from package dict"
@@ -969,21 +1067,6 @@ def _set_translated_fields(self, package_dict):
             ReadError: If there is an error translating the dataset.
 
         """
-        basic_fields = [
-            "id",
-            "name",
-            "title",
-            "title_translated",
-            "notes_translated",
-            "provenance",
-            "notes",
-            "provenance",
-            "private",
-            "groups",
-            "tags",
-            "tag_string",
-            "owner_org",
-        ]
         if (
             not hasattr(self, "_mapped_schema")
             or "dataset_fields" not in self._mapped_schema
@@ -1688,6 +1771,36 @@ def _clean_name(self, name):
 
         return name
 
+    def _fill_translated_properties(self, package_dict):
+        """
+        Fills properties without the _translated suffix using the default language or the first available translation.
+    
+        Args:
+            package_dict (dict): The package dictionary to be modified.
+            default_language (str): The default language of the instance.
+    
+        Returns:
+            dict: The modified package dictionary.
+        """
+        default_lang = self._get_local_required_lang()
+        
+        for key in list(package_dict.keys()):
+            if key.endswith('_translated'):
+                base_key = key[:-11]  # Remove '_translated' suffix
+                translations = package_dict[key]
+    
+                # Use the default language if available
+                if default_lang and default_lang in translations:
+                    package_dict[base_key] = translations[default_lang]
+                else:
+                    # Use the first available translation with a value
+                    for lang, value in translations.items():
+                        if value:
+                            package_dict[base_key] = value
+                            break
+    
+        return package_dict
+
     def _create_or_update_package(
         self, package_dict, harvest_object, package_dict_form="rest"
     ):
@@ -2019,19 +2132,15 @@ def _log_export_clean_datasets_and_ids(self, harvest_source_title, clean_dataset
 class ContentFetchError(Exception):
     pass
 
-
 class ContentNotFoundError(ContentFetchError):
     pass
 
-
 class RemoteResourceError(Exception):
     pass
 
-
 class SearchError(Exception):
     pass
 
-
 class ReadError(Exception):
     pass
 
diff --git a/ckanext/schemingdcat/harvesters/ckan.py b/ckanext/schemingdcat/harvesters/ckan.py
index 88b53363..c070c205 100644
--- a/ckanext/schemingdcat/harvesters/ckan.py
+++ b/ckanext/schemingdcat/harvesters/ckan.py
@@ -15,18 +15,16 @@
 import ckan.logic as logic
 import uuid
 
-from ckanext.harvest.harvesters.ckanharvester import (
-    CKANHarvester,
-    ContentFetchError,
-    ContentNotFoundError,
-    RemoteResourceError,
-    SearchError,
-)
 
 from ckanext.schemingdcat.harvesters.base import (
     SchemingDCATHarvester,
     RemoteSchemaError,
+    ReadError,
+    ContentFetchError,
+    SearchError,
+    RemoteResourceError
 )
+from ckanext.schemingdcat.lib.field_mapping import FieldMappingValidator
 
 log = logging.getLogger(__name__)
 
@@ -92,6 +90,9 @@ def validate_config(self, config):
         # Check basic validation config
         self._set_basic_validate_config(config)
 
+        # Instance field_mapping validator
+        field_mapping_validator = FieldMappingValidator()
+
         # Check if the schema is specified
         if "schema" in config_obj:
             schema = config_obj["schema"]
@@ -135,37 +136,33 @@ def validate_config(self, config):
         ):
             config = json.dumps({**config_obj, "remote_orgs": "only_local"})
 
-        # Validate if exists a JSON contained the mapping field_names between the remote schema and the local schema
-        for mapping_name in ["dataset_field_mapping", "distribution_field_mapping"]:
+        # Check if 'field_mapping_schema_version' exists in the config
+        field_mapping_schema_version_error_message = f'Insert the schema version: "field_mapping_schema_version: <version>", one of: {", ".join(map(str, self._field_mapping_validator_versions))} . More info: https://github.com/mjanez/ckanext-schemingdcat?tab=readme-ov-file#remote-google-sheetonedrive-excel-metadata-upload-harvester'
+        if 'field_mapping_schema_version' not in config_obj and 'dataset_field_mapping' in config_obj:
+            raise ValueError(field_mapping_schema_version_error_message)
+        else:
+            # Check if is an integer and if it is in the versions
+            if not isinstance(config_obj['field_mapping_schema_version'], int) or config_obj['field_mapping_schema_version'] not in self._field_mapping_validator_versions:
+                raise ValueError(field_mapping_schema_version_error_message)
+
+        # Validate if exists a JSON contained the mapping field_names between the remote schema and the local schema        
+        for mapping_name in self._field_mapping_info.keys():
             if mapping_name in config:
                 field_mapping = config_obj[mapping_name]
                 if not isinstance(field_mapping, dict):
-                    raise ValueError(f"{mapping_name} must be a dictionary")
-
-                # Check if the config is a valid mapping
-                for local_field, remote_field in field_mapping.items():
-                    if not isinstance(local_field, basestring):
-                        raise ValueError('"local_field_name" must be a string')
-                    if not isinstance(remote_field, (basestring, dict)):
-                        raise ValueError(
-                            '"remote_field_name" must be a string or a dictionary'
-                        )
-                    if isinstance(remote_field, dict):
-                        for lang, remote_field_name in remote_field.items():
-                            if not isinstance(lang, basestring) or not isinstance(
-                                remote_field_name, basestring
-                            ):
-                                raise ValueError(
-                                    'In translated fields, both language and remote_field_name must be strings. e.g. "notes_translated": {"es": "notes-es"}'
-                                )
-                            if not re.match("^[a-z]{2}$", lang):
-                                raise ValueError(
-                                    "Language code must be a 2-letter ISO 639-1 code"
-                                )
+                    raise ValueError(f'{mapping_name} must be a dictionary')
+
+                schema_version = config_obj['field_mapping_schema_version']
+
+                try:
+                    # Validate field_mappings acordin schema versions
+                    field_mapping = field_mapping_validator.validate(field_mapping, schema_version)
+                except ValueError as e:
+                    raise ValueError(f"The field mapping is invalid: {e}") from e
 
                 config = json.dumps({**config_obj, mapping_name: field_mapping})
 
-        return config
+        return config     
 
     def gather_stage(self, harvest_job):
         """
@@ -181,7 +178,7 @@ def gather_stage(self, harvest_job):
         harvest_source_title = harvest_job.source.title
         remote_ckan_base_url = harvest_job.source.url.rstrip("/")
 
-        log.debug('In SchemingDCATCKANHarvester gather_stage with harvest source: %s and database URL: %s', harvest_source_title, remote_ckan_base_url)
+        log.debug('In SchemingDCATCKANHarvester gather_stage with harvest source: %s and URL: %s', harvest_source_title, remote_ckan_base_url)
 
         # Get config options
         toolkit.requires_ckan_version(min_version="2.0")
@@ -274,6 +271,23 @@ def gather_stage(self, harvest_job):
             )
             return []
 
+
+        # Check if the content_dicts colnames correspond to the local schema
+        try:
+            # Standardizes the field_mapping           
+            field_mappings = {
+            'dataset_field_mapping': self._standardize_field_mapping(self.config.get("dataset_field_mapping")),
+            'distribution_field_mapping': self._standardize_field_mapping(self.config.get("distribution_field_mapping")),
+            'datadictionary_field_mapping': None
+        }
+
+        except RemoteSchemaError as e:
+            self._save_gather_error('Error standardize field mapping: {0}'.format(e), harvest_job)
+            return []
+    
+        except ReadError as e:
+            self._save_gather_error('Error generating default values for dataset/distribution config field mappings: {0}'.format(e), harvest_job)
+
         # Create harvest objects for each dataset
         try:
             package_ids = set()
@@ -284,18 +298,15 @@ def gather_stage(self, harvest_job):
                 if self.config.get("dataset_field_mapping") is None and self.config.get("distribution_field_mapping") is None:
                     log.warning('If no *_field_mapping is provided in the configuration for validation, fields are automatically mapped to the local schema.')
                 else:
-                    # Standardizes the field_mapping
-                    remote_dataset_field_mapping = self._standardize_field_mapping(self.config.get("dataset_field_mapping"))
-                    remote_distribution_field_mapping = self._standardize_field_mapping(self.config.get("distribution_field_mapping"))
-                    
-                    log.debug('remote_dataset_field_mapping: %s', remote_dataset_field_mapping)
-                    log.debug('remote_distribution_field_mapping: %s', remote_distribution_field_mapping)
+                    # Standardizes the field_mapping                    
+                    log.debug('remote_dataset_field_mapping: %s', field_mappings.get('dataset_field_mapping'))
+                    log.debug('remote_distribution_field_mapping: %s', field_mappings.get('distribution_field_mapping'))
                     self._validate_remote_schema(
                         remote_dataset_field_names=None,
                         remote_ckan_base_url=remote_ckan_base_url,
                         remote_resource_field_names=None,
-                        remote_dataset_field_mapping=remote_dataset_field_mapping,
-                        remote_distribution_field_mapping=remote_distribution_field_mapping,
+                        remote_dataset_field_mapping=field_mappings.get('dataset_field_mapping'),
+                        remote_distribution_field_mapping=field_mappings.get('distribution_field_mapping'),
                     )
             except RemoteSchemaError as e:
                 self._save_gather_error(
@@ -312,6 +323,17 @@ def gather_stage(self, harvest_job):
                         pkg_dict["id"],
                     )
                     continue
+                
+                # Check if the content_dicts colnames correspond to the local schema
+                try:
+                    #log.debug('content_dicts: %s', content_dicts)
+                    # Standardizes the field names
+                    pkg_dict = self._standardize_ckan_dict_from_field_mapping(pkg_dict, field_mappings)
+                    log.debug('Standardized package dict: %s', pkg_dict)
+                except RemoteSchemaError as e:
+                    self._save_gather_error('Error standarize remote dataset: {0}'.format(e), harvest_job)
+                    return []
+                                        
                 package_ids.add(pkg_dict["id"])
 
                 # Set translated fields
@@ -319,6 +341,8 @@ def gather_stage(self, harvest_job):
                 log.debug(
                     "Creating HarvestObject for %s %s", pkg_dict["name"], pkg_dict["id"]
                 )
+                log.debug('Translated package dict: %s', pkg_dict)
+                
                 obj = HarvestObject(
                     guid=pkg_dict["id"], job=harvest_job, content=json.dumps(pkg_dict)
                 )
@@ -427,6 +451,9 @@ def modify_package_dict(self, package_dict, harvest_object):
         """
         # Clean up any existing extras already in package_dict
         package_dict = self._remove_duplicate_keys_in_extras(package_dict)
+        
+        # Check basic fields without translations
+        package_dict = self._fill_translated_properties(package_dict)
 
         return package_dict
 
@@ -548,11 +575,13 @@ def import_stage(self, harvest_object):
                 # key.
                 resource.pop("revision_id", None)
 
+            log.debug('package_dict BEFORE MODIFY: %s', package_dict)
             package_dict = self.modify_package_dict(package_dict, harvest_object)
-
             result = self._create_or_update_package(
                 package_dict, harvest_object, package_dict_form="package_show"
             )
+            log.debug('package_dict AFTER MODIFY: %s', package_dict)
+
 
             # Log package_dict, package dict is a dict
             log.debug("Package create or update: %s", result)
@@ -590,4 +619,5 @@ def get_package_dict(self, harvest_object, context, package_dict=None):
             resource['id'] = str(uuid.uuid4())
             resource.pop('dataset_id', None)
 
-        return package_dict
\ No newline at end of file
+        return package_dict
+    
diff --git a/ckanext/schemingdcat/helpers.py b/ckanext/schemingdcat/helpers.py
index 55ab0491..5e61bfc1 100644
--- a/ckanext/schemingdcat/helpers.py
+++ b/ckanext/schemingdcat/helpers.py
@@ -1,1353 +1,1353 @@
-from ckan.common import json, c, request, is_flask_request
-from ckan.lib import helpers as ckan_helpers
-import ckan.logic as logic
-from ckan import model
-from ckan.lib.i18n import get_available_locales, get_lang
-import ckan.plugins as p
-import six
-import re
-import yaml
-from yaml.loader import SafeLoader
-from pathlib import Path
-from functools import lru_cache
-import datetime
-import typing
-from urllib.parse import urlparse
-from urllib.error import URLError
-
-from six.moves.urllib.parse import urlencode
-
-from ckanext.scheming.helpers import (
-    scheming_choices_label,
-    scheming_language_text,
-    scheming_dataset_schemas,
-    scheming_get_schema
-)
-
-from ckanext.harvest.helpers import (
-    get_harvest_source
-)
-from ckanext.harvest.utils import (
-    DATASET_TYPE_NAME
-)
-
-import ckanext.schemingdcat.config as sdct_config
-from ckanext.schemingdcat.utils import (
-    get_facets_dict,
-    public_file_exists,
-    public_dir_exists,
-)
-from ckanext.dcat.utils import CONTENT_TYPES, get_endpoint
-from ckanext.fluent.validators import LANG_SUFFIX
-import logging
-
-log = logging.getLogger(__name__)
-
-all_helpers = {}
-prettify_cache = {}
-DEFAULT_LANG = None
-
-@lru_cache(maxsize=None)
-def get_scheming_dataset_schemas():
-    """
-    Retrieves the dataset schemas using the scheming_dataset_schemas function.
-    Caches the result using the LRU cache decorator for efficient retrieval.
-    """
-    return scheming_dataset_schemas()
-
-
-def helper(fn):
-    """Collect helper functions into the ckanext.schemingdcat.all_helpers dictionary.
-
-    Args:
-        fn (function): The helper function to add to the dictionary.
-
-    Returns:
-        function: The helper function.
-    """
-    all_helpers[fn.__name__] = fn
-    return fn
-
-
-@helper
-def schemingdcat_get_schema_names():
-    """
-    Get the names of all the schemas defined for the Scheming DCAT extension.
-
-    Returns:
-        list: A list of schema names.
-    """
-    schemas = get_scheming_dataset_schemas()
-
-    return [schema["schema_name"] for schema in schemas.values()]
-
-
-@helper
-def schemingdcat_default_facet_search_operator():
-    """Return the default facet search operator: AND/OR.
-
-    Returns:
-        str: The default facet search operator.
-    """
-    facet_operator = sdct_config.default_facet_operator
-    if facet_operator and (
-        facet_operator.upper() == "AND" or facet_operator.upper() == "OR"
-    ):
-        facet_operator = facet_operator.upper()
-    else:
-        facet_operator = "AND"
-    return facet_operator
-
-
-@helper
-def schemingdcat_decode_json(json_text):
-    """Convert a JSON string to a Python object.
-
-    Args:
-        json_text (str): The JSON string to convert.
-
-    Returns:
-        object: A Python object representing the JSON data.
-    """
-    return json.loads(json_text)
-
-
-@helper
-def schemingdcat_organization_name(org_id):
-    """Return the name of the organization from its ID.
-
-    Args:
-        org_id (dict): A dictionary containing the ID of the organization.
-
-    Returns:
-        str: The name of the organization, or None if the organization cannot be found.
-    """
-    org_name = None
-    try:
-        org_dic = ckan_helpers.get_organization(org_id["display_name"])
-        if org_dic is not None:
-            org_name = org_dic["name"]
-        else:
-            log.warning(
-                "Could not find the name of the organization with ID {0}".format(
-                    org_id["display_name"]
-                )
-            )
-    except Exception as e:
-        log.error(
-            "Exception while trying to find the name of the organization: {0}".format(e)
-        )
-    return org_name
-
-
-@helper
-def schemingdcat_get_facet_label(facet):
-    """Return the label for a given facet.
-
-    Args:
-        facet (str): The name of the facet.
-
-    Returns:
-        str: The label for the given facet.
-    """
-    return get_facets_dict[facet]
-
-
-@helper
-def schemingdcat_get_facet_items_dict(
-    facet, search_facets=None, limit=None, exclude_active=False, scheming_choices=None
-):
-    """Return the list of unselected facet items for the given facet, sorted
-    by count.
-
-    Returns the list of unselected facet contraints or facet items (e.g. tag
-    names like "russian" or "tolstoy") for the given search facet (e.g.
-    "tags"), sorted by facet item count (i.e. the number of search results that
-    match each facet item).
-
-    Reads the complete list of facet items for the given facet from
-    c.search_facets, and filters out the facet items that the user has already
-    selected.
-
-    List of facet items are ordered acording the faccet_sort parameter
-
-    Arguments:
-    facet -- the name of the facet to filter.
-    search_facets -- dict with search facets(c.search_facets in Pylons)
-    limit -- the max. number of facet items to return.
-    exclude_active -- only return unselected facets.
-    scheming_choices -- scheming choices to use to get label from value.
-
-    """
-
-    # log.debug("Returning facets for: {0}".format(facet))
-
-    order = "default"
-    items = []
-
-    search_facets = search_facets or getattr(c, "search_facets", None)
-
-    if (
-        search_facets
-        and isinstance(search_facets, dict)
-        and search_facets.get(facet, {}).get("items")
-    ):
-        for facet_item in search_facets.get(facet)["items"]:
-            if scheming_choices:
-                facet_item["label"] = scheming_choices_label(
-                    scheming_choices, facet_item["name"]
-                )
-            else:
-                facet_item["label"] = facet_item["display_name"]
-
-            if not len(facet_item["name"].strip()):
-                continue
-
-            params_items = (
-                request.params.items(multi=True)
-                if is_flask_request()
-                else request.params.items()
-            )
-
-            if not (facet, facet_item["name"]) in params_items:
-                items.append(dict(active=False, **facet_item))
-            elif not exclude_active:
-                items.append(dict(active=True, **facet_item))
-
-            #    log.debug("params: {0}:{1}".format(
-            #    facet,request.params.getlist("_%s_sort" % facet)))
-            order_lst = request.params.getlist("_%s_sort" % facet)
-            if len(order_lst):
-                order = order_lst[0]
-        #     Sort descendingly by count and ascendingly by case-sensitive display name
-        #    items.sort(key=lambda it: (-it['count'], it['display_name'].lower()))
-        sorts = {
-            "name": ("label", False),
-            "name_r": ("label", True),
-            "count": ("count", False),
-            "count_r": ("count", True),
-        }
-        if sorts.get(order):
-            items.sort(
-                key=lambda it: (it[sorts.get(order)[0]]), reverse=sorts.get(order)[1]
-            )
-        else:
-            items.sort(key=lambda it: (-it["count"], it["label"].lower()))
-
-        if hasattr(c, "search_facets_limits"):
-            if c.search_facets_limits and limit is None:
-                limit = c.search_facets_limits.get(facet)
-        # zero treated as infinite for hysterical raisins
-        if limit is not None and limit > 0:
-            return items[:limit]
-
-    return items
-
-
-@helper
-def schemingdcat_new_order_url(facet_name, order_concept, extras=None):
-    """Return a URL with the order parameter for the given facet and concept to use.
-
-    Based on the actual order, it rotates cyclically from no order -> direct order -> inverse order over the given concept.
-
-    Args:
-        facet_name (str): The name of the facet to order.
-        order_concept (str): The concept (name or count) that will be used to order.
-        extras (dict, optional): Extra parameters to include in the URL.
-
-    Returns:
-        str: The URL with the order parameter for the given facet and concept.
-    """
-    old_order = None
-    order_param = "_%s_sort" % facet_name
-    order_lst = request.params.getlist(order_param)
-    if not extras:
-        extras = {}
-
-    controller = getattr(c, "controller", False) or request.blueprint
-    action = getattr(c, "action", False) or p.toolkit.get_endpoint()[1]
-    url = ckan_helpers.url_for(controller=controller, action=action, **extras)
-
-    if len(order_lst):
-        old_order = order_lst[0]
-
-    order_mapping = {
-        "name": {"name": "name_r", "name_r": None, None: "name"},
-        "count": {"count": "count_r", "count_r": None, None: "count"},
-    }
-
-    new_order = order_mapping.get(order_concept, {}).get(old_order)
-
-    params_items = (
-        request.params.items(multi=True)
-        if is_flask_request()
-        else request.params.items()
-    )
-    params_nopage = [(k, v) for k, v in params_items if k != order_param]
-
-    if new_order:
-        params_nopage.append((order_param, new_order))
-
-    if params_nopage:
-        url = url + "?" + urlencode(params_nopage)
-
-    return url
-
-@helper
-def schemingdcat_get_facet_list_limit():
-    """
-    Retrieves the limit for the facet list from the scheming DCAT configuration.
-
-    Returns:
-        int: The limit for the facet list.
-    """
-    return sdct_config.facet_list_limit
-
-@helper
-def schemingdcat_get_icons_dir(field=None, field_name=None):
-    """
-    Returns the defined icons directory for a given scheming field definition or field name.
-
-    This function is used to retrieve the icons directory associated with a 
-    specific field in a scheming dataset or directly by field name. If no icons directory is defined, 
-    the function will return None.
-
-    Args:
-        field (dict, optional): A dictionary representing the scheming field definition. 
-                                This should include all the properties of the field, 
-                                including the icons directory if one is defined.
-        field_name (str, optional): The name of the field. If provided, the function will 
-                                     look for an icons directory with this name.
-
-    Returns:
-        str: A string representing the icons directory for the field or field name. 
-             If no icons directory is defined or found, the function will return None.
-    """
-    if field:
-        if "icons_dir" in field:
-            return field["icons_dir"]
-
-        if "field_name" in field:
-            dir = sdct_config.icons_dir + "/" + field["field_name"]
-            if public_dir_exists(dir):
-                return dir
-
-    elif field_name:
-        dir = sdct_config.icons_dir + "/" + field_name
-        if public_dir_exists(dir):
-            return dir    
-
-    return None
-
-@helper
-def schemingdcat_get_default_icon(field):
-    """Return the defined default icon for a scheming field definition.
-
-    Args:
-        field (dict): The scheming field definition.
-
-    Returns:
-        str: The defined default icon, or None if not found.
-    """
-    if "default_icon" in field:
-        return field["default_icon"]
-    
-@helper
-def schemingdcat_get_default_package_item_icon():
-    """
-    Returns the default icon defined for a given scheming field definition.
-
-    This function is used to retrieve the default icon associated with a 
-    specific field in a scheming dataset. If no default icon is defined, 
-    the function will return None.
-
-    Args:
-        field (dict): A dictionary representing the scheming field definition. 
-                      This should include all the properties of the field, 
-                      including the default icon if one is defined.
-
-    Returns:
-        str: A string representing the default icon for the field. This could 
-             be a URL, a data URI, or any other string format used to represent 
-             images. If no default icon is defined for the field, the function 
-             will return None.
-    """
-    return sdct_config.default_package_item_icon
-
-@helper
-def schemingdcat_get_default_package_item_show_spatial():
-    """
-    Returns the configuration value for showing spatial information in the default package item.
-
-    This function is used to retrieve the configuration value that determines 
-    whether the spatial information should be shown in the default package item. 
-    If no value is defined in the configuration, the function will return None.
-
-    Returns:
-        bool: A boolean value representing whether the spatial information should 
-              be shown in the default package item. If no value is defined in the 
-              configuration, the function will return None.
-    """
-    return sdct_config.default_package_item_show_spatial
-
-@helper
-def schemingdcat_get_show_metadata_templates_toolbar():
-    """
-    Returns the configuration value for showing the metadata templates toolbar.
-
-    This function is used to retrieve the configuration value that determines 
-    whether the metadata templates toolbar should be shown or not. If the configuration 
-    value is not set, the function will return False.
-
-    Returns:
-        bool: A boolean value representing whether the metadata templates toolbar 
-              should be shown. If the configuration value is not set, the function 
-              will return False.
-    """
-    return sdct_config.show_metadata_templates_toolbar
-
-@helper
-def schemingdcat_get_metadata_templates_search_identifier():
-    """
-    Returns the default icon defined for a given scheming field definition.
-
-    This function is used to retrieve the default value to retrieve metadata templates. If no default value is defined, 
-    the function will return None.
-
-    Args:
-        field (dict): A dictionary representing the scheming field definition. 
-                      This should include all the properties of the field, 
-                      including the default icon if one is defined.
-
-    Returns:
-        str: A string representing the default icon for the field. This could 
-             be a URL, a data URI, or any other string format used to represent 
-             images. If no default icon is defined for the field, the function 
-             will return None.
-    """
-    return sdct_config.metadata_templates_search_identifier
-
-@helper
-def schemingdcat_get_schemingdcat_xls_harvest_templates(search_identifier=sdct_config.metadata_templates_search_identifier, count=10):
-    """
-    This helper function retrieves the schemingdcat_xls templates from the CKAN instance. 
-    It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
-    
-    Parameters:
-    search_identifier (str): The text to search in the identifier. Default is sdct_config.metadata_templates_search_identifier.
-    count (int): The number of featured datasets to retrieve. Default is 10.
-
-    Returns:
-    list: A list of dictionaries, each representing a featured dataset. If no results are found, returns None.
-    """
-    fq = f'+extras_schemingdcat_xls_metadata_template:{True}'
-    search_dict = {
-        'fq': fq, 
-        'fl': 'name,extras_identifier,title,notes,metadata_modified,extras_title_translated,extras_notes_translated',
-        'rows': count
-    }
-    context = {'model': model, 'session': model.Session}
-    result = logic.get_action('package_search')(context, search_dict)
-    
-    if not result['results']:
-        fq = f'+extras_schemingdcat_xls_metadata_template:*{search_identifier}*'
-        search_dict['fq'] = fq
-        result = logic.get_action('package_search')(context, search_dict)
-
-    return result['results'] if result['results'] else None
-
-@helper
-def schemingdcat_get_icon(
-    choice=None, icons_dir=None, default="/images/default/no_icon.svg", choice_value=None
-):
-    """Return the relative URL to the icon for the item.
-
-    Args:
-        choice (dict, optional): The choice selected for the field.
-        icons_dir (str, optional): The path to search for the icon. Usually the common path for icons for this field.
-        default (str, optional): The default value to return if no icon is found.
-        choice_value (str, optional): The value of the choice selected for the field. If provided, it will be used instead of choice['value'].
-
-    Returns:
-        str: The relative URL to the icon, or the default value if not found.
-    """
-    extensions = [".svg", ".png", ".jpg", ".jpeg", ".gif"]
-    icon_name = None
-
-    if choice_value is None and choice:
-        choice_value = choice.get("icon") or choice.get("value")
-
-    if choice_value:
-        if ckan_helpers.is_url(choice_value):
-            url_parts = choice_value.split("/")
-
-            if len(url_parts) == 1:
-                icon_name = url_parts[-1].lower()
-            else:
-                icon_name = url_parts[-2].lower() + "/" + url_parts[-1].lower()
-        else:
-            icon_name = choice_value
-
-        url_path = (icons_dir + "/" if icons_dir else "") + icon_name
-
-        for extension in extensions:
-            if public_file_exists(url_path + extension):
-                return url_path + extension
-
-    return default
-
-@helper
-def schemingdcat_get_choice_item(field, value):
-    """Return the whole choice item for the given value in the scheming field.
-
-    Args:
-        field (dict): The scheming field to look for the choice item in.
-        value (str): The option item value.
-
-    Returns:
-        dict: The whole option item in scheming, or None if not found.
-    """
-    if field and ("choices" in field):
-        # log.debug("Searching: {0} en {1}".format(value,field['choices']))
-        for choice in field["choices"]:
-            if choice["value"] == value:
-                return choice
-
-    return None
-
-@helper
-def schemingdcat_get_choice_property(choices, value, property):
-    """
-    Retrieve a specific property from a choice dictionary based on the given value.
-
-    Args:
-        choices (list): List of dictionaries containing "label" and "value" keys.
-        value (str): The value to match against the choices.
-        property (str): The property to retrieve from the matching choice dictionary.
-
-    Returns:
-        str or None: The property value from the matching choice dictionary, or None if not found.
-    """
-    for c in choices:
-        if c['value'] == value:
-            return c.get(property, None)
-    return None
-
-
-@helper
-def scheming_display_json_list(value):
-    """Return the object passed serialized as a JSON list.
-
-    Args:
-        value (any): The object to serialize.
-
-    Returns:
-        str: The serialized object as a JSON list, or the original value if it cannot be serialized.
-    """
-    if isinstance(value, six.string_types):
-        return value
-    try:
-        return json.loads(value)
-    except (TypeError, ValueError):
-        return value
-
-@helper
-def scheming_clean_json_value(value):
-    """Clean a JSON list value to avoid errors with: '"' and spaces.
-
-    Args:
-        value (str): The object to serialize.
-
-    Returns:
-        str: The cleaned value, or the original value if it cannot be cleaned.
-    """
-    try:
-        value = value.strip(" ").replace('\\"', "%%%@#")
-        value = value.replace('"', "")
-        value = value.replace("%%%@#", '"')
-        return value
-    except (TypeError, ValueError):
-        return value
-
-def format_eli_label(parsed_url):
-    """
-    Formats the label for a parsed URL with 'eli' segment.
-
-    Args:
-        parsed_url (ParseResult): The parsed URL.
-
-    Returns:
-        str: The formatted label.
-    """
-    segments = parsed_url.path.split('/')
-    eli_index = next(i for i, segment in enumerate(segments) if segment == 'eli')
-    return '/'.join(segments[eli_index + 1:]).upper()
-
-@helper
-def schemingdcat_prettify_url(url):
-    """
-    Prettifies a URL by removing the protocol and trailing slash.
-
-    Args:
-        url (str): The URL to prettify.
-
-    Returns:
-        str: The prettified URL, or the original URL if an error occurred.
-    """
-    if url in prettify_cache:
-        return prettify_cache[url]
-
-    try:
-        prettified_url = re.sub(r"^https?://(?:www\.)?", "", url).rstrip("/")
-        prettify_cache[url] = prettified_url
-        return prettified_url
-    except (TypeError, AttributeError):
-        return url
-
-@helper
-def schemingdcat_prettify_url_name(url):
-    """
-    Prettifies a URL name by extracting the last segment and cleaning it.
-
-    Args:
-        url (str): The URL to extract the name from.
-
-    Returns:
-        str: The prettified URL name, or the original URL if an error occurred.
-    """
-    if url is None:
-        return url
-
-    if url in prettify_cache:
-        return prettify_cache[url]
-
-    try:
-        parsed_url = urlparse(url)
-        
-        if '/eli/' in url:
-            prettified_url_name = format_eli_label(parsed_url)
-        else:
-            url_name = parsed_url.path.split("/")[-1].split('.')[0].replace('_', '-')
-            prettified_url_name = ' '.join(url_name.split(' ')[:4])
-
-        prettify_cache[url] = prettified_url_name
-        return prettified_url_name
-
-    except (URLError, ValueError) as e:
-        print(f"Error while prettifying URL: {e}")
-        return url
-
-@helper
-def schemingdcat_listify_str(values):
-    """Converts a string or list/tuple of strings to a list of strings.
-
-    If `values` is already a list or tuple, it is returned as is. If `values` is a string,
-    it is split into a list of strings using commas as the delimiter. Each string in the
-    resulting list is stripped of leading/trailing whitespace and quotes.
-
-    Args:
-        values (str or list or tuple): The value(s) to convert to a list of strings.
-
-    Returns:
-        list: A list of strings.
-    """
-    if isinstance(values, str):
-        values = values.strip("][").split(",")
-        values = [item.strip().strip('"') for item in values]
-    elif not isinstance(values, (list, tuple)):
-        log.debug("Not a list or string: {0}".format(values))
-        values = [""]
-
-    return values
-
-@helper
-def schemingdcat_load_yaml(file, folder="codelists"):
-    """Load a YAML file from the folder, by default 'codelists' directory.
-
-    Args:
-        file (str): The name of the YAML file to load.
-
-    Returns:
-        dict: A dictionary containing the data from the YAML file.
-    """
-    source_path = Path(__file__).resolve(True)
-    yaml_data = {}
-    try:
-        p = source_path.parent.joinpath(folder, file)
-        with open(p, "r") as f:
-            yaml_data = yaml.load(f, Loader=SafeLoader)
-    except FileNotFoundError:
-        log.error("The file {0} does not exist".format(file))
-    except Exception as e:
-        log.error("Could not read configuration from {0}: {1}".format(file, e))
-
-    return yaml_data
-
-@helper
-def schemingdcat_get_linked_data(id):
-    """Get linked data for a given identifier.
-
-    Args:
-        id (str): The identifier to get linked data for.
-
-    Returns:
-        list: A list of dictionaries containing linked data for the identifier.
-    """
-    return [
-        {
-            "name": name,
-            "display_name": sdct_config.linkeddata_links.get(name, {"display_name": content_type})[
-                "display_name"
-            ],
-            "format": sdct_config.linkeddata_links.get(name, {}).get("format"),
-            "image_display_url": sdct_config.linkeddata_links.get(name, {}).get(
-                "image_display_url"
-            ),
-            "endpoint_icon": sdct_config.linkeddata_links.get(name, {}).get(
-                "endpoint_icon"
-            ),
-            "description": sdct_config.linkeddata_links.get(name, {}).get("description")
-            or f"Formats {content_type}",
-            "description_url": sdct_config.linkeddata_links.get(name, {}).get("description_url"),
-            "endpoint": "dcat.read_dataset",
-            "endpoint_data": {
-                "_id": id,
-                "_format": name,
-            },
-        }
-        for name, content_type in CONTENT_TYPES.items()
-    ]
-
-@helper
-def schemingdcat_get_catalog_endpoints():
-    """Get the catalog endpoints.
-
-    Returns:
-        list: A list of dictionaries containing linked data for the identifier.
-    """    
-    csw_uri = schemingdcat_get_geospatial_endpoint("catalog")
-
-    return [
-        {
-            "name": item["name"],
-            "display_name": item["display_name"],
-            "format": item["format"],
-            "image_display_url": item["image_display_url"],
-            "endpoint_icon": item["endpoint_icon"],
-            "fa_icon": item["fa_icon"],
-            "description": item["description"],
-            "type": item["type"],
-            "profile": item["profile"],
-            "profile_label": item["profile_label"],
-            "endpoint": get_endpoint("catalog")
-            if item.get("type").lower() == "lod"
-            else csw_uri.format(version=item["version"])
-            if item.get("type").lower() == "ogc"
-            else None,
-            "endpoint_data": {
-                "_format": item["format"],
-                "_external": True,
-                "profiles": item["profile"],
-            },
-        }
-        for item in sdct_config.endpoints["catalog_endpoints"]
-    ]
-
-@helper
-def schemingdcat_get_geospatial_endpoint(type="dataset"):
-    """Get geospatial base URI for CSW Endpoint.
-
-    Args:
-        type (str): The type of endpoint to return. Can be 'catalog' or 'dataset'.
-
-    Returns:
-        str: The base URI of the CSW Endpoint with the appropriate format.
-    """
-    try:
-        if sdct_config.geometadata_base_uri:
-            csw_uri = sdct_config.geometadata_base_uri
-
-        if (
-            sdct_config.geometadata_base_uri
-            and "/csw" not in sdct_config.geometadata_base_uri
-        ):
-            csw_uri = sdct_config.geometadata_base_uri.rstrip("/") + "/csw"
-        elif sdct_config.geometadata_base_uri == "":
-            csw_uri = "/csw"
-        else:
-            csw_uri = sdct_config.geometadata_base_uri.rstrip("/")
-    except:
-        csw_uri = "/csw"
-
-    if type == "catalog":
-        return csw_uri + "?service=CSW&version={version}&request=GetCapabilities"
-    else:
-        return (
-            csw_uri
-            + "?service=CSW&version={version}&request=GetRecordById&id={id}&elementSetName={element_set_name}&outputSchema={output_schema}&OutputFormat={output_format}"
-        )
-
-@helper
-def schemingdcat_get_geospatial_metadata():
-    """Get geospatial metadata for CSW formats.
-
-    Returns:
-        list: A list of dictionaries containing geospatial metadata for CSW formats.
-    """
-    csw_uri = schemingdcat_get_geospatial_endpoint("dataset")
-
-    return [
-        {
-            "name": item["name"],
-            "display_name": item["display_name"],
-            "format": item["format"],
-            "image_display_url": item["image_display_url"],
-            "endpoint_icon": item["endpoint_icon"],
-            "description": item["description"],
-            "description_url": item["description_url"],
-            "url": csw_uri.format(
-                output_format=item["output_format"],
-                version=item["version"],
-                element_set_name=item["element_set_name"],
-                output_schema=item["output_schema"],
-                id="{id}",
-            ),
-        }
-        for item in sdct_config.geometadata_links["csw_formats"]
-    ]
-
-@helper
-def schemingdcat_get_all_metadata(id):
-    """Get linked data and geospatial metadata for a given identifier.
-
-    Args:
-        id (str): The identifier to get linked data and geospatial metadata for.
-
-    Returns:
-        list: A list of dictionaries containing linked data and geospatial metadata for the identifier.
-    """
-    geospatial_metadata = schemingdcat_get_geospatial_metadata()
-    linked_data = schemingdcat_get_linked_data(id)
-
-    for metadata in geospatial_metadata:
-        metadata["endpoint_type"] = "csw"
-
-    for data in linked_data:
-        data["endpoint_type"] = "dcat"
-
-    return geospatial_metadata + linked_data
-
-@helper
-def fluent_form_languages(field=None, entity_type=None, object_type=None, schema=None):
-    """
-    Return a list of language codes for this form (or form field)
-
-    1. return field['form_languages'] if it is defined
-    2. return schema['form_languages'] if it is defined
-    3. get schema from entity_type + object_type then
-       return schema['form_languages'] if they are defined
-    4. return languages from site configuration
-    """
-    if field and "form_languages" in field:
-        return field["form_languages"]
-    if schema and "form_languages" in schema:
-        return schema["form_languages"]
-    if entity_type and object_type:
-        # late import for compatibility with older ckanext-scheming
-        from ckanext.scheming.helpers import scheming_get_schema
-
-        schema = scheming_get_schema(entity_type, object_type)
-        if schema and "form_languages" in schema:
-            return schema["form_languages"]
-
-    langs = []
-    for l in get_available_locales():
-        if l.language not in langs:
-            langs.append(l.language)
-    return langs
-
-@helper
-def schemingdcat_fluent_form_label(field, lang):
-    """Returns a label for the input field in the specified language.
-
-    If the field has a `fluent_form_label` defined, the label will be taken from there.
-    If a matching label cannot be found, this helper will return the standard label
-    with the language code in uppercase.
-
-    Args:
-        field (dict): A dictionary representing the input field.
-        lang (str): A string representing the language code.
-
-    Returns:
-        str: A string representing the label for the input field in the specified language.
-    """
-    form_label = field.get("fluent_form_label", {})
-    label = scheming_language_text(form_label.get(lang, field["label"]))
-    return f"{label} ({lang.upper()})"
-
-@helper
-def schemingdcat_multiple_field_required(field, lang):
-    """
-    Returns whether a field is required or not based on the field definition and language.
-
-    Args:
-        field (dict): The field definition.
-        lang (str): The language to check for required fields.
-
-    Returns:
-        bool: True if the field is required, False otherwise.
-    """
-    if "required" in field:
-        return field["required"]
-    if "required_language" in field and field["required_language"] == lang:
-        return True
-    return "not_empty" in field.get("validators", "").split()
-
-def parse_json(value, default_value=None):
-    try:
-        return json.loads(value)
-    except (ValueError, TypeError, AttributeError):
-        if default_value is not None:
-            return default_value
-        return value
-
-@helper
-def schemingdcat_get_default_lang():
-    global DEFAULT_LANG
-    if DEFAULT_LANG is None:
-        DEFAULT_LANG = p.toolkit.config.get("ckan.locale_default", "en")
-    return DEFAULT_LANG
-
-@helper
-def schemingdcat_get_current_lang():
-    """
-    Returns the current language of the CKAN instance.
-
-    Returns:
-        str: The current language of the CKAN instance. If the language cannot be determined, the default language 'en' is returned.
-    """
-    try:
-        return get_lang()
-    except TypeError:
-        return p.toolkit.config.get("ckan.locale_default", "en")
-
-@helper
-def schemingdcat_extract_lang_text(text, current_lang):
-    """
-    Extracts the text content for a specified language from a string.
-
-    Args:
-        text (str): The string to extract the language content from.
-            Example: "[#en#]Welcome to the CKAN Open Data Portal.[#es#]Bienvenido al portal de datos abiertos CKAN."
-        current_lang (str): The language code to extract the content for.
-            Example: "es"
-
-    Returns:
-        str: The extracted language content, or the original string if no content is found.
-            Example: "Bienvenido al portal de datos abiertos CKAN."
-
-    """
-
-    @lru_cache(maxsize=30)
-    def process_language_content(language_label, text):
-        """Helper function to process the content for a specific language label.
-
-        Args:
-            language_label (str): The language label to process.
-            text (str): The text to process.
-
-        Returns:
-            str: The text corresponding to the specified language label.
-
-        """
-        pattern = re.compile(r'\[#(.*?)#\](.*?)(?=\[#|$)', re.DOTALL)
-        matches = pattern.findall(text)
-
-        for lang, content in matches:
-            if lang == language_label.replace('[#', '').replace('#]', ''):
-                return content.strip()
-
-        return ''
-
-    lang_label = f"[#{current_lang}#]"
-    default_lang = schemingdcat_get_default_lang()
-    default_lang_label = f"[#{default_lang}#]"
-
-    lang_text = process_language_content(lang_label, text)
-
-    if not lang_text and lang_label != default_lang_label:
-        lang_text = process_language_content(default_lang_label, text)
-
-    if not lang_text:
-        return text
-
-    return lang_text
-
-@helper
-def dataset_display_name(package_or_package_dict):
-    """
-    Returns the localized value of the dataset name by extracting the correct translation.
-
-    Args:
-    - package_or_package_dict: A dictionary containing the package information.
-
-    Returns:
-    - The localized value of the dataset name.
-    """
-    field_name = "title" if "title" in package_or_package_dict else "name"
-
-    return schemingdcat_get_localized_value_from_dict(
-        package_or_package_dict, field_name
-    )
-
-
-@helper
-def dataset_display_field_value(package_or_package_dict, field_name):
-    """
-    Extracts the correct translation of the dataset field.
-
-    Args:
-        package_or_package_dict (dict): The package or package dictionary to extract the value from.
-        field_name (str): The name of the field to extract the value for.
-
-    Returns:
-        str: The localized value for the given field name.
-    """
-    return schemingdcat_get_localized_value_from_dict(
-        package_or_package_dict, field_name
-    )
-
-@helper
-def schemingdcat_get_localized_value_from_dict(
-    package_or_package_dict, field_name, default=""
-):
-    """
-    Get the localized value from a dictionary.
-
-    This function tries to get the value of a field in a specific language.
-    If the value is not available in the specific language, it tries to get it in the default language.
-    If the value is not available in the default language, it tries to get the untranslated value.
-    If the untranslated value is not available, it returns a default value.
-
-    Args:
-        package_or_package_dict (dict or str): The package or dictionary to get the value from.
-            If it's a string, it tries to convert it to a dictionary using json.loads.
-        field_name (str): The name of the field to get the value from.
-        default (str, optional): The default value to return if the value is not available. Defaults to "".
-
-    Returns:
-        str: The localized value, or the default value if the localized value is not available.
-    """
-    if isinstance(package_or_package_dict, str):
-        try:
-            package_or_package_dict = json.loads(package_or_package_dict)
-        except ValueError:
-            return default
-
-    lang_code = schemingdcat_get_current_lang().split("_")[0]
-    schemingdcat_get_default_lang()
-
-    translated_field = package_or_package_dict.get(field_name + "_translated", {})
-    if isinstance(translated_field, str):
-        try:
-            translated_field = json.loads(translated_field)
-        except ValueError:
-            translated_field = {}
-
-    # Check the lang_code, if not check the default_lang, if not check the field without translation
-    return translated_field.get(lang_code) or translated_field.get(DEFAULT_LANG) or package_or_package_dict.get(field_name, default)
-
-@helper
-def schemingdcat_get_readable_file_size(num, suffix="B"):
-    if not num:
-        return False
-    try:
-        for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
-            num = float(num)
-            if abs(num) < 1024.0:
-                return "%3.1f%s%s" % (num, unit, suffix)
-            num /= 1024.0
-        return "%.1f%s%s" % (num, "Y", suffix)
-    except ValueError:
-        return False
-
-
-@helper
-def schemingdcat_get_group_or_org(id, type="group"):
-    """
-    Retrieve information about a group or organization in CKAN.
-
-    Args:
-        id (str): The ID of the group or organization.
-        type (str, optional): The type of the entity to retrieve. Defaults to 'group'.
-
-    Returns:
-        dict: A dictionary containing information about the group or organization.
-    """
-    return logic.get_action(f"{type}_show")({}, {"id": id})
-
-@helper
-def schemingdcat_package_list_for_source(source_id):
-    '''
-    Creates a dataset list with the ones belonging to a particular harvest
-    source.
-
-    It calls the package_list snippet and the pager.
-    '''
-    limit = 20
-    page = int(request.args.get('page', 1))
-    fq = '+harvest_source_id:"{0}"'.format(source_id)
-    search_dict = {
-        'fq': fq,
-        'rows': limit,
-        'sort': 'metadata_modified desc',
-        'start': (page - 1) * limit,
-        'include_private': True
-    }
-
-    context = {'model': model, 'session': model.Session}
-    harvest_source = get_harvest_source(source_id)
-    owner_org = harvest_source.get('owner_org', '')
-    if owner_org:
-        user_member_of_orgs = [org['id'] for org
-                               in ckan_helpers.organizations_available('read')]
-        if (harvest_source and owner_org in user_member_of_orgs):
-            context['ignore_capacity_check'] = True
-
-    query = logic.get_action('package_search')(context, search_dict)
-
-    base_url = ckan_helpers.url_for(
-        '{0}.read'.format(DATASET_TYPE_NAME),
-        id=harvest_source['name']
-    )
-
-    def pager_url(q=None, page=None):
-        url = base_url
-        if page:
-            url += '?page={0}'.format(page)
-        return url
-
-    pager = ckan_helpers.Page(
-        collection=query['results'],
-        page=page,
-        url=pager_url,
-        item_count=query['count'],
-        items_per_page=limit
-    )
-    pager.items = query['results']
-
-    if query['results']:
-        out = ckan_helpers.snippet('snippets/package_list.html', packages=query['results'])
-        out += pager.pager()
-    else:
-        out = ckan_helpers.snippet('snippets/package_list_empty.html')
-
-    return out
-@helper
-def schemingdcat_package_count_for_source(source_id):
-    '''
-    Returns the current package count for datasets associated with the given
-    source id
-    '''
-    fq = '+harvest_source_id:"{0}"'.format(source_id)
-    search_dict = {'fq': fq, 'include_private': True}
-    context = {'model': model, 'session': model.Session}
-    result = logic.get_action('package_search')(context, search_dict)
-    return result.get('count', 0)
-
-@helper
-def schemingdcat_parse_localised_date(date_=None):
-    '''Parse a datetime object or timestamp string as a localised date.
-    If timestamp is badly formatted, then None is returned.
-
-    :param date_: the date
-    :type date_: datetime or date or ISO string format
-    :rtype: date
-    '''
-    if not date_:
-        return None
-    if isinstance(date_, str):
-        try:
-            date_ = ckan_helpers.date_str_to_datetime(date_)
-        except (TypeError, ValueError):
-            return None
-    # check we are now a datetime or date
-    if isinstance(date_, datetime.datetime):
-        date_ = date_.date()
-    elif not isinstance(date_, datetime.date):
-        return None
-
-    # Format date based on locale
-    locale = schemingdcat_get_current_lang()
-    if locale == 'es':
-        return date_.strftime('%d-%m-%Y')
-    else:
-        return date_.strftime('%Y-%m-%d')
-
-@lru_cache(maxsize=None)
-@helper
-def schemingdcat_get_dataset_schema(schema_type="dataset"):
-    """
-    Retrieves the schema for the dataset instance and caches it using the LRU cache decorator for efficient retrieval.
-
-    Args:
-        schema_type (str, optional): The type of schema to retrieve. Defaults to 'dataset'.
-
-    Returns:
-        dict: The schema of the dataset instance.
-    """
-    return logic.get_action("scheming_dataset_schema_show")(
-        {}, {"type": schema_type}
-    )   
-
-@helper
-def schemingdcat_get_schema_form_groups(entity_type=None, object_type=None, schema=None):
-    """
-    Return a list of schema metadata groups for this form.
-
-    1. return schema['schema_form_groups'] if it is defined
-    2. get schema from entity_type + object_type then
-       return schema['schema_form_groups'] if they are defined
-    """
-    if schema and "schema_form_groups" in schema:
-        return schema["schema_form_groups"]
-    elif entity_type and object_type:
-        schema = scheming_get_schema(entity_type, object_type)
-        return schema["schema_form_groups"] if schema and "schema_form_groups" in schema else None
-    else:
-        return None
-
-# Vocabs
-@helper
-def get_inspire_themes(*args, **kwargs) -> typing.List[typing.Dict[str, str]]:
-    log.debug(f"inside get_inspire_themes {args=} {kwargs=}")
-    try:
-        inspire_themes = p.toolkit.get_action("tag_list")(
-            data_dict={"vocabulary_id": sdct_config.SCHEMINGDCAT_INSPIRE_THEMES_VOCAB}
-        )
-    except p.toolkit.ObjectNotFound:
-        inspire_themes = []
-    return [{"value": t, "label": t} for t in inspire_themes] 
-
-@helper
-def get_ckan_cleaned_name(name):
-    """
-    Cleans a name by removing accents, special characters, and spaces.
-
-    Args:
-        name (str): The name to clean.
-
-    Returns:
-        str: The cleaned name.
-    """
-    MAX_TAG_LENGTH = 100
-    MIN_TAG_LENGTH = 2
-    # Define a dictionary to map accented characters to their unaccented equivalents except ñ
-    accent_map = {
-        "á": "a", "à": "a", "ä": "a", "â": "a", "ã": "a",
-        "é": "e", "è": "e", "ë": "e", "ê": "e",
-        "í": "i", "ì": "i", "ï": "i", "î": "i",
-        "ó": "o", "ò": "o", "ö": "o", "ô": "o", "õ": "o",
-        "ú": "u", "ù": "u", "ü": "u", "û": "u",
-        "ñ": "ñ",
-    }
-
-    # Convert the name to lowercase
-    name = name.lower()
-
-    # Replace accented and special characters with their unaccented equivalents or -
-    name = "".join(accent_map.get(c, c) for c in name)
-    name = re.sub(r"[^a-zñ0-9_.-]", "-", name.strip())
-
-    # Truncate the name to MAX_TAG_LENGTH characters
-    name = name[:MAX_TAG_LENGTH]
-
-    # If the name is shorter than MIN_TAG_LENGTH, pad it with underscores
-    if len(name) < MIN_TAG_LENGTH:
-        name = name.ljust(MIN_TAG_LENGTH, '_')
-
-    return name
-
-@helper
-def get_featured_datasets(count=1):
-    """
-    This helper function retrieves a specified number of featured datasets from the CKAN instance. 
-    It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
-    
-    Parameters:
-    count (int): The number of featured datasets to retrieve. Default is 1.
-
-    Returns:
-    list: A list of dictionaries, each representing a featured dataset.
-    """
-    fq = '+featured:true'
-    search_dict = {
-        'fq': fq, 
-        'sort': 'metadata_modified desc',
-        'fl': 'id,name,title,notes,state,metadata_modified,type,extras_featured,extras_graphic_overview',
-        'rows': count
-    }
-    context = {'model': model, 'session': model.Session}
-    result = logic.get_action('package_search')(context, search_dict)
-    
-    return result['results']
-
-@helper
-def get_spatial_datasets(count=10):
-    """
-    This helper function retrieves a specified number of featured datasets from the CKAN instance. 
-    It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
-    
-    Parameters:
-    count (int): The number of featured datasets to retrieve. Default is 1.
-
-    Returns:
-    list: A list of dictionaries, each representing a featured dataset.
-    """
-    fq = '+dcat_type:*inspire*'
-    search_dict = {
-        'fq': fq, 
-        'fl': 'extras_dcat_type',
-        'rows': count
-    }
-    context = {'model': model, 'session': model.Session}
-    result = logic.get_action('package_search')(context, search_dict)
-    
-    return result['results']
-
-@lru_cache(maxsize=None)
-@helper
-def get_header_endpoint_url(endpoint, site_protocol_and_host):
-    url_for = ckan_helpers.url_for
-    endpoint_type = endpoint['type']
-    endpoint_value = endpoint['endpoint']
-
-    if endpoint_type == 'ogc':
-        if ckan_helpers.is_url(endpoint_value):
-            return ckan_helpers.url_for_static_or_external(endpoint_value)
-        else:
-            protocol, host = site_protocol_and_host
-            return f"{protocol}://{host}/{endpoint_value}"
-    elif endpoint_type == 'ckan':
-        return url_for('api.action', ver=3, logic_function='package_list', qualified=True)
-    elif endpoint_type == 'lod':
-        return url_for(endpoint_value, **endpoint['endpoint_data'])
-    elif endpoint_type == 'sparql':
-        return url_for('/sparql')
-    
-@helper
-def schemingdcat_check_valid_url(url):
-    """
-    Check if a string is a valid URL.
-
-    Args:
-        url (str): The string to check.
-
-    Returns:
-        bool: True if the string is a valid URL, False otherwise.
-    """
-    try:
-        result = urlparse(url)
-        return all([result.scheme, result.netloc])
-    except ValueError:
-        return False
+from ckan.common import json, c, request, is_flask_request
+from ckan.lib import helpers as ckan_helpers
+import ckan.logic as logic
+from ckan import model
+from ckan.lib.i18n import get_available_locales, get_lang
+import ckan.plugins as p
+import six
+import re
+import yaml
+from yaml.loader import SafeLoader
+from pathlib import Path
+from functools import lru_cache
+import datetime
+import typing
+from urllib.parse import urlparse
+from urllib.error import URLError
+
+from six.moves.urllib.parse import urlencode
+
+from ckanext.scheming.helpers import (
+    scheming_choices_label,
+    scheming_language_text,
+    scheming_dataset_schemas,
+    scheming_get_schema
+)
+
+from ckanext.harvest.helpers import (
+    get_harvest_source
+)
+from ckanext.harvest.utils import (
+    DATASET_TYPE_NAME
+)
+
+import ckanext.schemingdcat.config as sdct_config
+from ckanext.schemingdcat.utils import (
+    get_facets_dict,
+    public_file_exists,
+    public_dir_exists,
+)
+from ckanext.dcat.utils import CONTENT_TYPES, get_endpoint
+from ckanext.fluent.validators import LANG_SUFFIX
+import logging
+
+log = logging.getLogger(__name__)
+
+all_helpers = {}
+prettify_cache = {}
+DEFAULT_LANG = None
+
+@lru_cache(maxsize=None)
+def get_scheming_dataset_schemas():
+    """
+    Retrieves the dataset schemas using the scheming_dataset_schemas function.
+    Caches the result using the LRU cache decorator for efficient retrieval.
+    """
+    return scheming_dataset_schemas()
+
+
+def helper(fn):
+    """Collect helper functions into the ckanext.schemingdcat.all_helpers dictionary.
+
+    Args:
+        fn (function): The helper function to add to the dictionary.
+
+    Returns:
+        function: The helper function.
+    """
+    all_helpers[fn.__name__] = fn
+    return fn
+
+
+@helper
+def schemingdcat_get_schema_names():
+    """
+    Get the names of all the schemas defined for the Scheming DCAT extension.
+
+    Returns:
+        list: A list of schema names.
+    """
+    schemas = get_scheming_dataset_schemas()
+
+    return [schema["schema_name"] for schema in schemas.values()]
+
+
+@helper
+def schemingdcat_default_facet_search_operator():
+    """Return the default facet search operator: AND/OR.
+
+    Returns:
+        str: The default facet search operator.
+    """
+    facet_operator = sdct_config.default_facet_operator
+    if facet_operator and (
+        facet_operator.upper() == "AND" or facet_operator.upper() == "OR"
+    ):
+        facet_operator = facet_operator.upper()
+    else:
+        facet_operator = "AND"
+    return facet_operator
+
+
+@helper
+def schemingdcat_decode_json(json_text):
+    """Convert a JSON string to a Python object.
+
+    Args:
+        json_text (str): The JSON string to convert.
+
+    Returns:
+        object: A Python object representing the JSON data.
+    """
+    return json.loads(json_text)
+
+
+@helper
+def schemingdcat_organization_name(org_id):
+    """Return the name of the organization from its ID.
+
+    Args:
+        org_id (dict): A dictionary containing the ID of the organization.
+
+    Returns:
+        str: The name of the organization, or None if the organization cannot be found.
+    """
+    org_name = None
+    try:
+        org_dic = ckan_helpers.get_organization(org_id["display_name"])
+        if org_dic is not None:
+            org_name = org_dic["name"]
+        else:
+            log.warning(
+                "Could not find the name of the organization with ID {0}".format(
+                    org_id["display_name"]
+                )
+            )
+    except Exception as e:
+        log.error(
+            "Exception while trying to find the name of the organization: {0}".format(e)
+        )
+    return org_name
+
+
+@helper
+def schemingdcat_get_facet_label(facet):
+    """Return the label for a given facet.
+
+    Args:
+        facet (str): The name of the facet.
+
+    Returns:
+        str: The label for the given facet.
+    """
+    return get_facets_dict[facet]
+
+
+@helper
+def schemingdcat_get_facet_items_dict(
+    facet, search_facets=None, limit=None, exclude_active=False, scheming_choices=None
+):
+    """Return the list of unselected facet items for the given facet, sorted
+    by count.
+
+    Returns the list of unselected facet contraints or facet items (e.g. tag
+    names like "russian" or "tolstoy") for the given search facet (e.g.
+    "tags"), sorted by facet item count (i.e. the number of search results that
+    match each facet item).
+
+    Reads the complete list of facet items for the given facet from
+    c.search_facets, and filters out the facet items that the user has already
+    selected.
+
+    List of facet items are ordered acording the faccet_sort parameter
+
+    Arguments:
+    facet -- the name of the facet to filter.
+    search_facets -- dict with search facets(c.search_facets in Pylons)
+    limit -- the max. number of facet items to return.
+    exclude_active -- only return unselected facets.
+    scheming_choices -- scheming choices to use to get label from value.
+
+    """
+
+    # log.debug("Returning facets for: {0}".format(facet))
+
+    order = "default"
+    items = []
+
+    search_facets = search_facets or getattr(c, "search_facets", None)
+
+    if (
+        search_facets
+        and isinstance(search_facets, dict)
+        and search_facets.get(facet, {}).get("items")
+    ):
+        for facet_item in search_facets.get(facet)["items"]:
+            if scheming_choices:
+                facet_item["label"] = scheming_choices_label(
+                    scheming_choices, facet_item["name"]
+                )
+            else:
+                facet_item["label"] = facet_item["display_name"]
+
+            if not len(facet_item["name"].strip()):
+                continue
+
+            params_items = (
+                request.params.items(multi=True)
+                if is_flask_request()
+                else request.params.items()
+            )
+
+            if not (facet, facet_item["name"]) in params_items:
+                items.append(dict(active=False, **facet_item))
+            elif not exclude_active:
+                items.append(dict(active=True, **facet_item))
+
+            #    log.debug("params: {0}:{1}".format(
+            #    facet,request.params.getlist("_%s_sort" % facet)))
+            order_lst = request.params.getlist("_%s_sort" % facet)
+            if len(order_lst):
+                order = order_lst[0]
+        #     Sort descendingly by count and ascendingly by case-sensitive display name
+        #    items.sort(key=lambda it: (-it['count'], it['display_name'].lower()))
+        sorts = {
+            "name": ("label", False),
+            "name_r": ("label", True),
+            "count": ("count", False),
+            "count_r": ("count", True),
+        }
+        if sorts.get(order):
+            items.sort(
+                key=lambda it: (it[sorts.get(order)[0]]), reverse=sorts.get(order)[1]
+            )
+        else:
+            items.sort(key=lambda it: (-it["count"], it["label"].lower()))
+
+        if hasattr(c, "search_facets_limits"):
+            if c.search_facets_limits and limit is None:
+                limit = c.search_facets_limits.get(facet)
+        # zero treated as infinite for hysterical raisins
+        if limit is not None and limit > 0:
+            return items[:limit]
+
+    return items
+
+
+@helper
+def schemingdcat_new_order_url(facet_name, order_concept, extras=None):
+    """Return a URL with the order parameter for the given facet and concept to use.
+
+    Based on the actual order, it rotates cyclically from no order -> direct order -> inverse order over the given concept.
+
+    Args:
+        facet_name (str): The name of the facet to order.
+        order_concept (str): The concept (name or count) that will be used to order.
+        extras (dict, optional): Extra parameters to include in the URL.
+
+    Returns:
+        str: The URL with the order parameter for the given facet and concept.
+    """
+    old_order = None
+    order_param = "_%s_sort" % facet_name
+    order_lst = request.params.getlist(order_param)
+    if not extras:
+        extras = {}
+
+    controller = getattr(c, "controller", False) or request.blueprint
+    action = getattr(c, "action", False) or p.toolkit.get_endpoint()[1]
+    url = ckan_helpers.url_for(controller=controller, action=action, **extras)
+
+    if len(order_lst):
+        old_order = order_lst[0]
+
+    order_mapping = {
+        "name": {"name": "name_r", "name_r": None, None: "name"},
+        "count": {"count": "count_r", "count_r": None, None: "count"},
+    }
+
+    new_order = order_mapping.get(order_concept, {}).get(old_order)
+
+    params_items = (
+        request.params.items(multi=True)
+        if is_flask_request()
+        else request.params.items()
+    )
+    params_nopage = [(k, v) for k, v in params_items if k != order_param]
+
+    if new_order:
+        params_nopage.append((order_param, new_order))
+
+    if params_nopage:
+        url = url + "?" + urlencode(params_nopage)
+
+    return url
+
+@helper
+def schemingdcat_get_facet_list_limit():
+    """
+    Retrieves the limit for the facet list from the scheming DCAT configuration.
+
+    Returns:
+        int: The limit for the facet list.
+    """
+    return sdct_config.facet_list_limit
+
+@helper
+def schemingdcat_get_icons_dir(field=None, field_name=None):
+    """
+    Returns the defined icons directory for a given scheming field definition or field name.
+
+    This function is used to retrieve the icons directory associated with a 
+    specific field in a scheming dataset or directly by field name. If no icons directory is defined, 
+    the function will return None.
+
+    Args:
+        field (dict, optional): A dictionary representing the scheming field definition. 
+                                This should include all the properties of the field, 
+                                including the icons directory if one is defined.
+        field_name (str, optional): The name of the field. If provided, the function will 
+                                     look for an icons directory with this name.
+
+    Returns:
+        str: A string representing the icons directory for the field or field name. 
+             If no icons directory is defined or found, the function will return None.
+    """
+    if field:
+        if "icons_dir" in field:
+            return field["icons_dir"]
+
+        if "field_name" in field:
+            dir = sdct_config.icons_dir + "/" + field["field_name"]
+            if public_dir_exists(dir):
+                return dir
+
+    elif field_name:
+        dir = sdct_config.icons_dir + "/" + field_name
+        if public_dir_exists(dir):
+            return dir    
+
+    return None
+
+@helper
+def schemingdcat_get_default_icon(field):
+    """Return the defined default icon for a scheming field definition.
+
+    Args:
+        field (dict): The scheming field definition.
+
+    Returns:
+        str: The defined default icon, or None if not found.
+    """
+    if "default_icon" in field:
+        return field["default_icon"]
+    
+@helper
+def schemingdcat_get_default_package_item_icon():
+    """
+    Returns the default icon defined for a given scheming field definition.
+
+    This function is used to retrieve the default icon associated with a 
+    specific field in a scheming dataset. If no default icon is defined, 
+    the function will return None.
+
+    Args:
+        field (dict): A dictionary representing the scheming field definition. 
+                      This should include all the properties of the field, 
+                      including the default icon if one is defined.
+
+    Returns:
+        str: A string representing the default icon for the field. This could 
+             be a URL, a data URI, or any other string format used to represent 
+             images. If no default icon is defined for the field, the function 
+             will return None.
+    """
+    return sdct_config.default_package_item_icon
+
+@helper
+def schemingdcat_get_default_package_item_show_spatial():
+    """
+    Returns the configuration value for showing spatial information in the default package item.
+
+    This function is used to retrieve the configuration value that determines 
+    whether the spatial information should be shown in the default package item. 
+    If no value is defined in the configuration, the function will return None.
+
+    Returns:
+        bool: A boolean value representing whether the spatial information should 
+              be shown in the default package item. If no value is defined in the 
+              configuration, the function will return None.
+    """
+    return sdct_config.default_package_item_show_spatial
+
+@helper
+def schemingdcat_get_show_metadata_templates_toolbar():
+    """
+    Returns the configuration value for showing the metadata templates toolbar.
+
+    This function is used to retrieve the configuration value that determines 
+    whether the metadata templates toolbar should be shown or not. If the configuration 
+    value is not set, the function will return False.
+
+    Returns:
+        bool: A boolean value representing whether the metadata templates toolbar 
+              should be shown. If the configuration value is not set, the function 
+              will return False.
+    """
+    return sdct_config.show_metadata_templates_toolbar
+
+@helper
+def schemingdcat_get_metadata_templates_search_identifier():
+    """
+    Returns the default icon defined for a given scheming field definition.
+
+    This function is used to retrieve the default value to retrieve metadata templates. If no default value is defined, 
+    the function will return None.
+
+    Args:
+        field (dict): A dictionary representing the scheming field definition. 
+                      This should include all the properties of the field, 
+                      including the default icon if one is defined.
+
+    Returns:
+        str: A string representing the default icon for the field. This could 
+             be a URL, a data URI, or any other string format used to represent 
+             images. If no default icon is defined for the field, the function 
+             will return None.
+    """
+    return sdct_config.metadata_templates_search_identifier
+
+@helper
+def schemingdcat_get_schemingdcat_xls_harvest_templates(search_identifier=sdct_config.metadata_templates_search_identifier, count=10):
+    """
+    This helper function retrieves the schemingdcat_xls templates from the CKAN instance. 
+    It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
+    
+    Parameters:
+    search_identifier (str): The text to search in the identifier. Default is sdct_config.metadata_templates_search_identifier.
+    count (int): The number of featured datasets to retrieve. Default is 10.
+
+    Returns:
+    list: A list of dictionaries, each representing a featured dataset. If no results are found, returns None.
+    """
+    fq = f'+extras_schemingdcat_xls_metadata_template:{True}'
+    search_dict = {
+        'fq': fq, 
+        'fl': 'name,extras_identifier,title,notes,metadata_modified,extras_title_translated,extras_notes_translated',
+        'rows': count
+    }
+    context = {'model': model, 'session': model.Session}
+    result = logic.get_action('package_search')(context, search_dict)
+    
+    if not result['results']:
+        fq = f'+extras_schemingdcat_xls_metadata_template:*{search_identifier}*'
+        search_dict['fq'] = fq
+        result = logic.get_action('package_search')(context, search_dict)
+
+    return result['results'] if result['results'] else None
+
+@helper
+def schemingdcat_get_icon(
+    choice=None, icons_dir=None, default="/images/default/no_icon.svg", choice_value=None
+):
+    """Return the relative URL to the icon for the item.
+
+    Args:
+        choice (dict, optional): The choice selected for the field.
+        icons_dir (str, optional): The path to search for the icon. Usually the common path for icons for this field.
+        default (str, optional): The default value to return if no icon is found.
+        choice_value (str, optional): The value of the choice selected for the field. If provided, it will be used instead of choice['value'].
+
+    Returns:
+        str: The relative URL to the icon, or the default value if not found.
+    """
+    extensions = [".svg", ".png", ".jpg", ".jpeg", ".gif"]
+    icon_name = None
+
+    if choice_value is None and choice:
+        choice_value = choice.get("icon") or choice.get("value")
+
+    if choice_value:
+        if ckan_helpers.is_url(choice_value):
+            url_parts = choice_value.split("/")
+
+            if len(url_parts) == 1:
+                icon_name = url_parts[-1].lower()
+            else:
+                icon_name = url_parts[-2].lower() + "/" + url_parts[-1].lower()
+        else:
+            icon_name = choice_value
+
+        url_path = (icons_dir + "/" if icons_dir else "") + icon_name
+
+        for extension in extensions:
+            if public_file_exists(url_path + extension):
+                return url_path + extension
+
+    return default
+
+@helper
+def schemingdcat_get_choice_item(field, value):
+    """Return the whole choice item for the given value in the scheming field.
+
+    Args:
+        field (dict): The scheming field to look for the choice item in.
+        value (str): The option item value.
+
+    Returns:
+        dict: The whole option item in scheming, or None if not found.
+    """
+    if field and ("choices" in field):
+        # log.debug("Searching: {0} en {1}".format(value,field['choices']))
+        for choice in field["choices"]:
+            if choice["value"] == value:
+                return choice
+
+    return None
+
+@helper
+def schemingdcat_get_choice_property(choices, value, property):
+    """
+    Retrieve a specific property from a choice dictionary based on the given value.
+
+    Args:
+        choices (list): List of dictionaries containing "label" and "value" keys.
+        value (str): The value to match against the choices.
+        property (str): The property to retrieve from the matching choice dictionary.
+
+    Returns:
+        str or None: The property value from the matching choice dictionary, or None if not found.
+    """
+    for c in choices:
+        if c['value'] == value:
+            return c.get(property, None)
+    return None
+
+
+@helper
+def scheming_display_json_list(value):
+    """Return the object passed serialized as a JSON list.
+
+    Args:
+        value (any): The object to serialize.
+
+    Returns:
+        str: The serialized object as a JSON list, or the original value if it cannot be serialized.
+    """
+    if isinstance(value, six.string_types):
+        return value
+    try:
+        return json.loads(value)
+    except (TypeError, ValueError):
+        return value
+
+@helper
+def scheming_clean_json_value(value):
+    """Clean a JSON list value to avoid errors with: '"' and spaces.
+
+    Args:
+        value (str): The object to serialize.
+
+    Returns:
+        str: The cleaned value, or the original value if it cannot be cleaned.
+    """
+    try:
+        value = value.strip(" ").replace('\\"', "%%%@#")
+        value = value.replace('"', "")
+        value = value.replace("%%%@#", '"')
+        return value
+    except (TypeError, ValueError):
+        return value
+
+def format_eli_label(parsed_url):
+    """
+    Formats the label for a parsed URL with 'eli' segment.
+
+    Args:
+        parsed_url (ParseResult): The parsed URL.
+
+    Returns:
+        str: The formatted label.
+    """
+    segments = parsed_url.path.split('/')
+    eli_index = next(i for i, segment in enumerate(segments) if segment == 'eli')
+    return '/'.join(segments[eli_index + 1:]).upper()
+
+@helper
+def schemingdcat_prettify_url(url):
+    """
+    Prettifies a URL by removing the protocol and trailing slash.
+
+    Args:
+        url (str): The URL to prettify.
+
+    Returns:
+        str: The prettified URL, or the original URL if an error occurred.
+    """
+    if url in prettify_cache:
+        return prettify_cache[url]
+
+    try:
+        prettified_url = re.sub(r"^https?://(?:www\.)?", "", url).rstrip("/")
+        prettify_cache[url] = prettified_url
+        return prettified_url
+    except (TypeError, AttributeError):
+        return url
+
+@helper
+def schemingdcat_prettify_url_name(url):
+    """
+    Prettifies a URL name by extracting the last segment and cleaning it.
+
+    Args:
+        url (str): The URL to extract the name from.
+
+    Returns:
+        str: The prettified URL name, or the original URL if an error occurred.
+    """
+    if url is None:
+        return url
+
+    if url in prettify_cache:
+        return prettify_cache[url]
+
+    try:
+        parsed_url = urlparse(url)
+        
+        if '/eli/' in url:
+            prettified_url_name = format_eli_label(parsed_url)
+        else:
+            url_name = parsed_url.path.split("/")[-1].split('.')[0].replace('_', '-')
+            prettified_url_name = ' '.join(url_name.split(' ')[:4])
+
+        prettify_cache[url] = prettified_url_name
+        return prettified_url_name
+
+    except (URLError, ValueError) as e:
+        print(f"Error while prettifying URL: {e}")
+        return url
+
+@helper
+def schemingdcat_listify_str(values):
+    """Converts a string or list/tuple of strings to a list of strings.
+
+    If `values` is already a list or tuple, it is returned as is. If `values` is a string,
+    it is split into a list of strings using commas as the delimiter. Each string in the
+    resulting list is stripped of leading/trailing whitespace and quotes.
+
+    Args:
+        values (str or list or tuple): The value(s) to convert to a list of strings.
+
+    Returns:
+        list: A list of strings.
+    """
+    if isinstance(values, str):
+        values = values.strip("][").split(",")
+        values = [item.strip().strip('"') for item in values]
+    elif not isinstance(values, (list, tuple)):
+        log.debug("Not a list or string: {0}".format(values))
+        values = [""]
+
+    return values
+
+@helper
+def schemingdcat_load_yaml(file, folder="codelists"):
+    """Load a YAML file from the folder, by default 'codelists' directory.
+
+    Args:
+        file (str): The name of the YAML file to load.
+
+    Returns:
+        dict: A dictionary containing the data from the YAML file.
+    """
+    source_path = Path(__file__).resolve(True)
+    yaml_data = {}
+    try:
+        p = source_path.parent.joinpath(folder, file)
+        with open(p, "r") as f:
+            yaml_data = yaml.load(f, Loader=SafeLoader)
+    except FileNotFoundError:
+        log.error("The file {0} does not exist".format(file))
+    except Exception as e:
+        log.error("Could not read configuration from {0}: {1}".format(file, e))
+
+    return yaml_data
+
+@helper
+def schemingdcat_get_linked_data(id):
+    """Get linked data for a given identifier.
+
+    Args:
+        id (str): The identifier to get linked data for.
+
+    Returns:
+        list: A list of dictionaries containing linked data for the identifier.
+    """
+    return [
+        {
+            "name": name,
+            "display_name": sdct_config.linkeddata_links.get(name, {"display_name": content_type})[
+                "display_name"
+            ],
+            "format": sdct_config.linkeddata_links.get(name, {}).get("format"),
+            "image_display_url": sdct_config.linkeddata_links.get(name, {}).get(
+                "image_display_url"
+            ),
+            "endpoint_icon": sdct_config.linkeddata_links.get(name, {}).get(
+                "endpoint_icon"
+            ),
+            "description": sdct_config.linkeddata_links.get(name, {}).get("description")
+            or f"Formats {content_type}",
+            "description_url": sdct_config.linkeddata_links.get(name, {}).get("description_url"),
+            "endpoint": "dcat.read_dataset",
+            "endpoint_data": {
+                "_id": id,
+                "_format": name,
+            },
+        }
+        for name, content_type in CONTENT_TYPES.items()
+    ]
+
+@helper
+def schemingdcat_get_catalog_endpoints():
+    """Get the catalog endpoints.
+
+    Returns:
+        list: A list of dictionaries containing linked data for the identifier.
+    """    
+    csw_uri = schemingdcat_get_geospatial_endpoint("catalog")
+
+    return [
+        {
+            "name": item["name"],
+            "display_name": item["display_name"],
+            "format": item["format"],
+            "image_display_url": item["image_display_url"],
+            "endpoint_icon": item["endpoint_icon"],
+            "fa_icon": item["fa_icon"],
+            "description": item["description"],
+            "type": item["type"],
+            "profile": item["profile"],
+            "profile_label": item["profile_label"],
+            "endpoint": get_endpoint("catalog")
+            if item.get("type").lower() == "lod"
+            else csw_uri.format(version=item["version"])
+            if item.get("type").lower() == "ogc"
+            else None,
+            "endpoint_data": {
+                "_format": item["format"],
+                "_external": True,
+                "profiles": item["profile"],
+            },
+        }
+        for item in sdct_config.endpoints["catalog_endpoints"]
+    ]
+
+@helper
+def schemingdcat_get_geospatial_endpoint(type="dataset"):
+    """Get geospatial base URI for CSW Endpoint.
+
+    Args:
+        type (str): The type of endpoint to return. Can be 'catalog' or 'dataset'.
+
+    Returns:
+        str: The base URI of the CSW Endpoint with the appropriate format.
+    """
+    try:
+        if sdct_config.geometadata_base_uri:
+            csw_uri = sdct_config.geometadata_base_uri
+
+        if (
+            sdct_config.geometadata_base_uri
+            and "/csw" not in sdct_config.geometadata_base_uri
+        ):
+            csw_uri = sdct_config.geometadata_base_uri.rstrip("/") + "/csw"
+        elif sdct_config.geometadata_base_uri == "":
+            csw_uri = "/csw"
+        else:
+            csw_uri = sdct_config.geometadata_base_uri.rstrip("/")
+    except:
+        csw_uri = "/csw"
+
+    if type == "catalog":
+        return csw_uri + "?service=CSW&version={version}&request=GetCapabilities"
+    else:
+        return (
+            csw_uri
+            + "?service=CSW&version={version}&request=GetRecordById&id={id}&elementSetName={element_set_name}&outputSchema={output_schema}&OutputFormat={output_format}"
+        )
+
+@helper
+def schemingdcat_get_geospatial_metadata():
+    """Get geospatial metadata for CSW formats.
+
+    Returns:
+        list: A list of dictionaries containing geospatial metadata for CSW formats.
+    """
+    csw_uri = schemingdcat_get_geospatial_endpoint("dataset")
+
+    return [
+        {
+            "name": item["name"],
+            "display_name": item["display_name"],
+            "format": item["format"],
+            "image_display_url": item["image_display_url"],
+            "endpoint_icon": item["endpoint_icon"],
+            "description": item["description"],
+            "description_url": item["description_url"],
+            "url": csw_uri.format(
+                output_format=item["output_format"],
+                version=item["version"],
+                element_set_name=item["element_set_name"],
+                output_schema=item["output_schema"],
+                id="{id}",
+            ),
+        }
+        for item in sdct_config.geometadata_links["csw_formats"]
+    ]
+
+@helper
+def schemingdcat_get_all_metadata(id):
+    """Get linked data and geospatial metadata for a given identifier.
+
+    Args:
+        id (str): The identifier to get linked data and geospatial metadata for.
+
+    Returns:
+        list: A list of dictionaries containing linked data and geospatial metadata for the identifier.
+    """
+    geospatial_metadata = schemingdcat_get_geospatial_metadata()
+    linked_data = schemingdcat_get_linked_data(id)
+
+    for metadata in geospatial_metadata:
+        metadata["endpoint_type"] = "csw"
+
+    for data in linked_data:
+        data["endpoint_type"] = "dcat"
+
+    return geospatial_metadata + linked_data
+
+@helper
+def fluent_form_languages(field=None, entity_type=None, object_type=None, schema=None):
+    """
+    Return a list of language codes for this form (or form field)
+
+    1. return field['form_languages'] if it is defined
+    2. return schema['form_languages'] if it is defined
+    3. get schema from entity_type + object_type then
+       return schema['form_languages'] if they are defined
+    4. return languages from site configuration
+    """
+    if field and "form_languages" in field:
+        return field["form_languages"]
+    if schema and "form_languages" in schema:
+        return schema["form_languages"]
+    if entity_type and object_type:
+        # late import for compatibility with older ckanext-scheming
+        from ckanext.scheming.helpers import scheming_get_schema
+
+        schema = scheming_get_schema(entity_type, object_type)
+        if schema and "form_languages" in schema:
+            return schema["form_languages"]
+
+    langs = []
+    for l in get_available_locales():
+        if l.language not in langs:
+            langs.append(l.language)
+    return langs
+
+@helper
+def schemingdcat_fluent_form_label(field, lang):
+    """Returns a label for the input field in the specified language.
+
+    If the field has a `fluent_form_label` defined, the label will be taken from there.
+    If a matching label cannot be found, this helper will return the standard label
+    with the language code in uppercase.
+
+    Args:
+        field (dict): A dictionary representing the input field.
+        lang (str): A string representing the language code.
+
+    Returns:
+        str: A string representing the label for the input field in the specified language.
+    """
+    form_label = field.get("fluent_form_label", {})
+    label = scheming_language_text(form_label.get(lang, field["label"]))
+    return f"{label} ({lang.upper()})"
+
+@helper
+def schemingdcat_multiple_field_required(field, lang):
+    """
+    Returns whether a field is required or not based on the field definition and language.
+
+    Args:
+        field (dict): The field definition.
+        lang (str): The language to check for required fields.
+
+    Returns:
+        bool: True if the field is required, False otherwise.
+    """
+    if "required" in field:
+        return field["required"]
+    if "required_language" in field and field["required_language"] == lang:
+        return True
+    return "not_empty" in field.get("validators", "").split()
+
+def parse_json(value, default_value=None):
+    try:
+        return json.loads(value)
+    except (ValueError, TypeError, AttributeError):
+        if default_value is not None:
+            return default_value
+        return value
+
+@helper
+def schemingdcat_get_default_lang():
+    global DEFAULT_LANG
+    if DEFAULT_LANG is None:
+        DEFAULT_LANG = p.toolkit.config.get("ckan.locale_default", "en")
+    return DEFAULT_LANG
+
+@helper
+def schemingdcat_get_current_lang():
+    """
+    Returns the current language of the CKAN instance.
+
+    Returns:
+        str: The current language of the CKAN instance. If the language cannot be determined, the default language 'en' is returned.
+    """
+    try:
+        return get_lang()
+    except TypeError:
+        return p.toolkit.config.get("ckan.locale_default", "en")
+
+@helper
+def schemingdcat_extract_lang_text(text, current_lang):
+    """
+    Extracts the text content for a specified language from a string.
+
+    Args:
+        text (str): The string to extract the language content from.
+            Example: "[#en#]Welcome to the CKAN Open Data Portal.[#es#]Bienvenido al portal de datos abiertos CKAN."
+        current_lang (str): The language code to extract the content for.
+            Example: "es"
+
+    Returns:
+        str: The extracted language content, or the original string if no content is found.
+            Example: "Bienvenido al portal de datos abiertos CKAN."
+
+    """
+
+    @lru_cache(maxsize=30)
+    def process_language_content(language_label, text):
+        """Helper function to process the content for a specific language label.
+
+        Args:
+            language_label (str): The language label to process.
+            text (str): The text to process.
+
+        Returns:
+            str: The text corresponding to the specified language label.
+
+        """
+        pattern = re.compile(r'\[#(.*?)#\](.*?)(?=\[#|$)', re.DOTALL)
+        matches = pattern.findall(text)
+
+        for lang, content in matches:
+            if lang == language_label.replace('[#', '').replace('#]', ''):
+                return content.strip()
+
+        return ''
+
+    lang_label = f"[#{current_lang}#]"
+    default_lang = schemingdcat_get_default_lang()
+    default_lang_label = f"[#{default_lang}#]"
+
+    lang_text = process_language_content(lang_label, text)
+
+    if not lang_text and lang_label != default_lang_label:
+        lang_text = process_language_content(default_lang_label, text)
+
+    if not lang_text:
+        return text
+
+    return lang_text
+
+@helper
+def dataset_display_name(package_or_package_dict):
+    """
+    Returns the localized value of the dataset name by extracting the correct translation.
+
+    Args:
+    - package_or_package_dict: A dictionary containing the package information.
+
+    Returns:
+    - The localized value of the dataset name.
+    """
+    field_name = "title" if "title" in package_or_package_dict else "name"
+
+    return schemingdcat_get_localized_value_from_dict(
+        package_or_package_dict, field_name
+    )
+
+
+@helper
+def dataset_display_field_value(package_or_package_dict, field_name):
+    """
+    Extracts the correct translation of the dataset field.
+
+    Args:
+        package_or_package_dict (dict): The package or package dictionary to extract the value from.
+        field_name (str): The name of the field to extract the value for.
+
+    Returns:
+        str: The localized value for the given field name.
+    """
+    return schemingdcat_get_localized_value_from_dict(
+        package_or_package_dict, field_name
+    )
+
+@helper
+def schemingdcat_get_localized_value_from_dict(
+    package_or_package_dict, field_name, default=""
+):
+    """
+    Get the localized value from a dictionary.
+
+    This function tries to get the value of a field in a specific language.
+    If the value is not available in the specific language, it tries to get it in the default language.
+    If the value is not available in the default language, it tries to get the untranslated value.
+    If the untranslated value is not available, it returns a default value.
+
+    Args:
+        package_or_package_dict (dict or str): The package or dictionary to get the value from.
+            If it's a string, it tries to convert it to a dictionary using json.loads.
+        field_name (str): The name of the field to get the value from.
+        default (str, optional): The default value to return if the value is not available. Defaults to "".
+
+    Returns:
+        str: The localized value, or the default value if the localized value is not available.
+    """
+    if isinstance(package_or_package_dict, str):
+        try:
+            package_or_package_dict = json.loads(package_or_package_dict)
+        except ValueError:
+            return default
+
+    lang_code = schemingdcat_get_current_lang().split("_")[0]
+    schemingdcat_get_default_lang()
+
+    translated_field = package_or_package_dict.get(field_name + "_translated", {})
+    if isinstance(translated_field, str):
+        try:
+            translated_field = json.loads(translated_field)
+        except ValueError:
+            translated_field = {}
+
+    # Check the lang_code, if not check the default_lang, if not check the field without translation
+    return translated_field.get(lang_code) or translated_field.get(DEFAULT_LANG) or package_or_package_dict.get(field_name, default)
+
+@helper
+def schemingdcat_get_readable_file_size(num, suffix="B"):
+    if not num:
+        return False
+    try:
+        for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
+            num = float(num)
+            if abs(num) < 1024.0:
+                return "%3.1f%s%s" % (num, unit, suffix)
+            num /= 1024.0
+        return "%.1f%s%s" % (num, "Y", suffix)
+    except ValueError:
+        return False
+
+
+@helper
+def schemingdcat_get_group_or_org(id, type="group"):
+    """
+    Retrieve information about a group or organization in CKAN.
+
+    Args:
+        id (str): The ID of the group or organization.
+        type (str, optional): The type of the entity to retrieve. Defaults to 'group'.
+
+    Returns:
+        dict: A dictionary containing information about the group or organization.
+    """
+    return logic.get_action(f"{type}_show")({}, {"id": id})
+
+@helper
+def schemingdcat_package_list_for_source(source_id):
+    '''
+    Creates a dataset list with the ones belonging to a particular harvest
+    source.
+
+    It calls the package_list snippet and the pager.
+    '''
+    limit = 20
+    page = int(request.args.get('page', 1))
+    fq = '+harvest_source_id:"{0}"'.format(source_id)
+    search_dict = {
+        'fq': fq,
+        'rows': limit,
+        'sort': 'metadata_modified desc',
+        'start': (page - 1) * limit,
+        'include_private': True
+    }
+
+    context = {'model': model, 'session': model.Session}
+    harvest_source = get_harvest_source(source_id)
+    owner_org = harvest_source.get('owner_org', '')
+    if owner_org:
+        user_member_of_orgs = [org['id'] for org
+                               in ckan_helpers.organizations_available('read')]
+        if (harvest_source and owner_org in user_member_of_orgs):
+            context['ignore_capacity_check'] = True
+
+    query = logic.get_action('package_search')(context, search_dict)
+
+    base_url = ckan_helpers.url_for(
+        '{0}.read'.format(DATASET_TYPE_NAME),
+        id=harvest_source['name']
+    )
+
+    def pager_url(q=None, page=None):
+        url = base_url
+        if page:
+            url += '?page={0}'.format(page)
+        return url
+
+    pager = ckan_helpers.Page(
+        collection=query['results'],
+        page=page,
+        url=pager_url,
+        item_count=query['count'],
+        items_per_page=limit
+    )
+    pager.items = query['results']
+
+    if query['results']:
+        out = ckan_helpers.snippet('snippets/package_list.html', packages=query['results'])
+        out += pager.pager()
+    else:
+        out = ckan_helpers.snippet('snippets/package_list_empty.html')
+
+    return out
+@helper
+def schemingdcat_package_count_for_source(source_id):
+    '''
+    Returns the current package count for datasets associated with the given
+    source id
+    '''
+    fq = '+harvest_source_id:"{0}"'.format(source_id)
+    search_dict = {'fq': fq, 'include_private': True}
+    context = {'model': model, 'session': model.Session}
+    result = logic.get_action('package_search')(context, search_dict)
+    return result.get('count', 0)
+
+@helper
+def schemingdcat_parse_localised_date(date_=None):
+    '''Parse a datetime object or timestamp string as a localised date.
+    If timestamp is badly formatted, then None is returned.
+
+    :param date_: the date
+    :type date_: datetime or date or ISO string format
+    :rtype: date
+    '''
+    if not date_:
+        return None
+    if isinstance(date_, str):
+        try:
+            date_ = ckan_helpers.date_str_to_datetime(date_)
+        except (TypeError, ValueError):
+            return None
+    # check we are now a datetime or date
+    if isinstance(date_, datetime.datetime):
+        date_ = date_.date()
+    elif not isinstance(date_, datetime.date):
+        return None
+
+    # Format date based on locale
+    locale = schemingdcat_get_current_lang()
+    if locale == 'es':
+        return date_.strftime('%d-%m-%Y')
+    else:
+        return date_.strftime('%Y-%m-%d')
+
+@lru_cache(maxsize=None)
+@helper
+def schemingdcat_get_dataset_schema(schema_type="dataset"):
+    """
+    Retrieves the schema for the dataset instance and caches it using the LRU cache decorator for efficient retrieval.
+
+    Args:
+        schema_type (str, optional): The type of schema to retrieve. Defaults to 'dataset'.
+
+    Returns:
+        dict: The schema of the dataset instance.
+    """
+    return logic.get_action("scheming_dataset_schema_show")(
+        {}, {"type": schema_type}
+    )   
+
+@helper
+def schemingdcat_get_schema_form_groups(entity_type=None, object_type=None, schema=None):
+    """
+    Return a list of schema metadata groups for this form.
+
+    1. return schema['schema_form_groups'] if it is defined
+    2. get schema from entity_type + object_type then
+       return schema['schema_form_groups'] if they are defined
+    """
+    if schema and "schema_form_groups" in schema:
+        return schema["schema_form_groups"]
+    elif entity_type and object_type:
+        schema = scheming_get_schema(entity_type, object_type)
+        return schema["schema_form_groups"] if schema and "schema_form_groups" in schema else None
+    else:
+        return None
+
+# Vocabs
+@helper
+def get_inspire_themes(*args, **kwargs) -> typing.List[typing.Dict[str, str]]:
+    log.debug(f"inside get_inspire_themes {args=} {kwargs=}")
+    try:
+        inspire_themes = p.toolkit.get_action("tag_list")(
+            data_dict={"vocabulary_id": sdct_config.SCHEMINGDCAT_INSPIRE_THEMES_VOCAB}
+        )
+    except p.toolkit.ObjectNotFound:
+        inspire_themes = []
+    return [{"value": t, "label": t} for t in inspire_themes] 
+
+@helper
+def get_ckan_cleaned_name(name):
+    """
+    Cleans a name by removing accents, special characters, and spaces.
+
+    Args:
+        name (str): The name to clean.
+
+    Returns:
+        str: The cleaned name.
+    """
+    MAX_TAG_LENGTH = 100
+    MIN_TAG_LENGTH = 2
+    # Define a dictionary to map accented characters to their unaccented equivalents except ñ
+    accent_map = {
+        "á": "a", "à": "a", "ä": "a", "â": "a", "ã": "a",
+        "é": "e", "è": "e", "ë": "e", "ê": "e",
+        "í": "i", "ì": "i", "ï": "i", "î": "i",
+        "ó": "o", "ò": "o", "ö": "o", "ô": "o", "õ": "o",
+        "ú": "u", "ù": "u", "ü": "u", "û": "u",
+        "ñ": "ñ",
+    }
+
+    # Convert the name to lowercase
+    name = name.lower()
+
+    # Replace accented and special characters with their unaccented equivalents or -
+    name = "".join(accent_map.get(c, c) for c in name)
+    name = re.sub(r"[^a-zñ0-9_.-]", "-", name.strip())
+
+    # Truncate the name to MAX_TAG_LENGTH characters
+    name = name[:MAX_TAG_LENGTH]
+
+    # If the name is shorter than MIN_TAG_LENGTH, pad it with underscores
+    if len(name) < MIN_TAG_LENGTH:
+        name = name.ljust(MIN_TAG_LENGTH, '_')
+
+    return name
+
+@helper
+def get_featured_datasets(count=1):
+    """
+    This helper function retrieves a specified number of featured datasets from the CKAN instance. 
+    It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
+    
+    Parameters:
+    count (int): The number of featured datasets to retrieve. Default is 1.
+
+    Returns:
+    list: A list of dictionaries, each representing a featured dataset.
+    """
+    fq = '+featured:true'
+    search_dict = {
+        'fq': fq, 
+        'sort': 'metadata_modified desc',
+        'fl': 'id,name,title,notes,state,metadata_modified,type,extras_featured,extras_graphic_overview',
+        'rows': count
+    }
+    context = {'model': model, 'session': model.Session}
+    result = logic.get_action('package_search')(context, search_dict)
+    
+    return result['results']
+
+@helper
+def get_spatial_datasets(count=10):
+    """
+    This helper function retrieves a specified number of featured datasets from the CKAN instance. 
+    It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
+    
+    Parameters:
+    count (int): The number of featured datasets to retrieve. Default is 1.
+
+    Returns:
+    list: A list of dictionaries, each representing a featured dataset.
+    """
+    fq = '+dcat_type:*inspire*'
+    search_dict = {
+        'fq': fq, 
+        'fl': 'extras_dcat_type',
+        'rows': count
+    }
+    context = {'model': model, 'session': model.Session}
+    result = logic.get_action('package_search')(context, search_dict)
+    
+    return result['results']
+
+@lru_cache(maxsize=None)
+@helper
+def get_header_endpoint_url(endpoint, site_protocol_and_host):
+    url_for = ckan_helpers.url_for
+    endpoint_type = endpoint['type']
+    endpoint_value = endpoint['endpoint']
+
+    if endpoint_type == 'ogc':
+        if ckan_helpers.is_url(endpoint_value):
+            return ckan_helpers.url_for_static_or_external(endpoint_value)
+        else:
+            protocol, host = site_protocol_and_host
+            return f"{protocol}://{host}/{endpoint_value}"
+    elif endpoint_type == 'ckan':
+        return url_for('api.action', ver=3, logic_function='package_list', qualified=True)
+    elif endpoint_type == 'lod':
+        return url_for(endpoint_value, **endpoint['endpoint_data'])
+    elif endpoint_type == 'sparql':
+        return url_for('/sparql')
+    
+@helper
+def schemingdcat_check_valid_url(url):
+    """
+    Check if a string is a valid URL.
+
+    Args:
+        url (str): The string to check.
+
+    Returns:
+        bool: True if the string is a valid URL, False otherwise.
+    """
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
diff --git a/ckanext/schemingdcat/lib/field_mapping.py b/ckanext/schemingdcat/lib/field_mapping.py
index 3babce2c..34fe3f8b 100644
--- a/ckanext/schemingdcat/lib/field_mapping.py
+++ b/ckanext/schemingdcat/lib/field_mapping.py
@@ -119,7 +119,7 @@ def validate_v1(self, field_mapping):
                     if not isinstance(lang, str) or not isinstance(remote_field_name, str):
                         raise ValueError('In translated fields, both language and remote_field_name must be strings. e.g. "notes_translated": {"es": "notes-es"}')
                     if not re.match("^[a-z]{2}$", lang):
-                        raise ValueError('Language code must be a 2-letter ISO 639-1 code')
+                        raise ValueError(f'Invalid field "{lang}". Language code must be a 2-letter ISO 639-1 code')
                     
     def validate_v2(self, field_mapping):
         """
@@ -162,7 +162,7 @@ def validate_v2(self, field_mapping):
                         raise ValueError('%s must be a dictionary', self.language_field)
                     for lang, lang_config in value.items():
                         if not isinstance(lang, str) or not re.match("^[a-z]{2}$", lang):
-                            raise ValueError('Language code must be a 2-letter ISO 639-1 code')
+                            raise ValueError(f'Invalid field "{lang}". Language code must be a 2-letter ISO 639-1 code')
                         if not isinstance(lang_config, dict):
                             raise ValueError('Language config must be a dictionary')
                         for lang_prop, lang_value in lang_config.items():
diff --git a/ckanext/schemingdcat/package_controller.py b/ckanext/schemingdcat/package_controller.py
index 1c1b8fd4..99db3d5b 100644
--- a/ckanext/schemingdcat/package_controller.py
+++ b/ckanext/schemingdcat/package_controller.py
@@ -1,159 +1,159 @@
-from ckan.common import request
-import json
-import ckan.plugins as plugins
-import ckanext.schemingdcat.config as sdct_config
-import ckanext.schemingdcat.utils as utils
-
-import logging
-import sys
-
-FACET_OPERATOR_PARAM_NAME = '_facet_operator'
-FACET_SORT_PARAM_NAME = '_%s_sort'
-
-log = logging.getLogger(__name__)
-
-
-class PackageController():
-
-    plugins.implements(plugins.IPackageController)
-
-    default_facet_operator = sdct_config.default_facet_operator
-
-    def read(self, entity):
-        pass
-
-    def create(self, entity):
-        pass
-
-    def edit(self, entity):
-        pass
-
-    def authz_add_role(self, object_role):
-        pass
-
-    def authz_remove_role(self, object_role):
-        pass
-
-    def delete(self, entity):
-        pass
-
-    def before_search(self, search_params):
-        """Modifies search parameters before executing a search.
-
-        This method adjusts the 'fq' (filter query) parameter based on the 'facet.field' value in the search parameters. If 'facet.field' is a list, it iterates through each field, applying the '_facet_search_operator' to modify 'fq'. If 'facet.field' is a string, it directly applies the '_facet_search_operator'. If 'facet.field' is not present or is invalid, no modification is made.
-
-        Args:
-            search_params (dict): The search parameters to be modified. Expected to contain 'facet.field' and 'fq'.
-
-        Returns:
-            dict: The modified search parameters.
-
-        Raises:
-            Exception: Captures and logs any exception that occurs during the modification of search parameters.
-        """
-        try:
-            facet_field = search_params.get('facet.field', '')
-            if not facet_field:
-                return search_params
-            elif isinstance(facet_field, list):
-                for field in facet_field:
-                    new_fq = self._facet_search_operator(search_params.get('fq', ''), field)
-                    if new_fq and isinstance(new_fq, str):
-                        search_params.update({'fq': new_fq})
-            elif isinstance(facet_field, str):
-                new_fq = self._facet_search_operator(search_params.get('fq', ''), facet_field)
-                if new_fq and isinstance(new_fq, str):
-                    search_params.update({'fq': new_fq})
-        except Exception as e:
-            log.error("[before_search] Error: %s", e)
-        return search_params
-
-    def after_search(self, search_results, search_params):
-        return search_results
-
-    def before_index(self, data_dict):
-        """Processes the data dictionary before indexing.
-
-        Iterates through each facet defined in the system's facets dictionary. For each facet present in the data dictionary, it attempts to parse its value as JSON. If the value is a valid JSON string, it replaces the original string value with the parsed JSON object. If the value cannot be parsed as JSON (e.g., because it's not a valid JSON string), it leaves the value unchanged. Facets present in the data dictionary but not containing any data are removed.
-
-        Args:
-            data_dict (dict): The data dictionary to be processed. It's expected to contain keys corresponding to facet names with their associated data as values.
-
-        Returns:
-            dict: The processed data dictionary with JSON strings parsed into objects where applicable and empty facets removed.
-        """
-        for facet, label in utils.get_facets_dict().items():
-            data = data_dict.get(facet)
-            #log.debug("[before_index] Data ({1}) in facet: {0}".format(data, facet))
-            if data:
-                if isinstance(data, str):
-                    try:
-                        data_dict[facet] = json.loads(data)
-                    except json.decoder.JSONDecodeError:
-                        data_dict[facet] = data
-            else:
-                if facet in data_dict:
-                    del data_dict[facet]
-
-        return data_dict
-
-    def before_view(self, pkg_dict):
-        return pkg_dict
-
-    def after_create(self, context, data_dict):
-        return data_dict
-
-    def after_update(self, context, data_dict):
-        return data_dict
-
-    def after_delete(self, context, data_dict):
-        return data_dict
-
-    def after_show(self, context, data_dict):
-        return data_dict
-
-    def update_facet_titles(self, facet_titles):
-        return facet_titles
-
-    def package_controller_config(self, default_facet_operator):
-        self.default_facet_operator = default_facet_operator
-
-    def _facet_search_operator(self, fq, facet_field):
-        """Modifies the query filter (fq) to use the OR operator among the specified facet filters.
-
-        Args:
-            fq (str): The current query filter.
-            facet_field (list): List of facet fields to consider for the OR operation.
-
-        Returns:
-            str: The modified query filter.
-        """
-        new_fq = fq
-        try:
-            facet_operator = self.default_facet_operator
-            # Determine the facet operator based on request parameters
-            if request.params.get(FACET_OPERATOR_PARAM_NAME) == 'OR':
-                facet_operator = 'OR'
-            elif request.params.get(FACET_OPERATOR_PARAM_NAME) == 'AND':
-                facet_operator = 'AND'
-
-            if facet_operator == 'OR' and facet_field:
-                # Split the original fq into conditions, assuming they are separated by " AND "
-                conditions = fq.split(' AND ')
-                # Filter and group conditions that correspond to facet fields
-                facet_conditions = [cond for cond in conditions if any(fld in cond for fld in facet_field)]
-                non_facet_conditions = [cond for cond in conditions if not any(fld in cond for fld in facet_field)]
-                # Reconstruct fq using " OR " to join facet conditions and " AND " for the rest
-                if facet_conditions:
-                    new_fq = ' OR '.join(facet_conditions)
-                    if non_facet_conditions:
-                        new_fq = f"({new_fq}) AND {' AND '.join(non_facet_conditions)}"
-                else:
-                    new_fq = ' AND '.join(non_facet_conditions)
-
-        except Exception as e:
-            log.error("[_facet_search_operator] Error modifying the query filter: %s", e)
-            # In case of error, return the original fq
-            new_fq = fq
-
+from ckan.common import request
+import json
+import ckan.plugins as plugins
+import ckanext.schemingdcat.config as sdct_config
+import ckanext.schemingdcat.utils as utils
+
+import logging
+import sys
+
+FACET_OPERATOR_PARAM_NAME = '_facet_operator'
+FACET_SORT_PARAM_NAME = '_%s_sort'
+
+log = logging.getLogger(__name__)
+
+
+class PackageController():
+
+    plugins.implements(plugins.IPackageController)
+
+    default_facet_operator = sdct_config.default_facet_operator
+
+    def read(self, entity):
+        pass
+
+    def create(self, entity):
+        pass
+
+    def edit(self, entity):
+        pass
+
+    def authz_add_role(self, object_role):
+        pass
+
+    def authz_remove_role(self, object_role):
+        pass
+
+    def delete(self, entity):
+        pass
+
+    def before_search(self, search_params):
+        """Modifies search parameters before executing a search.
+
+        This method adjusts the 'fq' (filter query) parameter based on the 'facet.field' value in the search parameters. If 'facet.field' is a list, it iterates through each field, applying the '_facet_search_operator' to modify 'fq'. If 'facet.field' is a string, it directly applies the '_facet_search_operator'. If 'facet.field' is not present or is invalid, no modification is made.
+
+        Args:
+            search_params (dict): The search parameters to be modified. Expected to contain 'facet.field' and 'fq'.
+
+        Returns:
+            dict: The modified search parameters.
+
+        Raises:
+            Exception: Captures and logs any exception that occurs during the modification of search parameters.
+        """
+        try:
+            facet_field = search_params.get('facet.field', '')
+            if not facet_field:
+                return search_params
+            elif isinstance(facet_field, list):
+                for field in facet_field:
+                    new_fq = self._facet_search_operator(search_params.get('fq', ''), field)
+                    if new_fq and isinstance(new_fq, str):
+                        search_params.update({'fq': new_fq})
+            elif isinstance(facet_field, str):
+                new_fq = self._facet_search_operator(search_params.get('fq', ''), facet_field)
+                if new_fq and isinstance(new_fq, str):
+                    search_params.update({'fq': new_fq})
+        except Exception as e:
+            log.error("[before_search] Error: %s", e)
+        return search_params
+
+    def after_search(self, search_results, search_params):
+        return search_results
+
+    def before_index(self, data_dict):
+        """Processes the data dictionary before indexing.
+
+        Iterates through each facet defined in the system's facets dictionary. For each facet present in the data dictionary, it attempts to parse its value as JSON. If the value is a valid JSON string, it replaces the original string value with the parsed JSON object. If the value cannot be parsed as JSON (e.g., because it's not a valid JSON string), it leaves the value unchanged. Facets present in the data dictionary but not containing any data are removed.
+
+        Args:
+            data_dict (dict): The data dictionary to be processed. It's expected to contain keys corresponding to facet names with their associated data as values.
+
+        Returns:
+            dict: The processed data dictionary with JSON strings parsed into objects where applicable and empty facets removed.
+        """
+        for facet, label in utils.get_facets_dict().items():
+            data = data_dict.get(facet)
+            #log.debug("[before_index] Data ({1}) in facet: {0}".format(data, facet))
+            if data:
+                if isinstance(data, str):
+                    try:
+                        data_dict[facet] = json.loads(data)
+                    except json.decoder.JSONDecodeError:
+                        data_dict[facet] = data
+            else:
+                if facet in data_dict:
+                    del data_dict[facet]
+
+        return data_dict
+
+    def before_view(self, pkg_dict):
+        return pkg_dict
+
+    def after_create(self, context, data_dict):
+        return data_dict
+
+    def after_update(self, context, data_dict):
+        return data_dict
+
+    def after_delete(self, context, data_dict):
+        return data_dict
+
+    def after_show(self, context, data_dict):
+        return data_dict
+
+    def update_facet_titles(self, facet_titles):
+        return facet_titles
+
+    def package_controller_config(self, default_facet_operator):
+        self.default_facet_operator = default_facet_operator
+
+    def _facet_search_operator(self, fq, facet_field):
+        """Modifies the query filter (fq) to use the OR operator among the specified facet filters.
+
+        Args:
+            fq (str): The current query filter.
+            facet_field (list): List of facet fields to consider for the OR operation.
+
+        Returns:
+            str: The modified query filter.
+        """
+        new_fq = fq
+        try:
+            facet_operator = self.default_facet_operator
+            # Determine the facet operator based on request parameters
+            if request.params.get(FACET_OPERATOR_PARAM_NAME) == 'OR':
+                facet_operator = 'OR'
+            elif request.params.get(FACET_OPERATOR_PARAM_NAME) == 'AND':
+                facet_operator = 'AND'
+
+            if facet_operator == 'OR' and facet_field:
+                # Split the original fq into conditions, assuming they are separated by " AND "
+                conditions = fq.split(' AND ')
+                # Filter and group conditions that correspond to facet fields
+                facet_conditions = [cond for cond in conditions if any(fld in cond for fld in facet_field)]
+                non_facet_conditions = [cond for cond in conditions if not any(fld in cond for fld in facet_field)]
+                # Reconstruct fq using " OR " to join facet conditions and " AND " for the rest
+                if facet_conditions:
+                    new_fq = ' OR '.join(facet_conditions)
+                    if non_facet_conditions:
+                        new_fq = f"({new_fq}) AND {' AND '.join(non_facet_conditions)}"
+                else:
+                    new_fq = ' AND '.join(non_facet_conditions)
+
+        except Exception as e:
+            log.error("[_facet_search_operator] Error modifying the query filter: %s", e)
+            # In case of error, return the original fq
+            new_fq = fq
+
         return new_fq
\ No newline at end of file
diff --git a/ckanext/schemingdcat/utils.py b/ckanext/schemingdcat/utils.py
index 90fb731e..071a4f4a 100644
--- a/ckanext/schemingdcat/utils.py
+++ b/ckanext/schemingdcat/utils.py
@@ -1,293 +1,290 @@
-from ckan.common import config
-import ckan.logic as logic
-from ckanext.schemingdcat import config as sdct_config
-import logging
-import os
-import inspect
-import json
-import hashlib
-from threading import Lock
-from ckanext.dcat.utils import CONTENT_TYPES
-import yaml
-from yaml.loader import SafeLoader
-from pathlib import Path
-
-try:
-    from paste.reloader import watch_file
-except ImportError:
-    watch_file = None
-
-log = logging.getLogger(__name__)
-
-_facets_dict = None
-_public_dirs = None
-_files_hash = []
-_dirs_hash = []
-
-_facets_dict_lock = Lock()
-_public_dirs_lock = Lock()
-
-
-def get_facets_dict():
-    """Get the labels for all fields defined in the scheming file.
-
-    Returns:
-        dict: A dictionary containing the labels for all fields defined in the scheming file.
-    """
-    global _facets_dict
-    if not _facets_dict:
-        with _facets_dict_lock:
-            if not _facets_dict:
-                _facets_dict = {}
-
-                schema = logic.get_action('scheming_dataset_schema_show')(
-                    {},
-                    {'type': 'dataset'}
-                    )
-
-                for item in schema['dataset_fields']:
-                    _facets_dict[item['field_name']] = item['label']
-
-                for item in schema['resource_fields']:
-                    _facets_dict[item['field_name']] = item['label']
-
-    return _facets_dict
-
-def get_public_dirs():
-    """Get the list of public directories specified in the configuration file.
-
-    Returns:
-        list: A list of public directories specified in the configuration file.
-    """
-    global _public_dirs
-
-    if not _public_dirs:
-        with _public_dirs_lock:
-            if not _public_dirs:
-                _public_dirs = config.get('extra_public_paths', '').split(',')
-
-    return _public_dirs
-
-def public_file_exists(path):
-    """Check if a file exists in the public directories specified in the configuration file.
-
-    Args:
-        path (str): The path of the file to check.
-
-    Returns:
-        bool: True if the file exists in one of the public directories, False otherwise.
-    """
-    #log.debug("Check if exists: {0}".format(path))
-    file_hash = hashlib.sha512(path.encode('utf-8')).hexdigest()
-
-    if file_hash in _files_hash:
-        return True
-
-    public_dirs = get_public_dirs()
-    for i in range(len(public_dirs)):
-        public_path = os.path.join(public_dirs[i], path)
-        if os.path.isfile(public_path):
-            _files_hash.append(file_hash)
-            return True
-
-    return False
-
-def public_dir_exists(path):
-    """Check if a directory exists in the public directories specified in the configuration file.
-
-    Args:
-        path (str): The path of the directory to check.
-
-    Returns:
-        bool: True if the directory exists in one of the public directories, False otherwise.
-    """
-    dir_hash = hashlib.sha512(path.encode('utf-8')).hexdigest()
-
-    if dir_hash in _dirs_hash:
-        return True
-
-    public_dirs = get_public_dirs()
-    for i in range(len(public_dirs)):
-        public_path = os.path.join(public_dirs[i], path)
-        if os.path.isdir(public_path):
-            _dirs_hash.append(dir_hash)
-            return True
-
-    return False
-
-def init_config():
-    sdct_config.linkeddata_links = _load_yaml('linkeddata_links.yaml')
-    sdct_config.geometadata_links = _load_yaml('geometadata_links.yaml')
-    sdct_config.endpoints = _load_yaml(sdct_config.endpoints_yaml)
-
-def is_yaml(file):
-    """Check if a file has a YAML extension.
-
-    Args:
-        file (str): The file name or path.
-
-    Returns:
-        bool: True if the file has a .yaml or .yml extension, False otherwise.
-    """
-    return file.lower().endswith(('.yaml', '.yml'))
-
-def _load_yaml(file):
-    """Load a YAML file, either from a module path or a default directory.
-
-    Args:
-        file (str): The name of the YAML file to load. Can be a module path like "module:file.yaml".
-
-    Returns:
-        dict: A dictionary containing the data from the YAML file, or an empty dictionary if the file is invalid or cannot be loaded.
-    """
-    if not is_yaml(file):
-        log.error("The file {0} is not a valid YAML file".format(file))
-        return {}
-
-    yaml_data = _load_yaml_module_path(file)
-    if not yaml_data:
-        yaml_data = _load_default_yaml(file)
-    return yaml_data
-
-def _load_yaml_module_path(file):
-    """Load a YAML file from a module path.
-
-    Given a path like "module:file.yaml", find the file relative to the import path of the module.
-
-    Args:
-        file (str): The module path of the YAML file.
-
-    Returns:
-        dict or None: A dictionary containing the data from the YAML file, or None if the module cannot be imported or the file cannot be loaded.
-    """
-    log.debug('file: %s', file)
-    
-    if ':' not in file:
-        return None
-
-    module, file_name = file.split(':', 1)
-    try:
-        m = __import__(module, fromlist=[''])
-        log.debug('m: %s', m)
-        log.debug('file_name: %s', os.path.join(os.path.dirname(inspect.getfile(m)), file_name))
-    except ImportError:
-        log.error("Module {0} could not be imported".format(module))
-        return None
-
-    return _load_yaml_file(os.path.join(os.path.dirname(inspect.getfile(m)), file_name))
-
-def _load_default_yaml(file):
-    """Load a YAML file from the 'codelists' directory of the schemingdcat extension.
-
-    Args:
-        file (str): The name of the YAML file to load.
-
-    Returns:
-        dict: A dictionary containing the data from the YAML file, or an empty dictionary if the file cannot be loaded.
-    """
-    source_path = Path(__file__).resolve(True)
-    log.debug('source_path: %s', source_path)
-    return _load_yaml_file(source_path.parent.joinpath('codelists', file))
-
-def _load_yaml_file(path):
-    """Load a YAML file from a given path.
-
-    Args:
-        path (str): The file path of the YAML file.
-
-    Returns:
-        dict: A dictionary containing the data from the YAML file, or an empty dictionary if the file cannot be loaded.
-    """
-    yaml_data = {}
-    try:
-        if os.path.exists(path):
-            if watch_file:
-                watch_file(path)
-            with open(path, 'r') as f:
-                yaml_data = yaml.load(f, Loader=SafeLoader)
-        else:
-            log.error("The file {0} does not exist".format(path))
-    except Exception as e:
-        log.error("Could not read configuration from {0}: {1}".format(path, e))
-    return yaml_data
-
-def get_linked_data(id):
-    """Get linked data for a given identifier.
-
-    Args:
-        id (str): The identifier to get linked data for.
-
-    Returns:
-        list: A list of dictionaries containing linked data for the identifier.
-    """
-    if sdct_config.debug:
-        linkeddata_links = _load_yaml('linkeddata_links.yaml')
-    else:
-        linkeddata_links = sdct_config.linkeddata_links
-
-    data=[]
-    for name in CONTENT_TYPES:
-        data.append({
-            'name': name,
-            'display_name': linkeddata_links.get(name,{}).get('display_name',CONTENT_TYPES[name]),
-            'image_display_url': linkeddata_links.get(name,{}).get('image_display_url', None),
-            'description': linkeddata_links.get(name,{}).get('description','Formats '+ CONTENT_TYPES[name]),
-            'description_url': linkeddata_links.get(name,{}).get('description_url', None),
-            'endpoint_data':{
-                '_id': id,
-                '_format': name,
-                }
-        })
-
-    return data
-
-def get_geospatial_metadata():
-    """Get geospatial metadata for CSW formats.
-
-    Returns:
-        list: A list of dictionaries containing geospatial metadata for CSW formats.
-    """
-    if sdct_config.debug:
-        geometadata_links = _load_yaml('geometadata_links.yaml')
-    else:
-        geometadata_links = sdct_config.geometadata_links
-    data=[]
-    for item in geometadata_links.get('csw_formats',{}):
-        data.append({
-            'name': item['name'],
-            'display_name': item['display_name'],
-            'image_display_url': item['image_display_url'],
-            'description': item['description'],
-            'description_url': item['description_url'],
-            'url': (sdct_config.geometadata_link_domain or '') + geometadata_links['csw_url'].format(output_format=item['output_format'], schema=item['output_schema'], id='{id}')
-        })
-
-    return data
-
-def parse_json(value, default_value=None):
-    """
-    Parses a JSON string and returns the resulting object.
-    If the input value is not a valid JSON string, returns the default value.
-    If the default value is not provided, returns the input value.
-
-    Args:
-        value (str): The JSON string to parse.
-        default_value (any, optional): The default value to return if the input value is not a valid JSON string.
-            Defaults to None.
-
-    Returns:
-        any: The parsed JSON object, or the default value if the input value is not a valid JSON string.
-    """
-    try:
-        return json.loads(value)
-    except (ValueError, TypeError, AttributeError):
-        if default_value is not None:
-            return default_value
-
-        # The json may already have been parsed and we have the value for the
-        # language already.
-        if isinstance(value, int):
-            # If the value is a number, it has been converted into an int - but
-            # we want a string here.
-            return str(value)
+from ckan.common import config
+import ckan.logic as logic
+from ckanext.schemingdcat import config as sdct_config
+import logging
+import os
+import inspect
+import json
+import hashlib
+from threading import Lock
+from ckanext.dcat.utils import CONTENT_TYPES
+import yaml
+from yaml.loader import SafeLoader
+from pathlib import Path
+
+try:
+    from paste.reloader import watch_file
+except ImportError:
+    watch_file = None
+
+log = logging.getLogger(__name__)
+
+_facets_dict = None
+_public_dirs = None
+_files_hash = []
+_dirs_hash = []
+
+_facets_dict_lock = Lock()
+_public_dirs_lock = Lock()
+
+
+def get_facets_dict():
+    """Get the labels for all fields defined in the scheming file.
+
+    Returns:
+        dict: A dictionary containing the labels for all fields defined in the scheming file.
+    """
+    global _facets_dict
+    if not _facets_dict:
+        with _facets_dict_lock:
+            if not _facets_dict:
+                _facets_dict = {}
+
+                schema = logic.get_action('scheming_dataset_schema_show')(
+                    {},
+                    {'type': 'dataset'}
+                    )
+
+                for item in schema['dataset_fields']:
+                    _facets_dict[item['field_name']] = item['label']
+
+                for item in schema['resource_fields']:
+                    _facets_dict[item['field_name']] = item['label']
+
+    return _facets_dict
+
+def get_public_dirs():
+    """Get the list of public directories specified in the configuration file.
+
+    Returns:
+        list: A list of public directories specified in the configuration file.
+    """
+    global _public_dirs
+
+    if not _public_dirs:
+        with _public_dirs_lock:
+            if not _public_dirs:
+                _public_dirs = config.get('extra_public_paths', '').split(',')
+
+    return _public_dirs
+
+def public_file_exists(path):
+    """Check if a file exists in the public directories specified in the configuration file.
+
+    Args:
+        path (str): The path of the file to check.
+
+    Returns:
+        bool: True if the file exists in one of the public directories, False otherwise.
+    """
+    #log.debug("Check if exists: {0}".format(path))
+    file_hash = hashlib.sha512(path.encode('utf-8')).hexdigest()
+
+    if file_hash in _files_hash:
+        return True
+
+    public_dirs = get_public_dirs()
+    for i in range(len(public_dirs)):
+        public_path = os.path.join(public_dirs[i], path)
+        if os.path.isfile(public_path):
+            _files_hash.append(file_hash)
+            return True
+
+    return False
+
+def public_dir_exists(path):
+    """Check if a directory exists in the public directories specified in the configuration file.
+
+    Args:
+        path (str): The path of the directory to check.
+
+    Returns:
+        bool: True if the directory exists in one of the public directories, False otherwise.
+    """
+    dir_hash = hashlib.sha512(path.encode('utf-8')).hexdigest()
+
+    if dir_hash in _dirs_hash:
+        return True
+
+    public_dirs = get_public_dirs()
+    for i in range(len(public_dirs)):
+        public_path = os.path.join(public_dirs[i], path)
+        if os.path.isdir(public_path):
+            _dirs_hash.append(dir_hash)
+            return True
+
+    return False
+
+def init_config():
+    sdct_config.linkeddata_links = _load_yaml('linkeddata_links.yaml')
+    sdct_config.geometadata_links = _load_yaml('geometadata_links.yaml')
+    sdct_config.endpoints = _load_yaml(sdct_config.endpoints_yaml)
+
+def is_yaml(file):
+    """Check if a file has a YAML extension.
+
+    Args:
+        file (str): The file name or path.
+
+    Returns:
+        bool: True if the file has a .yaml or .yml extension, False otherwise.
+    """
+    return file.lower().endswith(('.yaml', '.yml'))
+
+def _load_yaml(file):
+    """Load a YAML file, either from a module path or a default directory.
+
+    Args:
+        file (str): The name of the YAML file to load. Can be a module path like "module:file.yaml".
+
+    Returns:
+        dict: A dictionary containing the data from the YAML file, or an empty dictionary if the file is invalid or cannot be loaded.
+    """
+    if not is_yaml(file):
+        log.error("The file {0} is not a valid YAML file".format(file))
+        return {}
+
+    yaml_data = _load_yaml_module_path(file)
+    if not yaml_data:
+        yaml_data = _load_default_yaml(file)
+    return yaml_data
+
+def _load_yaml_module_path(file):
+    """Load a YAML file from a module path.
+
+    Given a path like "module:file.yaml", find the file relative to the import path of the module.
+
+    Args:
+        file (str): The module path of the YAML file.
+
+    Returns:
+        dict or None: A dictionary containing the data from the YAML file, or None if the module cannot be imported or the file cannot be loaded.
+    """
+    
+    if ':' not in file:
+        return None
+
+    module, file_name = file.split(':', 1)
+    try:
+        m = __import__(module, fromlist=[''])
+    except ImportError:
+        log.error("Module {0} could not be imported".format(module))
+        return None
+
+    return _load_yaml_file(os.path.join(os.path.dirname(inspect.getfile(m)), file_name))
+
+def _load_default_yaml(file):
+    """Load a YAML file from the 'codelists' directory of the schemingdcat extension.
+
+    Args:
+        file (str): The name of the YAML file to load.
+
+    Returns:
+        dict: A dictionary containing the data from the YAML file, or an empty dictionary if the file cannot be loaded.
+    """
+    source_path = Path(__file__).resolve(True)
+    log.debug('source_path: %s', source_path)
+    return _load_yaml_file(source_path.parent.joinpath('codelists', file))
+
+def _load_yaml_file(path):
+    """Load a YAML file from a given path.
+
+    Args:
+        path (str): The file path of the YAML file.
+
+    Returns:
+        dict: A dictionary containing the data from the YAML file, or an empty dictionary if the file cannot be loaded.
+    """
+    yaml_data = {}
+    try:
+        if os.path.exists(path):
+            if watch_file:
+                watch_file(path)
+            with open(path, 'r') as f:
+                yaml_data = yaml.load(f, Loader=SafeLoader)
+        else:
+            log.error("The file {0} does not exist".format(path))
+    except Exception as e:
+        log.error("Could not read configuration from {0}: {1}".format(path, e))
+    return yaml_data
+
+def get_linked_data(id):
+    """Get linked data for a given identifier.
+
+    Args:
+        id (str): The identifier to get linked data for.
+
+    Returns:
+        list: A list of dictionaries containing linked data for the identifier.
+    """
+    if sdct_config.debug:
+        linkeddata_links = _load_yaml('linkeddata_links.yaml')
+    else:
+        linkeddata_links = sdct_config.linkeddata_links
+
+    data=[]
+    for name in CONTENT_TYPES:
+        data.append({
+            'name': name,
+            'display_name': linkeddata_links.get(name,{}).get('display_name',CONTENT_TYPES[name]),
+            'image_display_url': linkeddata_links.get(name,{}).get('image_display_url', None),
+            'description': linkeddata_links.get(name,{}).get('description','Formats '+ CONTENT_TYPES[name]),
+            'description_url': linkeddata_links.get(name,{}).get('description_url', None),
+            'endpoint_data':{
+                '_id': id,
+                '_format': name,
+                }
+        })
+
+    return data
+
+def get_geospatial_metadata():
+    """Get geospatial metadata for CSW formats.
+
+    Returns:
+        list: A list of dictionaries containing geospatial metadata for CSW formats.
+    """
+    if sdct_config.debug:
+        geometadata_links = _load_yaml('geometadata_links.yaml')
+    else:
+        geometadata_links = sdct_config.geometadata_links
+    data=[]
+    for item in geometadata_links.get('csw_formats',{}):
+        data.append({
+            'name': item['name'],
+            'display_name': item['display_name'],
+            'image_display_url': item['image_display_url'],
+            'description': item['description'],
+            'description_url': item['description_url'],
+            'url': (sdct_config.geometadata_link_domain or '') + geometadata_links['csw_url'].format(output_format=item['output_format'], schema=item['output_schema'], id='{id}')
+        })
+
+    return data
+
+def parse_json(value, default_value=None):
+    """
+    Parses a JSON string and returns the resulting object.
+    If the input value is not a valid JSON string, returns the default value.
+    If the default value is not provided, returns the input value.
+
+    Args:
+        value (str): The JSON string to parse.
+        default_value (any, optional): The default value to return if the input value is not a valid JSON string.
+            Defaults to None.
+
+    Returns:
+        any: The parsed JSON object, or the default value if the input value is not a valid JSON string.
+    """
+    try:
+        return json.loads(value)
+    except (ValueError, TypeError, AttributeError):
+        if default_value is not None:
+            return default_value
+
+        # The json may already have been parsed and we have the value for the
+        # language already.
+        if isinstance(value, int):
+            # If the value is a number, it has been converted into an int - but
+            # we want a string here.
+            return str(value)
         return value
\ No newline at end of file

From 8ad1c7447f17c89317d5e26c3c08cf4af380608b Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Mon, 29 Jul 2024 15:03:28 +0200
Subject: [PATCH 2/8] Improve ckan harvester

- Add field_mapping for ckan harvester.
- Add field_mapping for extras fields.
- Add interfaces
---
 README.md                               | 128 +++++++++++++++++++++++-
 ckanext/schemingdcat/config.py          |   6 ++
 ckanext/schemingdcat/harvesters/base.py | 115 +++++++++++++--------
 ckanext/schemingdcat/harvesters/ckan.py |  52 +++++++---
 ckanext/schemingdcat/interfaces.py      |  32 +++++-
 5 files changed, 268 insertions(+), 65 deletions(-)

diff --git a/README.md b/README.md
index 526db978..b9845dd6 100644
--- a/README.md
+++ b/README.md
@@ -346,9 +346,9 @@ To use it, you need to add the `schemingdcat_ckan_harvester` plugin to your opti
 
 The Scheming DCAT CKAN Harvester supports the same configuration options as the [CKAN Harvester](https://github.com/ckan/ckanext-harvest#the-ckan-harvester), plus the following additional options:
 
-* `dataset_field_mapping/distribution_field_mapping` (Optional):  Mapping field names from local to remote instance, all info at: [Field mapping structure](#field-mapping-structure)
+* `dataset_field_mapping/distribution_field_mapping` (Optional):  Mapping field names from local to remote instance, all info at: [CKAN Harvester Field mapping structure](#field-mapping-structure)
 * `field_mapping_schema_version` (**Mandatory if exists** `dataset_field_mapping/distribution_field_mapping`): Schema version of the field_mapping to ensure compatibility with older schemas. The default is `2`.
-* `schema` (Optional): The name of the schema to use for the harvested datasets. This is the `schema_name` as defined in the scheming file. The remote and local instances must have the same dataset schema. If not provided, the local instance schema will be used.
+* `schema` (Optional): The name of the schema to use for the harvested datasets. This is the `schema_name` as defined in the scheming file. The remote and local instances must have the same dataset schema. If not provided, the `dataset_field_mapping/distribution_field_mapping` is needed to mapping fields.
 * `allow_harvest_datasets` (Optional): If `true`, the harvester will create new records even if the package type is from the harvest source. If `false`, the harvester will only create records that originate from the instance. Default is `false`.
 * `remote_orgs` (Optional): [WIP]. Only `only_local`.
 * `remote_groups` (Optional): [WIP]. Only `only_local`.
@@ -400,10 +400,130 @@ And example configuration might look like this:
             // "field_value" extends the original list of values retrieved from the remote file for all records.
             "field_value": ["https://www.example.org/codelist/a","https://www.example.org/codelist/b", "https://www.example.org/codelist/c"] 
         },
+        "my_custom_field": {
+            // If you need to map a field in a remote dict to the "extras" dict, use the "extras_" prefix to indicate that the field is there.
+            "field_name": "extras_remote_custom_field"
+        },
       },
       }
   ```
+#### Field mapping structure
+The `dataset_field_mapping`/`distribution_field_mapping` is structured as follows (multilingual version):
+
+```json
+{
+  ...
+  "field_mapping_schema_version": 2,
+  "<dataset_field_mapping>/<distribution_field_mapping>": {
+    "<schema_field_name>": {
+      "languages": {
+        "<language>":  {
+          <"field_value": "<fixed_value>/<fixed_value_list>">,/<"field_name": "<excel_field_name>/<excel_field_name_list>">
+        },
+        ...
+      },
+      ...
+    },
+    ...
+  }
+}
+```
+
+* `<schema_field_name>`: The name of the field in the CKAN schema.
+  * `<language>`: (Optional) The language code for multilingual fields. This should be a valid [ISO 639-1 language code](https://localizely.com/iso-639-1-list/). This is now nested under the `languages` key.
+* `<fixed_value>/<fixed_value_list>`: (Optional) A fixed value or a list of fixed values that will be assigned to the field for all records.
+* **Field labels**: Field name:
+  * `<field_name>/<field_name_list>`: (Optional) The name of the field in the remote file or a list of field names.
+
+For fields that are not multilingual, you can directly use `field_name` without the `languages` key. For example:
+
+```json
+{
+  ...
+  "field_mapping_schema_version": 2,
+  "<dataset_field_mapping>/<distribution_field_mapping>": {
+    "<schema_field_name>": {
+      <"field_value": "<fixed_value>/<fixed_value_list>">,/<"field_name": "<excel_field_name>/<excel_field_name_list>">
+    },
+    ...
+  }
+}
+```
+
+>[!IMPORTANT]
+>The field mapping can be done either at the dataset level using `dataset_field_mapping` or at the resource level using `distribution_field_mapping`. The structure and options are the same for both. The `field_mapping_schema_version` is `2` by default, but needs to be set to avoid errors.
+
+#### Field Types
+There are two types of fields that can be defined in the configuration:
+
+1. **Regular fields**: These fields have a field label to define the mapping or a fixed value for all its records.
+    - **Properties**: A field can have one of these three properties:
+      - **Fixed value fields (`field_value`)**: These fields have a fixed value that is assigned to all records. This is defined using the `field_value` property. If `field_value` is a list, `field_name` could be set at the same time, and the `field_value` extends the list obtained from the remote field.
+      - **Field labels**: Field name:
+        - **Name based fields (`field_name`)**: These fields are defined by their name in the Excel file. This is defined using the `field_name` property, or if you need to map a field in a remote dict to the `extras` dict, use the `extras_` prefix to indicate that the field is there.
+2. **Multilingual Fields (`languages`)**: These fields have different values for different languages. Each language is represented as a separate object within the field object (`es`, `en`, ...). The language object can have `field_value` and `field_name` properties, just like a normal field.
+
+
+**Example**
+Here are some examples of configuration files:
+
+  * *Field names*: With `field_name` to define the mapping based on names of attributes in the remote sheet (`my_title`, `org_identifier`, `keywords`).
 
+  ```json
+  {
+    "storage_type": "gspread",
+    "dataset_sheet": "Dataset",
+    "distribution_sheet": "Distribution",
+
+    ...
+    # other properties
+    ...
+
+    "field_mapping_schema_version": 2,
+    "dataset_field_mapping": {
+      "title": {
+          "field_name": "my_title"
+        },
+      "title_translated": {
+          "languages": {
+              "en": {
+                  "field_name": "my_title-en"
+              },
+              "de": {
+                  "field_value": ""
+              },
+              "es": {
+                  "field_name": "my_title"
+              }
+          }
+      },
+      "private": {
+          "field_name": "private"
+      },
+      "theme": {
+          "field_name": ["theme", "theme_eu"]
+      },
+      "tag_custom": {
+          "field_name": "keywords"
+      },
+      "tag_string": {
+          "field_name": ["theme_a", "theme_b", "theme_c"]
+      },
+      "theme_es": {
+          "field_value": "http://datos.gob.es/kos/sector-publico/sector/medio-ambiente"
+      },
+      "tag_uri": {
+          "field_name": "keyword_uri",
+          // "field_value" extends the original list of values retrieved from the remote file for all records.
+          "field_value": ["https://www.example.org/codelist/a","https://www.example.org/codelist/b", "https://www.example.org/codelist/c"] 
+      },
+      "my_custom_field": {
+          // If you need to map a field in a remote dict to the "extras" dict, use the "extras_" prefix to indicate that the field is there.
+          "field_name": "extras_remote_custom_field"
+      }
+    }
+  }
+  ```
 
 ###TODO: Scheming DCAT CSW INSPIRE Harvester
 A harvester for remote CSW catalogues using the INSPIRE ISO 19139 metadata profile. This harvester is a subclass of the CSW Harvester provided by `ckanext-spatial` and is designed to work with the `schemingdcat` plugin to provide a more versatile and customizable harvester for CSW endpoints and GeoDCAT-AP CKAN instances.
@@ -429,7 +549,7 @@ Remote Google Sheet/Onedrive Excel metadata upload Harvester supports the follow
 * `storage_type` - **Mandatory**: The type of storage to use for the harvested datasets as `onedrive` or `gspread`. Default is `onedrive`.
 * `dataset_sheet` - **Mandatory**: The name of the sheet in the Excel file that contains the dataset records.
 * `field_mapping_schema_version`: Schema version of the field_mapping to ensure compatibility with older schemas. The default is `2`.
-* `dataset_field_mapping/distribution_field_mapping`:  Mapping field names from local to remote instance, all info at: [Field mapping structure](#field-mapping-structure)
+* `dataset_field_mapping/distribution_field_mapping`:  Mapping field names from local to remote instance, all info at: [Field mapping structure](#field-mapping-structure-sheets-harvester)
 * `credentials`: The `credentials` parameter should be used to provide the authentication credentials. The credentials depends on the `storage_type` used. 
   * For `onedrive`: The credentials parameter should be a dictionary with the following keys: `username`: A string representing the username. `password`: A string representing the password.
   * For `gspread` or `gdrive`: The credentials parameter should be a string containing the credentials in `JSON` format. You can obtain the credentials by following the instructions provided in the [Google Workspace documentation.](https://developers.google.com/workspace/guides/create-credentials?hl=es-419)
@@ -452,7 +572,7 @@ Remote Google Sheet/Onedrive Excel metadata upload Harvester supports the follow
 * `clean_tags`: By default, tags are stripped of accent characters, spaces and capital letters for display. Setting this option to `False` will keep the original tag names. Default is `True`.
 * `source_date_format`: By default the harvester uses [`dateutil`](https://dateutil.readthedocs.io/en/stable/parser.html) to parse the date, but if the date format of the strings is particularly different you can use this parameter to specify the format, e.g. `%d/%m/%Y`. Accepted formats are: [COMMON_DATE_FORMATS](https://github.com/mjanez/ckanext-schemingdcat/blob/main/ckanext/schemingdcat/config.py#L185-L200)
 
-#### Field mapping structure
+#### Field mapping structure (Sheets harvester)
 The `dataset_field_mapping`/`distribution_field_mapping` is structured as follows (multilingual version):
 
 ```json
diff --git a/ckanext/schemingdcat/config.py b/ckanext/schemingdcat/config.py
index 90f555ba..c00b1b43 100644
--- a/ckanext/schemingdcat/config.py
+++ b/ckanext/schemingdcat/config.py
@@ -20,6 +20,12 @@
 mimetype_base_uri = 'http://www.iana.org/assignments/media-types'
 slugify_pat = re.compile('[^a-zA-Z0-9]')
 
+# schemingdcat field_mapping extras field_names
+field_mapping_extras_prefix_symbol = '_'
+field_mapping_extras_prefix_list = 'extras'
+field_mapping_extras_prefix = field_mapping_extras_prefix_list + field_mapping_extras_prefix_symbol
+
+
 # Default DCAT metadata configuration
 OGC2CKAN_HARVESTER_MD_CONFIG = {
     'access_rights': 'http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/noLimitations',
diff --git a/ckanext/schemingdcat/harvesters/base.py b/ckanext/schemingdcat/harvesters/base.py
index a4c87863..eaa5d3d5 100644
--- a/ckanext/schemingdcat/harvesters/base.py
+++ b/ckanext/schemingdcat/harvesters/base.py
@@ -43,7 +43,10 @@
     URL_REGEX,
     INVALID_CHARS,
     ACCENT_MAP,
-    slugify_pat
+    slugify_pat,
+    field_mapping_extras_prefix,
+    field_mapping_extras_prefix_symbol,
+    field_mapping_extras_prefix_list
 )
 
 log = logging.getLogger(__name__)
@@ -496,18 +499,39 @@ def _standardize_ckan_dict_from_field_mapping(self, dataset, field_mapping):
         """
         def normalize_key(key):
             """
-            Helper function to normalize the key by converting to lowercase and replacing non-alphanumeric characters with underscores.
+            Helper function to normalize the key by removing accents, converting to lowercase, replacing non-alphanumeric characters with '-', and trimming spaces.
             """
-            return slugify_pat.sub('_', key.lower())
+            try:
+                key = key.strip()
+                
+                # Remove accents
+                norm_key = key.translate(ACCENT_MAP)
+                
+                # Replace non-alphanumeric characters with underscores
+                normalized_key = slugify_pat.sub('-', norm_key.lower())
+                                
+                #log.debug('key: %s normalize key: %s', key, normalized_key)
+                
+                return normalized_key
+        
+            except AttributeError:
+                # Manejar el caso donde 'key' no es una cadena
+                raise ValueError("The provided key must be a string")
+            
+            except Exception as e:
+                # Manejar cualquier otra excepción
+                raise RuntimeError(f"An unexpected error occurred: {e}")
     
         def get_extra_value(extras, key):
             """
             Helper function to get the value from the extras list where the key matches (case insensitive and normalized).
-            """
+            """            
             normalized_key = normalize_key(key)
             for item in extras:
                 if normalize_key(item['key']) == normalized_key:
+                    #log.debug('"extras" dict key: %s - normalized: %s', key, normalized_key)
                     return item['value']
+            
             return None
     
         def apply_field_mapping(d, mapping):
@@ -515,9 +539,9 @@ def apply_field_mapping(d, mapping):
             for local_field, remote_info in mapping.items():
                 if 'field_name' in remote_info:
                     remote_field = remote_info['field_name']
-                    if remote_field and remote_field.startswith('extras.'):
-                        extra_key = remote_field.split('.', 1)[1]
-                        extra_value = get_extra_value(d.get('extras', []), extra_key)
+                    if remote_field and remote_field.startswith(field_mapping_extras_prefix):
+                        extra_key = remote_field.split(field_mapping_extras_prefix_symbol, 1)[1]
+                        extra_value = get_extra_value(d.get(field_mapping_extras_prefix_list, []), extra_key)
                         if extra_value is not None:
                             new_dict[local_field] = extra_value
                     elif remote_field in d:
@@ -528,9 +552,9 @@ def apply_field_mapping(d, mapping):
                     for lang, lang_info in remote_info['languages'].items():
                         if 'field_name' in lang_info:
                             remote_field = lang_info['field_name']
-                            if remote_field and remote_field.startswith('extras.'):
-                                extra_key = remote_field.split('.', 1)[1]
-                                extra_value = get_extra_value(d.get('extras', []), extra_key)
+                            if remote_field and remote_field.startswith(field_mapping_extras_prefix):
+                                extra_key = remote_field.split(field_mapping_extras_prefix_symbol, 1)[1]
+                                extra_value = get_extra_value(d.get(field_mapping_extras_prefix_list, []), extra_key)
                                 if extra_value is not None:
                                     if local_field not in new_dict:
                                         new_dict[local_field] = {}
@@ -544,7 +568,7 @@ def apply_field_mapping(d, mapping):
                                 new_dict[local_field] = {}
                             new_dict[local_field][lang] = lang_info['field_value']
             return new_dict
-    
+
         # Apply dataset field mapping
         dataset_field_mapping = field_mapping.get('dataset_field_mapping', {})
         standardized_dataset = apply_field_mapping(dataset, dataset_field_mapping)
@@ -816,9 +840,9 @@ def get_mapped_fields(fields, field_mapping):
                         for field in self._remote_schema["resource_fields"]
                     )
                 else:
-                    log.warning("Failed to retrieve remote schema from: %s. Using local schema by default.", remote_ckan_base_url)
-                    remote_datasets_colnames = set()
-                    remote_distributions_colnames = set()
+                    log.warning("Failed to retrieve remote schema from: %s. Using local schema and config field_mapping by default.", remote_ckan_base_url)
+                    remote_datasets_colnames = set(remote_dataset_field_mapping.keys())
+                    remote_distributions_colnames = set(remote_distribution_field_mapping.keys())
             
             elif remote_dataset_field_names is not None:
                 log.debug(
@@ -1056,16 +1080,16 @@ def _check_existing_package_by_ids(self, package_dict):
     def _set_translated_fields(self, package_dict):
         """
         Sets translated fields in the package dictionary based on the mapped schema.
-
+    
         Args:
             package_dict (dict): The package dictionary to update with translated fields.
-
+    
         Returns:
             dict: The updated package dictionary.
-
+    
         Raises:
             ReadError: If there is an error translating the dataset.
-
+    
         """
         if (
             not hasattr(self, "_mapped_schema")
@@ -1079,26 +1103,24 @@ def _set_translated_fields(self, package_dict):
                 if field.get("modified", True):
                     local_field_name = field["local_field_name"]
                     remote_field_name = field["remote_field_name"]
-
-                    translated_fields["dataset_fields"].append(
-                        local_field_name
-                    )
-
+    
+                    translated_fields["dataset_fields"].append(local_field_name)
+    
                     if isinstance(remote_field_name, dict):
                         package_dict[local_field_name] = {
-                            lang: package_dict.get(name, None)
+                            lang: package_dict.get(name, package_dict.get(local_field_name, {}).get(lang))
                             for lang, name in remote_field_name.items()
                         }
                         if local_field_name.endswith('_translated'):
                             if self._local_required_lang in remote_field_name:
-                                package_dict[local_field_name.replace('_translated', '')] = package_dict.get(remote_field_name[self._local_required_lang], None)
+                                package_dict[local_field_name.replace('_translated', '')] = package_dict.get(remote_field_name[self._local_required_lang], package_dict.get(local_field_name.replace('_translated', '')))
                             else:
                                 raise ValueError("Missing translated field: %s for required language: %s" % (remote_field_name, self._local_required_lang))
                     else:
                         if remote_field_name not in package_dict:
                             raise KeyError(f"Field {remote_field_name} does not exist in the local schema")
-                        package_dict[local_field_name] = package_dict.get(remote_field_name, None)
-
+                        package_dict[local_field_name] = package_dict.get(remote_field_name, package_dict.get(local_field_name))
+    
             if package_dict["resources"]:
                 for i, resource in enumerate(package_dict["resources"]):
                     if self._mapped_schema and "resource_fields" in self._mapped_schema and self._mapped_schema["resource_fields"] is not None:
@@ -1106,36 +1128,38 @@ def _set_translated_fields(self, package_dict):
                             if field.get("modified", True):
                                 local_field_name = field["local_field_name"]
                                 remote_field_name = field["remote_field_name"]
-
-                                translated_fields["resource_fields"].append(
-                                    local_field_name
-                                )
-
+    
+                                translated_fields["resource_fields"].append(local_field_name)
+    
                                 if isinstance(remote_field_name, dict):
-                                    package_dict[local_field_name] = {
-                                        lang: package_dict.get(name, None)
+                                    resource[local_field_name] = {
+                                        lang: resource.get(name, resource.get(local_field_name, {}).get(lang))
                                         for lang, name in remote_field_name.items()
                                     }
                                     if local_field_name.endswith('_translated'):
                                         if self._local_required_lang in remote_field_name:
-                                            package_dict[local_field_name.replace('_translated', '')] = package_dict.get(remote_field_name[self._local_required_lang], None)
+                                            resource[local_field_name.replace('_translated', '')] = resource.get(remote_field_name[self._local_required_lang], resource.get(local_field_name.replace('_translated', '')))
                                         else:
                                             raise ValueError("Missing translated field: %s for required language: %s" % (remote_field_name, self._local_required_lang))
-
+                                else:
+                                    if remote_field_name not in resource:
+                                        raise KeyError(f"Field {remote_field_name} does not exist in the local schema")
+                                    resource[local_field_name] = resource.get(remote_field_name, resource.get(local_field_name))
+    
                     else:
                         log.warning("self._mapped_schema['resource_fields'] is None, skipping resource fields translation.")
-
+    
                     # Update the resource in package_dict
                     package_dict["resources"][i] = resource
-
+    
             #log.debug('Translated fields: %s', translated_fields)
-
+    
         except Exception as e:
             raise ReadError(
                 "Error translating dataset: %s. Error: %s"
                 % (package_dict["title"], str(e))
             )
-
+    
         return package_dict
 
     # TODO: Fix this method
@@ -1361,9 +1385,12 @@ def _update_package_dict_with_config_mapping_default_values(self, package_dict):
       # Create default values dict from config mappings.
       try:
         self.create_default_values(field_mappings)
-
-      except ReadError as e:
-        self._save_gather_error('Error generating default values for dataset/distribution config field mappings: {0}'.format(e), harvest_job)
+        
+      except Exception as e:
+        raise ReadError(
+            "Error generating default values from config field mappings. Error: %s"
+            % (str(e))
+        )
 
       def update_dict_with_defaults(target_dict, default_values):
         for key, default_value in default_values.items():
@@ -1430,7 +1457,7 @@ def _set_package_dict_default_values(self, package_dict, harvest_object, context
         if default_extras:
            override_extras = self.config.get('override_extras',False)
            for key,value in default_extras.items():
-              log.debug('Processing extra %s', key)
+              #log.debug('Processing extra %s', key)
               if not key in extras or override_extras:
                  # Look for replacement strings
                  if isinstance(value,six.string_types):
diff --git a/ckanext/schemingdcat/harvesters/ckan.py b/ckanext/schemingdcat/harvesters/ckan.py
index c070c205..9f6cbb2a 100644
--- a/ckanext/schemingdcat/harvesters/ckan.py
+++ b/ckanext/schemingdcat/harvesters/ckan.py
@@ -7,7 +7,7 @@
 from urllib.parse import urlencode
 from ckanext.harvest.model import HarvestObject
 import datetime
-from ckan.plugins import toolkit
+import ckan.plugins as p
 import requests
 from requests.exceptions import HTTPError, RequestException
 
@@ -15,7 +15,6 @@
 import ckan.logic as logic
 import uuid
 
-
 from ckanext.schemingdcat.harvesters.base import (
     SchemingDCATHarvester,
     RemoteSchemaError,
@@ -25,6 +24,7 @@
     RemoteResourceError
 )
 from ckanext.schemingdcat.lib.field_mapping import FieldMappingValidator
+from ckanext.schemingdcat.interfaces import ISchemingDCATHarvester
 
 log = logging.getLogger(__name__)
 
@@ -107,7 +107,9 @@ def validate_config(self, config):
                     )
                 else:
                     raise ValueError(
-                        f"schema should match the local schema: {self._local_schema_name}"
+                        f"Config schema should match the local schema: '{self._local_schema_name}'. "
+                        f"Check the remote schema with CKAN API: {{ckan_site_url}}/api/3/action/scheming_dataset_schema_show?type=dataset, "
+                        f"or specify the local schema, and the harvester will try to map the fields."
                     )
 
             config = json.dumps({**config_obj, "schema": schema.lower().strip()})
@@ -181,7 +183,7 @@ def gather_stage(self, harvest_job):
         log.debug('In SchemingDCATCKANHarvester gather_stage with harvest source: %s and URL: %s', harvest_source_title, remote_ckan_base_url)
 
         # Get config options
-        toolkit.requires_ckan_version(min_version="2.0")
+        p.toolkit.requires_ckan_version(min_version="2.0")
         get_all_packages = True
         self._set_config(harvest_job.source.config)
 
@@ -326,23 +328,20 @@ def gather_stage(self, harvest_job):
                 
                 # Check if the content_dicts colnames correspond to the local schema
                 try:
+                    
+                    #log.debug('RAW package_dict: %s', pkg_dict)
+                    
                     #log.debug('content_dicts: %s', content_dicts)
                     # Standardizes the field names
                     pkg_dict = self._standardize_ckan_dict_from_field_mapping(pkg_dict, field_mappings)
-                    log.debug('Standardized package dict: %s', pkg_dict)
+                    
+                    #log.debug('Standardized package dict: %s', pkg_dict)
                 except RemoteSchemaError as e:
                     self._save_gather_error('Error standarize remote dataset: {0}'.format(e), harvest_job)
                     return []
                                         
                 package_ids.add(pkg_dict["id"])
 
-                # Set translated fields
-                pkg_dict = self._set_translated_fields(pkg_dict)
-                log.debug(
-                    "Creating HarvestObject for %s %s", pkg_dict["name"], pkg_dict["id"]
-                )
-                log.debug('Translated package dict: %s', pkg_dict)
-                
                 obj = HarvestObject(
                     guid=pkg_dict["id"], job=harvest_job, content=json.dumps(pkg_dict)
                 )
@@ -451,10 +450,16 @@ def modify_package_dict(self, package_dict, harvest_object):
         """
         # Clean up any existing extras already in package_dict
         package_dict = self._remove_duplicate_keys_in_extras(package_dict)
-        
+
+        # Set translated fields
+        package_dict = self._set_translated_fields(package_dict)
+
         # Check basic fields without translations
         package_dict = self._fill_translated_properties(package_dict)
 
+        # Using self._dataset_default_values and self._distribution_default_values based on config mappings
+        package_dict = self._update_package_dict_with_config_mapping_default_values(package_dict)
+
         return package_dict
 
     def import_stage(self, harvest_object):
@@ -490,7 +495,7 @@ def import_stage(self, harvest_object):
 
         try:
             package_dict = json.loads(harvest_object.content)
-
+            
             # Add default values: tags, groups, etc.
             package_dict = self._set_package_dict_default_values(
                 package_dict, harvest_object, base_context
@@ -575,13 +580,28 @@ def import_stage(self, harvest_object):
                 # key.
                 resource.pop("revision_id", None)
 
-            log.debug('package_dict BEFORE MODIFY: %s', package_dict)
+            # before_cleaning interface
+            for harvester in p.PluginImplementations(ISchemingDCATHarvester):
+                if hasattr(harvester, 'before_modify_package_dict'):
+                    package_dict, before_modify_package_dict_errors = harvester.before_modify_package_dict(package_dict)
+
+                    for err in before_modify_package_dict_errors:
+                        self._save_object_error(f'before_modify_package_dict error: {err}', harvest_object, 'Import')
+                        return False
+
             package_dict = self.modify_package_dict(package_dict, harvest_object)
             result = self._create_or_update_package(
                 package_dict, harvest_object, package_dict_form="package_show"
             )
-            log.debug('package_dict AFTER MODIFY: %s', package_dict)
 
+            # after_modify_package_dict interface
+            for harvester in p.PluginImplementations(ISchemingDCATHarvester):
+                if hasattr(harvester, 'after_modify_package_dict'):
+                    package_dict, after_modify_package_dict_errors = harvester.after_modify_package_dict(package_dict)
+
+                    for err in after_modify_package_dict_errors:
+                        self._save_object_error(f'after_modify_package_dict error: {err}', harvest_object, 'Import')
+                        return False
 
             # Log package_dict, package dict is a dict
             log.debug("Package create or update: %s", result)
diff --git a/ckanext/schemingdcat/interfaces.py b/ckanext/schemingdcat/interfaces.py
index af54ad39..c318efd7 100644
--- a/ckanext/schemingdcat/interfaces.py
+++ b/ckanext/schemingdcat/interfaces.py
@@ -236,4 +236,34 @@ def update_package_schema_for_update(self, package_schema):
         Returns:
             object: The updated package_schema object
         """
-        return package_schema
\ No newline at end of file
+        return package_schema
+    
+    def before_modify_package_dict(self, package_dict):
+        """
+        Interface called just before modifying the package_dict in the CKAN harvester.
+    
+        Args:
+            package_dict (dict): The package dictionary that is about to be updated.
+    
+        Returns:
+            tuple: A tuple with two items:
+                    * The updated package dictionary.
+                    * A list of error messages. These will get stored as import
+                      errors by the harvester
+        """
+        return package_dict, []
+    
+    def after_modify_package_dict(self, package_dict):
+        """
+        Interface called just after modifying the package_dict in the CKAN harvester.
+    
+        Args:
+            package_dict (dict): The package dictionary that has been updated.
+    
+        Returns:
+            tuple: A tuple with two items:
+                    * The updated package dictionary.
+                    * A list of error messages. These will get stored as import
+                      errors by the harvester
+        """
+        return package_dict, []

From 3beccce0cecf2d680e17f14b06adaaf87b9388ce Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Mon, 29 Jul 2024 18:07:31 +0200
Subject: [PATCH 3/8] Improve ckan harvester

- Fixduplicate list  default_values from config.
---
 ckanext/schemingdcat/config.py          |  7 ++-----
 ckanext/schemingdcat/harvesters/base.py | 22 +++++++++++++++-------
 ckanext/schemingdcat/harvesters/ckan.py |  3 ---
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/ckanext/schemingdcat/config.py b/ckanext/schemingdcat/config.py
index c00b1b43..01a5a0e9 100644
--- a/ckanext/schemingdcat/config.py
+++ b/ckanext/schemingdcat/config.py
@@ -19,12 +19,9 @@
 metadata_templates_search_identifier = 'schemingdcat_xls-template'
 mimetype_base_uri = 'http://www.iana.org/assignments/media-types'
 slugify_pat = re.compile('[^a-zA-Z0-9]')
-
-# schemingdcat field_mapping extras field_names
+# schemingdcat field_mapping extras prefix, e.g. custom_field = extras_custom_field
+field_mapping_extras_prefix = 'extras'
 field_mapping_extras_prefix_symbol = '_'
-field_mapping_extras_prefix_list = 'extras'
-field_mapping_extras_prefix = field_mapping_extras_prefix_list + field_mapping_extras_prefix_symbol
-
 
 # Default DCAT metadata configuration
 OGC2CKAN_HARVESTER_MD_CONFIG = {
diff --git a/ckanext/schemingdcat/harvesters/base.py b/ckanext/schemingdcat/harvesters/base.py
index eaa5d3d5..475a4745 100644
--- a/ckanext/schemingdcat/harvesters/base.py
+++ b/ckanext/schemingdcat/harvesters/base.py
@@ -46,7 +46,6 @@
     slugify_pat,
     field_mapping_extras_prefix,
     field_mapping_extras_prefix_symbol,
-    field_mapping_extras_prefix_list
 )
 
 log = logging.getLogger(__name__)
@@ -540,8 +539,8 @@ def apply_field_mapping(d, mapping):
                 if 'field_name' in remote_info:
                     remote_field = remote_info['field_name']
                     if remote_field and remote_field.startswith(field_mapping_extras_prefix):
-                        extra_key = remote_field.split(field_mapping_extras_prefix_symbol, 1)[1]
-                        extra_value = get_extra_value(d.get(field_mapping_extras_prefix_list, []), extra_key)
+                        extra_key = remote_field.split(field_mapping_extras_prefix + field_mapping_extras_prefix_symbol, 1)[1]
+                        extra_value = get_extra_value(d.get(field_mapping_extras_prefix, []), extra_key)
                         if extra_value is not None:
                             new_dict[local_field] = extra_value
                     elif remote_field in d:
@@ -553,8 +552,8 @@ def apply_field_mapping(d, mapping):
                         if 'field_name' in lang_info:
                             remote_field = lang_info['field_name']
                             if remote_field and remote_field.startswith(field_mapping_extras_prefix):
-                                extra_key = remote_field.split(field_mapping_extras_prefix_symbol, 1)[1]
-                                extra_value = get_extra_value(d.get(field_mapping_extras_prefix_list, []), extra_key)
+                                extra_key = remote_field.split(field_mapping_extras_prefix + field_mapping_extras_prefix_symbol, 1)[1]
+                                extra_value = get_extra_value(d.get(field_mapping_extras_prefix, []), extra_key)
                                 if extra_value is not None:
                                     if local_field not in new_dict:
                                         new_dict[local_field] = {}
@@ -1398,6 +1397,7 @@ def update_dict_with_defaults(target_dict, default_values):
             target_dict[key] = default_value
           elif isinstance(target_dict[key], list) and isinstance(default_value, list):
             target_dict[key].extend(default_value)
+            target_dict[key] = list(set(target_dict[key]))
           elif isinstance(default_value, dict):
             target_dict[key] = target_dict.get(key, {})
             for subkey, subvalue in default_value.items():
@@ -1492,8 +1492,8 @@ def _set_package_dict_default_values(self, package_dict, harvest_object, context
         # Prepare tags
         package_dict, existing_tags_ids = self._set_ckan_tags(package_dict)
 
-        #TODO: Fix existing_tags_ids
-        log.debug('TODO:existing_tags_ids: %s', existing_tags_ids)
+        # Existing_tags_ids
+        log.debug('existing_tags_ids: %s', existing_tags_ids)
         
         # Set default tags if needed
         default_tags = self.config.get("default_tags", [])
@@ -1516,6 +1516,14 @@ def _set_package_dict_default_values(self, package_dict, harvest_object, context
 
         package_dict["groups"] = cleaned_groups
 
+        # Remove duplicates in list or dictionary fields
+        for key, value in package_dict.items():
+            if key not in ['groups', 'resources', 'tags']:
+                if isinstance(value, list):
+                    package_dict[key] = list({json.dumps(item): item for item in value}.values())
+                elif isinstance(value, dict):
+                    package_dict[key] = {k: v for k, v in value.items()}
+
         # log.debug('package_dict default values: %s', package_dict)
         return package_dict
 
diff --git a/ckanext/schemingdcat/harvesters/ckan.py b/ckanext/schemingdcat/harvesters/ckan.py
index 9f6cbb2a..39410827 100644
--- a/ckanext/schemingdcat/harvesters/ckan.py
+++ b/ckanext/schemingdcat/harvesters/ckan.py
@@ -630,9 +630,6 @@ def get_package_dict(self, harvest_object, context, package_dict=None):
         Returns:
             dict: The package dictionary with translated fields and default values set.
         """
-        # Add default values: tags, groups, etc.
-        package_dict = self._set_package_dict_default_values(package_dict, harvest_object, context)
-
         # Update unique ids
         for resource in package_dict['resources']:
             resource['alternate_identifier'] = resource['id']

From 36a298e9b3f2671c477dbf02da6280d61284aa07 Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Tue, 30 Jul 2024 10:15:40 +0200
Subject: [PATCH 4/8] Fix bug when schemingdcat.endpoints_yaml is None

---
 ckanext/schemingdcat/plugin.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ckanext/schemingdcat/plugin.py b/ckanext/schemingdcat/plugin.py
index dab1f085..1b6605b3 100644
--- a/ckanext/schemingdcat/plugin.py
+++ b/ckanext/schemingdcat/plugin.py
@@ -70,7 +70,7 @@ def update_config(self, config_):
         
         sdct_config.default_package_item_icon = config_.get(
                 "schemingdcat.default_package_item_icon", sdct_config.default_package_item_icon
-            )
+            ) or sdct_config.default_package_item_icon
 
         sdct_config.default_package_item_show_spatial = toolkit.asbool(
             config_.get(
@@ -86,11 +86,11 @@ def update_config(self, config_):
         
         sdct_config.metadata_templates_search_identifier = config_.get(
                 "schemingdcat.metadata_templates_search_identifier", sdct_config.metadata_templates_search_identifier
-            )
+            ) or sdct_config.metadata_templates_search_identifier
         
         sdct_config.endpoints_yaml = config_.get(
-                "schemingdcat.endpoints_yaml", sdct_config.endpoints_yaml
-            )
+            "schemingdcat.endpoints_yaml", sdct_config.endpoints_yaml
+            ) or sdct_config.endpoints_yaml
 
         sdct_config.debug = toolkit.asbool(config_.get("debug", sdct_config.debug))
 

From 32d790181001f92036183cf1607f865c4d9c5ce5 Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Tue, 30 Jul 2024 13:13:31 +0200
Subject: [PATCH 5/8] Fix file_size in resource metadata info

---
 ckanext/schemingdcat/helpers.py                                 | 1 -
 .../templates/schemingdcat/display_snippets/file_size.html      | 2 +-
 .../schemingdcat/package/snippets/resource_extended_info.html   | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ckanext/schemingdcat/helpers.py b/ckanext/schemingdcat/helpers.py
index 5e61bfc1..f6b3162d 100644
--- a/ckanext/schemingdcat/helpers.py
+++ b/ckanext/schemingdcat/helpers.py
@@ -1071,7 +1071,6 @@ def schemingdcat_get_readable_file_size(num, suffix="B"):
     except ValueError:
         return False
 
-
 @helper
 def schemingdcat_get_group_or_org(id, type="group"):
     """
diff --git a/ckanext/schemingdcat/templates/schemingdcat/display_snippets/file_size.html b/ckanext/schemingdcat/templates/schemingdcat/display_snippets/file_size.html
index 33214654..595bf400 100644
--- a/ckanext/schemingdcat/templates/schemingdcat/display_snippets/file_size.html
+++ b/ckanext/schemingdcat/templates/schemingdcat/display_snippets/file_size.html
@@ -1 +1 @@
-{{ h.schemingdcat_get_readable_file_size(data[field.field_name]) or '-' }}
\ No newline at end of file
+{{ h.localised_filesize(data[field.field_name]) or '-' }}
\ No newline at end of file
diff --git a/ckanext/schemingdcat/templates/schemingdcat/package/snippets/resource_extended_info.html b/ckanext/schemingdcat/templates/schemingdcat/package/snippets/resource_extended_info.html
index 1d06d733..fa20f205 100644
--- a/ckanext/schemingdcat/templates/schemingdcat/package/snippets/resource_extended_info.html
+++ b/ckanext/schemingdcat/templates/schemingdcat/package/snippets/resource_extended_info.html
@@ -102,7 +102,7 @@
     <tr>
         <th scope="row">{{ h.scheming_language_text(field.label) }}</th>
         <td>
-        {{ res[field_name] }}
+            {{ h.localised_filesize(res[field_name]) }}
         </td>
     </tr>
 {% endblock %}

From 3952322c036b7eeb4d3b2407448c943a842cf521 Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Wed, 31 Jul 2024 10:20:15 +0200
Subject: [PATCH 6/8] Fix CKAN harvester search functionality

- Now return results of all pages, not only the first x rows.
---
 ckanext/schemingdcat/harvesters/ckan.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ckanext/schemingdcat/harvesters/ckan.py b/ckanext/schemingdcat/harvesters/ckan.py
index 39410827..de1b1922 100644
--- a/ckanext/schemingdcat/harvesters/ckan.py
+++ b/ckanext/schemingdcat/harvesters/ckan.py
@@ -383,10 +383,11 @@ def _search_for_datasets(self, remote_ckan_base_url, fq_terms=None):
         pkg_dicts = []
         pkg_ids = set()
         previous_content = None
-        url = base_search_url + "?" + urlencode(params)
-        log.debug("Searching for CKAN datasets: %s", url)
 
         while True:
+            url = base_search_url + "?" + urlencode(params)
+            log.debug("Searching for CKAN datasets: %s", url)
+
             try:
                 content = self._get_content(url)
             except ContentFetchError as e:
@@ -429,6 +430,8 @@ def _search_for_datasets(self, remote_ckan_base_url, fq_terms=None):
 
             params["start"] = str(int(params["start"]) + int(params["rows"]))
 
+        log.debug('Number of elements in remote CKAN: %s', len(pkg_dicts))
+
         return pkg_dicts
 
     def fetch_stage(self, harvest_object):

From a226240480408ed7464d5b1b4e455a23d13ef8a0 Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Wed, 31 Jul 2024 13:08:46 +0200
Subject: [PATCH 7/8] Improve clean_tags

The clean_tags option is added to the configuration file, allowing users to control whether tags should be stripped of accent characters, spaces, and capital letters for display.
---
 README.md                               |  7 ++-
 ckanext/schemingdcat/config.py          |  6 ++
 ckanext/schemingdcat/harvesters/base.py | 80 +++++++++++++++++++------
 3 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index b9845dd6..cb403ccd 100644
--- a/README.md
+++ b/README.md
@@ -352,12 +352,14 @@ The Scheming DCAT CKAN Harvester supports the same configuration options as the
 * `allow_harvest_datasets` (Optional): If `true`, the harvester will create new records even if the package type is from the harvest source. If `false`, the harvester will only create records that originate from the instance. Default is `false`.
 * `remote_orgs` (Optional): [WIP]. Only `only_local`.
 * `remote_groups` (Optional): [WIP]. Only `only_local`.
+* `clean_tags`: By default, tags are stripped of accent characters, spaces and capital letters for display. Setting this option to `False` will keep the original tag names. Default is `True`.
 
 And example configuration might look like this:
 
   ```json
       {
       "api_version": 2,
+      "clean_tags": false,
       "default_tags": [{"name": "inspire"}, {"name": "geodcatap"}],
       "default_groups": ["transportation", "hb"],
       "default_extras": {"encoding":"utf8", "harvest_description":"Harvesting from Sample Catalog", "harvest_url": "{harvest_source_url}/dataset/{dataset_id}"},
@@ -471,9 +473,8 @@ Here are some examples of configuration files:
 
   ```json
   {
-    "storage_type": "gspread",
-    "dataset_sheet": "Dataset",
-    "distribution_sheet": "Distribution",
+    "api_version": 2,
+    "clean_tags": false,
 
     ...
     # other properties
diff --git a/ckanext/schemingdcat/config.py b/ckanext/schemingdcat/config.py
index 01a5a0e9..fa6987d4 100644
--- a/ckanext/schemingdcat/config.py
+++ b/ckanext/schemingdcat/config.py
@@ -274,6 +274,12 @@
     "ñ": "ñ",
 })
 
+# CKAN tags fields to be searched in the harvester
+AUX_TAG_FIELDS = [
+    'tag_string',
+    'keywords'
+]
+
 URL_FIELD_NAMES = {
         'dataset': 
             ['dcat_type', 'theme_es', 'language', 'topic', 'maintainer_url', 'tag_uri', 'contact_uri', 'contact_url', 'publisher_identifier', 'publisher_uri', 'publisher_url', 'publisher_type', 'maintainer_uri', 'maintainer_url', 'author_uri', 'author_url', 'conforms_to', 'theme', 'reference_system', 'spatial_uri', 'representation_type', 'license_id', 'access_rights', 'graphic_overview', 'frequency', 'hvd_category'],
diff --git a/ckanext/schemingdcat/harvesters/base.py b/ckanext/schemingdcat/harvesters/base.py
index 475a4745..2861beac 100644
--- a/ckanext/schemingdcat/harvesters/base.py
+++ b/ckanext/schemingdcat/harvesters/base.py
@@ -43,6 +43,7 @@
     URL_REGEX,
     INVALID_CHARS,
     ACCENT_MAP,
+    AUX_TAG_FIELDS,
     slugify_pat,
     field_mapping_extras_prefix,
     field_mapping_extras_prefix_symbol,
@@ -1489,11 +1490,11 @@ def _set_package_dict_default_values(self, package_dict, harvest_object, context
         # Using self._dataset_default_values and self._distribution_default_values based on config mappings
         package_dict = self._update_package_dict_with_config_mapping_default_values(package_dict)
 
-        # Prepare tags
-        package_dict, existing_tags_ids = self._set_ckan_tags(package_dict)
+        # Prepare tags        
+        package_dict, existing_tags_ids = self._set_ckan_tags(package_dict, clean_tags=self.config.get("clean_tags", True))
 
         # Existing_tags_ids
-        log.debug('existing_tags_ids: %s', existing_tags_ids)
+        #log.debug('existing_tags_ids: %s', existing_tags_ids)
         
         # Set default tags if needed
         default_tags = self.config.get("default_tags", [])
@@ -1559,13 +1560,14 @@ def _update_resource_dict(self, resource):
 
         return self._get_ckan_format(resource)
 
-    def _set_ckan_tags(self, package_dict, tag_fields=["tag_string", "keywords"]):
+    def _set_ckan_tags(self, package_dict, tag_fields=AUX_TAG_FIELDS, clean_tags=True):
         """
         Process the tags from the provided sources.
 
         Args:
             package_dict (dict): The package dictionary containing the information.
             tag_fields (list): The list of sources to check for tags. Default: ['tag_string', 'keywords']
+            clean_tags (bool): By default, tags are stripped of accent characters, spaces and capital letters for display. Setting this option to `False` will keep the original tag names. Default is `True`.
 
         Returns:
             list: A list of processed tags.
@@ -1586,7 +1588,9 @@ def _set_ckan_tags(self, package_dict, tag_fields=["tag_string", "keywords"]):
                     tags = [{"name": tags}]
                 else:
                     raise ValueError("Unsupported type for tags")
-                cleaned_tags = self._clean_tags(tags)
+                
+                # Clean tags
+                cleaned_tags = self._clean_tags(tags=tags, clean_tag_names=clean_tags, existing_dataset=True)
 
                 for tag in cleaned_tags:
                     if tag["name"] not in existing_tags_ids:
@@ -1728,31 +1732,53 @@ def _get_ckan_format(self, resource):
         #log.debug('resource: %s', resource)
         return resource
 
-    def _clean_tags(self, tags):
+    def _clean_tags(self, tags, clean_tag_names=True, existing_dataset=False):
         """
         Cleans the names of tags.
-
+    
         Each keyword is cleaned by removing non-alphanumeric characters,
         allowing only: a-z, ñ, 0-9, _, -, ., and spaces, and truncating to a
         maximum length of 100 characters. If the name of the keyword is a URL,
         it is converted into a standard CKAN name using the _url_to_ckan_name function.
-
+    
         Args:
-            tags (list): The tags to be cleaned. Each keyword is a
-            dictionary with a 'name' key.
-
+            tags (list): The tags to be cleaned. Each keyword is a dictionary with a `name` key.
+    
+            clean_tag_names (bool): By default, tags are stripped of accent characters, spaces and capital letters for display. Setting this harvester config option `clean_tags` to `False` will keep the original tag names. Default is `True`.
+    
+            existing_dataset (bool): If the tags are from a dataset from the local CKAN instance.
+    
         Returns:
             list: A list of dictionaries with cleaned keyword names.
         """
         cleaned_tags = []
+        seen_names = set()
+    
         for k in tags:
             if k and "name" in k:
                 name = k["name"]
+                vocabulary_id = k.get("vocabulary_id") or None
                 if self._is_url(name):
                     name = self._url_to_ckan_name(name)
-                cleaned_tags.append({"name": self._clean_name(name), "display_name": k["name"]})
-        return cleaned_tags
+    
+                normalized_name = self._clean_name(name)
+    
+                if normalized_name in seen_names:
+                    continue
+    
+                seen_names.add(normalized_name)
+    
+                tag = {
+                    "name": normalized_name if clean_tag_names else name,
+                    "display_name": k["name"]
+                }
+    
+                if vocabulary_id and existing_dataset:
+                    tag["vocabulary_id"] = vocabulary_id
+    
+                cleaned_tags.append(tag)
 
+        return cleaned_tags
 
     def _is_url(self, name):
         """
@@ -1976,6 +2002,18 @@ def _create_or_update_package(
 
                     package_dict["resources"] = new_resources
 
+                    # Clean tags before update existing dataset
+                    tags = package_dict.get("tags", [])
+
+                    if hasattr(self, 'config') and self.config:
+                        package_dict["tags"] = self._clean_tags(tags=tags, clean_tag_names=self.config.get("clean_tags", True), existing_dataset=False)
+                    else:
+                        package_dict["tags"] = self._clean_tags(tags=tags, clean_tag_names=True, existing_dataset=True)
+
+                    # Remove tag_fields from package_dict
+                    for field in AUX_TAG_FIELDS:
+                        package_dict.pop(field, None)
+
                     for field in p.toolkit.aslist(
                         config.get("ckan.harvest.not_overwrite_fields")
                     ):
@@ -2035,11 +2073,17 @@ def _create_or_update_package(
                             "Import",
                         )
 
-                log.info(
-                    "Created new package ID: %s with GUID: %s",
-                    package_dict["id"],
-                    harvest_object.guid,
-                )
+                # Clean tags before create. Not existing_dataset 
+                tags = package_dict.get("tags", [])
+
+                if hasattr(self, 'config') and self.config:
+                    package_dict["tags"] = self._clean_tags(tags=tags, clean_tag_names=self.config.get("clean_tags", True), existing_dataset=False)
+                else:
+                    package_dict["tags"] = self._clean_tags(tags=tags, clean_tag_names=True, existing_dataset=False)
+
+                # Remove tag_fields from package_dict
+                for field in AUX_TAG_FIELDS:
+                    package_dict.pop(field, None)
 
                 #log.debug('Package: %s', package_dict)
                 harvest_object.package_id = package_dict["id"]

From ea133a3ee40a574744c070db871f7d27b257f8c5 Mon Sep 17 00:00:00 2001
From: mjanez <96422458+mjanez@users.noreply.github.com>
Date: Thu, 1 Aug 2024 02:27:18 +0200
Subject: [PATCH 8/8] Add licenses.json

Add CC-BY 4.0 and more from https://licenses.opendefinition.org/licenses/groups/ckan.json
---
 .../schemingdcat/public/static/licenses.json  | 220 ++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 ckanext/schemingdcat/public/static/licenses.json

diff --git a/ckanext/schemingdcat/public/static/licenses.json b/ckanext/schemingdcat/public/static/licenses.json
new file mode 100644
index 00000000..74fbe1e3
--- /dev/null
+++ b/ckanext/schemingdcat/public/static/licenses.json
@@ -0,0 +1,220 @@
+[
+    {
+        "domain_content": false,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "notspecified",
+        "is_generic": true,
+        "maintainer": "",
+        "od_conformance": "not reviewed",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "License Not Specified",
+        "url": ""
+    },
+    {
+        "domain_content": false,
+        "domain_data": true,
+        "domain_software": false,
+        "family": "",
+        "id": "PDDL-1.0",
+        "legacy_ids": [
+            "ODC-PDDL-1.0"
+        ],
+        "maintainer": "",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Open Data Commons Public Domain Dedication and Licence 1.0",
+        "url": "https://opendefinition.org/licenses/odc-pddl"
+    },
+    {
+        "domain_content": false,
+        "domain_data": true,
+        "domain_software": false,
+        "family": "",
+        "id": "ODbL-1.0",
+        "maintainer": "",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Open Data Commons Open Database License 1.0",
+        "url": "https://opendefinition.org/licenses/odc-odbl"
+    },
+    {
+        "domain_content": false,
+        "domain_data": true,
+        "domain_software": false,
+        "family": "",
+        "id": "ODC-BY-1.0",
+        "maintainer": "Open Data Commons",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Open Data Commons Attribution License 1.0",
+        "url": "https://opendefinition.org/licenses/odc-by"
+    },
+    {
+        "domain_content": true,
+        "domain_data": true,
+        "domain_software": true,
+        "family": "",
+        "id": "CC0-1.0",
+        "maintainer": "Creative Commons",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "CC0 1.0",
+        "url": "https://creativecommons.org/publicdomain/zero/1.0/"
+    },
+    {
+        "domain_content": "False",
+        "domain_data": "False",
+        "domain_software": "False",
+        "family": "",
+        "id": "cc-by",
+        "maintainer": "",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Creative Commons Attribution",
+        "url": "http://www.opendefinition.org/licenses/cc-by"
+    },
+    {
+        "domain_content": true,
+        "domain_data": true,
+        "domain_software": false,
+        "family": "",
+        "id": "CC-BY-4.0",
+        "maintainer": "Creative Commons",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Creative Commons Attribution 4.0",
+        "url": "https://creativecommons.org/licenses/by/4.0/"
+    },
+    {
+        "domain_content": true,
+        "domain_data": true,
+        "domain_software": false,
+        "family": "",
+        "id": "CC-BY-SA-4.0",
+        "maintainer": "Creative Commons",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Creative Commons Attribution Share-Alike 4.0",
+        "url": "https://creativecommons.org/licenses/by-sa/4.0/"
+    },
+    {
+        "domain_content": true,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "GFDL-1.3-no-cover-texts-no-invariant-sections",
+        "maintainer": "Free Software Foundation",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "GNU Free Documentation License 1.3 with no cover texts and no invariant sections",
+        "url": "https://opendefinition.org/licenses/gfdl"
+    },
+    {
+        "domain_content": true,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "other-open",
+        "is_generic": true,
+        "maintainer": "",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Other (Open)",
+        "url": ""
+    },
+    {
+        "domain_content": true,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "other-pd",
+        "is_generic": true,
+        "maintainer": "",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Other (Public Domain)",
+        "url": ""
+    },
+    {
+        "domain_content": true,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "other-at",
+        "is_generic": true,
+        "maintainer": "",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Other (Attribution)",
+        "url": ""
+    },
+    {
+        "domain_content": true,
+        "domain_data": true,
+        "domain_software": true,
+        "family": "",
+        "id": "OGL-UK-2.0",
+        "is_generic": false,
+        "maintainer": "UK Government",
+        "od_conformance": "approved",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Open Government Licence 2.0 (United Kingdom)",
+        "url": "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/"
+    },
+    {
+        "domain_content": true,
+        "domain_data": true,
+        "domain_software": false,
+        "family": "Creative Commons",
+        "id": "CC-BY-NC-4.0",
+        "maintainer": "Creative Commons",
+        "od_conformance": "rejected",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Creative Commons Attribution-NonCommercial 4.0",
+        "url": "https://creativecommons.org/licenses/by-nc/4.0/"
+    },
+    {
+        "domain_content": false,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "other-nc",
+        "is_generic": true,
+        "maintainer": "",
+        "od_conformance": "not reviewed",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Other (Non-Commercial)",
+        "url": ""
+    },
+    {
+        "domain_content": false,
+        "domain_data": false,
+        "domain_software": false,
+        "family": "",
+        "id": "other-closed",
+        "is_generic": true,
+        "maintainer": "",
+        "od_conformance": "not reviewed",
+        "osd_conformance": "not reviewed",
+        "status": "active",
+        "title": "Other (Not Open)",
+        "url": ""
+    }
+]
\ No newline at end of file