Skip to content

Commit

Permalink
feat: ckan harvester supports named schema
Browse files Browse the repository at this point in the history
  • Loading branch information
smotornyuk committed Oct 10, 2024
1 parent b186952 commit 5183a76
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 11 deletions.
11 changes: 8 additions & 3 deletions ckanext/harvest_basket/harvesters/ckan_harvester.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
from urllib.parse import urljoin

import ckan.plugins.toolkit as tk
from ckan import model
Expand All @@ -9,6 +8,7 @@
from ckanext.harvest.harvesters.ckanharvester import SearchError

from ckanext.harvest_basket.harvesters.base_harvester import BasketBasicHarvester
from ckanext.transmute.utils import get_schema


log = logging.getLogger(__name__)
Expand All @@ -20,9 +20,14 @@ class CustomCKANHarvester(CKANHarvester, BasketBasicHarvester):
def import_stage(self, harvest_object):
package_dict = json.loads(harvest_object.content)
self._set_config(harvest_object.source.config)
self.transmute_data(package_dict, self.config.get("tsm_schema"))
harvest_object.content = json.dumps(package_dict)

schema = self.config.get("tsm_schema")
if not schema and (schema_name := self.config.get("tsm_named_schema")):
schema = get_schema(schema_name)

self.transmute_data(package_dict, schema)

harvest_object.content = json.dumps(package_dict)
super().import_stage(harvest_object)

def _search_for_datasets(self, remote_ckan_base_url, fq_terms=None):
Expand Down
3 changes: 0 additions & 3 deletions ckanext/harvest_basket/harvesters/csw.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

import logging
from lxml import etree

from ckan import model
import ckan.plugins as p
from ckanext.spatial.harvesters import CSWHarvester
from ckanext.spatial.lib.csw_client import CswService, CswError, PropertyIsEqualTo
from ckanext.transmute.utils import get_schema
Expand Down
6 changes: 1 addition & 5 deletions ckanext/harvest_basket/harvesters/dcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class BasketDcatJsonHarvester(DCATJSONHarvester, BasketBasicHarvester):
SRC_ID = "DCAT"
n = 0

def info(self):
return {
"name": "basket_dcat_json",
Expand All @@ -31,7 +31,3 @@ def modify_package_dict(self, package_dict, dcat_dict, harvest_object):
self.transmute_data(package_dict, schema)

return package_dict

def _get_guids_and_datasets(self, content):
for idx, item in enumerate(super()._get_guids_and_datasets(content)):
yield item

0 comments on commit 5183a76

Please sign in to comment.