Skip to content

Commit

Permalink
Add new harvest dcat daera_core profile (#15)
Browse files Browse the repository at this point in the history
* [setup.py][dcat.py]Adds code for profile DaeraCoreProfile

* [dcat.py] Fixes typo
  • Loading branch information
MuhammadIsmailShahzad authored Jul 1, 2020
1 parent a875da9 commit 6ef09e0
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 0 deletions.
63 changes: 63 additions & 0 deletions ckanext/opendatani/dcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,69 @@ def parse_dataset(self, dataset_dict, dataset_ref):

return dataset_dict

class DaeraCoreProfile(RDFProfile):

def parse_dataset(self, dataset_dict, dataset_ref):

# TODO: if there is more than one source with different defaults,
# modify accordingly
dataset_dict['frequency'] = 'notPlanned'
dataset_dict['topic_category'] = 'location'
dataset_dict['lineage'] = '-'
dataset_dict['contact_name'] = 'DAERA Open Data Enquiries'
dataset_dict['contact_email'] = '[email protected]'
dataset_dict['license_id'] = 'uk-ogl'

_remove_extra('contact_name', dataset_dict)
_remove_extra('contact_email', dataset_dict)

# Ping the ArcGIS server so the processing of the files
# starts
identifier = None
avoid = []

if toolkit.asbool(
config.get('ckanext.opendatani.harvest.ping_arcgis_urls')):

for extra in dataset_dict.get('extras', []):
if extra['key'] == 'identifier' and extra['value']:
identifier = extra['value']
if identifier:
query = toolkit.get_action('package_search')(
{}, {'q': 'guid:"{0}"'.format(identifier)})
if query['count']:
current_dataset = query['results'][0]
for current_resource in current_dataset.get('resources',
[]):
if ('requested' in current_resource and
toolkit.asbool(current_resource['requested'])):
avoid.append(current_resource['url'])

for resource in dataset_dict.get('resources', []):
if resource['format'] == 'OGC WMS':
resource['format'] = 'WMS'

resource['requested'] = False
file_formats = ('geojson', 'kml', 'zip', 'csv')

if resource['url'] in avoid:
resource['requested'] = True
elif resource['format'].lower() in file_formats:
try:
requests.head(resource['url'])

resource['requested'] = True
log.debug(
'Requested resource to start the processing: {0}'
.format(resource['url']))
except Exception, e:
log.debug(
'Error requesting resource: {0}\n{1}'
.format(resource['url'], e))
pass

return dataset_dict


def _remove_extra(key, dataset_dict):
dataset_dict['extras'][:] = [e
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
causeway_profile=ckanext.opendatani.dcat:CausewayProfile
midulster_profile=ckanext.opendatani.dcat:MidulsterProfile
esri_arcgis_profile=ckanext.opendatani.dcat:EsriArcGISProfile
daera_core_profile=ckanext.opendatani.dcat:DaeraCoreProfile
''',
)

0 comments on commit 6ef09e0

Please sign in to comment.