From 9a4edcc16b48f84ef120e35a2281ebb39168d9d9 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 6 May 2024 11:42:59 +0200 Subject: [PATCH 1/6] Consume command --- ckanext/dcat/cli.py | 75 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/ckanext/dcat/cli.py b/ckanext/dcat/cli.py index ade76959..7dcf8105 100644 --- a/ckanext/dcat/cli.py +++ b/ckanext/dcat/cli.py @@ -1,25 +1,80 @@ # -*- coding: utf-8 -*- +import json import click + import ckan.plugins.toolkit as tk + import ckanext.dcat.utils as utils +from ckanext.dcat.processors import RDFParser, RDFSerializer, DEFAULT_RDF_PROFILES -@click.group() -def generate_static(): - """Generates static files containing all datasets. - """ +@click.group() +def dcat(): + """DCAT utilities for CKAN""" pass -@generate_static.command() -@click.argument('output', type=click.File(mode="w")) -def json(output): - """The generate command will generate a static file containing all of - the datasets in the catalog in JSON format. +@dcat.command() +@click.argument("output", type=click.File(mode="w")) +def generate_static(output): + """[Deprecated] Generate a static datasets file in JSON format + (requires the dcat_json_interface plugin) . """ utils.generate_static_json(output) +@dcat.command(context_settings={"show_default": True}) +@click.argument("input", type=click.File(mode="r")) +@click.option( + "-o", + "--output", + type=click.File(mode="w"), + default="-", + help="By default the command will output the result to stdin, " + "alternatively you can provide a file path with this option", +) +@click.option( + "-f", "--format", default="xml", help="Serialization format (eg ttl, jsonld)" +) +@click.option( + "-p", + "--profiles", + default=" ".join(DEFAULT_RDF_PROFILES), + help="RDF profiles to use", +) +@click.option( + "-P", "--pretty", default=False, help="Make the output more human readable" +) +@click.option( + "-m", "--compat_mode", default=False, help="Compatibility mode (deprecated)" +) +def consume(input, output, format, profiles, pretty, compat_mode): + """ + Parses DCAT RDF graphs into CKAN dataset JSON objects. + + The input serializations can be provided as a path to a file, e.g.: + + ckan dcat consume examples/dataset.ttl + + Or be read from stdin: + + ckan dcat consume - + """ + contents = input.read() + + if profiles: + profiles = profiles.split() + parser = RDFParser(profiles=profiles, compatibility_mode=compat_mode) + parser.parse(contents, _format=format) + + ckan_datasets = [d for d in parser.datasets()] + + indent = 4 if pretty else None + out = json.dumps(ckan_datasets, indent=indent) + + output.write(out) + + def get_commands(): - return [generate_static] + return [dcat] From a8e4e348aeb5655c6e3b23af32a5111282dc73fe Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 6 May 2024 12:35:17 +0200 Subject: [PATCH 2/6] Produce command --- ckanext/dcat/cli.py | 56 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/ckanext/dcat/cli.py b/ckanext/dcat/cli.py index 7dcf8105..b74279a3 100644 --- a/ckanext/dcat/cli.py +++ b/ckanext/dcat/cli.py @@ -44,10 +44,10 @@ def generate_static(output): help="RDF profiles to use", ) @click.option( - "-P", "--pretty", default=False, help="Make the output more human readable" + "-P", "--pretty", is_flag=True, help="Make the output more human readable" ) @click.option( - "-m", "--compat_mode", default=False, help="Compatibility mode (deprecated)" + "-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)" ) def consume(input, output, format, profiles, pretty, compat_mode): """ @@ -76,5 +76,57 @@ def consume(input, output, format, profiles, pretty, compat_mode): output.write(out) +@dcat.command(context_settings={"show_default": True}) +@click.argument("input", type=click.File(mode="r")) +@click.option( + "-o", + "--output", + type=click.File(mode="w"), + default="-", + help="By default the command will output the result to stdin, " + "alternatively you can provide a file path with this option", +) +@click.option( + "-f", "--format", default="xml", help="Serialization format (eg ttl, jsonld)" +) +@click.option( + "-p", + "--profiles", + default=" ".join(DEFAULT_RDF_PROFILES), + help="RDF profiles to use", +) +@click.option( + "-P", "--pretty", is_flag=True, help="Make the output more human readable" +) +@click.option( + "-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)" +) +def produce(input, output, format, profiles, pretty, compat_mode): + """ + Transforms CKAN dataset JSON objects into DCAT RDF serializations. + + The input datasets can be provided as a path to a file, e.g.: + + ckan dcat consume examples/ckan_dataset.json + + Or be read from stdin: + + ckan dcat produce - + """ + contents = input.read() + + if profiles: + profiles = profiles.split() + serializer = RDFSerializer( + profiles=profiles, + compatibility_mode=compat_mode + ) + + dataset = json.loads(contents) + out = serializer.serialize_dataset(dataset, _format=format) + + output.write(out) + + def get_commands(): return [dcat] From ad7a2bdf1149149622bb5170f2b6635deaa187fa Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 6 May 2024 12:45:30 +0200 Subject: [PATCH 3/6] Remove old CLI from processors.py --- ckanext/dcat/processors.py | 56 -------------------------------------- 1 file changed, 56 deletions(-) diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index 864f734c..c8b8f5f5 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -394,59 +394,3 @@ def _get_from_extra(key): g.add((agent, predicate, _type(val))) return catalog_ref - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser( - description='DCAT RDF - CKAN operations') - parser.add_argument('mode', - default='consume', - help=''' -Operation mode. -`consume` parses DCAT RDF graphs to CKAN dataset JSON objects. -`produce` serializes CKAN dataset JSON objects into DCAT RDF. - ''') - parser.add_argument('file', nargs='?', type=argparse.FileType('r'), - default=sys.stdin, - help='Input file. If omitted will read from stdin') - parser.add_argument('-f', '--format', - default='xml', - help='''Serialization format (as understood by rdflib) - eg: xml, n3 ... Defaults to \'xml\'.''') - parser.add_argument('-P', '--pretty', - action='store_true', - help='Make the output more human readable') - parser.add_argument('-p', '--profile', nargs='*', - action='store', - help='RDF Profiles to use, defaults to euro_dcat_ap_2') - parser.add_argument('-m', '--compat-mode', - action='store_true', - help='Enable compatibility mode') - - parser.add_argument('-s', '--subcatalogs', action='store_true', dest='subcatalogs', - default=False, - help="Enable subcatalogs handling (dct:hasPart support)") - args = parser.parse_args() - - contents = args.file.read() - - config.update({DCAT_EXPOSE_SUBCATALOGS: args.subcatalogs}) - - if args.mode == 'produce': - serializer = RDFSerializer(profiles=args.profile, - compatibility_mode=args.compat_mode) - - dataset = json.loads(contents) - out = serializer.serialize_dataset(dataset, _format=args.format) - print(out) - else: - parser = RDFParser(profiles=args.profile, - compatibility_mode=args.compat_mode) - - parser.parse(contents, _format=args.format) - - ckan_datasets = [d for d in parser.datasets()] - - indent = 4 if args.pretty else None - print(json.dumps(ckan_datasets, indent=indent)) From fe268e0df8b3509ff695e489bcc13ddf501e5558 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 6 May 2024 14:59:03 +0200 Subject: [PATCH 4/6] Add new serialize_datasets method to Serializers It accepts a list of datasets instead of a single one --- ckanext/dcat/cli.py | 8 ++++---- ckanext/dcat/processors.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ckanext/dcat/cli.py b/ckanext/dcat/cli.py index b74279a3..ed0801cf 100644 --- a/ckanext/dcat/cli.py +++ b/ckanext/dcat/cli.py @@ -95,9 +95,6 @@ def consume(input, output, format, profiles, pretty, compat_mode): default=" ".join(DEFAULT_RDF_PROFILES), help="RDF profiles to use", ) -@click.option( - "-P", "--pretty", is_flag=True, help="Make the output more human readable" -) @click.option( "-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)" ) @@ -123,7 +120,10 @@ def produce(input, output, format, profiles, pretty, compat_mode): ) dataset = json.loads(contents) - out = serializer.serialize_dataset(dataset, _format=format) + if isinstance(dataset, list): + out = serializer.serialize_datasets(dataset, _format=format) + else: + out = serializer.serialize_dataset(dataset, _format=format) output.write(out) diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index c8b8f5f5..e6093443 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -291,6 +291,22 @@ def serialize_dataset(self, dataset_dict, _format='xml'): return output + def serialize_datasets(self, dataset_dicts, _format='xml'): + ''' + Given a list of CKAN dataset dicts, returns an RDF serialization + + The serialization format can be defined using the `_format` parameter. + It must be one of the ones supported by RDFLib, defaults to `xml`. + + Returns a string with the serialized datasets + ''' + out = [] + for dataset_dict in dataset_dicts: + out.append(self.serialize_dataset(dataset_dict, _format)) + return '\n'.join(out) + + + def serialize_catalog(self, catalog_dict=None, dataset_dicts=None, _format='xml', pagination_info=None): ''' From e3148fcafc9d27f2ad8402d17a0f240b313c7acc Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 6 May 2024 15:21:11 +0200 Subject: [PATCH 5/6] Document CLI commands --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index aaec4f33..c79fc710 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ It also offers other features related to Semantic Data like exposing the necessa - [XML DCAT harvester (deprecated)](#xml-dcat-harvester-deprecated) - [Translation of fields](#translation-of-fields) - [Structured Data and Google Dataset Search indexing](#structured-data-and-google-dataset-search-indexing) +- [CLI](#cli) - [Running the Tests](#running-the-tests) - [Releases](#releases) - [Acknowledgements](#acknowledgements) @@ -944,6 +945,25 @@ Example output of structured data in JSON-LD: +## CLI + +The `ckan dcat` command offers utilites to transform between DCAT RDF Serializations and CKAN datasets (`ckan dcat consume`) and +viceversa (`ckan dcat produce`). In both cases the input can be provided as a path to a file: + + ckan dcat consume -f ttl examples/dataset.ttl + + ckan dcat produce -f jsonld examples/ckan_datasets.json + +or be read from stdin: + + ckan dcat consume - + +The latter form allows chaininig commands for more complex metadata processing, e.g.: + + curl https://demo.ckan.org/api/action/package_search | jq .result.results | ckan dcat produce -f jsonld - + +For the full list of options check `ckan dcat consume --help` and `ckan dcat produce --help`. + ## Running the Tests To run the tests do: From 576e26c5252d515e6d850c109251dcfea62d0d72 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 6 May 2024 15:44:16 +0200 Subject: [PATCH 6/6] Add CLI tests --- ckanext/dcat/cli.py | 2 +- ckanext/dcat/tests/test_cli.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 ckanext/dcat/tests/test_cli.py diff --git a/ckanext/dcat/cli.py b/ckanext/dcat/cli.py index ed0801cf..83d8b539 100644 --- a/ckanext/dcat/cli.py +++ b/ckanext/dcat/cli.py @@ -98,7 +98,7 @@ def consume(input, output, format, profiles, pretty, compat_mode): @click.option( "-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)" ) -def produce(input, output, format, profiles, pretty, compat_mode): +def produce(input, output, format, profiles, compat_mode): """ Transforms CKAN dataset JSON objects into DCAT RDF serializations. diff --git a/ckanext/dcat/tests/test_cli.py b/ckanext/dcat/tests/test_cli.py new file mode 100644 index 00000000..ecacdc37 --- /dev/null +++ b/ckanext/dcat/tests/test_cli.py @@ -0,0 +1,33 @@ +import json +import os + +from ckanext.dcat.cli import dcat as dcat_cli + + +def test_consume(cli): + + path = os.path.join( + os.path.dirname(__file__), "..", "..", "..", "examples", "dataset_afs.ttl" + ) + + result = cli.invoke(dcat_cli, ["consume", "-f", "ttl", path]) + assert result.exit_code == 0 + + assert json.loads(result.stdout)[0]["title"] == "A test dataset on your catalogue" + + +def test_produce(cli): + + path = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "..", + "examples", + "full_ckan_dataset.json", + ) + + result = cli.invoke(dcat_cli, ["produce", "-f", "jsonld", path]) + assert result.exit_code == 0 + + assert json.loads(result.stdout)["@context"]["dcat"] == "http://www.w3.org/ns/dcat#"