diff --git a/CHANGES.rst b/CHANGES.rst index 89241241c..5b7ce4aa7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -30,6 +30,9 @@ Changes: and a sample `crim-ca/ncml2stac `_ repository making use of it with the `Weaver` `CLI` to generate a deployed `OGC API - Processes` definition (fixes `#63 `_). +- Add parsing of additional metadata from schema.org in CWL document to convert into process fields + (fixes `#463 `_). +- Add more metadata mapping details in documentation (fixes `#613 `_). Fixes: ------ diff --git a/docs/source/package.rst b/docs/source/package.rst index 6bc46b9c6..d712c3210 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -1307,11 +1307,20 @@ Below is a list of compatible elements. +-----------------------------------------+----------------------------------------------------------+ | ``metadata`` | ``$schemas``/``$namespace`` | | (using ``title`` and ``href`` fields) | (using namespace name and HTTP references) | +| | ``s:author`` | +| | ``s:citation`` | +| | ``s:codeRepository`` | +| | ``s:contributor`` | +| | ``s:dateCreated`` | +| | ``s:license`` | +| | ``s:releaseNotes`` [#cwl_schemaorg]_ | +-----------------------------------------+----------------------------------------------------------+ | ``title`` | ``label`` | +-----------------------------------------+----------------------------------------------------------+ | ``abstract``/``description`` | ``doc`` | +-----------------------------------------+----------------------------------------------------------+ +| ``version`` | ``s:version``/``s:softwareVersion`` [#cwl_schemaorg]_ | ++-----------------------------------------+----------------------------------------------------------+ .. rubric:: Footnotes diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 975990485..cba546e41 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -48,6 +48,7 @@ WpsPackage, _load_package_content, _update_package_compatibility, + _update_package_metadata, format_extension_validator, get_application_requirement, mask_process_inputs @@ -841,3 +842,186 @@ def test_mask_process_inputs(inputs, expect): def test_format_extension_validator_basic(data_input, mode, expect): # type: (Any, int, bool) -> None assert format_extension_validator(data_input, mode) == expect + + +@pytest.mark.parametrize("original, expected", [ + ( + # Test author metadata with empty wps_package + { + "cwl_package_package": { + "s:author": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} + ], + }, + "wps_package_metadata": {} + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "role": "author", + "value": { + "$schema": "https://schema.org/Person", + "name": "John Doe", + "affiliation": "Example Inc." + } + } + ] + } + ), + ( + # Test codeRepository + { + "cwl_package_package": { + "s:codeRepository": "https://gitlab.com/", + }, + "wps_package_metadata": {} + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/html", + "rel": "codeRepository", + "href": "https://gitlab.com/" + } + ] + } + ), + ( + # Test Version with existing metadata + { + "cwl_package_package": { + "s:version": "1.0" + }, + "wps_package_metadata": { + "metadata": [ + { + "type": "text/html", + "rel": "codeRepository", + "href": "https://gitlab.com/" + } + ] + } + }, + { + "abstract": "", + "title": "", + "version": "1.0", + "metadata": [ + { + "type": "text/html", + "rel": "codeRepository", + "href": "https://gitlab.com/" + }, + ], + } + ), + ( + # Test softwareVersion + { + "cwl_package_package": { + "s:softwareVersion": "1.0.0" + }, + "wps_package_metadata": {} + }, + { + "abstract": "", + "title": "", + "version": "1.0.0" + } + ), + ( + # Test contributor + { + "cwl_package_package": { + "s:contributor": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} + ], + }, + "wps_package_metadata": {} + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "role": "contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "John Doe", + "affiliation": "Example Inc." + } + } + ] + } + ), + ( + # Test citation + { + "cwl_package_package": { + "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + "wps_package_metadata": {} + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/plain", + "rel": "citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + ], + } + ), + ( + # Test dateCreated with existing metadata + { + "cwl_package_package": { + "s:dateCreated": [ + {"class": "s:DateTime", "s:dateCreated": "2016-12-13"} + ], + }, + "wps_package_metadata": { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/plain", + "rel": "citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + ], + } + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/plain", + "rel": "citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + { + "role": "dateCreated", + "value": { + "$schema": "https://schema.org/DateTime", + "dateCreated": "2016-12-13", + } + } + ] + } + ), +]) +def test_process_metadata(original, expected): + # type: (CWL, CWL) -> None + cwl_package_package = original["cwl_package_package"] + wps_package_metadata = original["wps_package_metadata"] + _update_package_metadata(wps_package_metadata, cwl_package_package) + # Assertions + assert wps_package_metadata == expected diff --git a/tests/smoke/docker-compose.smoke-test.yml b/tests/smoke/docker-compose.smoke-test.yml index 36e6e541f..cb770cc0a 100644 --- a/tests/smoke/docker-compose.smoke-test.yml +++ b/tests/smoke/docker-compose.smoke-test.yml @@ -1,4 +1,4 @@ -version: "3.4" +version: "3.8" services: mongodb: image: mongo:5.0 diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 868d88491..4d52acc38 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -227,6 +227,16 @@ PACKAGE_SCHEMA_CACHE = {} # type: Dict[str, Tuple[str, str]] +SUPPORTED_METADATA_MAPPING = [ + "s:author", + "s:citation", + "s:codeRepository", + "s:contributor", + "s:dateCreated", + "s:license", + "s:releaseNotes", +] + def get_status_location_log_path(status_location, out_dir=None): # type: (str, Optional[str]) -> str @@ -789,6 +799,55 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): set(wps_package_metadata.get("keywords", [])) | set(cwl_package_package.get("s:keywords", [])) ) + # specific use case with a different mapping + # https://docs.ogc.org/bp/20-089r1.html#toc31 + if "s:version" in cwl_package_package or "s:softwareVersion" in cwl_package_package: + version_value = ( + wps_package_metadata.get("version") + or cwl_package_package.get("s:version") + or cwl_package_package.get("s:softwareVersion") + ) + # Only set the key if version_value is not empty or null + if version_value: + wps_package_metadata["version"] = str(version_value) + else: + version_value = wps_package_metadata.get("version") + if version_value: + wps_package_metadata["version"] = str(version_value) + + for metadata_mapping in SUPPORTED_METADATA_MAPPING: + if metadata_mapping in cwl_package_package: + metadata = wps_package_metadata.get("metadata", []) + if ( + isinstance((cwl_package_package[metadata_mapping]), str) + and urlparse(cwl_package_package[metadata_mapping]).scheme != "" + ): + url = cwl_package_package[metadata_mapping] + if metadata_mapping == "s:codeRepository": + type = "text/html" + else: + type = get_content_type(os.path.splitext(url)[-1], default=ContentType.TEXT_PLAIN) + metadata.append({ + "type": type, + "rel": metadata_mapping.strip("s:"), + "href": cwl_package_package[metadata_mapping] + }) + else: + for objects in cwl_package_package[metadata_mapping]: + class_name = objects["class"].strip("s:") + value = { + "$schema": f"https://schema.org/{class_name}" + } + for key, val in objects.items(): + if key.startswith("s:"): + value[key.strip("s:")] = val + metadata.append({ + "role": metadata_mapping.strip("s:"), + "value": value + }) + + wps_package_metadata["metadata"] = metadata + def _patch_wps_process_description_url(reference, process_hint): # type: (str, Optional[JSON]) -> str diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 448ab3e25..ae77096d5 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -242,6 +242,15 @@ class CWL_SchemaName(Protocol): "$namespaces": NotRequired[CWL_Namespace], "$schemas": NotRequired[Dict[str, str]], "$graph": NotRequired[CWL_Graph], + "s:author": NotRequired[List[Dict[str, JSON]]], + "s:citation": NotRequired[List[Union[str, JSON]]], + "s:codeRepository": NotRequired[Link], + "s:contributor": NotRequired[List[Dict[str, JSON]]], + "s:dateCreated": NotRequired[datetime], + "s:license": NotRequired[Optional[Union[Dict[str, JSON], Link]]], + "s:releaseNotes": NotRequired[Optional[Union[str, Link]]], + "s:version": NotRequired[Optional[Union[str, Number]]], + "s:softwareVersion": NotRequired[Optional[Union[str, Number]]], }, total=False) CWL_WorkflowStepPackage = TypedDict("CWL_WorkflowStepPackage", { "id": str, # reference ID of the package diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index b094a41ff..df4777b96 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -858,6 +858,15 @@ class Link(LinkRelationship, LinkBase): _schema_include_deserialize = False # only in OpenAPI otherwise too verbose +class MetadataValueField(OneOfKeywordSchema): + _one_of = [ + # pointer to a file or JSON schema relative item (as in OpenAPI definitions) + ExtendedSchemaNode(String(), description="Plain text value of the information."), + # literal JSON schema, permissive since it can be anything + PermissiveMappingSchema(description="Flexible schema definition for the metadata value.") + ] + + class MetadataValue(NotKeywordSchema, ValueLanguage, MetadataBase): _not = [ # make sure value metadata does not allow 'rel' and 'hreflang' reserved for link reference @@ -865,7 +874,7 @@ class MetadataValue(NotKeywordSchema, ValueLanguage, MetadataBase): LinkRelationship(description="Field 'rel' must refer to a link reference with 'href'."), LinkLanguage(description="Field 'hreflang' must refer to a link reference with 'href'."), ] - value = ExtendedSchemaNode(String(), description="Plain text value of the information.") + value = MetadataValueField(description="Explicit schema definition of the metadata value.") class MetadataLink(Link):