Skip to content

Commit

Permalink
Merge pull request #702 from crim-ca/parsing-metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault authored Sep 12, 2024
2 parents 9788880 + b76cfd1 commit b41c211
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ Changes:
and a sample `crim-ca/ncml2stac <https://github.com/crim-ca/ncml2stac/tree/main#ncml-to-stac>`_ repository
making use of it with the `Weaver` `CLI` to generate a deployed `OGC API - Processes` definition
(fixes `#63 <https://github.com/crim-ca/weaver/issues/63>`_).
- Add parsing of additional metadata from schema.org in CWL document to convert into process fields
(fixes `#463 <https://github.com/crim-ca/weaver/issues/463>`_).
- Add more metadata mapping details in documentation (fixes `#613 <https://github.com/crim-ca/weaver/issues/613>`_).

Fixes:
------
Expand Down
9 changes: 9 additions & 0 deletions docs/source/package.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1307,11 +1307,20 @@ Below is a list of compatible elements.
+-----------------------------------------+----------------------------------------------------------+
| ``metadata`` | ``$schemas``/``$namespace`` |
| (using ``title`` and ``href`` fields) | (using namespace name and HTTP references) |
| | ``s:author`` |
| | ``s:citation`` |
| | ``s:codeRepository`` |
| | ``s:contributor`` |
| | ``s:dateCreated`` |
| | ``s:license`` |
| | ``s:releaseNotes`` [#cwl_schemaorg]_ |
+-----------------------------------------+----------------------------------------------------------+
| ``title`` | ``label`` |
+-----------------------------------------+----------------------------------------------------------+
| ``abstract``/``description`` | ``doc`` |
+-----------------------------------------+----------------------------------------------------------+
| ``version`` | ``s:version``/``s:softwareVersion`` [#cwl_schemaorg]_ |
+-----------------------------------------+----------------------------------------------------------+

.. rubric:: Footnotes

Expand Down
184 changes: 184 additions & 0 deletions tests/processes/test_wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
WpsPackage,
_load_package_content,
_update_package_compatibility,
_update_package_metadata,
format_extension_validator,
get_application_requirement,
mask_process_inputs
Expand Down Expand Up @@ -841,3 +842,186 @@ def test_mask_process_inputs(inputs, expect):
def test_format_extension_validator_basic(data_input, mode, expect):
# type: (Any, int, bool) -> None
assert format_extension_validator(data_input, mode) == expect


@pytest.mark.parametrize("original, expected", [
(
# Test author metadata with empty wps_package
{
"cwl_package_package": {
"s:author": [
{"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}
],
},
"wps_package_metadata": {}
},
{
"abstract": "",
"title": "",
"metadata": [
{
"role": "author",
"value": {
"$schema": "https://schema.org/Person",
"name": "John Doe",
"affiliation": "Example Inc."
}
}
]
}
),
(
# Test codeRepository
{
"cwl_package_package": {
"s:codeRepository": "https://gitlab.com/",
},
"wps_package_metadata": {}
},
{
"abstract": "",
"title": "",
"metadata": [
{
"type": "text/html",
"rel": "codeRepository",
"href": "https://gitlab.com/"
}
]
}
),
(
# Test Version with existing metadata
{
"cwl_package_package": {
"s:version": "1.0"
},
"wps_package_metadata": {
"metadata": [
{
"type": "text/html",
"rel": "codeRepository",
"href": "https://gitlab.com/"
}
]
}
},
{
"abstract": "",
"title": "",
"version": "1.0",
"metadata": [
{
"type": "text/html",
"rel": "codeRepository",
"href": "https://gitlab.com/"
},
],
}
),
(
# Test softwareVersion
{
"cwl_package_package": {
"s:softwareVersion": "1.0.0"
},
"wps_package_metadata": {}
},
{
"abstract": "",
"title": "",
"version": "1.0.0"
}
),
(
# Test contributor
{
"cwl_package_package": {
"s:contributor": [
{"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}
],
},
"wps_package_metadata": {}
},
{
"abstract": "",
"title": "",
"metadata": [
{
"role": "contributor",
"value": {
"$schema": "https://schema.org/Person",
"name": "John Doe",
"affiliation": "Example Inc."
}
}
]
}
),
(
# Test citation
{
"cwl_package_package": {
"s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2"
},
"wps_package_metadata": {}
},
{
"abstract": "",
"title": "",
"metadata": [
{
"type": "text/plain",
"rel": "citation",
"href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2"
},
],
}
),
(
# Test dateCreated with existing metadata
{
"cwl_package_package": {
"s:dateCreated": [
{"class": "s:DateTime", "s:dateCreated": "2016-12-13"}
],
},
"wps_package_metadata": {
"abstract": "",
"title": "",
"metadata": [
{
"type": "text/plain",
"rel": "citation",
"href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2"
},
],
}
},
{
"abstract": "",
"title": "",
"metadata": [
{
"type": "text/plain",
"rel": "citation",
"href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2"
},
{
"role": "dateCreated",
"value": {
"$schema": "https://schema.org/DateTime",
"dateCreated": "2016-12-13",
}
}
]
}
),
])
def test_process_metadata(original, expected):
# type: (CWL, CWL) -> None
cwl_package_package = original["cwl_package_package"]
wps_package_metadata = original["wps_package_metadata"]
_update_package_metadata(wps_package_metadata, cwl_package_package)
# Assertions
assert wps_package_metadata == expected
2 changes: 1 addition & 1 deletion tests/smoke/docker-compose.smoke-test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "3.4"
version: "3.8"
services:
mongodb:
image: mongo:5.0
Expand Down
59 changes: 59 additions & 0 deletions weaver/processes/wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,16 @@

PACKAGE_SCHEMA_CACHE = {} # type: Dict[str, Tuple[str, str]]

SUPPORTED_METADATA_MAPPING = [
"s:author",
"s:citation",
"s:codeRepository",
"s:contributor",
"s:dateCreated",
"s:license",
"s:releaseNotes",
]


def get_status_location_log_path(status_location, out_dir=None):
# type: (str, Optional[str]) -> str
Expand Down Expand Up @@ -789,6 +799,55 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package):
set(wps_package_metadata.get("keywords", [])) | set(cwl_package_package.get("s:keywords", []))
)

# specific use case with a different mapping
# https://docs.ogc.org/bp/20-089r1.html#toc31
if "s:version" in cwl_package_package or "s:softwareVersion" in cwl_package_package:
version_value = (
wps_package_metadata.get("version")
or cwl_package_package.get("s:version")
or cwl_package_package.get("s:softwareVersion")
)
# Only set the key if version_value is not empty or null
if version_value:
wps_package_metadata["version"] = str(version_value)
else:
version_value = wps_package_metadata.get("version")
if version_value:
wps_package_metadata["version"] = str(version_value)

for metadata_mapping in SUPPORTED_METADATA_MAPPING:
if metadata_mapping in cwl_package_package:
metadata = wps_package_metadata.get("metadata", [])
if (
isinstance((cwl_package_package[metadata_mapping]), str)
and urlparse(cwl_package_package[metadata_mapping]).scheme != ""
):
url = cwl_package_package[metadata_mapping]
if metadata_mapping == "s:codeRepository":
type = "text/html"
else:
type = get_content_type(os.path.splitext(url)[-1], default=ContentType.TEXT_PLAIN)
metadata.append({
"type": type,
"rel": metadata_mapping.strip("s:"),
"href": cwl_package_package[metadata_mapping]
})
else:
for objects in cwl_package_package[metadata_mapping]:
class_name = objects["class"].strip("s:")
value = {
"$schema": f"https://schema.org/{class_name}"
}
for key, val in objects.items():
if key.startswith("s:"):
value[key.strip("s:")] = val
metadata.append({
"role": metadata_mapping.strip("s:"),
"value": value
})

wps_package_metadata["metadata"] = metadata


def _patch_wps_process_description_url(reference, process_hint):
# type: (str, Optional[JSON]) -> str
Expand Down
9 changes: 9 additions & 0 deletions weaver/typedefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,15 @@ class CWL_SchemaName(Protocol):
"$namespaces": NotRequired[CWL_Namespace],
"$schemas": NotRequired[Dict[str, str]],
"$graph": NotRequired[CWL_Graph],
"s:author": NotRequired[List[Dict[str, JSON]]],
"s:citation": NotRequired[List[Union[str, JSON]]],
"s:codeRepository": NotRequired[Link],
"s:contributor": NotRequired[List[Dict[str, JSON]]],
"s:dateCreated": NotRequired[datetime],
"s:license": NotRequired[Optional[Union[Dict[str, JSON], Link]]],
"s:releaseNotes": NotRequired[Optional[Union[str, Link]]],
"s:version": NotRequired[Optional[Union[str, Number]]],
"s:softwareVersion": NotRequired[Optional[Union[str, Number]]],
}, total=False)
CWL_WorkflowStepPackage = TypedDict("CWL_WorkflowStepPackage", {
"id": str, # reference ID of the package
Expand Down
11 changes: 10 additions & 1 deletion weaver/wps_restapi/swagger_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,14 +858,23 @@ class Link(LinkRelationship, LinkBase):
_schema_include_deserialize = False # only in OpenAPI otherwise too verbose


class MetadataValueField(OneOfKeywordSchema):
_one_of = [
# pointer to a file or JSON schema relative item (as in OpenAPI definitions)
ExtendedSchemaNode(String(), description="Plain text value of the information."),
# literal JSON schema, permissive since it can be anything
PermissiveMappingSchema(description="Flexible schema definition for the metadata value.")
]


class MetadataValue(NotKeywordSchema, ValueLanguage, MetadataBase):
_not = [
# make sure value metadata does not allow 'rel' and 'hreflang' reserved for link reference
# explicitly refuse them such that when a href/rel link is provided, only link details are possible
LinkRelationship(description="Field 'rel' must refer to a link reference with 'href'."),
LinkLanguage(description="Field 'hreflang' must refer to a link reference with 'href'."),
]
value = ExtendedSchemaNode(String(), description="Plain text value of the information.")
value = MetadataValueField(description="Explicit schema definition of the metadata value.")


class MetadataLink(Link):
Expand Down

0 comments on commit b41c211

Please sign in to comment.