Skip to content

Commit

Permalink
workdir config + enforce existing cwl-io-formats (relates to #50) + l…
Browse files Browse the repository at this point in the history
…og pywps config loading
  • Loading branch information
fmigneault committed May 26, 2020
1 parent b8050a3 commit ec60adf
Show file tree
Hide file tree
Showing 9 changed files with 243 additions and 81 deletions.
16 changes: 16 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@ Changes
`Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
========================================================================

Changes:
--------

- Add ``weaver.wps_workdir`` configuration setting to define the location where the underlying ``cwltool`` application
should be executed under.
- Use ``weaver.request_options`` for `WPS GetCapabilities` and `WPS Check Status` requests under the running job.
- Change default ``DOCKER_REPO`` value defined in ``Makefile`` to point to reference mentioned in ``README.md`` and
considered as official deployment location.

Fixes:
------

- Set ``get_cwl_file_format`` default argument ``must_exist=True`` instead of ``False`` to retrieve original default
behaviour of the function. Since `CWL` usually doesn't need to add ``File.format`` field when no corresponding
reference actually exists, this default also makes more sense.

`1.8.1 <https://github.com/crim-ca/weaver/tree/1.8.1>`_ (2020-05-22)
========================================================================

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..)
APP_NAME := $(shell basename $(APP_ROOT))
APP_VERSION ?= 1.8.1
APP_INI ?= $(APP_ROOT)/config/$(APP_NAME).ini
DOCKER_REPO ?= docker-registry.crim.ca/ogc/weaver
DOCKER_REPO ?= pavics/weaver
#DOCKER_REPO ?= docker-registry.crim.ca/ogc/weaver

# guess OS (Linux, Darwin,...)
OS_NAME := $(shell uname -s 2>/dev/null || echo "unknown")
Expand Down
105 changes: 105 additions & 0 deletions tests/functional/test_wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,111 @@ def test_literal_io_from_package_and_offering(self):
# assert pkg["outputs"][1]["label"] == "Additional detail only within WPS output", \
# "WPS I/O title should be converted to CWL label of corresponding I/O from additional details"

def test_complex_io_format_references(self):
"""
Test validates that known `WPS` I/O formats (i.e.: `MIME-type`) considered as valid, but not corresponding
to any *real* `IANA/EDAM` reference for `CWL` are preserved on the `WPS` side and dropped on `CWL` side to
avoid validation error.
We also validate a `MIME-type` that should be found for both `CWL` and `WPS` formats to make sure that `CWL`
formats are only dropped when necessary.
"""
ns_json, type_json = get_cwl_file_format(CONTENT_TYPE_APP_JSON, must_exist=True)
assert "iana" in ns_json # just to make sure
ct_not_exists = "x-ogc-dods" # OpenDAP, still doesn't exist at moment of test creation
ns_not_exists, type_not_exists = get_cwl_file_format(ct_not_exists, must_exist=False)
assert "iana" in ns_not_exists
body = {
"processDescription": {
"process": {
"id": self._testMethodName,
"inputs": [
{
"id": "wps_only_format_exists",
"formats": [
{
"mimeType": CONTENT_TYPE_APP_JSON,
"default": True,
}
]
},
{
"id": "wps_only_format_not_exists",
"formats": [
{
"mimeType": ct_not_exists,
"default": True,
}
]
},
{
"id": "wps_only_format_both",
"formats": [
{"mimeType": CONTENT_TYPE_APP_JSON},
{"mimeType": ct_not_exists, "default": True},
]
}
],
# NOTE:
# Don't care about outputs here since we cannot have an array of formats
# as CWL output, so there isn't much to compare against from the WPS list.
},
},
"deploymentProfileName": "http://www.opengis.net/profiles/eoc/wpsApplication",
"executionUnit": [{"unit": {
"cwlVersion": "v1.0",
"class": "CommandLineTool",
"inputs": {
# minimal info only to match IDs, check that formats are added only when CWL can resolve references
# FIXME: no format is back-propagated from WPS format to CWL at the moment
# (https://github.com/crim-ca/weaver/issues/50)
"wps_only_format_exists": "File",
"wps_only_format_not_exists": "File",
"wps_only_format_both": "File",
"cwl_only_format_exists": {"type": "File", "format": type_json},
# non-existing schema references should not be provided directly in CWL
# since these would enforce raising the validation error directly...
# "cwl_only_format_not_exists": {"type": "File", "format": ct_not_exists}
},
"outputs": {"dont_care": "File"},
"$namespaces": dict(list(ns_json.items()))
}}],
}
desc, pkg = self.deploy_process(body)

assert desc["process"]["inputs"][0]["id"] == "wps_only_format_exists"
assert len(desc["process"]["inputs"][0]["formats"]) == 1
assert desc["process"]["inputs"][0]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON
assert pkg["inputs"][0]["id"] == "wps_only_format_exists"
assert pkg["inputs"][0]["type"] == "File"
# FIXME: back-propagate WPS format to CWL without format specified
# (https://github.com/crim-ca/weaver/issues/50)
# assert pkg["inputs"][0]["format"] == type_json

assert desc["process"]["inputs"][1]["id"] == "wps_only_format_not_exists"
assert len(desc["process"]["inputs"][1]["formats"]) == 1
assert desc["process"]["inputs"][1]["formats"][0]["mimeType"] == ct_not_exists
assert pkg["inputs"][1]["id"] == "wps_only_format_not_exists"
assert pkg["inputs"][1]["type"] == "File"
assert "format" not in pkg["inputs"][1], "Non-existing CWL format reference should have been dropped."

assert desc["process"]["inputs"][2]["id"] == "wps_only_format_both"
assert len(desc["process"]["inputs"][2]["formats"]) == 2
assert desc["process"]["inputs"][2]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON
assert desc["process"]["inputs"][2]["formats"][1]["mimeType"] == ct_not_exists
assert pkg["inputs"][2]["id"] == "wps_only_format_both"
assert pkg["inputs"][2]["type"] == "File"
# FIXME: for now we don't even back-propagate, but if we did, must be none because one is unknown reference
# (https://github.com/crim-ca/weaver/issues/50)
assert "format" not in pkg["inputs"][2], "Any non-existing CWL format reference should drop all entries."

assert desc["process"]["inputs"][3]["id"] == "cwl_only_format_exists"
assert len(desc["process"]["inputs"][3]["formats"]) == 1
assert desc["process"]["inputs"][3]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON
assert pkg["inputs"][3]["id"] == "cwl_only_format_exists"
assert pkg["inputs"][3]["type"] == "File"
assert pkg["inputs"][3]["format"] == type_json

def test_complex_io_with_multiple_formats_and_defaults(self):
"""
Test validates that different format types are set on different input variations simultaneously:
Expand Down
13 changes: 8 additions & 5 deletions weaver/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,28 +85,31 @@ def get_extension(mime_type):
FORMAT_NAMESPACES = frozenset([IANA_NAMESPACE, EDAM_NAMESPACE])


def get_cwl_file_format(mime_type, make_reference=False, must_exist=False):
def get_cwl_file_format(mime_type, make_reference=False, must_exist=True):
# type: (AnyStr, bool, bool) -> Union[Tuple[Union[JSON, None], Union[AnyStr, None]], Union[AnyStr, None]]
"""
Obtains the corresponding `IANA`/`EDAM` ``format`` value to be applied under a `CWL` I/O ``File`` from
the ``mime_type`` (`Content-Type` header) using the first matched one.
If ``make_reference=False``:
- If ``make_reference=False``:
- If there is a match, returns ``tuple({<namespace-name: namespace-url>}, <format>)``:
1) corresponding namespace mapping to be applied under ``$namespaces`` in the `CWL`.
2) value of ``format`` adjusted according to the namespace to be applied to ``File`` in the `CWL`.
- If there is no match but ``must_exist=False``:
returns a literal and non-existing definition as ``tuple({"iana": <iana-url>}, <format>)``
- Otherwise, returns ``(None, None)``
If ``make_reference=True``:
- If ``make_reference=True``:
- If there is a match, returns the explicit format reference as ``<namespace-url>/<format>``.
- If there is no match but ``must_exist=False``, returns the literal reference as ``<iana-url>/<format>``.
- Otherwise, returns a single ``None``.
Note:
In situations where ``must_exist=False`` and the default non-existing namespace is returned, the `CWL`
behaviour is to evaluate corresponding ``format`` for literal matching strings.
In situations where ``must_exist=False`` is used and that the namespace and/or full format URL cannot be
resolved to an existing reference, `CWL` will raise a validation error as it cannot confirm the ``format``.
You must therefore make sure that the returned reference really exists when using ``must_exist=False`` before
providing it to the `CWL` I/O definition. This parameter should be used only for literal string comparison or
pre-processing steps to evaluate formats.
"""
def _make_if_ref(_map, _key, _fmt):
return os.path.join(_map[_key], _fmt) if make_reference else (_map, "{}:{}".format(_key, _fmt))
Expand Down
Loading

0 comments on commit ec60adf

Please sign in to comment.