Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/multi adaptor filter keys #260

Merged
merged 19 commits into from
Feb 5, 2025
Merged
105 changes: 89 additions & 16 deletions cads_adaptors/adaptors/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,77 @@

from cads_adaptors import AbstractCdsAdaptor, mapping
from cads_adaptors.adaptors import Request
from cads_adaptors.exceptions import MultiAdaptorNoDataError
from cads_adaptors.exceptions import CdsConfigurationError, MultiAdaptorNoDataError
from cads_adaptors.tools import adaptor_tools
from cads_adaptors.tools.general import ensure_list


class MultiAdaptor(AbstractCdsAdaptor):
@property
def extract_subrequest_kws(self) -> list[str]:
# extract keywords from a function signature
# (this is possibly overkill, but it's useful if )we think the function signature may change)
import inspect

sig = inspect.signature(self.extract_subrequest)
return [
name
for name, param in sig.parameters.items()
if param.default != inspect.Parameter.empty
]

def get_extract_subrequest_kwargs(
self, this_adaptor_config: dict[str, Any]
) -> dict[str, Any]:
# Get any top level kwargs for extract_subrequest
extract_subrequest_kwargs: dict[str, Any] = {
k: self.config["extract_subrequest_kwargs"][k]
for k in self.extract_subrequest_kws
if k in self.config.get("extract_subrequest_kwargs", {})
}

for k in self.extract_subrequest_kws:
if k not in this_adaptor_config:
continue
if k not in extract_subrequest_kwargs:
extract_subrequest_kwargs[k] = this_adaptor_config[k]
continue

# k in both this_adaptor_config and extract_subrequest_kwargs, check they are same type
try:
assert isinstance(
this_adaptor_config[k], type(extract_subrequest_kwargs[k])
)
except AssertionError:
raise CdsConfigurationError(
f"Adaptor configuration error: extract_subrequest_kwargs: {k} "
f"has been set in both the top-level adaptor.json and the sub-adaptor.json, "
f"but they are not the same type. "
)

if isinstance(this_adaptor_config[k], dict):
extract_subrequest_kwargs[k] = {
**extract_subrequest_kwargs[k],
**this_adaptor_config[k],
}
elif isinstance(this_adaptor_config[k], list):
extract_subrequest_kwargs[k] = extract_subrequest_kwargs[k] + [
val
for val in this_adaptor_config[k]
if val not in extract_subrequest_kwargs[k]
]
else:
extract_subrequest_kwargs[k] = this_adaptor_config[k]

return extract_subrequest_kwargs

@staticmethod
def split_request(
def extract_subrequest(
full_request: Request, # User request
this_values: dict[str, Any], # key: [values] for the adaptor component
dont_split_keys: list[str] = ["area", "grid"],
filter_keys: None | list[str] = None,
required_keys: list[str] = [],
**config: Any,
) -> Request:
"""
Basic request splitter, splits based on whether the values are relevant to
Expand All @@ -23,10 +81,15 @@ def split_request(
"""
required_keys = ensure_list(required_keys)
this_request = {}
# loop over keys in the full_request
# Default filter_keys to all keys
if filter_keys is None:
filter_keys = list(full_request.keys())
for key, req_vals in full_request.items():
# If dont_split_key, then copy the key and values to the new request
if key in ensure_list(dont_split_keys):
# If not in filter_keys or is in dont_split_key, then copy the key and values to the new request
# filter_keys may make dont_split_keys redundant, but keep both for now
if key not in ensure_list(filter_keys) or key in ensure_list(
dont_split_keys
):
this_request[key] = req_vals
else:
# filter for values relevant to this_adaptor:
Expand Down Expand Up @@ -62,8 +125,11 @@ def split_adaptors(
)
this_values = adaptor_desc.get("values", {})

this_request = self.split_request(
request, this_values, **this_adaptor.config
extract_subrequest_kwargs = self.get_extract_subrequest_kwargs(
this_adaptor.config
)
this_request = self.extract_subrequest(
request, this_values, **extract_subrequest_kwargs
)
self.context.debug(
f"MultiAdaptor, {adaptor_tag}, this_request: {this_request}"
Expand Down Expand Up @@ -171,27 +237,34 @@ def retrieve_list_of_results(self, request: Request) -> list[str]:
# be useful to reduce the repetitive config in each sub-adaptor of adaptor.json

# self.mapped_requests contains the schema-checked, intersected and (top-level mapping) mapped request
self.context.info(f"MultiMarsCdsAdaptor, full_request: {self.mapped_requests}")
self.context.debug(
f"MultiMarsCdsAdaptor, mapped full request: {self.mapped_requests}"
)

# We now split the mapped_request into sub-adaptors
mapped_requests = []
for adaptor_tag, adaptor_desc in self.config["adaptors"].items():
this_adaptor = adaptor_tools.get_adaptor(adaptor_desc, self.form)
this_values = adaptor_desc.get("values", {})
extract_subrequest_kwargs = self.get_extract_subrequest_kwargs(
this_adaptor.config
)
for mapped_request_piece in self.mapped_requests:
this_request = self.split_request(
mapped_request_piece, this_values, **this_adaptor.config
this_request = self.extract_subrequest(
mapped_request_piece, this_values, **extract_subrequest_kwargs
)
self.context.info(
f"MultiMarsCdsAdaptor, {adaptor_tag}, this_request: {this_request}"
)

if len(this_request) > 0:
mapped_requests.append(
mapping.apply_mapping(this_request, this_adaptor.mapping)
)

self.context.info(f"MultiMarsCdsAdaptor, mapped_requests: {mapped_requests}")
self.context.debug(
f"MultiMarsCdsAdaptor, {adaptor_tag}, this_request: {this_request}"
)

self.context.debug(
f"MultiMarsCdsAdaptor, mapped and split requests: {mapped_requests}"
)
result = execute_mars(
mapped_requests,
context=self.context,
Expand Down
132 changes: 119 additions & 13 deletions tests/test_15_adaptor_multi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

import pytest
import requests

from cads_adaptors import AbstractAdaptor
Expand Down Expand Up @@ -44,47 +45,47 @@
}


def test_multi_adaptor_split_requests():
def test_multi_adaptor_extract_subrequests():
multi_adaptor = multi.MultiAdaptor(FORM, **ADAPTOR_CONFIG)

split_mean = multi_adaptor.split_request(
split_mean = multi_adaptor.extract_subrequest(
REQUEST, multi_adaptor.config["adaptors"]["mean"]["values"]
)
assert split_mean == ADAPTOR_CONFIG["adaptors"]["mean"]["values"]

split_max = multi_adaptor.split_request(
split_max = multi_adaptor.extract_subrequest(
REQUEST, multi_adaptor.config["adaptors"]["max"]["values"]
)
assert split_max == ADAPTOR_CONFIG["adaptors"]["max"]["values"]


def test_multi_adaptor_split_requests_required_keys():
def test_multi_adaptor_extract_subrequests_required_keys():
multi_adaptor = multi.MultiAdaptor(FORM, **ADAPTOR_CONFIG)

request = REQUEST.copy()
del request["level"]
split_mean_required_missing = multi_adaptor.split_request(
split_mean_required_missing = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["mean"]["values"],
required_keys=["level"],
)
assert split_mean_required_missing == dict()

split_max_required_present = multi_adaptor.split_request(
split_max_required_present = multi_adaptor.extract_subrequest(
REQUEST,
multi_adaptor.config["adaptors"]["max"]["values"],
required_keys=["level"],
)
assert split_max_required_present == ADAPTOR_CONFIG["adaptors"]["max"]["values"]


def test_multi_adaptor_split_requests_dont_split_keys():
def test_multi_adaptor_extract_subrequests_dont_split_keys():
multi_adaptor = multi.MultiAdaptor(FORM, **ADAPTOR_CONFIG)

request = REQUEST.copy()
# dont_split_keys as list dtype
request["dont_split"] = [1, 2, 3, 4]
split_mean_dont_split_area = multi_adaptor.split_request(
split_mean_dont_split_area = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["mean"]["values"],
dont_split_keys=["dont_split"],
Expand All @@ -93,7 +94,7 @@ def test_multi_adaptor_split_requests_dont_split_keys():

# dont_split_keys as integer dtype
request["dont_split"] = "1"
split_mean_dont_split = multi_adaptor.split_request(
split_mean_dont_split = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["mean"]["values"],
dont_split_keys=["dont_split"],
Expand All @@ -102,7 +103,7 @@ def test_multi_adaptor_split_requests_dont_split_keys():

# dont_split_keys as integer dtype
request["dont_split"] = 1
split_mean_dont_split = multi_adaptor.split_request(
split_mean_dont_split = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["mean"]["values"],
dont_split_keys=["dont_split"],
Expand All @@ -111,7 +112,7 @@ def test_multi_adaptor_split_requests_dont_split_keys():

# dont_split_keys as float dtype
request["dont_split"] = 1.0
split_mean_dont_split = multi_adaptor.split_request(
split_mean_dont_split = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["mean"]["values"],
dont_split_keys=["dont_split"],
Expand All @@ -120,7 +121,7 @@ def test_multi_adaptor_split_requests_dont_split_keys():

# dont_split_keys as dict dtype
request["dont_split"] = {"a": 1}
split_mean_dont_split = multi_adaptor.split_request(
split_mean_dont_split = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["mean"]["values"],
dont_split_keys=["dont_split"],
Expand All @@ -129,14 +130,119 @@ def test_multi_adaptor_split_requests_dont_split_keys():

# Area is dont_split as default
request["area"] = [1, 2, 3, 4]
split_max_split_area = multi_adaptor.split_request(
split_max_split_area = multi_adaptor.extract_subrequest(
request,
multi_adaptor.config["adaptors"]["max"]["values"],
)
assert "dont_split" not in split_max_split_area
assert "area" in split_max_split_area


def test_multi_adaptor_extract_subrequests_filter_keys():
multi_adaptor = multi.MultiAdaptor(FORM, **ADAPTOR_CONFIG)

request = {
"a": ["a1", "a2"],
"b": ["b1", "b2"],
}
values = {
"a": ["a1"],
"b": ["b2"],
}

# Check that we only filter a certain key
filter_a = multi_adaptor.extract_subrequest(
request,
values,
filter_keys=["a"],
)
assert filter_a == {"a": ["a1"], "b": ["b1", "b2"]}

# Check that we default to filter all keys (duplicate of previous test)
filter_all = multi_adaptor.extract_subrequest(
request,
values,
)
assert filter_all == {"a": ["a1"], "b": ["b2"]}


EXTRACT_SR_KWARGS_FORM = {
"a": ["a1", "a2", "a3", "a4"],
"b": ["b1", "b2", "b3", "b4"],
"c": ["c1", "c2", "c3", "c4"],
"d": ["d1", "d2", "d3", "d4"],
}
EXTRACT_SR_KWARGS_ADAPTOR_CONFIG = {
"entry_point": "MultiAdaptor",
"adaptors": {
"adaptor1": {
"entry_point": "cads_adaptors:DummyCdsAdaptor",
"values": {
"a": ["a1", "a2", "a3", "a4"],
"b": [
"b1",
"b2",
],
"c": ["c1", "c2", "c3"],
"d": ["d1", "d2", "d3", "d4"],
},
# We can also decide set extract_subrequest_kwargs for each adaptor, but hoepfully this is
# no longer required. For backwards compatibility we do not next inside extract_subrequest_kwargs
"filter_keys": ["a"],
"dont_split_keys": ["e"],
"required_keys": [
"a",
"b",
"c",
], # Includes check that double definition does not break
},
"adaptor2": {
"entry_point": "cads_adaptors:DummyCdsAdaptor",
"values": {
"a": ["a1", "a2", "a3", "a4"],
"b": ["b3", "b4"],
"c": ["c2", "c3", "c4"],
},
},
},
# These filter keys are used by all sub-adaptors. In practice gecko will
# detect requiements and populate this list automatically
"extract_subrequest_kwargs": {
"filter_keys": ["b", "c", "d"],
"dont_split_keys": ["f", "g", "h"],
"required_keys": ["a"],
},
}


@pytest.mark.parametrize("entry_point", ["MultiAdaptor", "MultiMarsCdsAdaptor"])
def test_multi_adaptor_get_extract_subrequests_kwargs(entry_point):
multi_adaptor = multi.MultiAdaptor(
EXTRACT_SR_KWARGS_FORM,
**{**EXTRACT_SR_KWARGS_ADAPTOR_CONFIG, "entry_point": entry_point},
)
# Check that we filter expected keys
adaptor1_kwargs = multi_adaptor.get_extract_subrequest_kwargs(
EXTRACT_SR_KWARGS_ADAPTOR_CONFIG["adaptors"]["adaptor1"]
)
assert "filter_keys" in adaptor1_kwargs
assert sorted(adaptor1_kwargs["filter_keys"]) == ["a", "b", "c", "d"]
assert "dont_split_keys" in adaptor1_kwargs
assert sorted(adaptor1_kwargs["dont_split_keys"]) == ["e", "f", "g", "h"]
assert "required_keys" in adaptor1_kwargs
assert sorted(adaptor1_kwargs["required_keys"]) == ["a", "b", "c"]

adaptor2_kwargs = multi_adaptor.get_extract_subrequest_kwargs(
EXTRACT_SR_KWARGS_ADAPTOR_CONFIG["adaptors"]["adaptor2"]
)
assert "filter_keys" in adaptor2_kwargs
assert sorted(adaptor2_kwargs["filter_keys"]) == ["b", "c", "d"]
assert "dont_split_keys" in adaptor2_kwargs
assert sorted(adaptor2_kwargs["dont_split_keys"]) == ["f", "g", "h"]
assert "required_keys" in adaptor2_kwargs
assert sorted(adaptor2_kwargs["required_keys"]) == ["a"]


def test_multi_adaptor_split_adaptors():
multi_adaptor = multi.MultiAdaptor(FORM, **ADAPTOR_CONFIG)

Expand Down