From 4a66dabc8f5174c3ff1d3412570ad371f3c62253 Mon Sep 17 00:00:00 2001 From: Owen Littlejohns Date: Fri, 29 Mar 2024 18:37:12 -0400 Subject: [PATCH 1/4] IP-241 - Implement pre-commit. --- .github/pull_request_template.md | 2 +- .pre-commit-config.yaml | 20 +++++++++++ README.md | 33 +++++++++++++++++++ docker/tests.Dockerfile | 2 +- docs/requirements.txt | 2 +- tests/data/ATL16_prefetch.dmr | 2 +- tests/data/ATL16_prefetch_bnds.dmr | 2 +- tests/data/ATL16_prefetch_group.dmr | 2 +- tests/data/GPM_3IMERGHH_example.dmr | 4 +-- tests/data/README.md | 2 +- .../geojson_examples/multilinestring.geo.json | 2 +- tests/pip_test_requirements.txt | 1 + tests/unit/test_dimension_utilities.py | 1 - 13 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 2cee128..8e6f556 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -11,6 +11,6 @@ A short description of the changes in this PR. ## PR Acceptance Checklist * [ ] Jira ticket acceptance criteria met. * [ ] `CHANGELOG.md` updated to include high level summary of PR changes. -* [ ] `VERSION` updated if publishing a release. +* [ ] `docker/service_version.txt` updated if publishing a release. * [ ] Tests added/updated and passing. * [ ] Documentation updated (if needed). diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c59f584 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-json + - id: check-yaml + - id: check-added-large-files + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.3.4 + hooks: + - id: ruff + args: ["--fix", "--show-fixes"] + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.3.0 + hooks: + - id: black-jupyter + args: ["--skip-string-normalization"] + language_version: python3.11 diff --git a/README.md b/README.md index 7ff0c2a..7199fb3 100644 --- a/README.md +++ b/README.md @@ -240,6 +240,39 @@ newest release of the code (starting at the top of the file). ## vX.Y.Z ``` +### pre-commit hooks: + +This repository uses [pre-commit](https://pre-commit.com/) to enable pre-commit +checking the repository for some coding standard best practices. These include: + +* Removing trailing whitespaces. +* Removing blank lines at the end of a file. +* JSON files have valid formats. +* [ruff](https://github.com/astral-sh/ruff) Python linting checks. +* [black](https://black.readthedocs.io/en/stable/index.html) Python code + formatting checks. + +To enable these checks: + +```bash +# Install pre-commit Python package as part of test requirements: +pip install -r tests/pip_test_requirements.txt + +# Install the git hook scripts: +pre-commit install + +# (Optional) Run against all files: +pre-commit run --all-files +``` + +When you try to make a new commit locally, `pre-commit` will automatically run. +If any of the hooks detect non-compliance (e.g., trailing whitespace), that +hook will state it failed, and also try to fix the issue. You will need to +review and `git add` the changes before you can make a commit. + +It is planned to implement additional hooks, possibly including tools such as +`mypy`. + ## Get in touch: You can reach out to the maintainers of this repository via email: diff --git a/docker/tests.Dockerfile b/docker/tests.Dockerfile index 517e019..e46e74d 100644 --- a/docker/tests.Dockerfile +++ b/docker/tests.Dockerfile @@ -16,7 +16,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 COPY tests/pip_test_requirements.txt . RUN conda run --name hoss pip install --no-input -r pip_test_requirements.txt -# Copy test directory containing Python unittest suite, test data and utilities +# Copy test directory containing Python unittest suite, test data and utilities COPY ./tests tests # Set conda environment to hoss, as conda run will not stream logging. diff --git a/docs/requirements.txt b/docs/requirements.txt index fb307c6..dd7a29c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -# +# # These requirements are used by the documentation Jupyter notebooks in the # harmony-opendap-subsetter/docs directory. # diff --git a/tests/data/ATL16_prefetch.dmr b/tests/data/ATL16_prefetch.dmr index 7ddecd8..22ac4e4 100644 --- a/tests/data/ATL16_prefetch.dmr +++ b/tests/data/ATL16_prefetch.dmr @@ -222,4 +222,4 @@ ATL16 - \ No newline at end of file + diff --git a/tests/data/ATL16_prefetch_bnds.dmr b/tests/data/ATL16_prefetch_bnds.dmr index d48b6a5..e35e0e6 100644 --- a/tests/data/ATL16_prefetch_bnds.dmr +++ b/tests/data/ATL16_prefetch_bnds.dmr @@ -217,4 +217,4 @@ ATL16 - \ No newline at end of file + diff --git a/tests/data/ATL16_prefetch_group.dmr b/tests/data/ATL16_prefetch_group.dmr index c200956..e401a6c 100644 --- a/tests/data/ATL16_prefetch_group.dmr +++ b/tests/data/ATL16_prefetch_group.dmr @@ -216,4 +216,4 @@ ATL16 - \ No newline at end of file + diff --git a/tests/data/GPM_3IMERGHH_example.dmr b/tests/data/GPM_3IMERGHH_example.dmr index 6193436..e025045 100644 --- a/tests/data/GPM_3IMERGHH_example.dmr +++ b/tests/data/GPM_3IMERGHH_example.dmr @@ -109,7 +109,7 @@ EndianType=LITTLE_ENDIAN; Longitude at the center of - 0.10 degree grid intervals of longitude + 0.10 degree grid intervals of longitude from -180 to 180. @@ -157,7 +157,7 @@ EndianType=LITTLE_ENDIAN; time - Representative time of data in + Representative time of data in seconds since 1970-01-01 00:00:00 UTC. diff --git a/tests/data/README.md b/tests/data/README.md index d06a301..ef7f46d 100644 --- a/tests/data/README.md +++ b/tests/data/README.md @@ -91,4 +91,4 @@ * ATL16_prefetch_bnds.dmr - An example `.dmr` file that is nearly identical to the `ATL16_prefetch.dmr` file except for four additional fabricated variables that represented the four - possible cases of combining bounds variable existence and cell alignment. \ No newline at end of file + possible cases of combining bounds variable existence and cell alignment. diff --git a/tests/geojson_examples/multilinestring.geo.json b/tests/geojson_examples/multilinestring.geo.json index 40d20b2..3a4e25e 100644 --- a/tests/geojson_examples/multilinestring.geo.json +++ b/tests/geojson_examples/multilinestring.geo.json @@ -35,4 +35,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/pip_test_requirements.txt b/tests/pip_test_requirements.txt index 0cf95be..4b2bb55 100644 --- a/tests/pip_test_requirements.txt +++ b/tests/pip_test_requirements.txt @@ -1,4 +1,5 @@ coverage~=7.2.2 +pre-commit~=3.7.0 pycodestyle~=2.10.0 pylint~=2.17.2 unittest-xml-reporting~=3.2.0 diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index e4fda16..2dcfa0b 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -7,7 +7,6 @@ from harmony.util import config from harmony.message import Message -from pathlib import PurePosixPath from netCDF4 import Dataset from numpy.ma import masked_array from numpy.testing import assert_array_equal From 56dd43f69d901abbba6cfb765a98dee26ff71cfc Mon Sep 17 00:00:00 2001 From: Owen Littlejohns Date: Thu, 4 Apr 2024 14:06:22 -0400 Subject: [PATCH 2/4] IP-241 - Implement black formatting on entire repository. --- docs/HOSS_DAAC_Operator_Documentation.ipynb | 60 +- docs/HOSS_User_Documentation.ipynb | 90 +- hoss/__main__.py | 10 +- hoss/adapter.py | 100 +- hoss/bbox_utilities.py | 359 +-- hoss/dimension_utilities.py | 474 ++-- hoss/exceptions.py | 127 +- hoss/projection_utilities.py | 480 ++-- hoss/spatial.py | 258 +- hoss/subset.py | 324 ++- hoss/temporal.py | 40 +- hoss/utilities.py | 104 +- tests/__init__.py | 1 + tests/test_adapter.py | 2718 ++++++++++++------- tests/test_code_format.py | 25 +- tests/unit/__init__.py | 1 + tests/unit/test_adapter.py | 597 ++-- tests/unit/test_bbox_utilities.py | 558 ++-- tests/unit/test_dimension_utilities.py | 953 ++++--- tests/unit/test_projection_utilities.py | 636 +++-- tests/unit/test_spatial.py | 391 +-- tests/unit/test_subset.py | 1649 ++++++----- tests/unit/test_temporal.py | 105 +- tests/unit/test_utilities.py | 180 +- tests/utilities.py | 38 +- 25 files changed, 6004 insertions(+), 4274 deletions(-) diff --git a/docs/HOSS_DAAC_Operator_Documentation.ipynb b/docs/HOSS_DAAC_Operator_Documentation.ipynb index f09b4de..c13ae67 100644 --- a/docs/HOSS_DAAC_Operator_Documentation.ipynb +++ b/docs/HOSS_DAAC_Operator_Documentation.ipynb @@ -170,8 +170,10 @@ "metadata": {}, "outputs": [], "source": [ - "temporal_range = {'start': datetime(2020, 1, 1, 0, 0, 0),\n", - " 'stop': datetime(2020, 1, 31, 23, 59, 59)}" + "temporal_range = {\n", + " 'start': datetime(2020, 1, 1, 0, 0, 0),\n", + " 'stop': datetime(2020, 1, 31, 23, 59, 59),\n", + "}" ] }, { @@ -273,14 +275,19 @@ "outputs": [], "source": [ "# Define the request:\n", - "variable_subset_request = Request(collection=collection, variables=[variable_to_subset], max_results=1)\n", + "variable_subset_request = Request(\n", + " collection=collection, variables=[variable_to_subset], max_results=1\n", + ")\n", "\n", "# Submit the request and download the results\n", "variable_subset_job_id = harmony_client.submit(variable_subset_request)\n", "harmony_client.wait_for_processing(variable_subset_job_id, show_progress=True)\n", - "variable_subset_outputs = [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(variable_subset_job_id, overwrite=True)]\n", + "variable_subset_outputs = [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " variable_subset_job_id, overwrite=True\n", + " )\n", + "]\n", "\n", "replace(variable_subset_outputs[0], 'hoss_variable_subset.nc4')\n", "\n", @@ -308,15 +315,22 @@ "outputs": [], "source": [ "# Define the request:\n", - "temporal_subset_request = Request(collection=collection, temporal=temporal_range,\n", - " variables=[variable_to_subset], max_results=1)\n", + "temporal_subset_request = Request(\n", + " collection=collection,\n", + " temporal=temporal_range,\n", + " variables=[variable_to_subset],\n", + " max_results=1,\n", + ")\n", "\n", "# Submit the request and download the results\n", "temporal_subset_job_id = harmony_client.submit(temporal_subset_request)\n", "harmony_client.wait_for_processing(temporal_subset_job_id, show_progress=True)\n", - "temporal_subset_outputs = [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(temporal_subset_job_id, overwrite=True)]\n", + "temporal_subset_outputs = [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " temporal_subset_job_id, overwrite=True\n", + " )\n", + "]\n", "\n", "replace(temporal_subset_outputs[0], 'hoss_temporal_subset.nc4')\n", "\n", @@ -351,14 +365,17 @@ "outputs": [], "source": [ "# Define the request:\n", - "bbox_subset_request = Request(collection=collection, spatial=bounding_box, max_results=1)\n", + "bbox_subset_request = Request(\n", + " collection=collection, spatial=bounding_box, max_results=1\n", + ")\n", "\n", "# Submit the request and download the results\n", "bbox_subset_job_id = harmony_client.submit(bbox_subset_request)\n", "harmony_client.wait_for_processing(bbox_subset_job_id, show_progress=True)\n", - "bbox_subset_outputs = [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(bbox_subset_job_id, overwrite=True)]\n", + "bbox_subset_outputs = [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(bbox_subset_job_id, overwrite=True)\n", + "]\n", "\n", "replace(bbox_subset_outputs[0], 'hoss_bbox_subset.nc4')\n", "\n", @@ -389,14 +406,19 @@ "outputs": [], "source": [ "# Define the request:\n", - "shape_file_subset_request = Request(collection=collection, shape='shape_files/bermuda_triangle.geo.json', max_results=1)\n", + "shape_file_subset_request = Request(\n", + " collection=collection, shape='shape_files/bermuda_triangle.geo.json', max_results=1\n", + ")\n", "\n", "# Submit the request and download the results\n", "shape_file_subset_job_id = harmony_client.submit(shape_file_subset_request)\n", "harmony_client.wait_for_processing(shape_file_subset_job_id, show_progress=True)\n", - "shape_file_subset_outputs = [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(shape_file_subset_job_id, overwrite=True)]\n", + "shape_file_subset_outputs = [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " shape_file_subset_job_id, overwrite=True\n", + " )\n", + "]\n", "\n", "replace(shape_file_subset_outputs[0], 'hoss_shape_file_subset.nc4')\n", "# Inspect the results:\n", diff --git a/docs/HOSS_User_Documentation.ipynb b/docs/HOSS_User_Documentation.ipynb index 236b0f0..589fdc9 100644 --- a/docs/HOSS_User_Documentation.ipynb +++ b/docs/HOSS_User_Documentation.ipynb @@ -127,14 +127,19 @@ "source": [ "variables = ['atmosphere_cloud_liquid_water_content']\n", "\n", - "variable_subset_request = Request(collection=ghrc_collection, variables=variables, granule_id=[ghrc_granule_id])\n", + "variable_subset_request = Request(\n", + " collection=ghrc_collection, variables=variables, granule_id=[ghrc_granule_id]\n", + ")\n", "variable_subset_job_id = harmony_client.submit(variable_subset_request)\n", "\n", "print(f'Processing job: {variable_subset_job_id}')\n", "\n", - "for filename in [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(variable_subset_job_id, overwrite=True, directory=demo_directory)]:\n", + "for filename in [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " variable_subset_job_id, overwrite=True, directory=demo_directory\n", + " )\n", + "]:\n", " print(f'Downloaded: {filename}')" ] }, @@ -157,14 +162,19 @@ "source": [ "gpm_bounding_box = BBox(w=45, s=-45, e=75, n=-15)\n", "\n", - "bbox_request = Request(collection=gpm_collection, spatial=gpm_bounding_box, granule_id=[gpm_granule_id])\n", + "bbox_request = Request(\n", + " collection=gpm_collection, spatial=gpm_bounding_box, granule_id=[gpm_granule_id]\n", + ")\n", "bbox_job_id = harmony_client.submit(bbox_request)\n", "\n", "print(f'Processing job: {bbox_job_id}')\n", "\n", - "for filename in [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(bbox_job_id, overwrite=True, directory=demo_directory)]:\n", + "for filename in [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " bbox_job_id, overwrite=True, directory=demo_directory\n", + " )\n", + "]:\n", " print(f'Downloaded: {filename}')" ] }, @@ -196,15 +206,22 @@ "gpm_bounding_box = BBox(w=45, s=-45, e=75, n=-15)\n", "gpm_variables = ['/Grid/precipitationCal']\n", "\n", - "combined_request = Request(collection=gpm_collection, spatial=gpm_bounding_box,\n", - " granule_id=[gpm_granule_id], variables=gpm_variables)\n", + "combined_request = Request(\n", + " collection=gpm_collection,\n", + " spatial=gpm_bounding_box,\n", + " granule_id=[gpm_granule_id],\n", + " variables=gpm_variables,\n", + ")\n", "combined_job_id = harmony_client.submit(combined_request)\n", "\n", "print(f'Processing job: {combined_job_id}')\n", "\n", - "for filename in [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(combined_job_id, overwrite=True, directory=demo_directory)]:\n", + "for filename in [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " combined_job_id, overwrite=True, directory=demo_directory\n", + " )\n", + "]:\n", " print(f'Downloaded: {filename}')" ] }, @@ -229,14 +246,19 @@ "source": [ "ghrc_bounding_box = BBox(w=-30, s=-50, e=30, n=0)\n", "\n", - "edge_request = Request(collection=ghrc_collection, spatial=ghrc_bounding_box, granule_id=[ghrc_granule_id])\n", + "edge_request = Request(\n", + " collection=ghrc_collection, spatial=ghrc_bounding_box, granule_id=[ghrc_granule_id]\n", + ")\n", "edge_job_id = harmony_client.submit(edge_request)\n", "\n", "print(f'Processing job: {edge_job_id}')\n", "\n", - "for filename in [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(edge_job_id, overwrite=True, directory=demo_directory)]:\n", + "for filename in [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " edge_job_id, overwrite=True, directory=demo_directory\n", + " )\n", + "]:\n", " print(f'Downloaded: {filename}')" ] }, @@ -268,15 +290,22 @@ "point_in_pixel_box = BBox(w=43.2222, s=-25.1111, e=43.2222, n=-25.1111)\n", "gpm_variables = ['/Grid/precipitationCal']\n", "\n", - "point_in_pixel_request = Request(collection=gpm_collection, spatial=point_in_pixel_box,\n", - " granule_id=[gpm_granule_id], variables=gpm_variables)\n", + "point_in_pixel_request = Request(\n", + " collection=gpm_collection,\n", + " spatial=point_in_pixel_box,\n", + " granule_id=[gpm_granule_id],\n", + " variables=gpm_variables,\n", + ")\n", "point_in_pixel_job_id = harmony_client.submit(point_in_pixel_request)\n", "\n", "print(f'Processing job: {point_in_pixel_job_id}')\n", "\n", - "for filename in [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(point_in_pixel_job_id, overwrite=True, directory=demo_directory)]:\n", + "for filename in [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " point_in_pixel_job_id, overwrite=True, directory=demo_directory\n", + " )\n", + "]:\n", " print(f'Downloaded: {filename}')" ] }, @@ -298,15 +327,22 @@ "corner_point_box = BBox(w=160, s=20, e=160, n=20)\n", "gpm_variables = ['/Grid/precipitationCal']\n", "\n", - "corner_point_request = Request(collection=gpm_collection, spatial=corner_point_box,\n", - " granule_id=[gpm_granule_id], variables=gpm_variables)\n", + "corner_point_request = Request(\n", + " collection=gpm_collection,\n", + " spatial=corner_point_box,\n", + " granule_id=[gpm_granule_id],\n", + " variables=gpm_variables,\n", + ")\n", "corner_point_job_id = harmony_client.submit(corner_point_request)\n", "\n", "print(f'Processing job: {corner_point_job_id}')\n", "\n", - "for filename in [file_future.result()\n", - " for file_future\n", - " in harmony_client.download_all(corner_point_job_id, overwrite=True, directory=demo_directory)]:\n", + "for filename in [\n", + " file_future.result()\n", + " for file_future in harmony_client.download_all(\n", + " corner_point_job_id, overwrite=True, directory=demo_directory\n", + " )\n", + "]:\n", " print(f'Downloaded: {filename}')" ] } diff --git a/hoss/__main__.py b/hoss/__main__.py index 34d5ba5..19caac5 100644 --- a/hoss/__main__.py +++ b/hoss/__main__.py @@ -1,4 +1,5 @@ """ Run the Harmony OPeNDAP SubSetter Adapter via the Harmony CLI. """ + from argparse import ArgumentParser from sys import argv @@ -8,12 +9,13 @@ def main(arguments: list[str]): - """ Parse command line arguments and invoke the appropriate method to - respond to them + """Parse command line arguments and invoke the appropriate method to + respond to them """ - parser = ArgumentParser(prog='harmony-opendap-subsetter', - description='Run Harmony OPeNDAP SubSetter.') + parser = ArgumentParser( + prog='harmony-opendap-subsetter', description='Run Harmony OPeNDAP SubSetter.' + ) setup_cli(parser) harmony_arguments, _ = parser.parse_known_args(arguments[1:]) diff --git a/hoss/adapter.py b/hoss/adapter.py index 93436d2..4215bed 100644 --- a/hoss/adapter.py +++ b/hoss/adapter.py @@ -24,6 +24,7 @@ calls to `process_item` for each granule. """ + import shutil from tempfile import mkdtemp from pystac import Asset, Item @@ -38,11 +39,12 @@ class HossAdapter(BaseHarmonyAdapter): - """ This class extends the BaseHarmonyAdapter class, to implement the - `invoke` method, which performs variable, spatial and temporal - subsetting via requests to OPeNDAP. + """This class extends the BaseHarmonyAdapter class, to implement the + `invoke` method, which performs variable, spatial and temporal + subsetting via requests to OPeNDAP. """ + def invoke(self): """ Adds validation to default process_item-based invocation @@ -56,26 +58,26 @@ def invoke(self): return super().invoke() def process_item(self, item: Item, source: Source): - """ Processes a single input item. Services that are not aggregating - multiple input files should prefer to implement this method rather - than `invoke` - - This example copies its input to the output, marking `variables` - and `subset.bbox` message attributes as having been processed - - Parameters - ---------- - item : pystac.Item - the item that should be processed - source : harmony.message.Source - the input source defining the variables, if any, to subset from - the item - - Returns - ------- - pystac.Item - a STAC catalog whose metadata and assets describe the service - output + """Processes a single input item. Services that are not aggregating + multiple input files should prefer to implement this method rather + than `invoke` + + This example copies its input to the output, marking `variables` + and `subset.bbox` message attributes as having been processed + + Parameters + ---------- + item : pystac.Item + the item that should be processed + source : harmony.message.Source + the input source defining the variables, if any, to subset from + the item + + Returns + ------- + pystac.Item + a STAC catalog whose metadata and assets describe the service + output """ result = item.clone() @@ -85,34 +87,44 @@ def process_item(self, item: Item, source: Source): workdir = mkdtemp() try: # Get the data file - asset = next((item_asset for item_asset in item.assets.values() - if 'opendap' in (item_asset.roles or [])), None) + asset = next( + ( + item_asset + for item_asset in item.assets.values() + if 'opendap' in (item_asset.roles or []) + ), + None, + ) self.logger.info(f'Collection short name: {source.shortName}') # Invoke service logic to retrieve subset of file from OPeNDAP - output_file_path = subset_granule(asset.href, source, workdir, - self.message, self.logger, - self.config) + output_file_path = subset_granule( + asset.href, source, workdir, self.message, self.logger, self.config + ) # Stage the output file with a conventional filename mime, _ = get_file_mimetype(output_file_path) staged_filename = generate_output_filename( - asset.href, variable_subset=source.variables, ext='.nc4', - is_subsetted=(is_index_subset(self.message) - or len(source.variables) > 0) + asset.href, + variable_subset=source.variables, + ext='.nc4', + is_subsetted=( + is_index_subset(self.message) or len(source.variables) > 0 + ), + ) + url = stage( + output_file_path, + staged_filename, + mime, + location=self.message.stagingLocation, + logger=self.logger, ) - url = stage(output_file_path, - staged_filename, - mime, - location=self.message.stagingLocation, - logger=self.logger) # Update the STAC record - result.assets['data'] = Asset(url, - title=staged_filename, - media_type=mime, - roles=['data']) + result.assets['data'] = Asset( + url, title=staged_filename, media_type=mime, roles=['data'] + ) # Return the STAC record return result @@ -126,8 +138,8 @@ def process_item(self, item: Item, source: Source): shutil.rmtree(workdir) def validate_message(self): - """ Check the service was triggered by a valid message containing - the expected number of granules. + """Check the service was triggered by a valid message containing + the expected number of granules. """ if not hasattr(self, 'message'): @@ -150,9 +162,7 @@ def validate_message(self): has_items = False if not has_granules and not has_items: - raise HarmonyException( - 'No granules specified for variable subsetting' - ) + raise HarmonyException('No granules specified for variable subsetting') for source in self.message.sources: if not hasattr(source, 'variables') or not source.variables: diff --git a/hoss/bbox_utilities.py b/hoss/bbox_utilities.py index e762082..0e5152a 100644 --- a/hoss/bbox_utilities.py +++ b/hoss/bbox_utilities.py @@ -13,6 +13,7 @@ the antimeridian. """ + from collections import namedtuple from logging import Logger from typing import Dict, List, Optional, Tuple, Union @@ -27,14 +28,18 @@ AggCoordinates = List[Tuple[float]] BBox = namedtuple('BBox', ['west', 'south', 'east', 'north']) -Coordinates = Union[List[float], List[List[float]], List[List[List[float]]], - List[List[List[List[float]]]]] +Coordinates = Union[ + List[float], + List[List[float]], + List[List[List[float]]], + List[List[List[List[float]]]], +] GeoJSON = Union[Dict, List] def get_harmony_message_bbox(message: Message) -> Optional[BBox]: - """ Try to retrieve a bounding box from an input Harmony message. If there - is no bounding box, return None. + """Try to retrieve a bounding box from an input Harmony message. If there + is no bounding box, return None. """ if message.subset is not None and message.subset.bbox is not None: @@ -45,12 +50,12 @@ def get_harmony_message_bbox(message: Message) -> Optional[BBox]: return bounding_box -def get_request_shape_file(message: Message, working_dir: str, - adapter_logger: Logger, - adapter_config: Config) -> str: - """ This helper function downloads the file specified in the input Harmony - message via: `Message.subset.shape.href` and returns the local file - path. +def get_request_shape_file( + message: Message, working_dir: str, adapter_logger: Logger, adapter_config: Config +) -> str: + """This helper function downloads the file specified in the input Harmony + message via: `Message.subset.shape.href` and returns the local file + path. """ if message.subset is not None and message.subset.shape is not None: @@ -59,10 +64,13 @@ def get_request_shape_file(message: Message, working_dir: str, shape_file_url = message.subset.shape.process('href') adapter_logger.info('Downloading request shape file') - local_shape_file_path = download(shape_file_url, working_dir, - logger=adapter_logger, - access_token=message.accessToken, - cfg=adapter_config) + local_shape_file_path = download( + shape_file_url, + working_dir, + logger=adapter_logger, + access_token=message.accessToken, + cfg=adapter_config, + ) else: local_shape_file_path = None @@ -70,8 +78,8 @@ def get_request_shape_file(message: Message, working_dir: str, def get_shape_file_geojson(local_shape_file_path: str) -> GeoJSON: - """ Retrieve the shape file GeoJSON from the downloaded shape file provided - by the Harmony request. + """Retrieve the shape file GeoJSON from the downloaded shape file provided + by the Harmony request. """ with open(local_shape_file_path, 'r', encoding='utf-8') as file_handler: @@ -81,19 +89,19 @@ def get_shape_file_geojson(local_shape_file_path: str) -> GeoJSON: def get_geographic_bbox(geojson_input: GeoJSON) -> Optional[BBox]: - """ This function takes a GeoJSON input and extracts the longitudinal and - latitudinal extents from it. These extents describe a bounding box that - minimally encompasses the specified shape. + """This function takes a GeoJSON input and extracts the longitudinal and + latitudinal extents from it. These extents describe a bounding box that + minimally encompasses the specified shape. - This function should be used in cases where the data within the granule - are geographic. Some projections, particularly polar projections, will - require further refinement of the GeoJSON shape. + This function should be used in cases where the data within the granule + are geographic. Some projections, particularly polar projections, will + require further refinement of the GeoJSON shape. - In the function below `contiguous_bboxes` and `contiguous_bbox` refer - to bounding boxes that do not cross the antimeridian. Although, the - GeoJSON specification recommends that GeoJSON shapes should be split to - avoid crossing the antimeridian, user-supplied shape files may not - conform to this recommendation. + In the function below `contiguous_bboxes` and `contiguous_bbox` refer + to bounding boxes that do not cross the antimeridian. Although, the + GeoJSON specification recommends that GeoJSON shapes should be split to + avoid crossing the antimeridian, user-supplied shape files may not + conform to this recommendation. """ if 'bbox' in geojson_input: @@ -107,8 +115,7 @@ def get_geographic_bbox(geojson_input: GeoJSON) -> Optional[BBox]: contiguous_bbox = get_contiguous_bbox(grouped_coordinates) antimeridian_bbox = get_antimeridian_bbox(grouped_coordinates) - bbox_south, bbox_north = get_latitude_range(contiguous_bbox, - antimeridian_bbox) + bbox_south, bbox_north = get_latitude_range(contiguous_bbox, antimeridian_bbox) if antimeridian_bbox is None: bbox_west = contiguous_bbox.west @@ -116,16 +123,16 @@ def get_geographic_bbox(geojson_input: GeoJSON) -> Optional[BBox]: elif contiguous_bbox is None: bbox_west = antimeridian_bbox.west bbox_east = antimeridian_bbox.east - elif ( - bbox_in_longitude_range(contiguous_bbox, -180, antimeridian_bbox.east) - or bbox_in_longitude_range(contiguous_bbox, antimeridian_bbox.west, 180) - ): + elif bbox_in_longitude_range( + contiguous_bbox, -180, antimeridian_bbox.east + ) or bbox_in_longitude_range(contiguous_bbox, antimeridian_bbox.west, 180): # Antimeridian bounding box encompasses non-antimeridian crossing # bounding box bbox_west = antimeridian_bbox.west bbox_east = antimeridian_bbox.east - elif ((antimeridian_bbox.east - contiguous_bbox.west) - < (contiguous_bbox.east - antimeridian_bbox.west)): + elif (antimeridian_bbox.east - contiguous_bbox.west) < ( + contiguous_bbox.east - antimeridian_bbox.west + ): # Distance from contiguous bounding box west to antimeridian bounding # box east is shorter than antimeridian bounding box west to contiguous # bounding box east @@ -141,72 +148,72 @@ def get_geographic_bbox(geojson_input: GeoJSON) -> Optional[BBox]: return BBox(bbox_west, bbox_south, bbox_east, bbox_north) -def get_contiguous_bbox( - grouped_coordinates: List[AggCoordinates] -) -> Optional[BBox]: - """ Retrieve a bounding box that encapsulates all shape file geometries - that do not cross the antimeridian. +def get_contiguous_bbox(grouped_coordinates: List[AggCoordinates]) -> Optional[BBox]: + """Retrieve a bounding box that encapsulates all shape file geometries + that do not cross the antimeridian. """ - contiguous_bboxes = [[min(grouped_lons), min(grouped_lats), - max(grouped_lons), max(grouped_lats)] - for grouped_lons, grouped_lats in grouped_coordinates - if len(grouped_lons) == 1 - or not crosses_antimeridian(grouped_lons)] + contiguous_bboxes = [ + [min(grouped_lons), min(grouped_lats), max(grouped_lons), max(grouped_lats)] + for grouped_lons, grouped_lats in grouped_coordinates + if len(grouped_lons) == 1 or not crosses_antimeridian(grouped_lons) + ] if len(contiguous_bboxes) > 0: aggregated_extents = list(zip(*contiguous_bboxes)) - contiguous_bbox = BBox(min(aggregated_extents[0]), - min(aggregated_extents[1]), - max(aggregated_extents[2]), - max(aggregated_extents[3])) + contiguous_bbox = BBox( + min(aggregated_extents[0]), + min(aggregated_extents[1]), + max(aggregated_extents[2]), + max(aggregated_extents[3]), + ) else: contiguous_bbox = None return contiguous_bbox -def get_antimeridian_bbox( - grouped_coordinates: List[AggCoordinates] -) -> Optional[BBox]: - """ Retrieve a bounding box that encapsulates all shape file geometries - that cross the antimeridian. The output bounding box will also cross - the antimeridian. +def get_antimeridian_bbox(grouped_coordinates: List[AggCoordinates]) -> Optional[BBox]: + """Retrieve a bounding box that encapsulates all shape file geometries + that cross the antimeridian. The output bounding box will also cross + the antimeridian. """ antimeridian_bboxes = [ get_antimeridian_geometry_bbox(grouped_lons, grouped_lats) for grouped_lons, grouped_lats in grouped_coordinates - if len(grouped_lons) > 1 - and crosses_antimeridian(grouped_lons) + if len(grouped_lons) > 1 and crosses_antimeridian(grouped_lons) ] if len(antimeridian_bboxes) > 0: aggregated_extents = list(zip(*antimeridian_bboxes)) - antimeridian_bbox = BBox(min(aggregated_extents[0]), - min(aggregated_extents[1]), - max(aggregated_extents[2]), - max(aggregated_extents[3])) + antimeridian_bbox = BBox( + min(aggregated_extents[0]), + min(aggregated_extents[1]), + max(aggregated_extents[2]), + max(aggregated_extents[3]), + ) else: antimeridian_bbox = None return antimeridian_bbox -def get_antimeridian_geometry_bbox(grouped_lons: Tuple[float], - grouped_lats: Tuple[float]) -> BBox: - """ Combine the longitudes and latitudes for a single GeoJSON geometry into - a bounding box that encapsulates that geometry. The input to this - function will already have been identified as crossing the - antimeridian. The longitudes will be split into two groups either side - of the antimeridian, so the westernmost point west of the antimeridian - and the easternmost point east of the antimeridian can be found. +def get_antimeridian_geometry_bbox( + grouped_lons: Tuple[float], grouped_lats: Tuple[float] +) -> BBox: + """Combine the longitudes and latitudes for a single GeoJSON geometry into + a bounding box that encapsulates that geometry. The input to this + function will already have been identified as crossing the + antimeridian. The longitudes will be split into two groups either side + of the antimeridian, so the westernmost point west of the antimeridian + and the easternmost point east of the antimeridian can be found. - This function assumes that, on average, those points east of the - antimeridian will have a lower average longitude than those west of it. + This function assumes that, on average, those points east of the + antimeridian will have a lower average longitude than those west of it. - The output from this function will be a bounding box that also crosses - the antimeridian. + The output from this function will be a bounding box that also crosses + the antimeridian. """ longitudes_group_one = [grouped_lons[0]] @@ -229,80 +236,86 @@ def get_antimeridian_geometry_bbox(grouped_lons: Tuple[float], east_lons = longitudes_group_two west_lons = longitudes_group_one - return BBox(min(west_lons), min(grouped_lats), max(east_lons), - max(grouped_lats)) + return BBox(min(west_lons), min(grouped_lats), max(east_lons), max(grouped_lats)) -def get_latitude_range(contiguous_bbox: Optional[BBox], - antimeridian_bbox: Optional[BBox]) -> Tuple[float]: - """ Retrieve the southern and northern extent for all bounding boxes. One - of `contiguous_bbox` or `antimeridian_bbox` must not be `None`. +def get_latitude_range( + contiguous_bbox: Optional[BBox], antimeridian_bbox: Optional[BBox] +) -> Tuple[float]: + """Retrieve the southern and northern extent for all bounding boxes. One + of `contiguous_bbox` or `antimeridian_bbox` must not be `None`. - * `contiguous_bbox`: A bounding box that minimally encompasses all - GeoJSON geometries that do not cross the antimeridian. - * `antimeridian_bbox`: A bounding box that minimally encompasses all - GeoJSON geometries that _do_ cross the antimeridian. + * `contiguous_bbox`: A bounding box that minimally encompasses all + GeoJSON geometries that do not cross the antimeridian. + * `antimeridian_bbox`: A bounding box that minimally encompasses all + GeoJSON geometries that _do_ cross the antimeridian. """ - south_values = [bbox.south for bbox in [contiguous_bbox, antimeridian_bbox] - if bbox is not None] - north_values = [bbox.north for bbox in [contiguous_bbox, antimeridian_bbox] - if bbox is not None] + south_values = [ + bbox.south for bbox in [contiguous_bbox, antimeridian_bbox] if bbox is not None + ] + north_values = [ + bbox.north for bbox in [contiguous_bbox, antimeridian_bbox] if bbox is not None + ] return min(south_values), max(north_values) -def bbox_in_longitude_range(bounding_box: BBox, west_limit: float, - east_limit: float) -> bool: - """ Check if the specified bounding box is entirely contained by the - specified longitude range. +def bbox_in_longitude_range( + bounding_box: BBox, west_limit: float, east_limit: float +) -> bool: + """Check if the specified bounding box is entirely contained by the + specified longitude range. - This function is used to identify when geometries that do not cross the - antimeridian are contained by the longitudinal range of those that do. + This function is used to identify when geometries that do not cross the + antimeridian are contained by the longitudinal range of those that do. """ - return (west_limit <= bounding_box[0] <= east_limit - and west_limit <= bounding_box[2] <= east_limit) + return ( + west_limit <= bounding_box[0] <= east_limit + and west_limit <= bounding_box[2] <= east_limit + ) def aggregate_all_geometries(geojson_input: GeoJSON) -> List[AggCoordinates]: - """ Parse the input GeoJSON object, and identify all items within it - containing geometries. When items containing geometries are identified, - functions are called to aggregate the coordinates within each geometry - and return a list of aggregated longitudes and latitudes for each - geometry (or sub-geometry member, e.g., multiple points, linestrings or - polygons). + """Parse the input GeoJSON object, and identify all items within it + containing geometries. When items containing geometries are identified, + functions are called to aggregate the coordinates within each geometry + and return a list of aggregated longitudes and latitudes for each + geometry (or sub-geometry member, e.g., multiple points, linestrings or + polygons). """ if 'coordinates' in geojson_input: # A Geometry object with a `coordinates` attribute, e.g., Point, # LineString, Polygon, etc. - grouped_coords = aggregate_geometry_coordinates( - geojson_input['coordinates'] - ) + grouped_coords = aggregate_geometry_coordinates(geojson_input['coordinates']) elif 'geometries' in geojson_input: # A GeometryCollection geometry. - grouped_coords = flatten_list([ - aggregate_geometry_coordinates(geometry['coordinates']) - for geometry in geojson_input['geometries'] - ]) - elif ('geometry' in geojson_input - and 'coordinates' in geojson_input['geometry']): + grouped_coords = flatten_list( + [ + aggregate_geometry_coordinates(geometry['coordinates']) + for geometry in geojson_input['geometries'] + ] + ) + elif 'geometry' in geojson_input and 'coordinates' in geojson_input['geometry']: # A GeoJSON Feature (e.g., Point, LineString, Polygon, etc) grouped_coords = aggregate_geometry_coordinates( geojson_input['geometry']['coordinates'] ) - elif ('geometry' in geojson_input - and 'geometries' in geojson_input['geometry']): + elif 'geometry' in geojson_input and 'geometries' in geojson_input['geometry']: # A GeoJSON Feature containing a GeometryCollection - grouped_coords = flatten_list([ - aggregate_all_geometries(geometry) - for geometry in geojson_input['geometry']['geometries'] - ]) + grouped_coords = flatten_list( + [ + aggregate_all_geometries(geometry) + for geometry in geojson_input['geometry']['geometries'] + ] + ) elif 'features' in geojson_input: # A GeoJSON FeatureCollection - grouped_coords = flatten_list(aggregate_all_geometries(feature) - for feature in geojson_input['features']) + grouped_coords = flatten_list( + aggregate_all_geometries(feature) for feature in geojson_input['features'] + ) else: raise InvalidInputGeoJSON() @@ -310,106 +323,108 @@ def aggregate_all_geometries(geojson_input: GeoJSON) -> List[AggCoordinates]: def aggregate_geometry_coordinates( - coordinates: Coordinates, - aggregated_coordinates: List[AggCoordinates] = None + coordinates: Coordinates, aggregated_coordinates: List[AggCoordinates] = None ) -> List[AggCoordinates]: - """ Extract the aggregated latitude and longitude coordinates associated - with all child items in the `coordinates` attribute of a GeoJSON - geometry. The order of longitudes and latitudes are preserved to allow - later checking for antimeridian crossing. - - Some geometries have multiple parts, such as MultiLineStrings or - MultiPolygons. These each have their own entries in the output list, - so that the bounding box of each can be derived independently. Keeping - sub-geometries separate is important to avoid spurious identification - of antimeridian crossing. - - Return value: - - [ - [(x_0, ..., x_M), (y_0, ..., y_M)], # For GeoJSON sub-geometry one - [(x_0, ..., x_N), (y_0, ..., y_N)] # For GeoJSON sub-geometry two - ] + """Extract the aggregated latitude and longitude coordinates associated + with all child items in the `coordinates` attribute of a GeoJSON + geometry. The order of longitudes and latitudes are preserved to allow + later checking for antimeridian crossing. + + Some geometries have multiple parts, such as MultiLineStrings or + MultiPolygons. These each have their own entries in the output list, + so that the bounding box of each can be derived independently. Keeping + sub-geometries separate is important to avoid spurious identification + of antimeridian crossing. + + Return value: + + [ + [(x_0, ..., x_M), (y_0, ..., y_M)], # For GeoJSON sub-geometry one + [(x_0, ..., x_N), (y_0, ..., y_N)] # For GeoJSON sub-geometry two + ] - For geometry types: Point, LineString and Polygon, there will be only - a single sub-geometry item in the returned list. + For geometry types: Point, LineString and Polygon, there will be only + a single sub-geometry item in the returned list. """ if aggregated_coordinates is None: aggregated_coordinates = [] if is_single_point(coordinates): - aggregated_coordinates.append([(coordinates[0], ), (coordinates[1], )]) + aggregated_coordinates.append([(coordinates[0],), (coordinates[1],)]) elif is_list_of_coordinates(coordinates): aggregated_coordinates.append(list(zip(*coordinates))) else: for nested_coordinates in coordinates: - aggregate_geometry_coordinates(nested_coordinates, - aggregated_coordinates) + aggregate_geometry_coordinates(nested_coordinates, aggregated_coordinates) return aggregated_coordinates def is_list_of_coordinates(input_object) -> bool: - """ Checks if the input contains a list of coordinates, which Python will - represent as a list of lists of numerical values, e.g.: + """Checks if the input contains a list of coordinates, which Python will + represent as a list of lists of numerical values, e.g.: - ```Python - list_of_coordinates = [[0.1, 0.2], [0.3, 0.4]] - ``` + ```Python + list_of_coordinates = [[0.1, 0.2], [0.3, 0.4]] + ``` """ - return (isinstance(input_object, list) - and all(is_single_point(element) for element in input_object)) + return isinstance(input_object, list) and all( + is_single_point(element) for element in input_object + ) def is_single_point(input_object) -> bool: - """ Checks if the input is a single list of numbers. Note, coordinates may - or may not include a vertical coordinate as a third element. + """Checks if the input is a single list of numbers. Note, coordinates may + or may not include a vertical coordinate as a third element. """ - return (isinstance(input_object, list) - and len(input_object) in (2, 3) - and all(isinstance(element, (float, int)) - for element in input_object)) + return ( + isinstance(input_object, list) + and len(input_object) in (2, 3) + and all(isinstance(element, (float, int)) for element in input_object) + ) def flatten_list(list_of_lists: List[List]) -> List: - """ Flatten the top level of a list of lists, to combine all elements in - the child lists to be child elements at the top level of the object. - For example: + """Flatten the top level of a list of lists, to combine all elements in + the child lists to be child elements at the top level of the object. + For example: - Input: [[1, 2, 3], [4, 5, 6]] - Output: [1, 2, 3, 4, 5, 6] + Input: [[1, 2, 3], [4, 5, 6]] + Output: [1, 2, 3, 4, 5, 6] """ return [item for sub_list in list_of_lists for item in sub_list] -def crosses_antimeridian(longitudes: List[Union[float, int]], - longitude_threshold: float = 180.0) -> bool: - """ Check if a specified list of ordered longitudes crosses the - antimeridian (+/- 180 degrees east). This check assumes that any points - that are separated by more than 180 degrees east in longitude will - cross the antimeridian. There are edge-cases where this may not be - true, but it is a common condition used in similar checks: +def crosses_antimeridian( + longitudes: List[Union[float, int]], longitude_threshold: float = 180.0 +) -> bool: + """Check if a specified list of ordered longitudes crosses the + antimeridian (+/- 180 degrees east). This check assumes that any points + that are separated by more than 180 degrees east in longitude will + cross the antimeridian. There are edge-cases where this may not be + true, but it is a common condition used in similar checks: - https://towardsdatascience.com/around-the-world-in-80-lines-crossing-the-antimeridian-with-python-and-shapely-c87c9b6e1513 + https://towardsdatascience.com/around-the-world-in-80-lines-crossing-the-antimeridian-with-python-and-shapely-c87c9b6e1513 """ return np.abs(np.diff(longitudes)).max() > longitude_threshold def get_bounding_box_lon_lat(bounding_box: List[float]) -> BBox: - """ Parse a GeoJSON bounding box attribute, and retrieve only the - horizontal coordinates (West, South, East, North). + """Parse a GeoJSON bounding box attribute, and retrieve only the + horizontal coordinates (West, South, East, North). """ if len(bounding_box) == 4: horizontal_bounding_box = BBox(*bounding_box) elif len(bounding_box) == 6: - horizontal_bounding_box = BBox(bounding_box[0], bounding_box[1], - bounding_box[3], bounding_box[4]) + horizontal_bounding_box = BBox( + bounding_box[0], bounding_box[1], bounding_box[3], bounding_box[4] + ) else: raise InvalidInputGeoJSON() diff --git a/hoss/dimension_utilities.py b/hoss/dimension_utilities.py index ab6b10a..0e00742 100644 --- a/hoss/dimension_utilities.py +++ b/hoss/dimension_utilities.py @@ -9,6 +9,7 @@ unwrapped in accordance with the longitude dimension values. """ + from logging import Logger from typing import Dict, Set, Tuple @@ -24,8 +25,11 @@ from hoss.bbox_utilities import flatten_list from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange -from hoss.utilities import (format_variable_set_string, get_opendap_nc4, - get_value_or_default) +from hoss.utilities import ( + format_variable_set_string, + get_opendap_nc4, + get_value_or_default, +) IndexRange = Tuple[int] @@ -33,31 +37,41 @@ def is_index_subset(message: Message) -> bool: - """ Determine if the inbound Harmony request specified any parameters that - will require an index range subset. These will be: + """Determine if the inbound Harmony request specified any parameters that + will require an index range subset. These will be: - * Bounding box spatial requests (Message.subset.bbox) - * Shape file spatial requests (Message.subset.shape) - * Temporal requests (Message.temporal) - * Named dimension range subsetting requests (Message.subset.dimensions) + * Bounding box spatial requests (Message.subset.bbox) + * Shape file spatial requests (Message.subset.shape) + * Temporal requests (Message.temporal) + * Named dimension range subsetting requests (Message.subset.dimensions) """ - return any(rgetattr(message, subset_parameter, None) is not None - for subset_parameter - in ['subset.bbox', 'subset.shape', 'subset.dimensions', - 'temporal']) - - -def prefetch_dimension_variables(opendap_url: str, varinfo: VarInfoFromDmr, - required_variables: Set[str], output_dir: str, - logger: Logger, access_token: str, - config: Config) -> str: - """ Determine the dimensions that need to be "pre-fetched" from OPeNDAP in - order to derive index ranges upon them. Initially, this was just - spatial and temporal dimensions, but to support generic dimension - subsets, all required dimensions must be prefetched, along with any - associated bounds variables referred to via the "bounds" metadata - attribute. + return any( + rgetattr(message, subset_parameter, None) is not None + for subset_parameter in [ + 'subset.bbox', + 'subset.shape', + 'subset.dimensions', + 'temporal', + ] + ) + + +def prefetch_dimension_variables( + opendap_url: str, + varinfo: VarInfoFromDmr, + required_variables: Set[str], + output_dir: str, + logger: Logger, + access_token: str, + config: Config, +) -> str: + """Determine the dimensions that need to be "pre-fetched" from OPeNDAP in + order to derive index ranges upon them. Initially, this was just + spatial and temporal dimensions, but to support generic dimension + subsets, all required dimensions must be prefetched, along with any + associated bounds variables referred to via the "bounds" metadata + attribute. """ required_dimensions = varinfo.get_required_dimensions(required_variables) @@ -66,41 +80,48 @@ def prefetch_dimension_variables(opendap_url: str, varinfo: VarInfoFromDmr, # references for each that has any. This will produce a list of lists, # which should be flattened into a single list and then combined into a set # to remove duplicates. - bounds = set(flatten_list([ - list(varinfo.get_variable(dimension).references.get('bounds')) - for dimension in required_dimensions - if varinfo.get_variable(dimension).references.get('bounds') is not None - ])) + bounds = set( + flatten_list( + [ + list(varinfo.get_variable(dimension).references.get('bounds')) + for dimension in required_dimensions + if varinfo.get_variable(dimension).references.get('bounds') is not None + ] + ) + ) required_dimensions.update(bounds) - logger.info('Variables being retrieved in prefetch request: ' - f'{format_variable_set_string(required_dimensions)}') + logger.info( + 'Variables being retrieved in prefetch request: ' + f'{format_variable_set_string(required_dimensions)}' + ) - required_dimensions_nc4 = get_opendap_nc4(opendap_url, - required_dimensions, output_dir, - logger, access_token, config) + required_dimensions_nc4 = get_opendap_nc4( + opendap_url, required_dimensions, output_dir, logger, access_token, config + ) # Create bounds variables if necessary. - add_bounds_variables(required_dimensions_nc4, required_dimensions, - varinfo, logger) + add_bounds_variables(required_dimensions_nc4, required_dimensions, varinfo, logger) return required_dimensions_nc4 -def add_bounds_variables(dimensions_nc4: str, - required_dimensions: Set[str], - varinfo: VarInfoFromDmr, - logger: Logger) -> None: - """ Augment a NetCDF4 file with artificial bounds variables for each - dimension variable that has been identified by the earthdata-varinfo - configuration file to have an edge-aligned attribute" +def add_bounds_variables( + dimensions_nc4: str, + required_dimensions: Set[str], + varinfo: VarInfoFromDmr, + logger: Logger, +) -> None: + """Augment a NetCDF4 file with artificial bounds variables for each + dimension variable that has been identified by the earthdata-varinfo + configuration file to have an edge-aligned attribute" - For each dimension variable: - (1) Check if the variable needs a bounds variable. - (2) If so, create a bounds array from within the `write_bounds` - function. - (3) Then write the bounds variable to the NetCDF4 URL. + For each dimension variable: + (1) Check if the variable needs a bounds variable. + (2) If so, create a bounds array from within the `write_bounds` + function. + (3) Then write the bounds variable to the NetCDF4 URL. """ with Dataset(dimensions_nc4, 'r+') as prefetch_dataset: @@ -109,14 +130,16 @@ def add_bounds_variables(dimensions_nc4: str, if needs_bounds(dimension_variable): write_bounds(prefetch_dataset, dimension_variable) - logger.info('Artificial bounds added for dimension variable: ' - f'{dimension_name}') + logger.info( + 'Artificial bounds added for dimension variable: ' + f'{dimension_name}' + ) def needs_bounds(dimension: VariableFromDmr) -> bool: - """ Check if a dimension variable needs a bounds variable. - This will be the case when dimension cells are edge-aligned - and bounds for that dimension do not already exist. + """Check if a dimension variable needs a bounds variable. + This will be the case when dimension cells are edge-aligned + and bounds for that dimension do not already exist. """ return ( @@ -125,29 +148,28 @@ def needs_bounds(dimension: VariableFromDmr) -> bool: ) -def get_bounds_array(prefetch_dataset: Dataset, - dimension_path: str) -> np.ndarray: - """ Create an array containing the minimum and maximum bounds - for each pixel in a given dimension. +def get_bounds_array(prefetch_dataset: Dataset, dimension_path: str) -> np.ndarray: + """Create an array containing the minimum and maximum bounds + for each pixel in a given dimension. - The minimum and maximum values are determined under the assumption - that the dimension data is monotonically increasing and contiguous. - So for every bounds but the last, the bounds are simply extracted - from the dimension dataset. + The minimum and maximum values are determined under the assumption + that the dimension data is monotonically increasing and contiguous. + So for every bounds but the last, the bounds are simply extracted + from the dimension dataset. - The final bounds must be calculated with the assumption that - the last data cell is edge-aligned and thus has a value the does - not account for the cell length. So, the final bound is determined - by taking the median of all the resolutions in the dataset to obtain - a resolution that can be added to the final data value. + The final bounds must be calculated with the assumption that + the last data cell is edge-aligned and thus has a value the does + not account for the cell length. So, the final bound is determined + by taking the median of all the resolutions in the dataset to obtain + a resolution that can be added to the final data value. - Ex: Input dataset with resolution of 3 degrees: [ ... , 81, 84, 87] + Ex: Input dataset with resolution of 3 degrees: [ ... , 81, 84, 87] - Minimum | Maximum - <...> <...> - 81 84 - 84 87 - 87 ? -> 87 + median resolution -> 87 + 3 -> 90 + Minimum | Maximum + <...> <...> + 81 84 + 84 87 + 87 ? -> 87 + median resolution -> 87 + 3 -> 90 """ # Access the dimension variable's data using the variable's full path. @@ -174,20 +196,20 @@ def get_bounds_array(prefetch_dataset: Dataset, return cell_bounds.T -def write_bounds(prefetch_dataset: Dataset, - dimension_variable: VariableFromDmr) -> None: - """ Write the input bounds array to a given dimension dataset. +def write_bounds( + prefetch_dataset: Dataset, dimension_variable: VariableFromDmr +) -> None: + """Write the input bounds array to a given dimension dataset. - First a new dimension is created for the new bounds variable - to allow the variable to be two-dimensional. + First a new dimension is created for the new bounds variable + to allow the variable to be two-dimensional. - Then the new bounds variable is created using two dimensions: - (1) the existing dimension of the dimension dataset, and - (2) the new bounds variable dimension. + Then the new bounds variable is created using two dimensions: + (1) the existing dimension of the dimension dataset, and + (2) the new bounds variable dimension. """ - bounds_array = get_bounds_array(prefetch_dataset, - dimension_variable.full_name_path) + bounds_array = get_bounds_array(prefetch_dataset, dimension_variable.full_name_path) # Create the second bounds dimension. dimension_name = str(PurePosixPath(dimension_variable.full_name_path).name) @@ -200,50 +222,65 @@ def write_bounds(prefetch_dataset: Dataset, # The root group must be explicitly referenced here. bounds_dim = prefetch_dataset.createDimension(bounds_dimension_name, 2) else: - bounds_dim = prefetch_dataset[dimension_group].createDimension(bounds_dimension_name, 2) + bounds_dim = prefetch_dataset[dimension_group].createDimension( + bounds_dimension_name, 2 + ) # Dimension variables only have one dimension - themselves. - variable_dimension = prefetch_dataset[dimension_variable.full_name_path].dimensions[0] + variable_dimension = prefetch_dataset[dimension_variable.full_name_path].dimensions[ + 0 + ] bounds_data_type = str(dimension_variable.data_type) - bounds = prefetch_dataset.createVariable(bounds_full_path_name, - bounds_data_type, - (variable_dimension, - bounds_dim,)) + bounds = prefetch_dataset.createVariable( + bounds_full_path_name, + bounds_data_type, + ( + variable_dimension, + bounds_dim, + ), + ) # Write data to the new variable in the prefetch dataset. bounds[:] = bounds_array[:] # Update varinfo attributes and references. - prefetch_dataset[dimension_variable.full_name_path].setncatts({'bounds': bounds_name}) - dimension_variable.references['bounds'] = {bounds_name, } + prefetch_dataset[dimension_variable.full_name_path].setncatts( + {'bounds': bounds_name} + ) + dimension_variable.references['bounds'] = { + bounds_name, + } dimension_variable.attributes['bounds'] = bounds_name def is_dimension_ascending(dimension: MaskedArray) -> bool: - """ Read the array associated with a dimension variable and check if the - variables ascend starting from the zeroth element or not. + """Read the array associated with a dimension variable and check if the + variables ascend starting from the zeroth element or not. """ first_index, last_index = np.ma.flatnotmasked_edges(dimension) return dimension.size == 1 or dimension[first_index] < dimension[last_index] -def get_dimension_index_range(dimension_values: MaskedArray, - request_min: float, request_max: float, - bounds_values: MaskedArray = None) -> IndexRange: - """ Ensure that both a minimum and maximum value are defined from the - message, if not, use the first or last value in the dimension array, - accordingly. For granules that only contain dimension variables (not - additional bounds variables) the minimum and maximum values must be - ordered to be ascending or descending in a way that matches the - dimension index values. - - Once the minimum and maximum values are determined, and sorted in the - same order as the dimension array values, retrieve the index values - that correspond to the requested dimension values. Alternatively, if a - dimension has an associated bounds variable, use this to determine the - dimension index range. +def get_dimension_index_range( + dimension_values: MaskedArray, + request_min: float, + request_max: float, + bounds_values: MaskedArray = None, +) -> IndexRange: + """Ensure that both a minimum and maximum value are defined from the + message, if not, use the first or last value in the dimension array, + accordingly. For granules that only contain dimension variables (not + additional bounds variables) the minimum and maximum values must be + ordered to be ascending or descending in a way that matches the + dimension index values. + + Once the minimum and maximum values are determined, and sorted in the + same order as the dimension array values, retrieve the index values + that correspond to the requested dimension values. Alternatively, if a + dimension has an associated bounds variable, use this to determine the + dimension index range. """ if is_dimension_ascending(dimension_values): @@ -254,43 +291,44 @@ def get_dimension_index_range(dimension_values: MaskedArray, dimension_max = get_value_or_default(request_min, dimension_values[-1]) if bounds_values is None: - index_range = get_dimension_indices_from_values(dimension_values, - dimension_min, - dimension_max) + index_range = get_dimension_indices_from_values( + dimension_values, dimension_min, dimension_max + ) else: index_range = get_dimension_indices_from_bounds( - bounds_values, min(dimension_min, dimension_max), - max(dimension_min, dimension_max) + bounds_values, + min(dimension_min, dimension_max), + max(dimension_min, dimension_max), ) return index_range -def get_dimension_indices_from_values(dimension: MaskedArray, - minimum_extent: float, - maximum_extent: float) -> IndexRange: - """ Find the indices closest to the interpolated values of the minimum and - maximum extents in that dimension. +def get_dimension_indices_from_values( + dimension: MaskedArray, minimum_extent: float, maximum_extent: float +) -> IndexRange: + """Find the indices closest to the interpolated values of the minimum and + maximum extents in that dimension. - Use of `numpy.interp` maps the dimension scale values to their index - values and then computes an interpolated index value that best matches - the bounding value (minimum_extent, maximum_extent) to a "fractional" - index value. Rounding that value gives the starting index value for the - cell that contains that bound. + Use of `numpy.interp` maps the dimension scale values to their index + values and then computes an interpolated index value that best matches + the bounding value (minimum_extent, maximum_extent) to a "fractional" + index value. Rounding that value gives the starting index value for the + cell that contains that bound. - If an extent is requested that is a single point in this dimension, the - range should be the two surrounding pixels that border the point. + If an extent is requested that is a single point in this dimension, the + range should be the two surrounding pixels that border the point. - For an ascending dimension: + For an ascending dimension: - * `minimum_extent` ≤ `maximum_extent`. + * `minimum_extent` ≤ `maximum_extent`. - For a descending dimension: + For a descending dimension: - * `minimum_extent` ≥ `maximum_extent` + * `minimum_extent` ≥ `maximum_extent` - Input longitude extent values must conform to the valid range of the - native dimension data. + Input longitude extent values must conform to the valid range of the + native dimension data. """ dimension_range = [minimum_extent, maximum_extent] @@ -304,8 +342,7 @@ def get_dimension_indices_from_values(dimension: MaskedArray, dimension_values = np.flip(dimension) dimension_indices = np.flip(dimension_indices) - raw_indices = np.interp(dimension_range, dimension_values, - dimension_indices) + raw_indices = np.interp(dimension_range, dimension_values, dimension_indices) if (raw_indices[0] == raw_indices[1]) and (raw_indices[0] % 1 == 0.5): # Minimum extent is exactly halfway between two pixels, and the @@ -335,20 +372,21 @@ def get_dimension_indices_from_values(dimension: MaskedArray, return (minimum_index, maximum_index) -def get_dimension_indices_from_bounds(bounds: np.ndarray, min_value: float, - max_value: float) -> Tuple[int]: - """ Derive the dimension array indices that correspond to the requested - dimension range in the input Harmony message. +def get_dimension_indices_from_bounds( + bounds: np.ndarray, min_value: float, max_value: float +) -> Tuple[int]: + """Derive the dimension array indices that correspond to the requested + dimension range in the input Harmony message. - This function assumes: + This function assumes: - - The pixels bounds represent a contiguous range, e.g., the upper - bound of one pixel is always equal to the lower bound of the next - pixel. - - The bounds arrays are monotonic in the 0th dimension (e.g., lower and - upper bounds values either all ascend or all descend along with the - array indices). - - min_value ≤ max_value. + - The pixels bounds represent a contiguous range, e.g., the upper + bound of one pixel is always equal to the lower bound of the next + pixel. + - The bounds arrays are monotonic in the 0th dimension (e.g., lower and + upper bounds values either all ascend or all descend along with the + array indices). + - min_value ≤ max_value. """ if min_value > np.nanmax(bounds) or max_value < np.nanmin(bounds): @@ -372,16 +410,17 @@ def get_dimension_indices_from_bounds(bounds: np.ndarray, min_value: float, return (minimum_index, maximum_index) -def add_index_range(variable_name: str, varinfo: VarInfoFromDmr, - index_ranges: IndexRanges) -> str: - """ Append the index ranges of each dimension for the specified variable. - If there are no dimensions with listed index ranges, then the full - variable should be requested, and no index notation is required. - A variable with a bounding box crossing the edge of the grid (e.g., at - the antimeridian or Prime Meridian) will have a minimum index greater - than the maximum index. In this case the full dimension range should be - requested, as the related values will be masked before returning the - output to the user. +def add_index_range( + variable_name: str, varinfo: VarInfoFromDmr, index_ranges: IndexRanges +) -> str: + """Append the index ranges of each dimension for the specified variable. + If there are no dimensions with listed index ranges, then the full + variable should be requested, and no index notation is required. + A variable with a bounding box crossing the edge of the grid (e.g., at + the antimeridian or Prime Meridian) will have a minimum index greater + than the maximum index. In this case the full dimension range should be + requested, as the related values will be masked before returning the + output to the user. """ variable = varinfo.get_variable(variable_name) @@ -405,31 +444,30 @@ def add_index_range(variable_name: str, varinfo: VarInfoFromDmr, def get_fill_slice(dimension: str, fill_ranges: IndexRanges) -> slice: - """ Check the dictionary of dimensions that need to be filled for the - given dimension. If present, the minimum index will be greater than the - maximum index (the eastern edge of the bounding box will seem to be to - the west of the western edge due to crossing the grid edge). The region - to be filled is between these indices: + """Check the dictionary of dimensions that need to be filled for the + given dimension. If present, the minimum index will be greater than the + maximum index (the eastern edge of the bounding box will seem to be to + the west of the western edge due to crossing the grid edge). The region + to be filled is between these indices: - * Start index = maximum index + 1. - * Stop index = minimum index. (As Python index slices go up to, but not - including, the stop index). + * Start index = maximum index + 1. + * Stop index = minimum index. (As Python index slices go up to, but not + including, the stop index). - If the dimension is not to be filled, return a `slice` with unspecified - start and stop. This is the equivalent of the full range in this - dimension. Slices for all variable dimensions will be combined to - identify the region of the variable to be filled, e.g.: + If the dimension is not to be filled, return a `slice` with unspecified + start and stop. This is the equivalent of the full range in this + dimension. Slices for all variable dimensions will be combined to + identify the region of the variable to be filled, e.g.: - * variable[(slice(None), slice(None), slice(start_lon, stop_lon))] = fill + * variable[(slice(None), slice(None), slice(start_lon, stop_lon))] = fill - This is equivalent to: + This is equivalent to: - * science_variable[:][:][start_lon:stop_lon] = fill + * science_variable[:][:][start_lon:stop_lon] = fill """ if dimension in fill_ranges: - fill_slice = slice(fill_ranges[dimension][1] + 1, - fill_ranges[dimension][0]) + fill_slice = slice(fill_ranges[dimension][1] + 1, fill_ranges[dimension][0]) else: fill_slice = slice(None) @@ -437,9 +475,9 @@ def get_fill_slice(dimension: str, fill_ranges: IndexRanges) -> slice: def get_dimension_extents(dimension_array: np.ndarray) -> Tuple[float]: - """ Fit the dimension with a straight line, and find the outer edge of the - first and last pixel, assuming the supplied values lie at the centre of - each pixel. + """Fit the dimension with a straight line, and find the outer edge of the + first and last pixel, assuming the supplied values lie at the centre of + each pixel. """ dimension_indices = np.arange(dimension_array.size) @@ -451,20 +489,23 @@ def get_dimension_extents(dimension_array: np.ndarray) -> Tuple[float]: return (min_extent, max_extent) -def get_requested_index_ranges(required_variables: Set[str], - varinfo: VarInfoFromDmr, dimensions_path: str, - harmony_message: Message) -> IndexRanges: - """ Examines the requested dimension names and ranges and extracts the - indices that correspond to the specified range of values for each - dimension that is requested specifically by name. +def get_requested_index_ranges( + required_variables: Set[str], + varinfo: VarInfoFromDmr, + dimensions_path: str, + harmony_message: Message, +) -> IndexRanges: + """Examines the requested dimension names and ranges and extracts the + indices that correspond to the specified range of values for each + dimension that is requested specifically by name. - When dimensions, such as atmospheric pressure or ocean depth, have - values that are descending (getting smaller from start to finish), then - the min/max values of the requested range are flipped. If the dimension - is descending, the specified range must also be descending. + When dimensions, such as atmospheric pressure or ocean depth, have + values that are descending (getting smaller from start to finish), then + the min/max values of the requested range are flipped. If the dimension + is descending, the specified range must also be descending. - The return value from this function is a dictionary that contains the - index ranges for the named dimension, such as: {'/lev': [1, 5]} + The return value from this function is a dictionary that contains the + index ranges for the named dimension, such as: {'/lev': [1, 5]} """ required_dimensions = varinfo.get_required_dimensions(required_variables) @@ -483,12 +524,13 @@ def get_requested_index_ranges(required_variables: Set[str], if dim_is_valid: # Try to extract bounds metadata: - bounds_array = get_dimension_bounds(dim.name, varinfo, - dimensions_file) + bounds_array = get_dimension_bounds(dim.name, varinfo, dimensions_file) # Retrieve index ranges for the specifically named dimension: dim_index_ranges[dim.name] = get_dimension_index_range( - dimensions_file[dim.name][:], dim.min, dim.max, - bounds_values=bounds_array + dimensions_file[dim.name][:], + dim.min, + dim.max, + bounds_values=bounds_array, ) else: # This requested dimension is not in the required dimension set @@ -497,15 +539,16 @@ def get_requested_index_ranges(required_variables: Set[str], return dim_index_ranges -def get_dimension_bounds(dimension_name: str, varinfo: VarInfoFromDmr, - prefetch_dataset: Dataset) -> MaskedArray: - """ Check if a named dimension has a `bounds` metadata attribute, if so - retrieve the array of values for the named variable from the NetCDF-4 - variables retrieved from OPeNDAP in the prefetch request. +def get_dimension_bounds( + dimension_name: str, varinfo: VarInfoFromDmr, prefetch_dataset: Dataset +) -> MaskedArray: + """Check if a named dimension has a `bounds` metadata attribute, if so + retrieve the array of values for the named variable from the NetCDF-4 + variables retrieved from OPeNDAP in the prefetch request. - If there is no `bounds` reference, or if the variable contained in the - `bounds` reference is not present in the prefetch output, `None` will - be returned. + If there is no `bounds` reference, or if the variable contained in the + `bounds` reference is not present in the prefetch output, `None` will + be returned. """ bounds = varinfo.get_variable(dimension_name).references.get('bounds') @@ -523,23 +566,24 @@ def get_dimension_bounds(dimension_name: str, varinfo: VarInfoFromDmr, def is_almost_in(value: float, array: np.ndarray) -> bool: - """ Check if a specific value is within the supplied array. The comparison - will first derive a precision from the smallest difference in elements - in the supplied array. The comparison will use the minimum value of - either 10**-5 or (10**-3 * minimum_difference). - - `np.isclose` calculates tolerance = (atol + (rtol * abs(b)), where - b is the element in the second array being compared. To ensure large - values don't lose precision, rtol is set to zero below. - - This function was specifically written to help support the ECCO Ocean - Velocity collection, which has a depth dimension, Z. Most of these - dimension values are likely set at depths that correspond to specific - pressure values. The relationship between these (P = rho.g.h) means - that well rounded pressure values lead to depths without nicely rounded - values. + """Check if a specific value is within the supplied array. The comparison + will first derive a precision from the smallest difference in elements + in the supplied array. The comparison will use the minimum value of + either 10**-5 or (10**-3 * minimum_difference). + + `np.isclose` calculates tolerance = (atol + (rtol * abs(b)), where + b is the element in the second array being compared. To ensure large + values don't lose precision, rtol is set to zero below. + + This function was specifically written to help support the ECCO Ocean + Velocity collection, which has a depth dimension, Z. Most of these + dimension values are likely set at depths that correspond to specific + pressure values. The relationship between these (P = rho.g.h) means + that well rounded pressure values lead to depths without nicely rounded + values. """ array_precision = min(np.nanmin(np.abs(np.diff(array) / 1000.0)), 0.00001) - return np.any(np.isclose(array, np.full_like(array, value), - rtol=0, atol=array_precision)) + return np.any( + np.isclose(array, np.full_like(array, value), rtol=0, atol=array_precision) + ) diff --git a/hoss/exceptions.py b/hoss/exceptions.py index a3a05eb..1cb1439 100644 --- a/hoss/exceptions.py +++ b/hoss/exceptions.py @@ -7,10 +7,11 @@ class CustomError(Exception): - """ Base class for exceptions in HOSS. This base class allows for future - work, such as assigning exit codes for specific failure modes. + """Base class for exceptions in HOSS. This base class allows for future + work, such as assigning exit codes for specific failure modes. """ + def __init__(self, exception_type, message): self.exception_type = exception_type self.message = message @@ -18,104 +19,126 @@ def __init__(self, exception_type, message): class InvalidInputGeoJSON(CustomError): - """ This exception is raised when a supplied GeoJSON object does not - adhere the GeoJSON schema. For example, if a GeoJSON geometry does not - contain either a `bbox` or a `coordinates` attribute. + """This exception is raised when a supplied GeoJSON object does not + adhere the GeoJSON schema. For example, if a GeoJSON geometry does not + contain either a `bbox` or a `coordinates` attribute. """ + def __init__(self): - super().__init__('InvalidInputGeoJSON', - 'The supplied shape file cannot be parsed according ' - 'to the GeoJSON format defined in RFC 7946.') + super().__init__( + 'InvalidInputGeoJSON', + 'The supplied shape file cannot be parsed according ' + 'to the GeoJSON format defined in RFC 7946.', + ) class InvalidNamedDimension(CustomError): - """ This exception is raised when a user-supplied dimension name - is not in the list of required dimensions for the subset. + """This exception is raised when a user-supplied dimension name + is not in the list of required dimensions for the subset. """ + def __init__(self, dimension_name): - super().__init__('InvalidNamedDimension', - f'"{dimension_name}" is not a dimension for ' - 'any of the requested variables.') + super().__init__( + 'InvalidNamedDimension', + f'"{dimension_name}" is not a dimension for ' + 'any of the requested variables.', + ) class InvalidRequestedRange(CustomError): - """ This exception is raised when a user-supplied dimension range lies - entirely outside the range of a dimension with an associated bounds - variable. + """This exception is raised when a user-supplied dimension range lies + entirely outside the range of a dimension with an associated bounds + variable. """ + def __init__(self): - super().__init__('InvalidRequestedRange', - 'Input request specified range outside supported ' - 'dimension range') + super().__init__( + 'InvalidRequestedRange', + 'Input request specified range outside supported ' 'dimension range', + ) class MissingGridMappingMetadata(CustomError): - """ This exception is raised when HOSS tries to obtain the `grid_mapping` - metadata attribute for a projected variable and it is not present in - either the input granule or the CF-Convention overrides defined in the - earthdata-varinfo configuration file. + """This exception is raised when HOSS tries to obtain the `grid_mapping` + metadata attribute for a projected variable and it is not present in + either the input granule or the CF-Convention overrides defined in the + earthdata-varinfo configuration file. """ + def __init__(self, variable_name): - super().__init__('MissingGridMappingMetadata', - f'Projected variable "{variable_name}" does not have ' - 'an associated "grid_mapping" metadata attribute.') + super().__init__( + 'MissingGridMappingMetadata', + f'Projected variable "{variable_name}" does not have ' + 'an associated "grid_mapping" metadata attribute.', + ) class MissingGridMappingVariable(CustomError): - """ This exception is raised when HOSS tries to extract attributes from a - `grid_mapping` variable referred to by another variable, but that - `grid_mapping` variable is not present in the `.dmr` for that granule. + """This exception is raised when HOSS tries to extract attributes from a + `grid_mapping` variable referred to by another variable, but that + `grid_mapping` variable is not present in the `.dmr` for that granule. """ + def __init__(self, grid_mapping_variable, referring_variable): - super().__init__('MissingGridMappingVariable', - f'Grid mapping variable "{grid_mapping_variable}" ' - f'referred to by variable "{referring_variable}" is ' - 'not present in granule .dmr file.') + super().__init__( + 'MissingGridMappingVariable', + f'Grid mapping variable "{grid_mapping_variable}" ' + f'referred to by variable "{referring_variable}" is ' + 'not present in granule .dmr file.', + ) class MissingSpatialSubsetInformation(CustomError): - """ This exception is raised when HOSS reaches a branch of the code that - requires spatial subset information, but neither a bounding box, nor a - shape file is specified. + """This exception is raised when HOSS reaches a branch of the code that + requires spatial subset information, but neither a bounding box, nor a + shape file is specified. """ + def __init__(self): - super().__init__('MissingSpatialSubsetInformation', - 'Either a bounding box or shape file must be ' - 'specified when performing spatial subsetting.') + super().__init__( + 'MissingSpatialSubsetInformation', + 'Either a bounding box or shape file must be ' + 'specified when performing spatial subsetting.', + ) class UnsupportedShapeFileFormat(CustomError): - """ This exception is raised when the shape file included in the input - Harmony message is not GeoJSON. + """This exception is raised when the shape file included in the input + Harmony message is not GeoJSON. """ + def __init__(self, shape_file_mime_type: str): - super().__init__('UnsupportedShapeFileFormat', - f'Shape file format "{shape_file_mime_type}" not ' - 'supported.') + super().__init__( + 'UnsupportedShapeFileFormat', + f'Shape file format "{shape_file_mime_type}" not ' 'supported.', + ) class UnsupportedTemporalUnits(CustomError): - """ This exception is raised when the 'units' metadata attribute contains - a temporal unit that is not supported by HOSS. + """This exception is raised when the 'units' metadata attribute contains + a temporal unit that is not supported by HOSS. """ + def __init__(self, units_string): - super().__init__('UnsupportedTemporalUnits', - f'Temporal units "{units_string}" not supported.') + super().__init__( + 'UnsupportedTemporalUnits', + f'Temporal units "{units_string}" not supported.', + ) class UrlAccessFailed(CustomError): - """ This exception is raised when an HTTP request for a given URL has a non - 500 error, and is therefore not retried. + """This exception is raised when an HTTP request for a given URL has a non + 500 error, and is therefore not retried. """ + def __init__(self, url, status_code): - super().__init__('UrlAccessFailed', - f'{status_code} error retrieving: {url}') + super().__init__('UrlAccessFailed', f'{status_code} error retrieving: {url}') diff --git a/hoss/projection_utilities.py b/hoss/projection_utilities.py index f4ae600..78d600e 100644 --- a/hoss/projection_utilities.py +++ b/hoss/projection_utilities.py @@ -9,197 +9,219 @@ projected grids. """ + from typing import Dict, get_args, List, Optional, Tuple, Union import json from pyproj import CRS, Transformer -from shapely.geometry import (GeometryCollection, LineString, MultiLineString, - MultiPoint, MultiPolygon, Point, Polygon, shape) +from shapely.geometry import ( + GeometryCollection, + LineString, + MultiLineString, + MultiPoint, + MultiPolygon, + Point, + Polygon, + shape, +) from varinfo import VarInfoFromDmr import numpy as np from hoss.bbox_utilities import BBox, flatten_list -from hoss.exceptions import (InvalidInputGeoJSON, MissingGridMappingMetadata, - MissingGridMappingVariable, - MissingSpatialSubsetInformation) +from hoss.exceptions import ( + InvalidInputGeoJSON, + MissingGridMappingMetadata, + MissingGridMappingVariable, + MissingSpatialSubsetInformation, +) Coordinates = Tuple[float] -MultiShape = Union[GeometryCollection, MultiLineString, MultiPoint, - MultiPolygon] +MultiShape = Union[GeometryCollection, MultiLineString, MultiPoint, MultiPolygon] Shape = Union[LineString, Point, Polygon, MultiShape] def get_variable_crs(variable: str, varinfo: VarInfoFromDmr) -> CRS: - """ Check the metadata attributes for the variable to find the associated - grid mapping variable. Create a `pyproj.CRS` object from the grid - mapping variable metadata attributes. + """Check the metadata attributes for the variable to find the associated + grid mapping variable. Create a `pyproj.CRS` object from the grid + mapping variable metadata attributes. - All metadata attributes that contain references from one variable to - another are stored in the `Variable.references` dictionary attribute - as sets. There should only be one reference in the `grid_mapping` - attribute value, so the first element of the set is retrieved. + All metadata attributes that contain references from one variable to + another are stored in the `Variable.references` dictionary attribute + as sets. There should only be one reference in the `grid_mapping` + attribute value, so the first element of the set is retrieved. """ - grid_mapping = next(iter(varinfo.get_variable(variable).references - .get('grid_mapping', [])), - None) + grid_mapping = next( + iter(varinfo.get_variable(variable).references.get('grid_mapping', [])), None + ) if grid_mapping is not None: try: crs = CRS.from_cf(varinfo.get_variable(grid_mapping).attributes) except AttributeError as exception: - raise MissingGridMappingVariable( - grid_mapping, variable - ) from exception + raise MissingGridMappingVariable(grid_mapping, variable) from exception else: raise MissingGridMappingMetadata(variable) return crs -def get_projected_x_y_variables(varinfo: VarInfoFromDmr, - variable: str) -> Tuple[Optional[str]]: - """ Retrieve the names of the projected x and y dimension variables - associated with a variable. If either are not found, a `None` value - will be returned for the absent dimension variable. +def get_projected_x_y_variables( + varinfo: VarInfoFromDmr, variable: str +) -> Tuple[Optional[str]]: + """Retrieve the names of the projected x and y dimension variables + associated with a variable. If either are not found, a `None` value + will be returned for the absent dimension variable. - Note - the input variables to this function are only filtered to remove - variables that are spatial dimensions. The input to this function may - have no dimensions, or may not be spatially gridded. + Note - the input variables to this function are only filtered to remove + variables that are spatial dimensions. The input to this function may + have no dimensions, or may not be spatially gridded. """ variable_dimensions = varinfo.get_variable(variable).dimensions - projected_x = next((dimension for dimension in variable_dimensions - if is_projection_x_dimension(varinfo, dimension)), - None) + projected_x = next( + ( + dimension + for dimension in variable_dimensions + if is_projection_x_dimension(varinfo, dimension) + ), + None, + ) - projected_y = next((dimension for dimension in variable_dimensions - if is_projection_y_dimension(varinfo, dimension)), - None) + projected_y = next( + ( + dimension + for dimension in variable_dimensions + if is_projection_y_dimension(varinfo, dimension) + ), + None, + ) return projected_x, projected_y -def is_projection_x_dimension(varinfo: VarInfoFromDmr, - dimension_variable: str) -> bool: - """ Check if the named variable exists in the `VarInfoFromDmr` - representation of the granule. If so, check the `standard_name` - attribute conforms to the CF-Convention defined options for a - projection x coordinate. +def is_projection_x_dimension(varinfo: VarInfoFromDmr, dimension_variable: str) -> bool: + """Check if the named variable exists in the `VarInfoFromDmr` + representation of the granule. If so, check the `standard_name` + attribute conforms to the CF-Convention defined options for a + projection x coordinate. - The variable must be first checked to see if it exists as some - dimensions, such as the `nv`, `latv` and `lonv` that define the - 2-element dimension of bounds variables, exist only as a size, not as a - full variable within the input granule. + The variable must be first checked to see if it exists as some + dimensions, such as the `nv`, `latv` and `lonv` that define the + 2-element dimension of bounds variables, exist only as a size, not as a + full variable within the input granule. """ - projected_x_names = ('projection_x_coordinate', - 'projection_x_angular_coordinate') + projected_x_names = ('projection_x_coordinate', 'projection_x_angular_coordinate') - return (varinfo.get_variable(dimension_variable) is not None - and (varinfo.get_variable(dimension_variable) - .attributes - .get('standard_name') in projected_x_names)) + return varinfo.get_variable(dimension_variable) is not None and ( + varinfo.get_variable(dimension_variable).attributes.get('standard_name') + in projected_x_names + ) -def is_projection_y_dimension(varinfo: VarInfoFromDmr, - dimension_variable: str) -> bool: - """ Check if the named variable exists in the representation of the - granule. If so, check the `standard_name` attribute conforms to the - CF-Convention defined options for a projection y coordinate. +def is_projection_y_dimension(varinfo: VarInfoFromDmr, dimension_variable: str) -> bool: + """Check if the named variable exists in the representation of the + granule. If so, check the `standard_name` attribute conforms to the + CF-Convention defined options for a projection y coordinate. - The variable must be first checked to see if it exists as some - dimensions, such as the `nv`, `latv` and `lonv` that define the - 2-element dimension of bounds variables, exist only as a size, not as a - full variable within the input granule. + The variable must be first checked to see if it exists as some + dimensions, such as the `nv`, `latv` and `lonv` that define the + 2-element dimension of bounds variables, exist only as a size, not as a + full variable within the input granule. """ - projected_y_names = ('projection_y_coordinate', - 'projection_y_angular_coordinate') + projected_y_names = ('projection_y_coordinate', 'projection_y_angular_coordinate') - return (varinfo.get_variable(dimension_variable) is not None - and (varinfo.get_variable(dimension_variable) - .attributes - .get('standard_name') in projected_y_names)) + return varinfo.get_variable(dimension_variable) is not None and ( + varinfo.get_variable(dimension_variable).attributes.get('standard_name') + in projected_y_names + ) -def get_projected_x_y_extents(x_values: np.ndarray, y_values: np.ndarray, - crs: CRS, shape_file: str = None, - bounding_box: BBox = None) -> Dict[str, float]: - """ Retrieve the minimum and maximum values for a projected grid as derived - from either a bounding box or GeoJSON shape file, both of which are - defined in geographic coordinates. +def get_projected_x_y_extents( + x_values: np.ndarray, + y_values: np.ndarray, + crs: CRS, + shape_file: str = None, + bounding_box: BBox = None, +) -> Dict[str, float]: + """Retrieve the minimum and maximum values for a projected grid as derived + from either a bounding box or GeoJSON shape file, both of which are + defined in geographic coordinates. - A minimum grid resolution will be determined in the geographic - Coordinate Reference System (CRS). The input spatial constraint will - then have points populated around its exterior at this resolution. - These geographic points will then all be projected to the target grid - CRS, allowing the retrieval of a minimum and maximum value in both the - projected x and projected y dimension. + A minimum grid resolution will be determined in the geographic + Coordinate Reference System (CRS). The input spatial constraint will + then have points populated around its exterior at this resolution. + These geographic points will then all be projected to the target grid + CRS, allowing the retrieval of a minimum and maximum value in both the + projected x and projected y dimension. - Example output: + Example output: - x_y_extents = {'x_min': 1000, - 'x_max': 4000, - 'y_min': 2500, - 'y_max': 5500} + x_y_extents = {'x_min': 1000, + 'x_max': 4000, + 'y_min': 2500, + 'y_max': 5500} """ - grid_lats, grid_lons = get_grid_lat_lons(x_values, y_values, crs) # pylint: disable=unpacking-non-sequence + grid_lats, grid_lons = get_grid_lat_lons( # pylint: disable=unpacking-non-sequence + x_values, y_values, crs + ) geographic_resolution = get_geographic_resolution(grid_lons, grid_lats) - resolved_geojson = get_resolved_geojson(geographic_resolution, - shape_file=shape_file, - bounding_box=bounding_box) + resolved_geojson = get_resolved_geojson( + geographic_resolution, shape_file=shape_file, bounding_box=bounding_box + ) return get_x_y_extents_from_geographic_points(resolved_geojson, crs) -def get_grid_lat_lons(x_values: np.ndarray, y_values: np.ndarray, - crs: CRS) -> Tuple[np.ndarray]: - """ Construct a 2-D grid of projected x and y values from values in the - corresponding dimension variable 1-D arrays. Then transform those - points to longitudes and latitudes. +def get_grid_lat_lons( + x_values: np.ndarray, y_values: np.ndarray, crs: CRS +) -> Tuple[np.ndarray]: + """Construct a 2-D grid of projected x and y values from values in the + corresponding dimension variable 1-D arrays. Then transform those + points to longitudes and latitudes. """ - projected_x = np.repeat(x_values.reshape(1, len(x_values)), len(y_values), - axis=0) - projected_y = np.repeat(y_values.reshape(len(y_values), 1), len(x_values), - axis=1) + projected_x = np.repeat(x_values.reshape(1, len(x_values)), len(y_values), axis=0) + projected_y = np.repeat(y_values.reshape(len(y_values), 1), len(x_values), axis=1) to_geo_transformer = Transformer.from_crs(crs, 4326) - return to_geo_transformer.transform(projected_x, projected_y) # pylint: disable=unpacking-non-sequence + return to_geo_transformer.transform( # pylint: disable=unpacking-non-sequence + projected_x, projected_y + ) -def get_geographic_resolution(longitudes: np.ndarray, - latitudes: np.ndarray) -> float: - """ Calculate the distance between diagonally adjacent cells in both - longitude and latitude. Combined those differences in quadrature to - obtain Euclidean distances. Return the minimum of these Euclidean - distances. Over the typical distances being considered, differences - between the Euclidean and geodesic distance between points should be - minimal, with Euclidean distances being slightly shorter. +def get_geographic_resolution(longitudes: np.ndarray, latitudes: np.ndarray) -> float: + """Calculate the distance between diagonally adjacent cells in both + longitude and latitude. Combined those differences in quadrature to + obtain Euclidean distances. Return the minimum of these Euclidean + distances. Over the typical distances being considered, differences + between the Euclidean and geodesic distance between points should be + minimal, with Euclidean distances being slightly shorter. """ - lon_square_diffs = np.square(np.subtract(longitudes[1:, 1:], - longitudes[:-1, :-1])) - lat_square_diffs = np.square(np.subtract(latitudes[1:, 1:], - latitudes[:-1, :-1])) + lon_square_diffs = np.square(np.subtract(longitudes[1:, 1:], longitudes[:-1, :-1])) + lat_square_diffs = np.square(np.subtract(latitudes[1:, 1:], latitudes[:-1, :-1])) return np.nanmin(np.sqrt(np.add(lon_square_diffs, lat_square_diffs))) -def get_resolved_geojson(resolution: float, shape_file: str = None, - bounding_box: BBox = None) -> List[Coordinates]: - """ Take a shape file or bounding box, as defined by the input Harmony - request, and return a full set of points that correspond to the - exterior of any GeoJSON shape fixed to the resolution of the projected - grid of the data. +def get_resolved_geojson( + resolution: float, shape_file: str = None, bounding_box: BBox = None +) -> List[Coordinates]: + """Take a shape file or bounding box, as defined by the input Harmony + request, and return a full set of points that correspond to the + exterior of any GeoJSON shape fixed to the resolution of the projected + grid of the data. """ if bounding_box is not None: - resolved_geojson = get_resolved_feature(get_bbox_polygon(bounding_box), - resolution) + resolved_geojson = get_resolved_feature( + get_bbox_polygon(bounding_box), resolution + ) elif shape_file is not None: with open(shape_file, 'r', encoding='utf-8') as file_handler: geojson_content = json.load(file_handler) @@ -212,127 +234,141 @@ def get_resolved_geojson(resolution: float, shape_file: str = None, def get_bbox_polygon(bounding_box: BBox) -> Polygon: - """ Convert a bounding box into a polygon with points at each corner of - that box. + """Convert a bounding box into a polygon with points at each corner of + that box. """ - coordinates = [(bounding_box.west, bounding_box.south), - (bounding_box.east, bounding_box.south), - (bounding_box.east, bounding_box.north), - (bounding_box.west, bounding_box.north), - (bounding_box.west, bounding_box.south)] + coordinates = [ + (bounding_box.west, bounding_box.south), + (bounding_box.east, bounding_box.south), + (bounding_box.east, bounding_box.north), + (bounding_box.west, bounding_box.north), + (bounding_box.west, bounding_box.south), + ] return Polygon(coordinates) -def get_resolved_features(geojson_content: Dict, - resolution: float) -> List[Coordinates]: - """ Parse GeoJSON read from a file. Once `shapely.geometry.shape` objects - have been created for all features, these features will be resolved - using the supplied resolution of the projected grid. +def get_resolved_features( + geojson_content: Dict, resolution: float +) -> List[Coordinates]: + """Parse GeoJSON read from a file. Once `shapely.geometry.shape` objects + have been created for all features, these features will be resolved + using the supplied resolution of the projected grid. - * The first condition will recognise a single GeoJSON geometry, using - the allowed values of the `type` attribute. - * The second condition will recognise a full GeoJSON feature, which - will include the `geometry` attribute. - * The third condition recognises feature collections, and will create a - `shapely.geometry.shape` object for each child feature. + * The first condition will recognise a single GeoJSON geometry, using + the allowed values of the `type` attribute. + * The second condition will recognise a full GeoJSON feature, which + will include the `geometry` attribute. + * The third condition recognises feature collections, and will create a + `shapely.geometry.shape` object for each child feature. - Strictly, RFC7946 defines geometry types with capital letters, however, - this function converts any detected `type` attribute to an entirely - lowercase string, to avoid missing feature types due to unexpected - lowercase letters. + Strictly, RFC7946 defines geometry types with capital letters, however, + this function converts any detected `type` attribute to an entirely + lowercase string, to avoid missing feature types due to unexpected + lowercase letters. """ - feature_types = ('geometrycollection', 'linestring', 'point', 'polygon', - 'multilinestring', 'multipoint', 'multipolygon') + feature_types = ( + 'geometrycollection', + 'linestring', + 'point', + 'polygon', + 'multilinestring', + 'multipoint', + 'multipolygon', + ) if geojson_content.get('type', '').lower() in feature_types: - resolved_features = get_resolved_feature(shape(geojson_content), - resolution) + resolved_features = get_resolved_feature(shape(geojson_content), resolution) elif 'geometry' in geojson_content: resolved_features = get_resolved_feature( shape(geojson_content['geometry']), resolution ) elif 'features' in geojson_content: - resolved_features = flatten_list([ - get_resolved_feature(shape(feature['geometry']), resolution) - for feature in geojson_content['features'] - ]) + resolved_features = flatten_list( + [ + get_resolved_feature(shape(feature['geometry']), resolution) + for feature in geojson_content['features'] + ] + ) else: raise InvalidInputGeoJSON() return resolved_features -def get_resolved_feature(feature: Shape, - resolution: float) -> List[Coordinates]: - """ Take an input `shapely` feature, such as a GeoJSON Point, LineString, - Polygon or multiple of those options, and return a list of coordinates - on that feature at the supplied resolution. This resolution corresponds - to that of a projected grid. - - * For a Polygon, resolve each line segment on the exterior of the - Polygon. The holes within the polygon should be enclosed by the - exterior, and therefore should not contain an extreme point in - spatial extent. - * For a LineString resolve each line segment and return all points - along each segment. - * For a Point object return the input point. - * For a shape with multiple geometries, recursively call this function - on each sub-geometry, flattening the multiple lists of points into a - single list. - - Later processing will try to determine the extents from these points, - but won't require the list of coordinates to distinguish between input - subgeometries, so a flattened list of all coordinates is returned. +def get_resolved_feature(feature: Shape, resolution: float) -> List[Coordinates]: + """Take an input `shapely` feature, such as a GeoJSON Point, LineString, + Polygon or multiple of those options, and return a list of coordinates + on that feature at the supplied resolution. This resolution corresponds + to that of a projected grid. + + * For a Polygon, resolve each line segment on the exterior of the + Polygon. The holes within the polygon should be enclosed by the + exterior, and therefore should not contain an extreme point in + spatial extent. + * For a LineString resolve each line segment and return all points + along each segment. + * For a Point object return the input point. + * For a shape with multiple geometries, recursively call this function + on each sub-geometry, flattening the multiple lists of points into a + single list. + + Later processing will try to determine the extents from these points, + but won't require the list of coordinates to distinguish between input + subgeometries, so a flattened list of all coordinates is returned. """ if isinstance(feature, Polygon): - resolved_points = get_resolved_geometry(list(feature.exterior.coords), - resolution) + resolved_points = get_resolved_geometry( + list(feature.exterior.coords), resolution + ) elif isinstance(feature, LineString): - resolved_points = get_resolved_geometry(list(feature.coords), - resolution, - is_closed=feature.is_closed) + resolved_points = get_resolved_geometry( + list(feature.coords), resolution, is_closed=feature.is_closed + ) elif isinstance(feature, Point): resolved_points = [(feature.x, feature.y)] elif isinstance(feature, get_args(MultiShape)): - resolved_points = flatten_list([ - get_resolved_feature(sub_geometry, resolution) - for sub_geometry in feature.geoms - ]) + resolved_points = flatten_list( + [ + get_resolved_feature(sub_geometry, resolution) + for sub_geometry in feature.geoms + ] + ) else: raise InvalidInputGeoJSON() return resolved_points -def get_resolved_geometry(geometry_points: List[Coordinates], - resolution: float, - is_closed: bool = True) -> List[Coordinates]: - """ Iterate through all pairs of consecutive points and ensure that, if - those points are further apart than the resolution of the input data, - additional points are placed along that edge at regular intervals. Each - line segment will have regular spacing, and will remain anchored at the - original start and end of the line segment. This means the spacing of - the points will have an upper bound of the supplied resolution, but may - be a shorter distance to account for non-integer multiples of the - resolution along the line. - - To avoid duplication of points, the last point of each line segment is - not retained, as this will match the first point of the next line - segment. For geometries that do not form a closed ring, - the final point of the geometry is appended to the full list of - resolved points to ensure all points are represented in the output. For - closed geometries, this is already present as the first returned point. +def get_resolved_geometry( + geometry_points: List[Coordinates], resolution: float, is_closed: bool = True +) -> List[Coordinates]: + """Iterate through all pairs of consecutive points and ensure that, if + those points are further apart than the resolution of the input data, + additional points are placed along that edge at regular intervals. Each + line segment will have regular spacing, and will remain anchored at the + original start and end of the line segment. This means the spacing of + the points will have an upper bound of the supplied resolution, but may + be a shorter distance to account for non-integer multiples of the + resolution along the line. + + To avoid duplication of points, the last point of each line segment is + not retained, as this will match the first point of the next line + segment. For geometries that do not form a closed ring, + the final point of the geometry is appended to the full list of + resolved points to ensure all points are represented in the output. For + closed geometries, this is already present as the first returned point. """ - new_points = [get_resolved_line(point_one, - geometry_points[point_one_index + 1], - resolution)[:-1] - for point_one_index, point_one - in enumerate(geometry_points[:-1])] + new_points = [ + get_resolved_line(point_one, geometry_points[point_one_index + 1], resolution)[ + :-1 + ] + for point_one_index, point_one in enumerate(geometry_points[:-1]) + ] if not is_closed: new_points.append([geometry_points[-1]]) @@ -340,16 +376,17 @@ def get_resolved_geometry(geometry_points: List[Coordinates], return flatten_list(new_points) -def get_resolved_line(point_one: Coordinates, point_two: Coordinates, - resolution: float) -> List[Coordinates]: - """ A function that takes two consecutive points from either an exterior - ring of a `shapely.geometry.Polygon` object or the coordinates of a - `LineString` object and places equally spaced points along that line - determined by the supplied geographic resolution. That resolution will - be determined by the gridded input data. +def get_resolved_line( + point_one: Coordinates, point_two: Coordinates, resolution: float +) -> List[Coordinates]: + """A function that takes two consecutive points from either an exterior + ring of a `shapely.geometry.Polygon` object or the coordinates of a + `LineString` object and places equally spaced points along that line + determined by the supplied geographic resolution. That resolution will + be determined by the gridded input data. - The resulting points will be appended to the rest of the ring, - ensuring the ring has points at a resolution of the gridded data. + The resulting points will be appended to the rest of the ring, + ensuring the ring has points at a resolution of the gridded data. """ distance = np.linalg.norm(np.array(point_two[:2]) - np.array(point_one[:2])) @@ -359,21 +396,24 @@ def get_resolved_line(point_one: Coordinates, point_two: Coordinates, return list(zip(new_x, new_y)) -def get_x_y_extents_from_geographic_points(points: List[Coordinates], - crs: CRS) -> Dict[str, float]: - """ Take an input list of (longitude, latitude) coordinates that define the - exterior of the input GeoJSON shape or bounding box, and project those - points to the target grid. Then return the minimum and maximum values - of those projected coordinates. +def get_x_y_extents_from_geographic_points( + points: List[Coordinates], crs: CRS +) -> Dict[str, float]: + """Take an input list of (longitude, latitude) coordinates that define the + exterior of the input GeoJSON shape or bounding box, and project those + points to the target grid. Then return the minimum and maximum values + of those projected coordinates. """ point_longitudes, point_latitudes = zip(*points) from_geo_transformer = Transformer.from_crs(4326, crs) - points_x, points_y = from_geo_transformer.transform( # pylint: disable=unpacking-non-sequence - point_latitudes, point_longitudes + points_x, points_y = ( # pylint: disable=unpacking-non-sequence + from_geo_transformer.transform(point_latitudes, point_longitudes) ) - return {'x_min': np.nanmin(points_x), - 'x_max': np.nanmax(points_x), - 'y_min': np.nanmin(points_y), - 'y_max': np.nanmax(points_y)} + return { + 'x_min': np.nanmin(points_x), + 'x_max': np.nanmax(points_x), + 'y_min': np.nanmin(points_y), + 'y_max': np.nanmax(points_y), + } diff --git a/hoss/spatial.py b/hoss/spatial.py index 7c3dfe1..91129fb 100644 --- a/hoss/spatial.py +++ b/hoss/spatial.py @@ -21,6 +21,7 @@ For example: [W, S, E, N] = [-20, -90, 20, 90] """ + from typing import List, Set from harmony.message import Message @@ -28,42 +29,54 @@ from numpy.ma.core import MaskedArray from varinfo import VarInfoFromDmr -from hoss.bbox_utilities import (BBox, get_harmony_message_bbox, - get_shape_file_geojson, get_geographic_bbox) -from hoss.dimension_utilities import (get_dimension_bounds, - get_dimension_extents, - get_dimension_index_range, IndexRange, - IndexRanges) -from hoss.projection_utilities import (get_projected_x_y_extents, - get_projected_x_y_variables, - get_variable_crs) - - -def get_spatial_index_ranges(required_variables: Set[str], - varinfo: VarInfoFromDmr, dimensions_path: str, - harmony_message: Message, - shape_file_path: str = None) -> IndexRanges: - """ Return a dictionary containing indices that correspond to the minimum - and maximum extents for all horizontal spatial coordinate variables - that support all end-user requested variables. This includes both - geographic and projected horizontal coordinates: - - index_ranges = {'/latitude': (12, 34), '/longitude': (56, 78), - '/x': (20, 42), '/y': (31, 53)} - - If geographic dimensions are present and only a shape file has been - specified, a minimally encompassing bounding box will be found in order - to determine the longitude and latitude extents. - - For projected grids, coordinate dimensions must be considered in x, y - pairs. The minimum and/or maximum values of geographically defined - shapes in the target projected grid may be midway along an exterior - edge of the shape, rather than a known coordinate vertex. For this - reason, a minimum grid resolution in geographic coordinates will be - determined for each projected coordinate variable pairs. The input - bounding box or shape file will be populated with additional points - around the exterior of the user-defined GeoJSON shape, to ensure the - correct extents are derived. +from hoss.bbox_utilities import ( + BBox, + get_harmony_message_bbox, + get_shape_file_geojson, + get_geographic_bbox, +) +from hoss.dimension_utilities import ( + get_dimension_bounds, + get_dimension_extents, + get_dimension_index_range, + IndexRange, + IndexRanges, +) +from hoss.projection_utilities import ( + get_projected_x_y_extents, + get_projected_x_y_variables, + get_variable_crs, +) + + +def get_spatial_index_ranges( + required_variables: Set[str], + varinfo: VarInfoFromDmr, + dimensions_path: str, + harmony_message: Message, + shape_file_path: str = None, +) -> IndexRanges: + """Return a dictionary containing indices that correspond to the minimum + and maximum extents for all horizontal spatial coordinate variables + that support all end-user requested variables. This includes both + geographic and projected horizontal coordinates: + + index_ranges = {'/latitude': (12, 34), '/longitude': (56, 78), + '/x': (20, 42), '/y': (31, 53)} + + If geographic dimensions are present and only a shape file has been + specified, a minimally encompassing bounding box will be found in order + to determine the longitude and latitude extents. + + For projected grids, coordinate dimensions must be considered in x, y + pairs. The minimum and/or maximum values of geographically defined + shapes in the target projected grid may be midway along an exterior + edge of the shape, rather than a known coordinate vertex. For this + reason, a minimum grid resolution in geographic coordinates will be + determined for each projected coordinate variable pairs. The input + bounding box or shape file will be populated with additional points + around the exterior of the user-defined GeoJSON shape, to ensure the + correct extents are derived. """ bounding_box = get_harmony_message_bbox(harmony_message) @@ -72,9 +85,7 @@ def get_spatial_index_ranges(required_variables: Set[str], geographic_dimensions = varinfo.get_geographic_spatial_dimensions( required_variables ) - projected_dimensions = varinfo.get_projected_spatial_dimensions( - required_variables - ) + projected_dimensions = varinfo.get_projected_spatial_dimensions(required_variables) non_spatial_variables = required_variables.difference( varinfo.get_spatial_dimensions(required_variables) ) @@ -94,36 +105,43 @@ def get_spatial_index_ranges(required_variables: Set[str], if len(projected_dimensions) > 0: for non_spatial_variable in non_spatial_variables: - index_ranges.update(get_projected_x_y_index_ranges( - non_spatial_variable, varinfo, dimensions_file, - index_ranges, bounding_box=bounding_box, - shape_file_path=shape_file_path - )) + index_ranges.update( + get_projected_x_y_index_ranges( + non_spatial_variable, + varinfo, + dimensions_file, + index_ranges, + bounding_box=bounding_box, + shape_file_path=shape_file_path, + ) + ) return index_ranges -def get_projected_x_y_index_ranges(non_spatial_variable: str, - varinfo: VarInfoFromDmr, - dimensions_file: Dataset, - index_ranges: IndexRanges, - bounding_box: BBox = None, - shape_file_path: str = None) -> IndexRanges: - """ This function returns a dictionary containing the minimum and maximum - index ranges for a pair of projection x and y coordinates, e.g.: - - index_ranges = {'/x': (20, 42), '/y': (31, 53)} - - First, the dimensions of the input, non-spatial variable are checked - for associated projection x and y coordinates. If these are present, - and they have not already been added to the `index_ranges` cache, the - extents of the input spatial subset are determined in these projected - coordinates. This requires the derivation of a minimum resolution of - the target grid in geographic coordinates. Points must be placed along - the exterior of the spatial subset shape. All points are then projected - from a geographic Coordinate Reference System (CRS) to the target grid - CRS. The minimum and maximum values are then derived from these - projected coordinate points. +def get_projected_x_y_index_ranges( + non_spatial_variable: str, + varinfo: VarInfoFromDmr, + dimensions_file: Dataset, + index_ranges: IndexRanges, + bounding_box: BBox = None, + shape_file_path: str = None, +) -> IndexRanges: + """This function returns a dictionary containing the minimum and maximum + index ranges for a pair of projection x and y coordinates, e.g.: + + index_ranges = {'/x': (20, 42), '/y': (31, 53)} + + First, the dimensions of the input, non-spatial variable are checked + for associated projection x and y coordinates. If these are present, + and they have not already been added to the `index_ranges` cache, the + extents of the input spatial subset are determined in these projected + coordinates. This requires the derivation of a minimum resolution of + the target grid in geographic coordinates. Points must be placed along + the exterior of the spatial subset shape. All points are then projected + from a geographic Coordinate Reference System (CRS) to the target grid + CRS. The minimum and maximum values are then derived from these + projected coordinate points. """ projected_x, projected_y = get_projected_x_y_variables( @@ -131,52 +149,59 @@ def get_projected_x_y_index_ranges(non_spatial_variable: str, ) if ( - projected_x is not None and projected_y is not None - and not set((projected_x, projected_y)).issubset( - set(index_ranges.keys()) - ) + projected_x is not None + and projected_y is not None + and not set((projected_x, projected_y)).issubset(set(index_ranges.keys())) ): crs = get_variable_crs(non_spatial_variable, varinfo) x_y_extents = get_projected_x_y_extents( dimensions_file[projected_x][:], - dimensions_file[projected_y][:], crs, - shape_file=shape_file_path, bounding_box=bounding_box + dimensions_file[projected_y][:], + crs, + shape_file=shape_file_path, + bounding_box=bounding_box, ) x_bounds = get_dimension_bounds(projected_x, varinfo, dimensions_file) y_bounds = get_dimension_bounds(projected_y, varinfo, dimensions_file) x_index_ranges = get_dimension_index_range( - dimensions_file[projected_x][:], x_y_extents['x_min'], - x_y_extents['x_max'], bounds_values=x_bounds + dimensions_file[projected_x][:], + x_y_extents['x_min'], + x_y_extents['x_max'], + bounds_values=x_bounds, ) y_index_ranges = get_dimension_index_range( - dimensions_file[projected_y][:], x_y_extents['y_min'], - x_y_extents['y_max'], bounds_values=y_bounds + dimensions_file[projected_y][:], + x_y_extents['y_min'], + x_y_extents['y_max'], + bounds_values=y_bounds, ) - x_y_index_ranges = {projected_x: x_index_ranges, - projected_y: y_index_ranges} + x_y_index_ranges = {projected_x: x_index_ranges, projected_y: y_index_ranges} else: x_y_index_ranges = {} return x_y_index_ranges -def get_geographic_index_range(dimension: str, varinfo: VarInfoFromDmr, - dimensions_file: Dataset, - bounding_box: BBox) -> IndexRange: - """ Extract the indices that correspond to the minimum and maximum extents - for a specific geographic dimension (longitude or latitude). For - longitudes, it is assumed that the western extent should be considered - the minimum extent. If the bounding box crosses a longitude - discontinuity this will be later identified by the minimum extent index - being larger than the maximum extent index. +def get_geographic_index_range( + dimension: str, + varinfo: VarInfoFromDmr, + dimensions_file: Dataset, + bounding_box: BBox, +) -> IndexRange: + """Extract the indices that correspond to the minimum and maximum extents + for a specific geographic dimension (longitude or latitude). For + longitudes, it is assumed that the western extent should be considered + the minimum extent. If the bounding box crosses a longitude + discontinuity this will be later identified by the minimum extent index + being larger than the maximum extent index. - The return value from this function is an `IndexRange` tuple of format: - (minimum_index, maximum_index). + The return value from this function is an `IndexRange` tuple of format: + (minimum_index, maximum_index). """ variable = varinfo.get_variable(dimension) @@ -202,44 +227,49 @@ def get_geographic_index_range(dimension: str, varinfo: VarInfoFromDmr, bounding_box, dimensions_file[dimension][:] ) - return get_dimension_index_range(dimensions_file[dimension][:], - minimum_extent, maximum_extent, - bounds_values=bounds) + return get_dimension_index_range( + dimensions_file[dimension][:], + minimum_extent, + maximum_extent, + bounds_values=bounds, + ) -def get_bounding_box_longitudes(bounding_box: BBox, - longitude_array: MaskedArray) -> List[float]: - """ Ensure the bounding box extents are compatible with the range of the - longitude variable. The Harmony bounding box values are expressed in - the range from -180 ≤ longitude (degrees east) ≤ 180, whereas some - collections have grids with discontinuities at the Prime Meridian and - others have sub-pixel wrap-around at the Antimeridian. +def get_bounding_box_longitudes( + bounding_box: BBox, longitude_array: MaskedArray +) -> List[float]: + """Ensure the bounding box extents are compatible with the range of the + longitude variable. The Harmony bounding box values are expressed in + the range from -180 ≤ longitude (degrees east) ≤ 180, whereas some + collections have grids with discontinuities at the Prime Meridian and + others have sub-pixel wrap-around at the Antimeridian. """ min_longitude, max_longitude = get_dimension_extents(longitude_array) - western_box_extent = get_longitude_in_grid(min_longitude, max_longitude, - bounding_box.west) - eastern_box_extent = get_longitude_in_grid(min_longitude, max_longitude, - bounding_box.east) + western_box_extent = get_longitude_in_grid( + min_longitude, max_longitude, bounding_box.west + ) + eastern_box_extent = get_longitude_in_grid( + min_longitude, max_longitude, bounding_box.east + ) return [western_box_extent, eastern_box_extent] -def get_longitude_in_grid(grid_min: float, grid_max: float, - longitude: float) -> float: - """ Ensure that a longitude value from the bounding box extents is within - the full longitude range of the grid. If it is not, check the same - value +/- 360 degrees, to see if either of those are present in the - grid. This function returns the value of the three options that lies - within the grid. If none of these values are within the grid, then the - original longitude value is returned. +def get_longitude_in_grid(grid_min: float, grid_max: float, longitude: float) -> float: + """Ensure that a longitude value from the bounding box extents is within + the full longitude range of the grid. If it is not, check the same + value +/- 360 degrees, to see if either of those are present in the + grid. This function returns the value of the three options that lies + within the grid. If none of these values are within the grid, then the + original longitude value is returned. - This functionality is used for grids where the longitude values are not - -180 ≤ longitude (degrees east) ≤ 180. This includes: + This functionality is used for grids where the longitude values are not + -180 ≤ longitude (degrees east) ≤ 180. This includes: - * RSSMIF16D: 0 ≤ longitude (degrees east) ≤ 360. - * MERRA-2 products: -180.3125 ≤ longitude (degrees east) ≤ 179.6875. + * RSSMIF16D: 0 ≤ longitude (degrees east) ≤ 360. + * MERRA-2 products: -180.3125 ≤ longitude (degrees east) ≤ 179.6875. """ decremented_longitude = longitude - 360 diff --git a/hoss/subset.py b/hoss/subset.py index d30c9c8..fb5f740 100644 --- a/hoss/subset.py +++ b/hoss/subset.py @@ -4,6 +4,7 @@ `hoss.adapter.HossAdapter` class. """ + from logging import Logger from typing import List, Set @@ -15,68 +16,85 @@ from varinfo import VarInfoFromDmr from hoss.bbox_utilities import get_request_shape_file -from hoss.dimension_utilities import (add_index_range, get_fill_slice, - IndexRanges, is_index_subset, - get_requested_index_ranges, - prefetch_dimension_variables) +from hoss.dimension_utilities import ( + add_index_range, + get_fill_slice, + IndexRanges, + is_index_subset, + get_requested_index_ranges, + prefetch_dimension_variables, +) from hoss.spatial import get_spatial_index_ranges from hoss.temporal import get_temporal_index_ranges -from hoss.utilities import (download_url, format_variable_set_string, - get_opendap_nc4) - - -def subset_granule(opendap_url: str, harmony_source: Source, output_dir: str, - harmony_message: Message, logger: Logger, - config: Config) -> str: - """ This function is the main business logic for retrieving a variable, - spatial, temporal and/or named-dimension subset from OPeNDAP. - - Variable dependencies are extracted from a `varinfo.VarInfoFromDmr` - instance that is based on the `.dmr` file for the granule as obtained - from OPeNDAP. The full set of returned variables will include those - requested by the end-user, and additional variables required to support - those requested (e.g., grid dimension variables or CF-Convention - metadata references). - - When the input Harmony message specifies a bounding box, shape file or - named dimensions that require index-range subsetting, dimension - variables will first be retrieved in a "prefetch" request to OPeNDAP. - Then the bounding-box or shape file extents are converted to - index-ranges. Similar behaviour occurs when a temporal range is - requested by the end user, determining the indices of the temporal - dimension from the prefetch response. - - Once the required variables, and index-ranges if needed, are derived, - a request is made to OPeNDAP to retrieve only the requested data. +from hoss.utilities import download_url, format_variable_set_string, get_opendap_nc4 + + +def subset_granule( + opendap_url: str, + harmony_source: Source, + output_dir: str, + harmony_message: Message, + logger: Logger, + config: Config, +) -> str: + """This function is the main business logic for retrieving a variable, + spatial, temporal and/or named-dimension subset from OPeNDAP. + + Variable dependencies are extracted from a `varinfo.VarInfoFromDmr` + instance that is based on the `.dmr` file for the granule as obtained + from OPeNDAP. The full set of returned variables will include those + requested by the end-user, and additional variables required to support + those requested (e.g., grid dimension variables or CF-Convention + metadata references). + + When the input Harmony message specifies a bounding box, shape file or + named dimensions that require index-range subsetting, dimension + variables will first be retrieved in a "prefetch" request to OPeNDAP. + Then the bounding-box or shape file extents are converted to + index-ranges. Similar behaviour occurs when a temporal range is + requested by the end user, determining the indices of the temporal + dimension from the prefetch response. + + Once the required variables, and index-ranges if needed, are derived, + a request is made to OPeNDAP to retrieve only the requested data. """ # Determine if index range subsetting will be required: request_is_index_subset = is_index_subset(harmony_message) # Produce map of variable dependencies with `earthdata-varinfo` and `.dmr`. - varinfo = get_varinfo(opendap_url, output_dir, logger, - harmony_source.shortName, - harmony_message.accessToken, config) + varinfo = get_varinfo( + opendap_url, + output_dir, + logger, + harmony_source.shortName, + harmony_message.accessToken, + config, + ) # Obtain a list of all variables for the subset, including those used as # references by the requested variables. - required_variables = get_required_variables(varinfo, - harmony_source.variables, - request_is_index_subset, - logger) - logger.info('All required variables: ' - f'{format_variable_set_string(required_variables)}') + required_variables = get_required_variables( + varinfo, harmony_source.variables, request_is_index_subset, logger + ) + logger.info( + 'All required variables: ' f'{format_variable_set_string(required_variables)}' + ) # Define a cache to store all dimension index ranges (spatial, temporal): index_ranges = {} if request_is_index_subset: # Prefetch all dimension variables in full: - dimensions_path = prefetch_dimension_variables(opendap_url, varinfo, - required_variables, - output_dir, logger, - harmony_message.accessToken, - config) + dimensions_path = prefetch_dimension_variables( + opendap_url, + varinfo, + required_variables, + output_dir, + logger, + harmony_message.accessToken, + config, + ) # Note regarding precedence of user requests ... # We handle the general dimension request first, in case the @@ -88,10 +106,11 @@ def subset_granule(opendap_url: str, harmony_source: Source, output_dir: str, # dimension(s). This will convert the requested min and max # values to array indices in the proper order. Each item in # the dimension request is a list: [name, min, max] - index_ranges.update(get_requested_index_ranges(required_variables, - varinfo, - dimensions_path, - harmony_message)) + index_ranges.update( + get_requested_index_ranges( + required_variables, varinfo, dimensions_path, harmony_message + ) + ) if ( rgetattr(harmony_message, 'subset.bbox', None) is not None @@ -99,37 +118,48 @@ def subset_granule(opendap_url: str, harmony_source: Source, output_dir: str, ): # Update `index_ranges` cache with ranges for horizontal grid # dimension variables (geographic and projected). - shape_file_path = get_request_shape_file(harmony_message, - output_dir, logger, - config) - index_ranges.update(get_spatial_index_ranges(required_variables, - varinfo, - dimensions_path, - harmony_message, - shape_file_path)) + shape_file_path = get_request_shape_file( + harmony_message, output_dir, logger, config + ) + index_ranges.update( + get_spatial_index_ranges( + required_variables, + varinfo, + dimensions_path, + harmony_message, + shape_file_path, + ) + ) if harmony_message.temporal is not None: # Update `index_ranges` cache with ranges for temporal # variables. This will convert information from the temporal range # to array indices for each temporal dimension. - index_ranges.update(get_temporal_index_ranges(required_variables, - varinfo, - dimensions_path, - harmony_message)) + index_ranges.update( + get_temporal_index_ranges( + required_variables, varinfo, dimensions_path, harmony_message + ) + ) # Add any range indices to variable names for DAP4 constraint expression. variables_with_ranges = set( add_index_range(variable, varinfo, index_ranges) for variable in required_variables ) - logger.info('variables_with_ranges: ' - f'{format_variable_set_string(variables_with_ranges)}') + logger.info( + 'variables_with_ranges: ' f'{format_variable_set_string(variables_with_ranges)}' + ) # Retrieve OPeNDAP data including only the specified variables in the # specified ranges. - output_path = get_opendap_nc4(opendap_url, variables_with_ranges, - output_dir, logger, - harmony_message.accessToken, config) + output_path = get_opendap_nc4( + opendap_url, + variables_with_ranges, + output_dir, + logger, + harmony_message.accessToken, + config, + ) # Fill the data outside the requested ranges for variables that cross a # dimensional discontinuity (for example longitude and the anti-meridian). @@ -138,102 +168,132 @@ def subset_granule(opendap_url: str, harmony_source: Source, output_dir: str, return output_path -def get_varinfo(opendap_url: str, output_dir: str, logger: Logger, - collection_short_name: str, access_token: str, - config: Config) -> str: - """ Retrieve the `.dmr` from OPeNDAP and use `earthdata-varinfo` to - populate a representation of the granule that maps dependencies between - variables. +def get_varinfo( + opendap_url: str, + output_dir: str, + logger: Logger, + collection_short_name: str, + access_token: str, + config: Config, +) -> str: + """Retrieve the `.dmr` from OPeNDAP and use `earthdata-varinfo` to + populate a representation of the granule that maps dependencies between + variables. """ - dmr_path = download_url(f'{opendap_url}.dmr.xml', output_dir, logger, - access_token=access_token, config=config) - return VarInfoFromDmr(dmr_path, short_name=collection_short_name, - config_file='hoss/hoss_config.json') - - -def get_required_variables(varinfo: VarInfoFromDmr, - variables: List[HarmonyVariable], - request_is_index_subset: bool, - logger: Logger) -> Set[str]: - """ Iterate through all requested variables from the Harmony message and - extract their full paths. Then use the - `VarInfoFromDmr.get_required_variables` method to also return all those - variables that are required to support - - If index range subsetting is required, but no variables are specified - (e.g., all variables are requested) then the requested variables should - be set to all variables (science and non-science), so that index-range - subsets can be specified in a DAP4 constraint expression. + dmr_path = download_url( + f'{opendap_url}.dmr.xml', + output_dir, + logger, + access_token=access_token, + config=config, + ) + return VarInfoFromDmr( + dmr_path, short_name=collection_short_name, config_file='hoss/hoss_config.json' + ) + + +def get_required_variables( + varinfo: VarInfoFromDmr, + variables: List[HarmonyVariable], + request_is_index_subset: bool, + logger: Logger, +) -> Set[str]: + """Iterate through all requested variables from the Harmony message and + extract their full paths. Then use the + `VarInfoFromDmr.get_required_variables` method to also return all those + variables that are required to support + + If index range subsetting is required, but no variables are specified + (e.g., all variables are requested) then the requested variables should + be set to all variables (science and non-science), so that index-range + subsets can be specified in a DAP4 constraint expression. """ - requested_variables = set(variable.fullPath - if variable.fullPath.startswith('/') - else f'/{variable.fullPath}' - for variable in variables) + requested_variables = set( + ( + variable.fullPath + if variable.fullPath.startswith('/') + else f'/{variable.fullPath}' + ) + for variable in variables + ) if request_is_index_subset and len(requested_variables) == 0: requested_variables = varinfo.get_science_variables().union( varinfo.get_metadata_variables() ) - logger.info('Requested variables: ' - f'{format_variable_set_string(requested_variables)}') + logger.info( + 'Requested variables: ' f'{format_variable_set_string(requested_variables)}' + ) return varinfo.get_required_variables(requested_variables) -def fill_variables(output_path: str, varinfo: VarInfoFromDmr, - required_variables: Set[str], - index_ranges: IndexRanges) -> None: - """ Check the index ranges for all dimension variables. If the minimum - index is greater than the maximum index in the subset range, then the - requested dimension range crossed an edge of the grid (e.g. longitude), - and must be filled in between those values. +def fill_variables( + output_path: str, + varinfo: VarInfoFromDmr, + required_variables: Set[str], + index_ranges: IndexRanges, +) -> None: + """Check the index ranges for all dimension variables. If the minimum + index is greater than the maximum index in the subset range, then the + requested dimension range crossed an edge of the grid (e.g. longitude), + and must be filled in between those values. - Note - longitude variables themselves will not be filled, to ensure - valid grid coordinates at all points of the science variables. + Note - longitude variables themselves will not be filled, to ensure + valid grid coordinates at all points of the science variables. """ - fill_ranges = {dimension: index_range - for dimension, index_range - in index_ranges.items() - if index_range[0] > index_range[1]} + fill_ranges = { + dimension: index_range + for dimension, index_range in index_ranges.items() + if index_range[0] > index_range[1] + } dimensions_to_fill = set(fill_ranges) if len(dimensions_to_fill) > 0: with Dataset(output_path, 'a', format='NETCDF4') as output_dataset: for variable_path in required_variables: - fill_variable(output_dataset, fill_ranges, varinfo, - variable_path, dimensions_to_fill) - - -def fill_variable(output_dataset: Dataset, fill_ranges: IndexRanges, - varinfo: VarInfoFromDmr, variable_path: str, - dimensions_to_fill: Set[str]) -> None: - """ Check if the variable has dimensions that require filling. If so, - and if the variable is not the longitude itself, fill the data outside - of the requested dimension range using the `numpy.ma.masked` constant. - The dimension variables should not be filled to ensure there are valid - grid-dimension values for all pixels in the grid. - - Conditions for filling: - - * Variable is not the longitude dimension (currently the only dimension - we expect to cross a grid edge). - * Variable has at least one grid-dimension that crosses a grid edge. + fill_variable( + output_dataset, + fill_ranges, + varinfo, + variable_path, + dimensions_to_fill, + ) + + +def fill_variable( + output_dataset: Dataset, + fill_ranges: IndexRanges, + varinfo: VarInfoFromDmr, + variable_path: str, + dimensions_to_fill: Set[str], +) -> None: + """Check if the variable has dimensions that require filling. If so, + and if the variable is not the longitude itself, fill the data outside + of the requested dimension range using the `numpy.ma.masked` constant. + The dimension variables should not be filled to ensure there are valid + grid-dimension values for all pixels in the grid. + + Conditions for filling: + + * Variable is not the longitude dimension (currently the only dimension + we expect to cross a grid edge). + * Variable has at least one grid-dimension that crosses a grid edge. """ variable = varinfo.get_variable(variable_path) if ( - not variable.is_longitude() - and len(dimensions_to_fill.intersection(variable.dimensions)) > 0 + not variable.is_longitude() + and len(dimensions_to_fill.intersection(variable.dimensions)) > 0 ): fill_index_tuple = tuple( - get_fill_slice(dimension, fill_ranges) - for dimension in variable.dimensions + get_fill_slice(dimension, fill_ranges) for dimension in variable.dimensions ) output_dataset[variable_path][fill_index_tuple] = masked diff --git a/hoss/temporal.py b/hoss/temporal.py index de60087..a5a3559 100644 --- a/hoss/temporal.py +++ b/hoss/temporal.py @@ -7,6 +7,7 @@ be combined with any other index ranges (e.g., spatial). """ + from datetime import datetime, timedelta, timezone from typing import List, Set @@ -15,8 +16,11 @@ from netCDF4 import Dataset from varinfo import VarInfoFromDmr -from hoss.dimension_utilities import (get_dimension_bounds, - get_dimension_index_range, IndexRanges) +from hoss.dimension_utilities import ( + get_dimension_bounds, + get_dimension_index_range, + IndexRanges, +) from hoss.exceptions import UnsupportedTemporalUnits @@ -26,16 +30,19 @@ units_second = {'second', 'seconds', 'sec', 'secs', 's'} -def get_temporal_index_ranges(required_variables: Set[str], - varinfo: VarInfoFromDmr, dimensions_path: str, - harmony_message: Message) -> IndexRanges: - """ Iterate through the temporal dimension and extract the indices that - correspond to the minimum and maximum extents in that dimension. +def get_temporal_index_ranges( + required_variables: Set[str], + varinfo: VarInfoFromDmr, + dimensions_path: str, + harmony_message: Message, +) -> IndexRanges: + """Iterate through the temporal dimension and extract the indices that + correspond to the minimum and maximum extents in that dimension. - The return value from this function is a dictionary that contains the - index ranges for the time dimension, such as: + The return value from this function is a dictionary that contains the + index ranges for the time dimension, such as: - index_range = {'/time': [1, 5]} + index_range = {'/time': [1, 5]} """ index_ranges = {} @@ -58,17 +65,18 @@ def get_temporal_index_ranges(required_variables: Set[str], maximum_extent = (time_end - time_ref) / time_delta index_ranges[dimension] = get_dimension_index_range( - dimensions_file[dimension][:], minimum_extent, maximum_extent, - bounds_values=get_dimension_bounds(dimension, varinfo, - dimensions_file) + dimensions_file[dimension][:], + minimum_extent, + maximum_extent, + bounds_values=get_dimension_bounds(dimension, varinfo, dimensions_file), ) return index_ranges def get_datetime_with_timezone(timestring: str) -> datetime: - """ function to parse string to datetime, and ensure datetime is timezone - "aware". If a timezone is not supplied, it is assumed to be UTC. + """function to parse string to datetime, and ensure datetime is timezone + "aware". If a timezone is not supplied, it is assumed to be UTC. """ @@ -81,7 +89,7 @@ def get_datetime_with_timezone(timestring: str) -> datetime: def get_time_ref(units_time: str) -> List[datetime]: - """ Retrieve the reference time (epoch) and time step size. """ + """Retrieve the reference time (epoch) and time step size.""" unit, epoch_str = units_time.split(' since ') ref_time = get_datetime_with_timezone(epoch_str) diff --git a/hoss/utilities.py b/hoss/utilities.py index bde6ef0..4c0b9b0 100644 --- a/hoss/utilities.py +++ b/hoss/utilities.py @@ -3,6 +3,7 @@ allows finer-grained unit testing of each smaller part of functionality. """ + from logging import Logger from os import sep from os.path import splitext @@ -19,10 +20,10 @@ def get_file_mimetype(file_name: str) -> Tuple[Optional[str], Optional[str]]: - """ This function tries to infer the MIME type of a file string. If - the `mimetypes.guess_type` function cannot guess the MIME type of the - granule, a default value is returned, which assumes that the file is - a NetCDF-4 file. + """This function tries to infer the MIME type of a file string. If + the `mimetypes.guess_type` function cannot guess the MIME type of the + granule, a default value is returned, which assumes that the file is + a NetCDF-4 file. """ mimetype = mimetypes.guess_type(file_name, False) @@ -33,13 +34,19 @@ def get_file_mimetype(file_name: str) -> Tuple[Optional[str], Optional[str]]: return mimetype -def get_opendap_nc4(url: str, required_variables: Set[str], output_dir: str, - logger: Logger, access_token: str, config: Config) -> str: - """ Construct a semi-colon separated string of the required variables and - use as a constraint expression to retrieve those variables from - OPeNDAP. +def get_opendap_nc4( + url: str, + required_variables: Set[str], + output_dir: str, + logger: Logger, + access_token: str, + config: Config, +) -> str: + """Construct a semi-colon separated string of the required variables and + use as a constraint expression to retrieve those variables from + OPeNDAP. - Returns the path of the downloaded granule containing those variables. + Returns the path of the downloaded granule containing those variables. """ constraint_expression = get_constraint_expression(required_variables) @@ -50,9 +57,14 @@ def get_opendap_nc4(url: str, required_variables: Set[str], output_dir: str, else: request_data = None - downloaded_nc4 = download_url(netcdf4_url, output_dir, logger, - access_token=access_token, config=config, - data=request_data) + downloaded_nc4 = download_url( + netcdf4_url, + output_dir, + logger, + access_token=access_token, + config=config, + data=request_data, + ) # Rename output file, to ensure repeated data downloads to OPeNDAP will be # respected by `harmony-service-lib-py`. @@ -60,21 +72,21 @@ def get_opendap_nc4(url: str, required_variables: Set[str], output_dir: str, def get_constraint_expression(variables: Set[str]) -> str: - """ Take a set of variables and return a URL encoded, semi-colon separated - DAP4 constraint expression to retrieve those variables. Each variable - may or may not specify their index ranges. + """Take a set of variables and return a URL encoded, semi-colon separated + DAP4 constraint expression to retrieve those variables. Each variable + may or may not specify their index ranges. """ return quote(';'.join(variables), safe='') def move_downloaded_nc4(output_dir: str, downloaded_file: str) -> str: - """ Change the basename of a NetCDF-4 file downloaded from OPeNDAP. The - `harmony-service-lib-py` produces a local filename that is a hex digest - of the requested URL only. If this filename is already present in the - local file system, `harmony-service-lib-py` assumes it does not need to - make another HTTP request, and just returns the constructed file path, - even if a POST request is being made with different parameters. + """Change the basename of a NetCDF-4 file downloaded from OPeNDAP. The + `harmony-service-lib-py` produces a local filename that is a hex digest + of the requested URL only. If this filename is already present in the + local file system, `harmony-service-lib-py` assumes it does not need to + make another HTTP request, and just returns the constructed file path, + even if a POST request is being made with different parameters. """ extension = splitext(downloaded_file)[1] or '.nc4' @@ -83,19 +95,24 @@ def move_downloaded_nc4(output_dir: str, downloaded_file: str) -> str: return new_filename -def download_url(url: str, destination: str, logger: Logger, - access_token: str = None, config: Config = None, - data=None) -> str: - """ Use built-in Harmony functionality to download from a URL. This is - expected to be used for obtaining the granule `.dmr`, a prefetch of - only dimensions and bounds variables, and the subsetted granule itself. +def download_url( + url: str, + destination: str, + logger: Logger, + access_token: str = None, + config: Config = None, + data=None, +) -> str: + """Use built-in Harmony functionality to download from a URL. This is + expected to be used for obtaining the granule `.dmr`, a prefetch of + only dimensions and bounds variables, and the subsetted granule itself. - OPeNDAP can return intermittent 500 errors. Retries will be performed - by inbuilt functionality in the `harmony-service-lib`. The OPeNDAP - errors are captured and re-raised as custom exceptions. + OPeNDAP can return intermittent 500 errors. Retries will be performed + by inbuilt functionality in the `harmony-service-lib`. The OPeNDAP + errors are captured and re-raised as custom exceptions. - The return value is the location in the file-store of the downloaded - content from the URL. + The return value is the location in the file-store of the downloaded + content from the URL. """ logger.info(f'Downloading: {url}') @@ -105,12 +122,7 @@ def download_url(url: str, destination: str, logger: Logger, try: response = util_download( - url, - destination, - logger, - access_token=access_token, - data=data, - cfg=config + url, destination, logger, access_token=access_token, data=data, cfg=config ) except ForbiddenException as harmony_exception: raise UrlAccessFailed(url, 400) from harmony_exception @@ -123,25 +135,25 @@ def download_url(url: str, destination: str, logger: Logger, def format_variable_set_string(variable_set: Set[str]) -> str: - """ Take an input set of variable strings and return a string that does not - contain curly braces, for compatibility with Harmony logging. + """Take an input set of variable strings and return a string that does not + contain curly braces, for compatibility with Harmony logging. """ return ', '.join(variable_set) def format_dictionary_string(dictionary: Dict) -> str: - """ Take an input dictionary and return a string that does not contain - curly braces (assuming the dictionary is not nested, or doesn't contain - set values). + """Take an input dictionary and return a string that does not contain + curly braces (assuming the dictionary is not nested, or doesn't contain + set values). """ return '\n'.join([f'{key}: {value}' for key, value in dictionary.items()]) def get_value_or_default(value: Optional[float], default: float) -> float: - """ A helper function that will either return the value, if it is supplied, - or a default value if not. + """A helper function that will either return the value, if it is supplied, + or a default value if not. """ return value if value is not None else default diff --git a/tests/__init__.py b/tests/__init__.py index a2c1e57..681f36a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,2 +1,3 @@ import os + os.environ['ENV'] = os.environ.get('ENV') or 'test' diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 6ceff4c..929a6fb 100755 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -5,6 +5,7 @@ requests were made to OPeNDAP. """ + from shutil import copy, rmtree from tempfile import mkdtemp from typing import Dict, Set @@ -25,10 +26,11 @@ class TestHossEndToEnd(TestCase): @classmethod def setUpClass(cls): - """ Test fixture that can be set once for all tests in the class. """ + """Test fixture that can be set once for all tests in the class.""" cls.granule_url = 'https://harmony.uat.earthdata.nasa.gov/opendap_url' - cls.input_stac = create_stac([Granule(cls.granule_url, None, - ['opendap', 'data'])]) + cls.input_stac = create_stac( + [Granule(cls.granule_url, None, ['opendap', 'data'])] + ) cls.atl03_variable = '/gt1r/geophys_corr/geoid' cls.gpm_variable = '/Grid/precipitationCal' cls.rssmif16d_variable = '/wind_speed' @@ -51,23 +53,24 @@ def setUpClass(cls): cls.atl16_dmr = file_handler.read() def setUp(self): - """ Have to mock mkdtemp, to know where to put mock .dmr content. """ + """Have to mock mkdtemp, to know where to put mock .dmr content.""" self.tmp_dir = mkdtemp() self.config = config(validate=False) def tearDown(self): rmtree(self.tmp_dir) - def assert_valid_request_data(self, request_data: Dict, - expected_variables: Set[str]): - """ Check the contents of the request data sent to the OPeNDAP server - when retrieving a NetCDF-4 file. This should ensure that a URL - encoded constraint expression was sent, and that all the expected - variables (potentially with index ranges) were included. + def assert_valid_request_data( + self, request_data: Dict, expected_variables: Set[str] + ): + """Check the contents of the request data sent to the OPeNDAP server + when retrieving a NetCDF-4 file. This should ensure that a URL + encoded constraint expression was sent, and that all the expected + variables (potentially with index ranges) were included. - This custom class method is used because the constraint expressions - are constructed from sets. The order of variables in the set, and - therefore the constraint expression string, cannot be guaranteed. + This custom class method is used because the constraint expressions + are constructed from sets. The order of variables in the set, and + therefore the constraint expression string, cannot be guaranteed. """ opendap_separator = '%3B' @@ -75,12 +78,12 @@ def assert_valid_request_data(self, request_data: Dict, requested_variables = set(request_data['dap4.ce'].split(opendap_separator)) self.assertSetEqual(requested_variables, expected_variables) - def assert_expected_output_catalog(self, catalog: Catalog, - expected_href: str, - expected_title: str): - """ Check the contents of the Harmony output STAC. It should have a - single data item, containing an asset with the supplied URL and - title. + def assert_expected_output_catalog( + self, catalog: Catalog, expected_href: str, expected_title: str + ): + """Check the contents of the Harmony output STAC. It should have a + single data item, containing an asset with the supplied URL and + title. """ items = list(catalog.get_items()) @@ -88,10 +91,12 @@ def assert_expected_output_catalog(self, catalog: Catalog, self.assertListEqual(list(items[0].assets.keys()), ['data']) self.assertDictEqual( items[0].assets['data'].to_dict(), - {'href': expected_href, - 'title': expected_title, - 'type': 'application/x-netcdf4', - 'roles': ['data']} + { + 'href': expected_href, + 'title': expected_title, + 'type': 'application/x-netcdf4', + 'roles': ['data'], + }, ) @patch('hoss.utilities.uuid4') @@ -99,12 +104,13 @@ def assert_expected_output_catalog(self, catalog: Catalog, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_non_spatial_end_to_end(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid): - """ Ensure HOSS will run end-to-end, only mocking the HTTP responses, - and the output interactions with Harmony. + def test_non_spatial_end_to_end( + self, mock_stage, mock_util_download, mock_rmtree, mock_mkdtemp, mock_uuid + ): + """Ensure HOSS will run end-to-end, only mocking the HTTP responses, + and the output interactions with Harmony. - This test should only perform a variable subset. + This test should only perform a variable subset. """ expected_output_basename = 'opendap_url_gt1r_geophys_corr_geoid_subsetted.nc4' @@ -123,29 +129,37 @@ def test_non_spatial_end_to_end(self, mock_stage, mock_util_download, mock_util_download.side_effect = [dmr_path, downloaded_nc4_path] - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'ATL03', - 'variables': [{'id': '', - 'name': self.atl03_variable, - 'fullPath': self.atl03_variable}]}], - 'stagingLocation': self.staging_location, - 'user': 'fhaise', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'ATL03', + 'variables': [ + { + 'id': '', + 'name': self.atl03_variable, + 'fullPath': self.atl03_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'user': 'fhaise', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the correct number of downloads were requested from OPeNDAP: # the first should be the `.dmr`. The second should be the required @@ -154,31 +168,49 @@ def test_non_spatial_end_to_end(self, mock_stage, mock_util_download, # their order cannot be guaranteed. Instead, `data` is matched to # `ANY`, and the constraint expression is tested separately. self.assertEqual(mock_util_download.call_count, 2) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression contains all the required variables. post_data = mock_util_download.call_args_list[1][1].get('data', {}) self.assert_valid_request_data( post_data, - {'%2Fgt1r%2Fgeolocation%2Fdelta_time', - '%2Fgt1r%2Fgeolocation%2Freference_photon_lon', - '%2Fgt1r%2Fgeolocation%2Fpodppd_flag', - '%2Fgt1r%2Fgeophys_corr%2Fdelta_time', - '%2Fgt1r%2Fgeolocation%2Freference_photon_lat', - '%2Fgt1r%2Fgeophys_corr%2Fgeoid'} + { + '%2Fgt1r%2Fgeolocation%2Fdelta_time', + '%2Fgt1r%2Fgeolocation%2Freference_photon_lon', + '%2Fgt1r%2Fgeolocation%2Fpodppd_flag', + '%2Fgt1r%2Fgeophys_corr%2Fdelta_time', + '%2Fgt1r%2Fgeolocation%2Freference_photon_lat', + '%2Fgt1r%2Fgeophys_corr%2Fgeoid', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) @patch('hoss.dimension_utilities.get_fill_slice') @@ -187,13 +219,19 @@ def test_non_spatial_end_to_end(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_geo_bbox_end_to_end(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a bounding box will be correctly processed - for a geographically gridded collection, requesting only the - expected variables, with index ranges corresponding to the bounding - box specified. + def test_geo_bbox_end_to_end( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a bounding box will be correctly processed + for a geographically gridded collection, requesting only the + expected variables, with index ranges corresponding to the bounding + box specified. """ expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' @@ -211,33 +249,40 @@ def test_geo_bbox_end_to_end(self, mock_stage, mock_util_download, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/f16_ssmis_geo.nc', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-30, 45, -15, 60]}, - 'user': 'jlovell', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [-30, 45, -15, 60]}, + 'user': 'jlovell', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # The first should be the `.dmr`. The second should fetch a NetCDF-4 @@ -248,14 +293,34 @@ def test_geo_bbox_end_to_end(self, mock_stage, mock_util_download, # Instead, `data` is matched to `ANY`, and the constraint expression is # tested separately. self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -269,18 +334,22 @@ def test_geo_bbox_end_to_end(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B540%3A599%5D', - '%2Flongitude%5B1320%3A1379%5D', - '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D'} + { + '%2Ftime', + '%2Flatitude%5B540%3A599%5D', + '%2Flongitude%5B1320%3A1379%5D', + '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -292,16 +361,22 @@ def test_geo_bbox_end_to_end(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_bbox_geo_descending_latitude(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a bounding box will be correctly processed, - for a geographically gridded collection, requesting only the - expected variables, with index ranges corresponding to the bounding - box specified. The latitude dimension returned from the geographic - dimensions request to OPeNDAP will be descending. This test is to - ensure the correct dimension indices are identified and the correct - DAP4 constraint expression is built. + def test_bbox_geo_descending_latitude( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a bounding box will be correctly processed, + for a geographically gridded collection, requesting only the + expected variables, with index ranges corresponding to the bounding + box specified. The latitude dimension returned from the geographic + dimensions request to OPeNDAP will be descending. This test is to + ensure the correct dimension indices are identified and the correct + DAP4 constraint expression is built. """ expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' @@ -319,68 +394,100 @@ def test_bbox_geo_descending_latitude(self, mock_stage, mock_util_download, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/f16_ssmis_geo_desc.nc', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-30, 45, -15, 60]}, - 'user': 'cduke', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [-30, 45, -15, 60]}, + 'user': 'cduke', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) self.assert_valid_request_data( - dimensions_data, {'%2Flatitude', '%2Flongitude', '%2Ftime'}) + dimensions_data, {'%2Flatitude', '%2Flongitude', '%2Ftime'} + ) # Ensure the constraint expression contains all the required variables. # /wind_speed[][120:179][1320:1379], /time, /longitude[1320:1379] # /latitude[120:179] index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B120%3A179%5D', - '%2Flongitude%5B1320%3A1379%5D', - '%2Fwind_speed%5B%5D%5B120%3A179%5D%5B1320%3A1379%5D'} + { + '%2Ftime', + '%2Flatitude%5B120%3A179%5D', + '%2Flongitude%5B1320%3A1379%5D', + '%2Fwind_speed%5B%5D%5B120%3A179%5D%5B1320%3A1379%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled: @@ -391,13 +498,14 @@ def test_bbox_geo_descending_latitude(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_geo_bbox_crossing_grid_edge(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid): - """ Ensure a request with a bounding box that crosses a longitude edge - (360 degrees east) requests the expected variables from OPeNDAP and - does so only in the expected latitude range. The full longitude - range should be requested for all variables, with filling applied - outside of the bounding box region. + def test_geo_bbox_crossing_grid_edge( + self, mock_stage, mock_util_download, mock_rmtree, mock_mkdtemp, mock_uuid + ): + """Ensure a request with a bounding box that crosses a longitude edge + (360 degrees east) requests the expected variables from OPeNDAP and + does so only in the expected latitude range. The full longitude + range should be requested for all variables, with filling applied + outside of the bounding box region. """ expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' @@ -415,44 +523,71 @@ def test_geo_bbox_crossing_grid_edge(self, mock_stage, mock_util_download, unfilled_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/f16_ssmis_unfilled.nc', unfilled_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - unfilled_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-7.5, -60, 7.5, -45]}, - 'user': 'jswiggert', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, unfilled_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [-7.5, -60, 7.5, -45]}, + 'user': 'jswiggert', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -466,18 +601,22 @@ def test_geo_bbox_crossing_grid_edge(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B120%3A179%5D', - '%2Flongitude', - '%2Fwind_speed%5B%5D%5B120%3A179%5D%5B%5D'} + { + '%2Ftime', + '%2Flatitude%5B120%3A179%5D', + '%2Flongitude', + '%2Fwind_speed%5B%5D%5B120%3A179%5D%5B%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure the final output was correctly filled (the unfilled file is @@ -487,8 +626,7 @@ def test_geo_bbox_crossing_grid_edge(self, mock_stage, mock_util_download, for variable_name, expected_variable in expected_output.variables.items(): self.assertIn(variable_name, actual_output.variables) - assert_array_equal(actual_output[variable_name][:], - expected_variable[:]) + assert_array_equal(actual_output[variable_name][:], expected_variable[:]) expected_output.close() actual_output.close() @@ -499,20 +637,27 @@ def test_geo_bbox_crossing_grid_edge(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_geo_bbox(self, mock_stage, mock_util_download, mock_rmtree, - mock_mkdtemp, mock_uuid, mock_get_fill_slice): - """ Ensure requests with particular bounding box edge-cases return the - correct pixel ranges: - - * Single point, N=S, W=E, inside a pixel, retrieves that single - pixel. - * Single point, N=S, W=E, in corner of 4 pixels retrieves all 4 - surrounding pixels. - * Line, N=S, W < E, where the latitude is inside a pixel, retrieves - a single row of pixels. - * Line, N > S, W=E, where longitude is between pixels, retrieves - two columns of pixels, corresponding to those which touch the - line. + def test_geo_bbox( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure requests with particular bounding box edge-cases return the + correct pixel ranges: + + * Single point, N=S, W=E, inside a pixel, retrieves that single + pixel. + * Single point, N=S, W=E, in corner of 4 pixels retrieves all 4 + surrounding pixels. + * Line, N=S, W < E, where the latitude is inside a pixel, retrieves + a single row of pixels. + * Line, N > S, W=E, where longitude is between pixels, retrieves + two columns of pixels, corresponding to those which touch the + line. """ point_in_pixel = [-29.99, 45.01, -29.99, 45.01] @@ -521,42 +666,54 @@ def test_geo_bbox(self, mock_stage, mock_util_download, mock_rmtree, line_between_pixels = [-30, 45, -30, 60] range_point_in_pixel = { - '%2Ftime', '%2Flatitude%5B540%3A540%5D', + '%2Ftime', + '%2Flatitude%5B540%3A540%5D', '%2Flongitude%5B1320%3A1320%5D', - '%2Fwind_speed%5B%5D%5B540%3A540%5D%5B1320%3A1320%5D' + '%2Fwind_speed%5B%5D%5B540%3A540%5D%5B1320%3A1320%5D', } range_point_between_pixels = { - '%2Ftime', '%2Flatitude%5B539%3A540%5D', + '%2Ftime', + '%2Flatitude%5B539%3A540%5D', '%2Flongitude%5B1319%3A1320%5D', - '%2Fwind_speed%5B%5D%5B539%3A540%5D%5B1319%3A1320%5D' + '%2Fwind_speed%5B%5D%5B539%3A540%5D%5B1319%3A1320%5D', } range_line_in_pixels = { - '%2Ftime', '%2Flatitude%5B300%3A300%5D', + '%2Ftime', + '%2Flatitude%5B300%3A300%5D', '%2Flongitude%5B1320%3A1379%5D', - '%2Fwind_speed%5B%5D%5B300%3A300%5D%5B1320%3A1379%5D' + '%2Fwind_speed%5B%5D%5B300%3A300%5D%5B1320%3A1379%5D', } range_line_between_pixels = { - '%2Ftime', '%2Flatitude%5B540%3A599%5D', + '%2Ftime', + '%2Flatitude%5B540%3A599%5D', '%2Flongitude%5B1319%3A1320%5D', - '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1319%3A1320%5D' + '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1319%3A1320%5D', } - test_args = [['Point is inside single pixel', point_in_pixel, - range_point_in_pixel], - ['Point in corner of 4 pixels', point_between_pixels, - range_point_between_pixels], - ['Line through single row', line_in_pixels, - range_line_in_pixels], - ['Line between two columns', line_between_pixels, - range_line_between_pixels]] + test_args = [ + ['Point is inside single pixel', point_in_pixel, range_point_in_pixel], + [ + 'Point in corner of 4 pixels', + point_between_pixels, + range_point_between_pixels, + ], + ['Line through single row', line_in_pixels, range_line_in_pixels], + [ + 'Line between two columns', + line_between_pixels, + range_line_between_pixels, + ], + ] for description, bounding_box, expected_index_ranges in test_args: with self.subTest(description): expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' - expected_staged_url = f'{self.staging_location}{expected_output_basename}' + expected_staged_url = ( + f'{self.staging_location}{expected_output_basename}' + ) mock_uuid.side_effect = [Mock(hex='uuid'), Mock(hex='uuid2')] mock_mkdtemp.return_value = self.tmp_dir mock_stage.return_value = expected_staged_url @@ -569,58 +726,91 @@ def test_geo_bbox(self, mock_stage, mock_util_download, mock_rmtree, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/f16_ssmis_geo.nc', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] + mock_util_download.side_effect = [ + dmr_path, + dimensions_path, + all_variables_path, + ] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': bounding_box}, + 'user': 'jaaron', + } + ) - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': bounding_box}, - 'user': 'jaaron', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + hoss = HossAdapter( + message, config=config(False), catalog=self.input_stac + ) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with # the expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, - hoss.logger, access_token=message.accessToken, - data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, - hoss.logger, access_token=message.accessToken, - data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, - hoss.logger, access_token=message.accessToken, - data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included # only geographic or temporal variables with no index ranges - dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) + dimensions_data = mock_util_download.call_args_list[1][1].get( + 'data', {} + ) self.assert_valid_request_data( dimensions_data, {'%2Flatitude', '%2Flongitude', '%2Ftime'} ) # Ensure the constraint expression contains all the required variables. - index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) - self.assert_valid_request_data(index_range_data, - expected_index_ranges) + index_range_data = mock_util_download.call_args_list[2][1].get( + 'data', {} + ) + self.assert_valid_request_data(index_range_data, expected_index_ranges) # Ensure the output was staged with the expected file name mock_stage.assert_called_once_with( @@ -628,7 +818,7 @@ def test_geo_bbox(self, mock_stage, mock_util_download, mock_rmtree, expected_output_basename, 'application/x-netcdf4', location=self.staging_location, - logger=hoss.logger + logger=hoss.logger, ) mock_rmtree.assert_called_once_with(self.tmp_dir) @@ -648,12 +838,18 @@ def test_geo_bbox(self, mock_stage, mock_util_download, mock_rmtree, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_spatial_bbox_no_variables(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a bounding box that does not specify any - variables will retrieve all variables, but limited to the range - specified by the bounding box. + def test_spatial_bbox_no_variables( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a bounding box that does not specify any + variables will retrieve all variables, but limited to the range + specified by the bounding box. """ expected_output_basename = 'opendap_url_subsetted.nc4' @@ -671,40 +867,61 @@ def test_spatial_bbox_no_variables(self, mock_stage, mock_util_download, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/f16_ssmis_geo_no_vars.nc', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D'}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-30, 45, -15, 60]}, - 'user': 'kerwinj', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + {'collection': 'C1234567890-EEDTEST', 'shortName': 'RSSMIF16D'} + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [-30, 45, -15, 60]}, + 'user': 'kerwinj', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -721,22 +938,26 @@ def test_spatial_bbox_no_variables(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B540%3A599%5D', - '%2Flongitude%5B1320%3A1379%5D', - '%2Fatmosphere_cloud_liquid_water_content%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', - '%2Fatmosphere_water_vapor_content%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', - '%2Frainfall_rate%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', - '%2Fsst_dtime%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', - '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D'} + { + '%2Ftime', + '%2Flatitude%5B540%3A599%5D', + '%2Flongitude%5B1320%3A1379%5D', + '%2Fatmosphere_cloud_liquid_water_content%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + '%2Fatmosphere_water_vapor_content%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + '%2Frainfall_rate%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + '%2Fsst_dtime%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled: @@ -748,14 +969,20 @@ def test_spatial_bbox_no_variables(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_temporal_end_to_end(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a temporal range will retrieve variables, - but limited to the range specified by the temporal range. - - The example granule has 24 hourly time slices, starting with - 2021-01-10T00:30:00. + def test_temporal_end_to_end( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a temporal range will retrieve variables, + but limited to the range specified by the temporal range. + + The example granule has 24 hourly time slices, starting with + 2021-01-10T00:30:00. """ expected_output_basename = 'opendap_url_PS_subsetted.nc4' @@ -773,69 +1000,94 @@ def test_temporal_end_to_end(self, mock_stage, mock_util_download, temporal_variables_path = f'{self.tmp_dir}/temporal_variables.nc4' copy('tests/data/M2T1NXSLV_temporal.nc4', temporal_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - temporal_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'M2T1NXSLV', - 'variables': [{'id': '', - 'name': '/PS', - 'fullPath': '/PS'}]}], - 'stagingLocation': self.staging_location, - 'temporal': {'start': '2021-01-10T01:00:00', - 'end': '2021-01-10T03:00:00'}, - 'user': 'jyoung', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [ + dmr_path, + dimensions_path, + temporal_variables_path, + ] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'M2T1NXSLV', + 'variables': [{'id': '', 'name': '/PS', 'fullPath': '/PS'}], + } + ], + 'stagingLocation': self.staging_location, + 'temporal': { + 'start': '2021-01-10T01:00:00', + 'end': '2021-01-10T03:00:00', + }, + 'user': 'jyoung', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) - self.assert_valid_request_data(dimensions_data, - {'%2Flat', '%2Flon', '%2Ftime'}) + self.assert_valid_request_data(dimensions_data, {'%2Flat', '%2Flon', '%2Ftime'}) # Ensure the constraint expression contains all the required variables. # /PS[1:2][][], /time[1:2], /lon, /lat index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime%5B1%3A2%5D', - '%2Flat', - '%2Flon', - '%2FPS%5B1%3A2%5D%5B%5D%5B%5D'} + {'%2Ftime%5B1%3A2%5D', '%2Flat', '%2Flon', '%2FPS%5B1%3A2%5D%5B%5D%5B%5D'}, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -847,18 +1099,24 @@ def test_temporal_end_to_end(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_temporal_all_variables(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a temporal range and no specified variables - will retrieve the expected output. Note - because a temporal range - is specified, HOSS will need to perform an index range subset. This - means that the prefetch will still have to occur, and all variables - with the temporal grid dimension will need to include their index - ranges in the final DAP4 constraint expression. - - The example granule has 24 hourly time slices, starting with - 2021-01-10T00:30:00. + def test_temporal_all_variables( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a temporal range and no specified variables + will retrieve the expected output. Note - because a temporal range + is specified, HOSS will need to perform an index range subset. This + means that the prefetch will still have to occur, and all variables + with the temporal grid dimension will need to include their index + ranges in the final DAP4 constraint expression. + + The example granule has 24 hourly time slices, starting with + 2021-01-10T00:30:00. """ expected_output_basename = 'opendap_url_subsetted.nc4' @@ -876,111 +1134,141 @@ def test_temporal_all_variables(self, mock_stage, mock_util_download, temporal_variables_path = f'{self.tmp_dir}/temporal_variables.nc4' copy('tests/data/M2T1NXSLV_temporal.nc4', temporal_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - temporal_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{'collection': 'C1234567890-EEDTEST', - 'shortName': 'M2T1NXSLV'}], - 'stagingLocation': self.staging_location, - 'subset': None, - 'temporal': {'start': '2021-01-10T01:00:00', - 'end': '2021-01-10T03:00:00'}, - 'user': 'jyoung', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [ + dmr_path, + dimensions_path, + temporal_variables_path, + ] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + {'collection': 'C1234567890-EEDTEST', 'shortName': 'M2T1NXSLV'} + ], + 'stagingLocation': self.staging_location, + 'subset': None, + 'temporal': { + 'start': '2021-01-10T01:00:00', + 'end': '2021-01-10T03:00:00', + }, + 'user': 'jyoung', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) - self.assert_valid_request_data(dimensions_data, - {'%2Flat', '%2Flon', '%2Ftime'}) + self.assert_valid_request_data(dimensions_data, {'%2Flat', '%2Flon', '%2Ftime'}) # Ensure the constraint expression contains all the required variables. # /[1:2][][], /time[1:2], /lon, /lat index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime%5B1%3A2%5D', - '%2Flat', - '%2Flon', - '%2FCLDPRS%5B1%3A2%5D%5B%5D%5B%5D', - '%2FCLDTMP%5B1%3A2%5D%5B%5D%5B%5D', - '%2FDISPH%5B1%3A2%5D%5B%5D%5B%5D', - '%2FH1000%5B1%3A2%5D%5B%5D%5B%5D', - '%2FH250%5B1%3A2%5D%5B%5D%5B%5D', - '%2FH500%5B1%3A2%5D%5B%5D%5B%5D', - '%2FH850%5B1%3A2%5D%5B%5D%5B%5D', - '%2FPBLTOP%5B1%3A2%5D%5B%5D%5B%5D', - '%2FPS%5B1%3A2%5D%5B%5D%5B%5D', - '%2FOMEGA500%5B1%3A2%5D%5B%5D%5B%5D', - '%2FQ250%5B1%3A2%5D%5B%5D%5B%5D', - '%2FQ500%5B1%3A2%5D%5B%5D%5B%5D', - '%2FQ850%5B1%3A2%5D%5B%5D%5B%5D', - '%2FQV10M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FQV2M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FSLP%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT10M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT250%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT2M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT2MDEW%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT2MWET%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT500%5B1%3A2%5D%5B%5D%5B%5D', - '%2FT850%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTO3%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTOX%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTQL%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTQI%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTQV%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTROPPB%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTROPPV%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTROPQ%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTROPT%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTROPPT%5B1%3A2%5D%5B%5D%5B%5D', - '%2FTS%5B1%3A2%5D%5B%5D%5B%5D', - '%2FU10M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FU250%5B1%3A2%5D%5B%5D%5B%5D', - '%2FU2M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FU500%5B1%3A2%5D%5B%5D%5B%5D', - '%2FU50M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FU850%5B1%3A2%5D%5B%5D%5B%5D', - '%2FV10M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FV250%5B1%3A2%5D%5B%5D%5B%5D', - '%2FV2M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FV500%5B1%3A2%5D%5B%5D%5B%5D', - '%2FV50M%5B1%3A2%5D%5B%5D%5B%5D', - '%2FV850%5B1%3A2%5D%5B%5D%5B%5D', - '%2FZLCL%5B1%3A2%5D%5B%5D%5B%5D'} + { + '%2Ftime%5B1%3A2%5D', + '%2Flat', + '%2Flon', + '%2FCLDPRS%5B1%3A2%5D%5B%5D%5B%5D', + '%2FCLDTMP%5B1%3A2%5D%5B%5D%5B%5D', + '%2FDISPH%5B1%3A2%5D%5B%5D%5B%5D', + '%2FH1000%5B1%3A2%5D%5B%5D%5B%5D', + '%2FH250%5B1%3A2%5D%5B%5D%5B%5D', + '%2FH500%5B1%3A2%5D%5B%5D%5B%5D', + '%2FH850%5B1%3A2%5D%5B%5D%5B%5D', + '%2FPBLTOP%5B1%3A2%5D%5B%5D%5B%5D', + '%2FPS%5B1%3A2%5D%5B%5D%5B%5D', + '%2FOMEGA500%5B1%3A2%5D%5B%5D%5B%5D', + '%2FQ250%5B1%3A2%5D%5B%5D%5B%5D', + '%2FQ500%5B1%3A2%5D%5B%5D%5B%5D', + '%2FQ850%5B1%3A2%5D%5B%5D%5B%5D', + '%2FQV10M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FQV2M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FSLP%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT10M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT250%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT2M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT2MDEW%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT2MWET%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT500%5B1%3A2%5D%5B%5D%5B%5D', + '%2FT850%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTO3%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTOX%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTQL%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTQI%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTQV%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTROPPB%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTROPPV%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTROPQ%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTROPT%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTROPPT%5B1%3A2%5D%5B%5D%5B%5D', + '%2FTS%5B1%3A2%5D%5B%5D%5B%5D', + '%2FU10M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FU250%5B1%3A2%5D%5B%5D%5B%5D', + '%2FU2M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FU500%5B1%3A2%5D%5B%5D%5B%5D', + '%2FU50M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FU850%5B1%3A2%5D%5B%5D%5B%5D', + '%2FV10M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FV250%5B1%3A2%5D%5B%5D%5B%5D', + '%2FV2M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FV500%5B1%3A2%5D%5B%5D%5B%5D', + '%2FV50M%5B1%3A2%5D%5B%5D%5B%5D', + '%2FV850%5B1%3A2%5D%5B%5D%5B%5D', + '%2FZLCL%5B1%3A2%5D%5B%5D%5B%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -992,12 +1280,18 @@ def test_temporal_all_variables(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_bbox_temporal_end_to_end(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with both a bounding box and a temporal range will - retrieve variables, but limited to the ranges specified by the - bounding box and the temporal range. + def test_bbox_temporal_end_to_end( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with both a bounding box and a temporal range will + retrieve variables, but limited to the ranges specified by the + bounding box and the temporal range. """ expected_output_basename = 'opendap_url_PS_subsetted.nc4' @@ -1015,69 +1309,95 @@ def test_bbox_temporal_end_to_end(self, mock_stage, mock_util_download, geo_temporal_path = f'{self.tmp_dir}/geo_temporal.nc4' copy('tests/data/M2T1NXSLV_temporal.nc4', geo_temporal_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - geo_temporal_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'M2T1NXSLV', - 'variables': [{'id': '', - 'name': '/PS', - 'fullPath': '/PS'}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [40, -30, 50, -20]}, - 'temporal': {'start': '2021-01-10T01:00:00', - 'end': '2021-01-10T03:00:00'}, - 'user': 'jyoung', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, geo_temporal_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'M2T1NXSLV', + 'variables': [{'id': '', 'name': '/PS', 'fullPath': '/PS'}], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [40, -30, 50, -20]}, + 'temporal': { + 'start': '2021-01-10T01:00:00', + 'end': '2021-01-10T03:00:00', + }, + 'user': 'jyoung', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) - self.assert_valid_request_data(dimensions_data, - {'%2Flat', '%2Flon', '%2Ftime'}) + self.assert_valid_request_data(dimensions_data, {'%2Flat', '%2Flon', '%2Ftime'}) # Ensure the constraint expression contains all the required variables. # /PS[1:2][120:140][352:368], /time[1:2], /lon[352:368], /lat[120:140] index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime%5B1%3A2%5D', - '%2Flat%5B120%3A140%5D', - '%2Flon%5B352%3A368%5D', - '%2FPS%5B1%3A2%5D%5B120%3A140%5D%5B352%3A368%5D'} + { + '%2Ftime%5B1%3A2%5D', + '%2Flat%5B120%3A140%5D', + '%2Flon%5B352%3A368%5D', + '%2FPS%5B1%3A2%5D%5B120%3A140%5D%5B352%3A368%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1090,14 +1410,20 @@ def test_bbox_temporal_end_to_end(self, mock_stage, mock_util_download, @patch('hoss.bbox_utilities.download') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_geo_shapefile_end_to_end(self, mock_stage, mock_util_download, - mock_geojson_download, mock_rmtree, - mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a shape file specified against a - geographically gridded collection will retrieve variables, but - limited to the ranges of a bounding box that encloses the specified - GeoJSON shape. + def test_geo_shapefile_end_to_end( + self, + mock_stage, + mock_util_download, + mock_geojson_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a shape file specified against a + geographically gridded collection will retrieve variables, but + limited to the ranges of a bounding box that encloses the specified + GeoJSON shape. """ expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' @@ -1120,52 +1446,82 @@ def test_geo_shapefile_end_to_end(self, mock_stage, mock_util_download, shape_file_url = 'www.example.com/polygon.geo.json' mock_geojson_download.return_value = geojson_path - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'shape': {'href': shape_file_url, - 'type': 'application/geo+json'}}, - 'user': 'dscott', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': { + 'shape': {'href': shape_file_url, 'type': 'application/geo+json'} + }, + 'user': 'dscott', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the shape file in the Harmony message was downloaded: - mock_geojson_download.assert_called_once_with(shape_file_url, - self.tmp_dir, - logger=hoss.logger, - access_token=message.accessToken, - cfg=hoss.config) + mock_geojson_download.assert_called_once_with( + shape_file_url, + self.tmp_dir, + logger=hoss.logger, + access_token=message.accessToken, + cfg=hoss.config, + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -1181,18 +1537,22 @@ def test_geo_shapefile_end_to_end(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B508%3A527%5D', - '%2Flongitude%5B983%3A1003%5D', - '%2Fwind_speed%5B%5D%5B508%3A527%5D%5B983%3A1003%5D'} + { + '%2Ftime', + '%2Flatitude%5B508%3A527%5D', + '%2Flongitude%5B983%3A1003%5D', + '%2Fwind_speed%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1205,19 +1565,25 @@ def test_geo_shapefile_end_to_end(self, mock_stage, mock_util_download, @patch('hoss.bbox_utilities.download') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_geo_shapefile_all_variables(self, mock_stage, mock_util_download, - mock_geojson_download, mock_rmtree, - mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure an all variable request with a shape file specified will - retrieve all variables, but limited to the ranges of a bounding box - that encloses the specified GeoJSON shape. This request uses a - collection that is geographically gridded. - - Because a shape file is specified, index range subsetting will be - performed, so a prefetch request will be performed, and the final - DAP4 constraint expression will include all variables with index - ranges. + def test_geo_shapefile_all_variables( + self, + mock_stage, + mock_util_download, + mock_geojson_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure an all variable request with a shape file specified will + retrieve all variables, but limited to the ranges of a bounding box + that encloses the specified GeoJSON shape. This request uses a + collection that is geographically gridded. + + Because a shape file is specified, index range subsetting will be + performed, so a prefetch request will be performed, and the final + DAP4 constraint expression will include all variables with index + ranges. """ expected_output_basename = 'opendap_url_subsetted.nc4' @@ -1240,48 +1606,72 @@ def test_geo_shapefile_all_variables(self, mock_stage, mock_util_download, shape_file_url = 'www.example.com/polygon.geo.json' mock_geojson_download.return_value = geojson_path - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D'}], - 'stagingLocation': self.staging_location, - 'subset': {'shape': {'href': shape_file_url, - 'type': 'application/geo+json'}}, - 'user': 'dscott', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + {'collection': 'C1234567890-EEDTEST', 'shortName': 'RSSMIF16D'} + ], + 'stagingLocation': self.staging_location, + 'subset': { + 'shape': {'href': shape_file_url, 'type': 'application/geo+json'} + }, + 'user': 'dscott', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the shape file in the Harmony message was downloaded: - mock_geojson_download.assert_called_once_with(shape_file_url, - self.tmp_dir, - logger=hoss.logger, - access_token=message.accessToken, - cfg=hoss.config) + mock_geojson_download.assert_called_once_with( + shape_file_url, + self.tmp_dir, + logger=hoss.logger, + access_token=message.accessToken, + cfg=hoss.config, + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -1297,22 +1687,26 @@ def test_geo_shapefile_all_variables(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B508%3A527%5D', - '%2Flongitude%5B983%3A1003%5D', - '%2Fatmosphere_cloud_liquid_water_content%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', - '%2Fatmosphere_water_vapor_content%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', - '%2Frainfall_rate%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', - '%2Fsst_dtime%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', - '%2Fwind_speed%5B%5D%5B508%3A527%5D%5B983%3A1003%5D'} + { + '%2Ftime', + '%2Flatitude%5B508%3A527%5D', + '%2Flongitude%5B983%3A1003%5D', + '%2Fatmosphere_cloud_liquid_water_content%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', + '%2Fatmosphere_water_vapor_content%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', + '%2Frainfall_rate%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', + '%2Fsst_dtime%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', + '%2Fwind_speed%5B%5D%5B508%3A527%5D%5B983%3A1003%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1325,13 +1719,19 @@ def test_geo_shapefile_all_variables(self, mock_stage, mock_util_download, @patch('hoss.bbox_utilities.download') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_bbox_precedence_end_to_end(self, mock_stage, mock_util_download, - mock_geojson_download, mock_rmtree, - mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a bounding box will be correctly processed, - requesting only the expected variables, with index ranges - corresponding to the bounding box specified. + def test_bbox_precedence_end_to_end( + self, + mock_stage, + mock_util_download, + mock_geojson_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a bounding box will be correctly processed, + requesting only the expected variables, with index ranges + corresponding to the bounding box specified. """ expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' @@ -1354,56 +1754,86 @@ def test_bbox_precedence_end_to_end(self, mock_stage, mock_util_download, shape_file_url = 'www.example.com/polygon.geo.json' mock_geojson_download.return_value = geojson_path - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-30, 45, -15, 60], - 'shape': {'href': shape_file_url, - 'type': 'application/geo+json'}}, - 'user': 'aworden', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': { + 'bbox': [-30, 45, -15, 60], + 'shape': {'href': shape_file_url, 'type': 'application/geo+json'}, + }, + 'user': 'aworden', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the shape file in the Harmony message was downloaded (the # logic giving the bounding box precedence over the shape file occurs # in `hoss/subset.py`, after the shape file has already been # downloaded - however, that file will not be used. - mock_geojson_download.assert_called_once_with(shape_file_url, - self.tmp_dir, - logger=hoss.logger, - access_token=message.accessToken, - cfg=hoss.config) + mock_geojson_download.assert_called_once_with( + shape_file_url, + self.tmp_dir, + logger=hoss.logger, + access_token=message.accessToken, + cfg=hoss.config, + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -1419,18 +1849,22 @@ def test_bbox_precedence_end_to_end(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B540%3A599%5D', - '%2Flongitude%5B1320%3A1379%5D', - '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D'} + { + '%2Ftime', + '%2Flatitude%5B540%3A599%5D', + '%2Flongitude%5B1320%3A1379%5D', + '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B1320%3A1379%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1442,15 +1876,22 @@ def test_bbox_precedence_end_to_end(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_geo_dimensions(self, mock_stage, mock_util_download, mock_rmtree, - mock_mkdtemp, mock_uuid, mock_get_fill_slice): - """ Ensure a request with explicitly specified dimension extents will - be correctly processed, requesting only the expected variables, - with index ranges corresponding to the extents specified. - - To minimise test data in the repository, this test uses geographic - dimension of latitude and longitude, but within the - `subset.dimensions` region of the inbound Harmony message. + def test_geo_dimensions( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with explicitly specified dimension extents will + be correctly processed, requesting only the expected variables, + with index ranges corresponding to the extents specified. + + To minimise test data in the repository, this test uses geographic + dimension of latitude and longitude, but within the + `subset.dimensions` region of the inbound Harmony message. """ expected_output_basename = 'opendap_url_wind_speed_subsetted.nc4' @@ -1468,47 +1909,76 @@ def test_geo_dimensions(self, mock_stage, mock_util_download, mock_rmtree, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/f16_ssmis_geo.nc', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'RSSMIF16D', - 'variables': [{'id': '', - 'name': self.rssmif16d_variable, - 'fullPath': self.rssmif16d_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'dimensions': [ - {'name': 'latitude', 'min': 45, 'max': 60}, - {'name': 'longitude', 'min': 15, 'max': 30} - ]}, - 'user': 'blightyear', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'RSSMIF16D', + 'variables': [ + { + 'id': '', + 'name': self.rssmif16d_variable, + 'fullPath': self.rssmif16d_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': { + 'dimensions': [ + {'name': 'latitude', 'min': 45, 'max': 60}, + {'name': 'longitude', 'min': 15, 'max': 30}, + ] + }, + 'user': 'blightyear', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -1522,18 +1992,22 @@ def test_geo_dimensions(self, mock_stage, mock_util_download, mock_rmtree, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Flatitude%5B540%3A599%5D', - '%2Flongitude%5B60%3A119%5D', - '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B60%3A119%5D'} + { + '%2Ftime', + '%2Flatitude%5B540%3A599%5D', + '%2Flongitude%5B60%3A119%5D', + '%2Fwind_speed%5B%5D%5B540%3A599%5D%5B60%3A119%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1545,13 +2019,19 @@ def test_geo_dimensions(self, mock_stage, mock_util_download, mock_rmtree, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_projected_grid_bbox(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Make a request specifying a bounding box for a collection that is - gridded to a non-geographic projection. This example will use - ABoVE TVPRM, which uses an Albers Conical Equal Area projection - with data covering Alaska. + def test_projected_grid_bbox( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Make a request specifying a bounding box for a collection that is + gridded to a non-geographic projection. This example will use + ABoVE TVPRM, which uses an Albers Conical Equal Area projection + with data covering Alaska. """ expected_output_basename = 'opendap_url_NEE_subsetted.nc4' @@ -1568,44 +2048,65 @@ def test_projected_grid_bbox(self, mock_stage, mock_util_download, output_path = f'{self.tmp_dir}/ABoVE_TVPRM_bbox.nc4' copy('tests/data/ABoVE_TVPRM_prefetch.nc4', output_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - output_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'NorthSlope_NEE_TVPRM_1920', - 'variables': [{'id': '', - 'name': '/NEE', - 'fullPath': '/NEE'}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-160, 68, -145, 70]}, - 'user': 'wfunk', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, output_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'NorthSlope_NEE_TVPRM_1920', + 'variables': [{'id': '', 'name': '/NEE', 'fullPath': '/NEE'}], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [-160, 68, -145, 70]}, + 'user': 'wfunk', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # spatial or temporal variables with no index ranges @@ -1618,20 +2119,24 @@ def test_projected_grid_bbox(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Ftime_bnds', - '%2Fcrs', - '%2Fx%5B37%3A56%5D', - '%2Fy%5B7%3A26%5D', - '%2FNEE%5B%5D%5B7%3A26%5D%5B37%3A56%5D'} + { + '%2Ftime', + '%2Ftime_bnds', + '%2Fcrs', + '%2Fx%5B37%3A56%5D', + '%2Fy%5B7%3A26%5D', + '%2FNEE%5B%5D%5B7%3A26%5D%5B37%3A56%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1644,13 +2149,20 @@ def test_projected_grid_bbox(self, mock_stage, mock_util_download, @patch('hoss.bbox_utilities.download') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_projected_grid_shape(self, mock_stage, mock_util_download, - mock_geojson_download, mock_rmtree, - mock_mkdtemp, mock_uuid, mock_get_fill_slice): - """ Make a request specifying a shape file for a collection that is - gridded to a non-geographic projection. This example will use - ABoVE TVPRM, which uses an Albers Conical Equal Area projection - with data covering Alaska. + def test_projected_grid_shape( + self, + mock_stage, + mock_util_download, + mock_geojson_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Make a request specifying a shape file for a collection that is + gridded to a non-geographic projection. This example will use + ABoVE TVPRM, which uses an Albers Conical Equal Area projection + with data covering Alaska. """ expected_output_basename = 'opendap_url_NEE_subsetted.nc4' @@ -1673,45 +2185,67 @@ def test_projected_grid_shape(self, mock_stage, mock_util_download, output_path = f'{self.tmp_dir}/ABoVE_TVPRM_bbox.nc4' copy('tests/data/ABoVE_TVPRM_prefetch.nc4', output_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - output_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'NorthSlope_NEE_TVPRM_1920', - 'variables': [{'id': '', - 'name': '/NEE', - 'fullPath': '/NEE'}]}], - 'stagingLocation': self.staging_location, - 'subset': {'shape': {'href': shape_file_url, - 'type': 'application/geo+json'}}, - 'user': 'wfunk', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, output_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'NorthSlope_NEE_TVPRM_1920', + 'variables': [{'id': '', 'name': '/NEE', 'fullPath': '/NEE'}], + } + ], + 'stagingLocation': self.staging_location, + 'subset': { + 'shape': {'href': shape_file_url, 'type': 'application/geo+json'} + }, + 'user': 'wfunk', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # geographic or temporal variables with no index ranges @@ -1724,20 +2258,24 @@ def test_projected_grid_shape(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Ftime', - '%2Ftime_bnds', - '%2Fcrs', - '%2Fx%5B37%3A56%5D', - '%2Fy%5B11%3A26%5D', - '%2FNEE%5B%5D%5B11%3A26%5D%5B37%3A56%5D'} + { + '%2Ftime', + '%2Ftime_bnds', + '%2Fcrs', + '%2Fx%5B37%3A56%5D', + '%2Fy%5B11%3A26%5D', + '%2FNEE%5B%5D%5B11%3A26%5D%5B37%3A56%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1749,20 +2287,26 @@ def test_projected_grid_shape(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_bounds_end_to_end(self, mock_stage, mock_util_download, - mock_rmtree, mock_mkdtemp, mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a bounding box and temporal range will be - correctly processed for a geographically gridded collection that - has bounds variables for each dimension. - - Note: Each GPM IMERGHH granule has a single time slice, so the full - range will be retrieved (e.g., /Grid/time[0:0] - - * -30.0 ≤ /Grid/lon[1500] ≤ -29.9 - * 45.0 ≤ /Grid/lat[1350] ≤ 45.1 - * -14.9 ≤ /Grid/lon[1649] ≤ -15.0 - * 59.9 ≤ /Grid/lat[1499] ≤ 60.0 + def test_bounds_end_to_end( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a bounding box and temporal range will be + correctly processed for a geographically gridded collection that + has bounds variables for each dimension. + + Note: Each GPM IMERGHH granule has a single time slice, so the full + range will be retrieved (e.g., /Grid/time[0:0] + + * -30.0 ≤ /Grid/lon[1500] ≤ -29.9 + * 45.0 ≤ /Grid/lat[1350] ≤ 45.1 + * -14.9 ≤ /Grid/lon[1649] ≤ -15.0 + * 59.9 ≤ /Grid/lat[1499] ≤ 60.0 """ expected_output_basename = 'opendap_url_Grid_precipitationCal_subsetted.nc4' @@ -1780,54 +2324,89 @@ def test_bounds_end_to_end(self, mock_stage, mock_util_download, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/GPM_3IMERGHH_bounds.nc4', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'GPM_3IMERGHH', - 'variables': [{'id': '', - 'name': self.gpm_variable, - 'fullPath': self.gpm_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [-30, 45, -15, 60]}, - 'temporal': {'start': '2020-01-01T12:15:00', - 'end': '2020-01-01T12:45:00'}, - 'user': 'jlovell', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'GPM_3IMERGHH', + 'variables': [ + { + 'id': '', + 'name': self.gpm_variable, + 'fullPath': self.gpm_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [-30, 45, -15, 60]}, + 'temporal': { + 'start': '2020-01-01T12:15:00', + 'end': '2020-01-01T12:45:00', + }, + 'user': 'jlovell', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # dimension variables and their associated bounds variables. dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) self.assert_valid_request_data( - dimensions_data, {'%2FGrid%2Flat', '%2FGrid%2Flat_bnds', - '%2FGrid%2Flon', '%2FGrid%2Flon_bnds', - '%2FGrid%2Ftime', '%2FGrid%2Ftime_bnds'} + dimensions_data, + { + '%2FGrid%2Flat', + '%2FGrid%2Flat_bnds', + '%2FGrid%2Flon', + '%2FGrid%2Flon_bnds', + '%2FGrid%2Ftime', + '%2FGrid%2Ftime_bnds', + }, ) # Ensure the constraint expression contains all the required variables. # /Grid/precipitationCal[0:0][1500:1649][1350:1499], @@ -1837,21 +2416,25 @@ def test_bounds_end_to_end(self, mock_stage, mock_util_download, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2FGrid%2Flat%5B1350%3A1499%5D', - '%2FGrid%2Flat_bnds%5B1350%3A1499%5D%5B%5D', - '%2FGrid%2Flon%5B1500%3A1649%5D', - '%2FGrid%2Flon_bnds%5B1500%3A1649%5D%5B%5D', - '%2FGrid%2Ftime%5B0%3A0%5D', - '%2FGrid%2Ftime_bnds%5B0%3A0%5D%5B%5D', - '%2FGrid%2FprecipitationCal%5B0%3A0%5D%5B1500%3A1649%5D%5B1350%3A1499%5D'} + { + '%2FGrid%2Flat%5B1350%3A1499%5D', + '%2FGrid%2Flat_bnds%5B1350%3A1499%5D%5B%5D', + '%2FGrid%2Flon%5B1500%3A1649%5D', + '%2FGrid%2Flon_bnds%5B1500%3A1649%5D%5B%5D', + '%2FGrid%2Ftime%5B0%3A0%5D', + '%2FGrid%2Ftime_bnds%5B0%3A0%5D%5B%5D', + '%2FGrid%2FprecipitationCal%5B0%3A0%5D%5B1500%3A1649%5D%5B1350%3A1499%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1863,28 +2446,31 @@ def test_bounds_end_to_end(self, mock_stage, mock_util_download, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_requested_dimensions_bounds_end_to_end(self, mock_stage, - mock_util_download, - mock_rmtree, mock_mkdtemp, - mock_uuid, - mock_get_fill_slice): - """ Ensure a request with a spatial range specified by variable names, - not just subset=lat(), subset=lon(), will be correctly processed - for a geographically gridded collection that has bounds variables - for each dimension. - - Note: Each GPM IMERGHH granule has a single time slice, so the full - range will be retrieved (e.g., /Grid/time[0:0] - - * -30.0 ≤ /Grid/lon[1500] ≤ -29.9 - * 45.0 ≤ /Grid/lat[1350] ≤ 45.1 - * -14.9 ≤ /Grid/lon[1649] ≤ -15.0 - * 59.9 ≤ /Grid/lat[1499] ≤ 60.0 + def test_requested_dimensions_bounds_end_to_end( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request with a spatial range specified by variable names, + not just subset=lat(), subset=lon(), will be correctly processed + for a geographically gridded collection that has bounds variables + for each dimension. + + Note: Each GPM IMERGHH granule has a single time slice, so the full + range will be retrieved (e.g., /Grid/time[0:0] + + * -30.0 ≤ /Grid/lon[1500] ≤ -29.9 + * 45.0 ≤ /Grid/lat[1350] ≤ 45.1 + * -14.9 ≤ /Grid/lon[1649] ≤ -15.0 + * 59.9 ≤ /Grid/lat[1499] ≤ 60.0 """ expected_output_basename = 'opendap_url_Grid_precipitationCal_subsetted.nc4' - expected_staged_url = ''.join([self.staging_location, - expected_output_basename]) + expected_staged_url = ''.join([self.staging_location, expected_output_basename]) mock_uuid.side_effect = [Mock(hex='uuid'), Mock(hex='uuid2')] mock_mkdtemp.return_value = self.tmp_dir @@ -1898,57 +2484,94 @@ def test_requested_dimensions_bounds_end_to_end(self, mock_stage, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/GPM_3IMERGHH_bounds.nc4', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'GPM_3IMERGHH', - 'variables': [{'id': '', - 'name': self.gpm_variable, - 'fullPath': self.gpm_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'dimensions': [ - {'name': '/Grid/lat', 'min': 45, 'max': 60}, - {'name': '/Grid/lon', 'min': -30, 'max': -15}, - ]}, - 'temporal': {'start': '2020-01-01T12:15:00', - 'end': '2020-01-01T12:45:00'}, - 'user': 'jlovell', - }) - - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'GPM_3IMERGHH', + 'variables': [ + { + 'id': '', + 'name': self.gpm_variable, + 'fullPath': self.gpm_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': { + 'dimensions': [ + {'name': '/Grid/lat', 'min': 45, 'max': 60}, + {'name': '/Grid/lon', 'min': -30, 'max': -15}, + ] + }, + 'temporal': { + 'start': '2020-01-01T12:15:00', + 'end': '2020-01-01T12:45:00', + }, + 'user': 'jlovell', + } + ) + + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. # See related comment in self.test_geo_bbox_end_to_end self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # dimension variables and their associated bounds variables. dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) self.assert_valid_request_data( - dimensions_data, {'%2FGrid%2Flat', '%2FGrid%2Flat_bnds', - '%2FGrid%2Flon', '%2FGrid%2Flon_bnds', - '%2FGrid%2Ftime', '%2FGrid%2Ftime_bnds'} + dimensions_data, + { + '%2FGrid%2Flat', + '%2FGrid%2Flat_bnds', + '%2FGrid%2Flon', + '%2FGrid%2Flon_bnds', + '%2FGrid%2Ftime', + '%2FGrid%2Ftime_bnds', + }, ) # Ensure the constraint expression contains all the required variables. # /Grid/precipitationCal[0:0][1500:1649][1350:1499], @@ -1958,21 +2581,25 @@ def test_requested_dimensions_bounds_end_to_end(self, mock_stage, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2FGrid%2Flat%5B1350%3A1499%5D', - '%2FGrid%2Flat_bnds%5B1350%3A1499%5D%5B%5D', - '%2FGrid%2Flon%5B1500%3A1649%5D', - '%2FGrid%2Flon_bnds%5B1500%3A1649%5D%5B%5D', - '%2FGrid%2Ftime%5B0%3A0%5D', - '%2FGrid%2Ftime_bnds%5B0%3A0%5D%5B%5D', - '%2FGrid%2FprecipitationCal%5B0%3A0%5D%5B1500%3A1649%5D%5B1350%3A1499%5D'} + { + '%2FGrid%2Flat%5B1350%3A1499%5D', + '%2FGrid%2Flat_bnds%5B1350%3A1499%5D%5B%5D', + '%2FGrid%2Flon%5B1500%3A1649%5D', + '%2FGrid%2Flon_bnds%5B1500%3A1649%5D%5B%5D', + '%2FGrid%2Ftime%5B0%3A0%5D', + '%2FGrid%2Ftime_bnds%5B0%3A0%5D%5B%5D', + '%2FGrid%2FprecipitationCal%5B0%3A0%5D%5B1500%3A1649%5D%5B1350%3A1499%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) # Ensure no variables were filled @@ -1982,32 +2609,41 @@ def test_requested_dimensions_bounds_end_to_end(self, mock_stage, @patch('shutil.rmtree') @patch('hoss.subset.download_url') @patch('hoss.adapter.stage') - def test_exception_handling(self, mock_stage, mock_download_subset, - mock_rmtree, mock_mkdtemp): - """ Ensure that if an exception is raised during processing, this - causes a HarmonyException to be raised, to allow for informative - logging. + def test_exception_handling( + self, mock_stage, mock_download_subset, mock_rmtree, mock_mkdtemp + ): + """Ensure that if an exception is raised during processing, this + causes a HarmonyException to be raised, to allow for informative + logging. """ mock_mkdtemp.return_value = self.tmp_dir mock_download_subset.side_effect = Exception('Random error') - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1234567890-EEDTEST', - 'shortName': 'ATL03', - 'variables': [{'id': '', - 'name': self.atl03_variable, - 'fullPath': self.atl03_variable}]}], - 'stagingLocation': self.staging_location, - 'user': 'kmattingly', - }) + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': 'ATL03', + 'variables': [ + { + 'id': '', + 'name': self.atl03_variable, + 'fullPath': self.atl03_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'user': 'kmattingly', + } + ) with self.assertRaises(HarmonyException): - hoss = HossAdapter(message, config=config(False), - catalog=self.input_stac) + hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) hoss.invoke() mock_stage.assert_not_called() @@ -2019,15 +2655,19 @@ def test_exception_handling(self, mock_stage, mock_download_subset, @patch('shutil.rmtree') @patch('hoss.utilities.util_download') @patch('hoss.adapter.stage') - def test_edge_aligned_no_bounds_end_to_end(self, mock_stage, - mock_util_download, - mock_rmtree, mock_mkdtemp, - mock_uuid, - mock_get_fill_slice): - """ Ensure a request for a collection that contains dimension variables - with edge-aligned grid cells is correctly processed regardless of - whether or not a bounds variable associated with that dimension - variable exists. + def test_edge_aligned_no_bounds_end_to_end( + self, + mock_stage, + mock_util_download, + mock_rmtree, + mock_mkdtemp, + mock_uuid, + mock_get_fill_slice, + ): + """Ensure a request for a collection that contains dimension variables + with edge-aligned grid cells is correctly processed regardless of + whether or not a bounds variable associated with that dimension + variable exists. """ expected_output_basename = 'opendap_url_global_asr_obs_grid_subsetted.nc4' @@ -2045,50 +2685,76 @@ def test_edge_aligned_no_bounds_end_to_end(self, mock_stage, all_variables_path = f'{self.tmp_dir}/variables.nc4' copy('tests/data/ATL16_variables.nc4', all_variables_path) - mock_util_download.side_effect = [dmr_path, dimensions_path, - all_variables_path] - - message = Message({ - 'accessToken': 'fake-token', - 'callback': 'https://example.com/', - 'sources': [{ - 'collection': 'C1238589498-EEDTEST', - 'shortName': 'ATL16', - 'variables': [{'id': '', - 'name': self.atl16_variable, - 'fullPath': self.atl16_variable}]}], - 'stagingLocation': self.staging_location, - 'subset': {'bbox': [77, 71.25, 88, 74.75]}, - 'user': 'sride', - }) + mock_util_download.side_effect = [dmr_path, dimensions_path, all_variables_path] + + message = Message( + { + 'accessToken': 'fake-token', + 'callback': 'https://example.com/', + 'sources': [ + { + 'collection': 'C1238589498-EEDTEST', + 'shortName': 'ATL16', + 'variables': [ + { + 'id': '', + 'name': self.atl16_variable, + 'fullPath': self.atl16_variable, + } + ], + } + ], + 'stagingLocation': self.staging_location, + 'subset': {'bbox': [77, 71.25, 88, 74.75]}, + 'user': 'sride', + } + ) hoss = HossAdapter(message, config=config(False), catalog=self.input_stac) _, output_catalog = hoss.invoke() # Ensure that there is a single item in the output catalog with the # expected asset: - self.assert_expected_output_catalog(output_catalog, - expected_staged_url, - expected_output_basename) + self.assert_expected_output_catalog( + output_catalog, expected_staged_url, expected_output_basename + ) # Ensure the expected requests were made against OPeNDAP. self.assertEqual(mock_util_download.call_count, 3) - mock_util_download.assert_has_calls([ - call(f'{self.granule_url}.dmr.xml', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=None, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - call(f'{self.granule_url}.dap.nc4', self.tmp_dir, hoss.logger, - access_token=message.accessToken, data=ANY, cfg=hoss.config), - ]) + mock_util_download.assert_has_calls( + [ + call( + f'{self.granule_url}.dmr.xml', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=None, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + call( + f'{self.granule_url}.dap.nc4', + self.tmp_dir, + hoss.logger, + access_token=message.accessToken, + data=ANY, + cfg=hoss.config, + ), + ] + ) # Ensure the constraint expression for dimensions data included only # dimension variables and their associated bounds variables. dimensions_data = mock_util_download.call_args_list[1][1].get('data', {}) self.assert_valid_request_data( - dimensions_data, - {'%2Fglobal_grid_lat', - '%2Fglobal_grid_lon'} + dimensions_data, {'%2Fglobal_grid_lat', '%2Fglobal_grid_lon'} ) # Ensure the constraint expression contains all the required variables. @@ -2105,17 +2771,21 @@ def test_edge_aligned_no_bounds_end_to_end(self, mock_stage, index_range_data = mock_util_download.call_args_list[2][1].get('data', {}) self.assert_valid_request_data( index_range_data, - {'%2Fglobal_asr_obs_grid%5B53%3A54%5D%5B85%3A89%5D', - '%2Fglobal_grid_lat%5B53%3A54%5D', - '%2Fglobal_grid_lon%5B85%3A89%5D'} + { + '%2Fglobal_asr_obs_grid%5B53%3A54%5D%5B85%3A89%5D', + '%2Fglobal_grid_lat%5B53%3A54%5D', + '%2Fglobal_grid_lon%5B85%3A89%5D', + }, ) # Ensure the output was staged with the expected file name - mock_stage.assert_called_once_with(f'{self.tmp_dir}/uuid2.nc4', - expected_output_basename, - 'application/x-netcdf4', - location=self.staging_location, - logger=hoss.logger) + mock_stage.assert_called_once_with( + f'{self.tmp_dir}/uuid2.nc4', + expected_output_basename, + 'application/x-netcdf4', + location=self.staging_location, + logger=hoss.logger, + ) mock_rmtree.assert_called_once_with(self.tmp_dir) diff --git a/tests/test_code_format.py b/tests/test_code_format.py index 3693c06..56d6689 100644 --- a/tests/test_code_format.py +++ b/tests/test_code_format.py @@ -5,26 +5,29 @@ class TestCodeFormat(TestCase): - """ This test class should ensure all Harmony service Python code adheres - to standard Python code styling. + """This test class should ensure all Harmony service Python code adheres + to standard Python code styling. - Ignored errors and warning: + Ignored errors and warning: - * E501: Line length, which defaults to 80 characters. This is a - preferred feature of the code, but not always easily achieved. - * W503: Break before binary operator. Have to ignore one of W503 or - W504 to allow for breaking of some long lines. PEP8 suggests - breaking the line before a binary operatore is more "Pythonic". + * E501: Line length, which defaults to 80 characters. This is a + preferred feature of the code, but not always easily achieved. + * W503: Break before binary operator. Have to ignore one of W503 or + W504 to allow for breaking of some long lines. PEP8 suggests + breaking the line before a binary operatore is more "Pythonic". + * E203, E701: This repository uses black code formatting, which deviates + from PEP8 for these errors. """ + @classmethod def setUpClass(cls): cls.python_files = Path('hoss').rglob('*.py') def test_pycodestyle_adherence(self): - """ Ensure all code in the `hoss` directory adheres to PEP8 - defined standard. + """Ensure all code in the `hoss` directory adheres to PEP8 + defined standard. """ - style_guide = StyleGuide(ignore=['E501', 'W503']) + style_guide = StyleGuide(ignore=['E501', 'W503', 'E203', 'E701']) results = style_guide.check_files(self.python_files) self.assertEqual(results.total_errors, 0, 'Found code style issues.') diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index a2c1e57..681f36a 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,2 +1,3 @@ import os + os.environ['ENV'] = os.environ.get('ENV') or 'test' diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py index 760b3a8..32d6701 100644 --- a/tests/unit/test_adapter.py +++ b/tests/unit/test_adapter.py @@ -15,52 +15,63 @@ @patch('hoss.adapter.subset_granule') @patch('hoss.adapter.stage') class TestAdapter(TestCase): - """ Test the HossAdapter class for basic functionality including: + """Test the HossAdapter class for basic functionality including: - - Synchronous vs asynchronous behaviour. - - Basic message validation. + - Synchronous vs asynchronous behaviour. + - Basic message validation. """ @classmethod def setUpClass(cls): - cls.operations = {'is_variable_subset': True, - 'is_regridded': False, - 'is_subsetted': False} + cls.operations = { + 'is_variable_subset': True, + 'is_regridded': False, + 'is_subsetted': False, + } cls.africa_granule_url = '/home/tests/data/africa.nc' - cls.africa_stac = create_stac([Granule(cls.africa_granule_url, None, - ['opendap', 'data'])]) + cls.africa_stac = create_stac( + [Granule(cls.africa_granule_url, None, ['opendap', 'data'])] + ) def setUp(self): self.config = config(validate=False) self.process_item_spy = spy_on(HossAdapter.process_item) - def create_message(self, collection_id: str, collection_short_name: str, - variable_list: List[str], user: str, - is_synchronous: Optional[bool] = None, - bounding_box: Optional[List[float]] = None, - temporal_range: Optional[Dict[str, str]] = None, - shape_file: Optional[str] = None, - dimensions: Optional[List[Dict]] = None) -> Message: - """ Create a Harmony Message object with the requested attributes. """ + def create_message( + self, + collection_id: str, + collection_short_name: str, + variable_list: List[str], + user: str, + is_synchronous: Optional[bool] = None, + bounding_box: Optional[List[float]] = None, + temporal_range: Optional[Dict[str, str]] = None, + shape_file: Optional[str] = None, + dimensions: Optional[List[Dict]] = None, + ) -> Message: + """Create a Harmony Message object with the requested attributes.""" variables = [{'name': variable} for variable in variable_list] message_content = { - 'sources': [{'collection': collection_id, - 'shortName': collection_short_name, - 'variables': variables}], + 'sources': [ + { + 'collection': collection_id, + 'shortName': collection_short_name, + 'variables': variables, + } + ], 'user': user, 'callback': 'https://example.com/', 'stagingLocation': 's3://example-bucket/', 'accessToken': 'xyzzy', - 'subset': {'bbox': bounding_box, 'dimensions': dimensions, - 'shape': None}, - 'temporal': temporal_range + 'subset': {'bbox': bounding_box, 'dimensions': dimensions, 'shape': None}, + 'temporal': temporal_range, } if shape_file is not None: message_content['subset']['shape'] = { 'href': shape_file, - 'type': 'application/geo+json' + 'type': 'application/geo+json', } if is_synchronous is not None: @@ -68,166 +79,182 @@ def create_message(self, collection_id: str, collection_short_name: str, return Message(json.dumps(message_content)) - def test_temporal_request(self, mock_stage, mock_subset_granule, - mock_get_mimetype): - """ A request that specifies a temporal range should result in a - temporal subset. + def test_temporal_request(self, mock_stage, mock_subset_granule, mock_get_mimetype): + """A request that specifies a temporal range should result in a + temporal subset. """ mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) - temporal_range = {'start': '2021-01-01T00:00:00', - 'end': '2021-01-02T00:00:00'} + temporal_range = {'start': '2021-01-01T00:00:00', 'end': '2021-01-02T00:00:00'} collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'mcollins', - bounding_box=None, - temporal_range=temporal_range) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'mcollins', + bounding_box=None, + temporal_range=temporal_range, + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) - - def test_synchronous_request(self, - mock_stage, - mock_subset_granule, - mock_get_mimetype): - """ A request that specifies `isSynchronous = True` should complete - for a single granule. It should call the `subset_granule` function, - and then indicate the request completed. + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) + + def test_synchronous_request( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request that specifies `isSynchronous = True` should complete + for a single granule. It should call the `subset_granule` function, + and then indicate the request completed. """ mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'narmstrong', - True) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'narmstrong', + True, + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) - - def test_asynchronous_request(self, - mock_stage, - mock_subset_granule, - mock_get_mimetype): - """ A request that specified `isSynchronous = False` should complete - for a single granule. It should call the `subset_granule` function, - and then indicate the request completed. + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) + + def test_asynchronous_request( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request that specified `isSynchronous = False` should complete + for a single granule. It should call the `subset_granule` function, + and then indicate the request completed. """ mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'ealdrin', - False) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'ealdrin', + False, + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) - - def test_unspecified_synchronous_request(self, - mock_stage, - mock_subset_granule, - mock_get_mimetype): - """ A request the does not specify `isSynchronous` should default to - synchronous behaviour. The `subset_granule` function should be - called. Then the request should complete. + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) + + def test_unspecified_synchronous_request( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request the does not specify `isSynchronous` should default to + synchronous behaviour. The `subset_granule` function should be + called. Then the request should complete. """ mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'mcollins') + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'mcollins', + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) - def test_hoss_bbox_request(self, mock_stage, mock_subset_granule, - mock_get_mimetype): - """ A request that specifies a bounding box should result in a both a - variable and a bounding box spatial subset being made. + def test_hoss_bbox_request( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request that specifies a bounding box should result in a both a + variable and a bounding box spatial subset being made. """ mock_subset_granule.return_value = '/path/to/output.nc' @@ -235,36 +262,42 @@ def test_hoss_bbox_request(self, mock_stage, mock_subset_granule, bounding_box = BBox(-20, -10, 20, 30) collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'mcollins', - bounding_box=bounding_box) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'mcollins', + bounding_box=bounding_box, + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) - def test_hoss_shape_file_request(self, mock_stage, mock_subset_granule, - mock_get_mimetype): - """ A request that specifies a shape file should result in a both a - variable and a spatial subset being made. + def test_hoss_shape_file_request( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request that specifies a shape file should result in a both a + variable and a spatial subset being made. """ collection_short_name = 'harmony_example_l2' @@ -272,41 +305,47 @@ def test_hoss_shape_file_request(self, mock_stage, mock_subset_granule, mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'mcollins', - shape_file=shape_file_url) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'mcollins', + shape_file=shape_file_url, + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) - - def test_hoss_named_dimension(self, mock_stage, mock_subset_granule, - mock_get_mimetype): - """ A request with a message that specifies a named dimension within a - granule, with a specific range of data, should have that - information extracted from the input message and passed along to - the `subset_granule` function. - - This unit test refers to a file that is not actually stored in the - repository, as it would be large. + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) + + def test_hoss_named_dimension( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request with a message that specifies a named dimension within a + granule, with a specific range of data, should have that + information extracted from the input message and passed along to + the `subset_granule` function. + + This unit test refers to a file that is not actually stored in the + repository, as it would be large. """ collection_short_name = 'M2I3NPASM' @@ -314,98 +353,103 @@ def test_hoss_named_dimension(self, mock_stage, mock_subset_granule, mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) - message = self.create_message('C1245663527-EEDTEST', - collection_short_name, - ['H1000'], - 'dbowman', - dimensions=[{'name': 'lev', - 'min': 800, - 'max': 1000}]) - input_stac = create_stac([Granule(granule_url, None, - ['opendap', 'data'])]) + message = self.create_message( + 'C1245663527-EEDTEST', + collection_short_name, + ['H1000'], + 'dbowman', + dimensions=[{'name': 'lev', 'min': 800, 'max': 1000}], + ) + input_stac = create_stac([Granule(granule_url, None, ['opendap', 'data'])]) - hoss = HossAdapter(message, config=self.config, - catalog=input_stac) + hoss = HossAdapter(message, config=self.config, catalog=input_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + granule_url, message.sources[0], ANY, hoss.message, hoss.logger, hoss.config + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'M2I3NPASM_H1000_subsetted.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'M2I3NPASM_H1000_subsetted.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) - def test_missing_granules(self, - mock_stage, - mock_subset_granule, - mock_get_mimetype): - """ A request with no specified granules in an inbound Harmony message - should raise an exception. + def test_missing_granules(self, mock_stage, mock_subset_granule, mock_get_mimetype): + """A request with no specified granules in an inbound Harmony message + should raise an exception. """ collection_short_name = 'harmony_example_l2' mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], - 'pconrad', - False) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'pconrad', + False, + ) - hoss = HossAdapter(message, config=self.config, - catalog=create_stac([])) + hoss = HossAdapter(message, config=self.config, catalog=create_stac([])) with self.assertRaises(Exception) as context_manager: with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - self.assertEqual(str(context_manager.exception), - 'No granules specified for variable subsetting') + self.assertEqual( + str(context_manager.exception), + 'No granules specified for variable subsetting', + ) mock_subset_granule.assert_not_called() mock_get_mimetype.assert_not_called() mock_stage.assert_not_called() - def test_asynchronous_multiple_granules(self, - mock_stage, - mock_subset_granule, - mock_get_mimetype): - """ A request for asynchronous processing, with multiple granules - specified should be successful, and call `subset_granule` for each - input granule. + def test_asynchronous_multiple_granules( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """A request for asynchronous processing, with multiple granules + specified should be successful, and call `subset_granule` for each + input granule. """ output_paths = ['/path/to/output1.nc', '/path/to/output2.nc'] - output_filenames = ['africa_subsetted.nc4', - 'f16_ssmis_20200102v7_subsetted.nc4'] + output_filenames = [ + 'africa_subsetted.nc4', + 'f16_ssmis_20200102v7_subsetted.nc4', + ] mock_subset_granule.side_effect = output_paths mock_get_mimetype.return_value = ('application/x-netcdf4', None) collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - ['alpha_var', 'blue_var'], 'abean', - False) - input_stac = create_stac([ - Granule(self.africa_granule_url, None, ['opendap', 'data']), - Granule('/home/tests/data/f16_ssmis_20200102v7.nc', None, - ['opendap', 'data']) - ]) - - hoss = HossAdapter(message, config=self.config, - catalog=input_stac) + message = self.create_message( + 'C1233860183-EEDTEST', + collection_short_name, + ['alpha_var', 'blue_var'], + 'abean', + False, + ) + input_stac = create_stac( + [ + Granule(self.africa_granule_url, None, ['opendap', 'data']), + Granule( + '/home/tests/data/f16_ssmis_20200102v7.nc', + None, + ['opendap', 'data'], + ), + ] + ) + + hoss = HossAdapter(message, config=self.config, catalog=input_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() @@ -413,55 +457,60 @@ def test_asynchronous_multiple_granules(self, granules = hoss.message.granules for index, granule in enumerate(granules): - mock_subset_granule.assert_any_call(granule.url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - self.config) + mock_subset_granule.assert_any_call( + granule.url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + self.config, + ) mock_get_mimetype.assert_any_call(output_paths[index]) - mock_stage.assert_any_call(output_paths[index], - output_filenames[index], - 'application/x-netcdf4', - location=message.stagingLocation, - logger=hoss.logger) - - def test_missing_variables(self, - mock_stage, - mock_subset_granule, - mock_get_mimetype): - """ Ensure that if no variables are specified for a source, the service - will not raise an exception, and that the variables specified to - the `subset_granule` function is an empty list. The output of that - function should be staged by Harmony. + mock_stage.assert_any_call( + output_paths[index], + output_filenames[index], + 'application/x-netcdf4', + location=message.stagingLocation, + logger=hoss.logger, + ) + + def test_missing_variables( + self, mock_stage, mock_subset_granule, mock_get_mimetype + ): + """Ensure that if no variables are specified for a source, the service + will not raise an exception, and that the variables specified to + the `subset_granule` function is an empty list. The output of that + function should be staged by Harmony. """ mock_subset_granule.return_value = '/path/to/output.nc' mock_get_mimetype.return_value = ('application/x-netcdf4', None) collection_short_name = 'harmony_example_l2' - message = self.create_message('C1233860183-EEDTEST', - collection_short_name, - [], - 'jlovell') + message = self.create_message( + 'C1233860183-EEDTEST', collection_short_name, [], 'jlovell' + ) - hoss = HossAdapter(message, config=self.config, - catalog=self.africa_stac) + hoss = HossAdapter(message, config=self.config, catalog=self.africa_stac) with patch.object(HossAdapter, 'process_item', self.process_item_spy): hoss.invoke() - mock_subset_granule.assert_called_once_with(self.africa_granule_url, - message.sources[0], - ANY, - hoss.message, - hoss.logger, - hoss.config) + mock_subset_granule.assert_called_once_with( + self.africa_granule_url, + message.sources[0], + ANY, + hoss.message, + hoss.logger, + hoss.config, + ) mock_get_mimetype.assert_called_once_with('/path/to/output.nc') - mock_stage.assert_called_once_with('/path/to/output.nc', - 'africa.nc4', - 'application/x-netcdf4', - location='s3://example-bucket/', - logger=hoss.logger) + mock_stage.assert_called_once_with( + '/path/to/output.nc', + 'africa.nc4', + 'application/x-netcdf4', + location='s3://example-bucket/', + logger=hoss.logger, + ) diff --git a/tests/unit/test_bbox_utilities.py b/tests/unit/test_bbox_utilities.py index b7e862c..8b8946c 100644 --- a/tests/unit/test_bbox_utilities.py +++ b/tests/unit/test_bbox_utilities.py @@ -6,6 +6,7 @@ encloses the shape. """ + from logging import getLogger from os.path import join as path_join from unittest import TestCase @@ -15,29 +16,34 @@ from harmony.message import Message from harmony.util import config -from hoss.bbox_utilities import (aggregate_all_geometries, - aggregate_geometry_coordinates, BBox, - bbox_in_longitude_range, - crosses_antimeridian, - flatten_list, - get_bounding_box_lon_lat, - get_antimeridian_bbox, - get_antimeridian_geometry_bbox, - get_contiguous_bbox, - get_geographic_bbox, - get_harmony_message_bbox, - get_latitude_range, - get_request_shape_file, - get_shape_file_geojson, - is_list_of_coordinates, is_single_point) +from hoss.bbox_utilities import ( + aggregate_all_geometries, + aggregate_geometry_coordinates, + BBox, + bbox_in_longitude_range, + crosses_antimeridian, + flatten_list, + get_bounding_box_lon_lat, + get_antimeridian_bbox, + get_antimeridian_geometry_bbox, + get_contiguous_bbox, + get_geographic_bbox, + get_harmony_message_bbox, + get_latitude_range, + get_request_shape_file, + get_shape_file_geojson, + is_list_of_coordinates, + is_single_point, +) from hoss.exceptions import InvalidInputGeoJSON, UnsupportedShapeFileFormat class TestBBoxUtilities(TestCase): - """ A class for testing functions in the `hoss.bbox_utilities` - module. + """A class for testing functions in the `hoss.bbox_utilities` + module. """ + @classmethod def setUpClass(cls): cls.config = config(validate=False) @@ -53,7 +59,7 @@ def setUpClass(cls): @staticmethod def read_geojson(geojson_basename: str): - """ A helper function to extract GeoJSON from a supplied file path. """ + """A helper function to extract GeoJSON from a supplied file path.""" geojson_path = path_join('tests/geojson_examples', geojson_basename) with open(geojson_path, 'r', encoding='utf-8') as file_handler: @@ -62,20 +68,25 @@ def read_geojson(geojson_basename: str): return geojson_content def test_get_harmony_message_bbox(self): - """ Ensure a BBox object is returned from an input Harmony message if - there is a bounding box included in that message. + """Ensure a BBox object is returned from an input Harmony message if + there is a bounding box included in that message. """ with self.subTest('There is a bounding box in the message.'): message = Message({'subset': {'bbox': [1, 2, 3, 4]}}) - self.assertTupleEqual(get_harmony_message_bbox(message), - BBox(1, 2, 3, 4)) + self.assertTupleEqual(get_harmony_message_bbox(message), BBox(1, 2, 3, 4)) with self.subTest('There is a shape file in the message, but no bbox'): - message = Message({ - 'subset': {'shape': {'href': 'www.example.com/shape.geo.json', - 'type': 'application/geo_json'}} - }) + message = Message( + { + 'subset': { + 'shape': { + 'href': 'www.example.com/shape.geo.json', + 'type': 'application/geo_json', + } + } + } + ) self.assertIsNone(get_harmony_message_bbox(message)) with self.subTest('There is no subset attribute to the message.'): @@ -83,10 +94,8 @@ def test_get_harmony_message_bbox(self): self.assertIsNone(get_harmony_message_bbox(message)) def test_get_shape_file_geojson(self): - """ Ensure that a local GeoJSON file is correctly read. """ - read_geojson = get_shape_file_geojson( - 'tests/geojson_examples/point.geo.json' - ) + """Ensure that a local GeoJSON file is correctly read.""" + read_geojson = get_shape_file_geojson('tests/geojson_examples/point.geo.json') self.assertDictEqual(read_geojson, self.point_geojson) # Ensure that both files aren't just empty @@ -94,10 +103,10 @@ def test_get_shape_file_geojson(self): @patch('hoss.bbox_utilities.download') def test_get_request_shape_file(self, mock_download): - """ Ensure that a shape file is returned if present in an input Harmony - message. If the shape file MIME type is incorrect, an exception - should be raised. If no shape file is present, then the function - should return None. + """Ensure that a shape file is returned if present in an input Harmony + message. If the shape file MIME type is incorrect, an exception + should be raised. If no shape file is present, then the function + should return None. """ access_token = 'UUDDLRLRBA' @@ -108,181 +117,206 @@ def test_get_request_shape_file(self, mock_download): mock_download.return_value = local_shape_file_path with self.subTest('Shape file provided'): - message = Message({ - 'accessToken': access_token, - 'subset': {'shape': {'href': shape_file_url, - 'type': 'application/geo+json'}} - }) - - self.assertEqual(get_request_shape_file(message, local_dir, - self.logger, self.config), - local_shape_file_path) - - mock_download.assert_called_once_with(shape_file_url, local_dir, - logger=self.logger, - access_token=access_token, - cfg=self.config) + message = Message( + { + 'accessToken': access_token, + 'subset': { + 'shape': { + 'href': shape_file_url, + 'type': 'application/geo+json', + } + }, + } + ) + + self.assertEqual( + get_request_shape_file(message, local_dir, self.logger, self.config), + local_shape_file_path, + ) + + mock_download.assert_called_once_with( + shape_file_url, + local_dir, + logger=self.logger, + access_token=access_token, + cfg=self.config, + ) mock_download.reset_mock() with self.subTest('Shape file has wrong MIME type'): - message = Message({ - 'accessToken': access_token, - 'subset': {'shape': {'href': shape_file_url, 'type': 'bad'}} - }) + message = Message( + { + 'accessToken': access_token, + 'subset': {'shape': {'href': shape_file_url, 'type': 'bad'}}, + } + ) with self.assertRaises(UnsupportedShapeFileFormat): - get_request_shape_file(message, local_dir, self.logger, - self.config) + get_request_shape_file(message, local_dir, self.logger, self.config) mock_download.assert_not_called() with self.subTest('No shape file in message'): - message = Message({ - 'accessToken': access_token, - 'subset': {'bbox': [10, 20, 30, 40]} - }) + message = Message( + {'accessToken': access_token, 'subset': {'bbox': [10, 20, 30, 40]}} + ) - self.assertIsNone(get_request_shape_file(message, local_dir, - self.logger, self.config)) + self.assertIsNone( + get_request_shape_file(message, local_dir, self.logger, self.config) + ) mock_download.assert_not_called() with self.subTest('No subset property in message'): message = Message({'accessToken': access_token}) - self.assertIsNone(get_request_shape_file(message, local_dir, - self.logger, self.config)) + self.assertIsNone( + get_request_shape_file(message, local_dir, self.logger, self.config) + ) mock_download.assert_not_called() def test_get_geographic_bbox_antimeridian_combinations(self): - """ Ensure that the correct bounding box is extracted for Features that - cross the antimeridian: - - * An antimeridian crossing feature. - * An antimeridian crossing feature and a nearby non-antimeridian - crossing feature to the east (should extend the antimeridian - bounding box eastwards to retrieve the least data). - * An antimeridian crossing feature and a nearby non-antimeridian - crossing feature to the west (should extend the antimeridian - bounding box westwards to retrieve the least data). - * An antimeridian crossing feature and a non-antimeridian crossing - feature that lies entirely between the antimeridian and the - western extent of the antimeridian crossing feature. The returned - bounding box longitude extents should just be that of the - antimeridian crossing feature. - * An antimeridian crossing feature and a non-antimeridian crossing - feature that lies entirely between the antimeridian and the - eastern extent of the antimeridian crossing feature. The returned - bounding box longitude extents should just be those of the - antimeridian crossing feature. + """Ensure that the correct bounding box is extracted for Features that + cross the antimeridian: + + * An antimeridian crossing feature. + * An antimeridian crossing feature and a nearby non-antimeridian + crossing feature to the east (should extend the antimeridian + bounding box eastwards to retrieve the least data). + * An antimeridian crossing feature and a nearby non-antimeridian + crossing feature to the west (should extend the antimeridian + bounding box westwards to retrieve the least data). + * An antimeridian crossing feature and a non-antimeridian crossing + feature that lies entirely between the antimeridian and the + western extent of the antimeridian crossing feature. The returned + bounding box longitude extents should just be that of the + antimeridian crossing feature. + * An antimeridian crossing feature and a non-antimeridian crossing + feature that lies entirely between the antimeridian and the + eastern extent of the antimeridian crossing feature. The returned + bounding box longitude extents should just be those of the + antimeridian crossing feature. """ test_args = [ ['antimeridian_only.geo.json', BBox(175.0, 37.0, -176.0, 44.0)], ['antimeridian_west.geo.json', BBox(160.0, 37.0, -176.0, 55.0)], ['antimeridian_east.geo.json', BBox(175.0, 22.0, -160.0, 44.0)], - ['antimeridian_within_west.geo.json', - BBox(175.0, 37.0, -176.0, 44.0)], - ['antimeridian_within_east.geo.json', - BBox(175.0, 37.0, -176.0, 44.0)], + ['antimeridian_within_west.geo.json', BBox(175.0, 37.0, -176.0, 44.0)], + ['antimeridian_within_east.geo.json', BBox(175.0, 37.0, -176.0, 44.0)], ] for geojson_basename, expected_bounding_box in test_args: with self.subTest(geojson_basename): geojson = self.read_geojson(geojson_basename) - self.assertTupleEqual(get_geographic_bbox(geojson), - expected_bounding_box) + self.assertTupleEqual( + get_geographic_bbox(geojson), expected_bounding_box + ) @patch('hoss.bbox_utilities.aggregate_all_geometries') - def test_get_geographic_bbox_geojson_has_bbox(self, - mock_aggregate_all_geometries): - """ Ensure that, if present, the optional GeoJSON "bbox" attribute is - used. This will mean that further parsing of the "coordinates" is - not undertaken. + def test_get_geographic_bbox_geojson_has_bbox(self, mock_aggregate_all_geometries): + """Ensure that, if present, the optional GeoJSON "bbox" attribute is + used. This will mean that further parsing of the "coordinates" is + not undertaken. """ bbox_geojson = self.read_geojson('polygon_with_bbox.geo.json') - self.assertTupleEqual(get_geographic_bbox(bbox_geojson), - BBox(-114.05, 37.0, -109.04, 42.0)) + self.assertTupleEqual( + get_geographic_bbox(bbox_geojson), BBox(-114.05, 37.0, -109.04, 42.0) + ) # Because the bounding box was retrieved from the "bbox" attribute, # the function returns before it can call anything else. mock_aggregate_all_geometries.assert_not_called() def test_get_geographic_bbox_geojson_types(self): - """ Ensure that the correct bounding box is extracted for Features of - each of the core GeoJSON geometry types. + """Ensure that the correct bounding box is extracted for Features of + each of the core GeoJSON geometry types. """ test_args = [ ['Point', self.point_geojson, BBox(2.295, 48.874, 2.295, 48.874)], - ['MultiPoint', self.multipoint_geojson, - BBox(-0.142, 51.501, -0.076, 51.508)], - ['LineString', self.linestring_geojson, - BBox(-80.519, 38.471, -75.696, 39.724)], - ['MultiLineString', self.multilinestring_geojson, - BBox(-3.194, 51.502, -0.128, 55.953)], - ['Polygon', self.polygon_geojson, - BBox(-114.05, 37.0, -109.04, 42.0)], - ['MultiPolygon', self.multipolygon_geojson, - BBox(-111.05, 37.0, -102.05, 45.0)], - ['GeometryCollection', self.geometrycollection_geojson, - BBox(-80.519, 38.471, -75.565, 39.724)], + [ + 'MultiPoint', + self.multipoint_geojson, + BBox(-0.142, 51.501, -0.076, 51.508), + ], + [ + 'LineString', + self.linestring_geojson, + BBox(-80.519, 38.471, -75.696, 39.724), + ], + [ + 'MultiLineString', + self.multilinestring_geojson, + BBox(-3.194, 51.502, -0.128, 55.953), + ], + ['Polygon', self.polygon_geojson, BBox(-114.05, 37.0, -109.04, 42.0)], + [ + 'MultiPolygon', + self.multipolygon_geojson, + BBox(-111.05, 37.0, -102.05, 45.0), + ], + [ + 'GeometryCollection', + self.geometrycollection_geojson, + BBox(-80.519, 38.471, -75.565, 39.724), + ], ] for description, geojson, expected_bounding_box in test_args: with self.subTest(description): - self.assertTupleEqual(get_geographic_bbox(geojson), - expected_bounding_box) + self.assertTupleEqual( + get_geographic_bbox(geojson), expected_bounding_box + ) def test_get_contiguous_bbox(self): - """ Ensure the aggregated longitudes and latitudes of one or more - GeoJSON geometries that do not cross the antimeridian can be - correctly combined to form a single bounding box. + """Ensure the aggregated longitudes and latitudes of one or more + GeoJSON geometries that do not cross the antimeridian can be + correctly combined to form a single bounding box. """ # The input coordinates are aggregated: # [(lon_0, lon_1, ..., lon_N), (lat_0, lat_1, ..., lat_N)] - point_coordinates = [(4, ), (6, )] + point_coordinates = [(4,), (6,)] linestring_coordinates = [(-10, 10), (-20, 20)] polygon_coordinates = [(30, 35, 35, 30, 30), (30, 30, 40, 40, 30)] with self.subTest('Point geometry'): - self.assertTupleEqual(get_contiguous_bbox([point_coordinates]), - BBox(4, 6, 4, 6)) + self.assertTupleEqual( + get_contiguous_bbox([point_coordinates]), BBox(4, 6, 4, 6) + ) with self.subTest('Single geometry'): self.assertTupleEqual( - get_contiguous_bbox([linestring_coordinates]), - BBox(-10, -20, 10, 20) + get_contiguous_bbox([linestring_coordinates]), BBox(-10, -20, 10, 20) ) with self.subTest('Multiple geometries'): self.assertTupleEqual( get_contiguous_bbox([linestring_coordinates, polygon_coordinates]), - BBox(-10, -20, 35, 40) + BBox(-10, -20, 35, 40), ) with self.subTest('Feature crossing antimeridian returns None'): self.assertIsNone(get_contiguous_bbox([[(170, -170), (10, 20)]])) def test_get_antimeridian_bbox(self): - """ Ensure the aggregated longitudes and latitudes of one or more - GeoJSON geometries crossing the antimeridian can be correctly - combined to form a single bounding box. + """Ensure the aggregated longitudes and latitudes of one or more + GeoJSON geometries crossing the antimeridian can be correctly + combined to form a single bounding box. - Because these features cross the antimeridian, the bounding box - will have a western extent that is greater than the eastern extent. + Because these features cross the antimeridian, the bounding box + will have a western extent that is greater than the eastern extent. """ # The input coordinates are aggregated: # [(lon_0, lon_1, ..., lon_N), (lat_0, lat_1, ..., lat_N)] - point_coordinates = [(0, ), (0, )] + point_coordinates = [(0,), (0,)] linestring_coordinates = [(160, -170), (-20, 20)] - polygon_coordinates = [(165, -165, -165, 165, 165), - (30, 30, 40, 40, 30)] + polygon_coordinates = [(165, -165, -165, 165, 165), (30, 30, 40, 40, 30)] with self.subTest('Point returns None'): self.assertIsNone(get_antimeridian_bbox([point_coordinates])) @@ -293,54 +327,51 @@ def test_get_antimeridian_bbox(self): with self.subTest('Single geometry'): self.assertTupleEqual( get_antimeridian_bbox([linestring_coordinates]), - BBox(160, -20, -170, 20) + BBox(160, -20, -170, 20), ) with self.subTest('Multiple geometries'): self.assertTupleEqual( - get_antimeridian_bbox([linestring_coordinates, - polygon_coordinates]), - BBox(160, -20, -165, 40) + get_antimeridian_bbox([linestring_coordinates, polygon_coordinates]), + BBox(160, -20, -165, 40), ) def test_get_antimeridian_geometry_bbox(self): - """ Ensure the aggregated longitudes and latitudes of one or more - GeoJSON geometries crossing the antimeridian can be correctly - combined to form a single bounding box. + """Ensure the aggregated longitudes and latitudes of one or more + GeoJSON geometries crossing the antimeridian can be correctly + combined to form a single bounding box. - Because these features cross the antimeridian, the bounding box - will have a western extent that is greater than the eastern extent. + Because these features cross the antimeridian, the bounding box + will have a western extent that is greater than the eastern extent. """ # The input coordinates are aggregated: # [(lon_0, lon_1, ..., lon_N), (lat_0, lat_1, ..., lat_N)] linestring_coordinates = [(160, -170), (-20, 20)] - polygon_coordinates = [(165, -165, -165, 165, 165), - (30, 30, 40, 40, 30)] + polygon_coordinates = [(165, -165, -165, 165, 165), (30, 30, 40, 40, 30)] test_args = [ ['LineString', linestring_coordinates, BBox(160, -20, -170, 20)], - ['Polygon', polygon_coordinates, BBox(165, 30, -165, 40)] + ['Polygon', polygon_coordinates, BBox(165, 30, -165, 40)], ] for description, coordinates, expected_bbox in test_args: with self.subTest(description): self.assertTupleEqual( - get_antimeridian_geometry_bbox(coordinates[0], - coordinates[1]), - expected_bbox + get_antimeridian_geometry_bbox(coordinates[0], coordinates[1]), + expected_bbox, ) def test_get_latitude_range(self): - """ Ensure that the broadest latitude range is extracted from a - combination of those bounding boxes that cross the antimeridian - and those that don't. The inputs to this function will include one - or both of: + """Ensure that the broadest latitude range is extracted from a + combination of those bounding boxes that cross the antimeridian + and those that don't. The inputs to this function will include one + or both of: - * A bounding box encapsulating all GeoJSON features that do not - cross the antimeridian. - * A bounding box encapsulating all GeoJSON features that do cross - the antimeridian. + * A bounding box encapsulating all GeoJSON features that do not + cross the antimeridian. + * A bounding box encapsulating all GeoJSON features that do cross + the antimeridian. """ antimeridian_bbox = BBox(170, -20, -170, 20) @@ -353,27 +384,42 @@ def test_get_latitude_range(self): test_args = [ ['Contiguous bbox only', north_bbox, None, (30, 50)], ['Antimeridian bbox only', None, antimeridian_bbox, (-20, 20)], - ['Contiguous north of antimeridian', north_bbox, antimeridian_bbox, - (-20, 50)], - ['Contiguous south of antimeridian', south_bbox, antimeridian_bbox, - (-60, 20)], - ['Overlapping bboxes', overlapping_bbox, antimeridian_bbox, - (-20, 30)], - ['Contiguous range contains antimeridian', taller_bbox, - antimeridian_bbox, (-30, 30)], - ['Contiguous range contained by antimeridian', shorter_bbox, - antimeridian_bbox, (-20, 20)] + [ + 'Contiguous north of antimeridian', + north_bbox, + antimeridian_bbox, + (-20, 50), + ], + [ + 'Contiguous south of antimeridian', + south_bbox, + antimeridian_bbox, + (-60, 20), + ], + ['Overlapping bboxes', overlapping_bbox, antimeridian_bbox, (-20, 30)], + [ + 'Contiguous range contains antimeridian', + taller_bbox, + antimeridian_bbox, + (-30, 30), + ], + [ + 'Contiguous range contained by antimeridian', + shorter_bbox, + antimeridian_bbox, + (-20, 20), + ], ] for description, contiguous_bbox, am_bbox, expected_range in test_args: with self.subTest(description): - self.assertTupleEqual(get_latitude_range(contiguous_bbox, - am_bbox), - expected_range) + self.assertTupleEqual( + get_latitude_range(contiguous_bbox, am_bbox), expected_range + ) def test_bbox_in_longitude_range(self): - """ Ensure that the function correctly identifies when a bounding box - lies entirely in the supplied longitude range. + """Ensure that the function correctly identifies when a bounding box + lies entirely in the supplied longitude range. """ bounding_box = BBox(30, 10, 40, 20) @@ -388,29 +434,27 @@ def test_bbox_in_longitude_range(self): self.assertFalse(bbox_in_longitude_range(bounding_box, 25, 35)) def test_aggregate_all_geometries(self): - """ Ensure that GeoJSON objects can all be aggregated if: + """Ensure that GeoJSON objects can all be aggregated if: - * Only coordinates are supplied in the input. - * The input is a Geometry (e.g., Point, etc) - * The input is a GeometryCollection type. - * The input is a Feature. - * The input is a Feature containing a GeometryCollection. - * The input is a FeatureCollection. - * The input is a FeatureCollection with multiple features. + * Only coordinates are supplied in the input. + * The input is a Geometry (e.g., Point, etc) + * The input is a GeometryCollection type. + * The input is a Feature. + * The input is a Feature containing a GeometryCollection. + * The input is a FeatureCollection. + * The input is a FeatureCollection with multiple features. """ - point_output = [[(2.295, ), (48.874, )]] + point_output = [[(2.295,), (48.874,)]] geometrycollection_output = [ - [(-75.565, ), (39.662, )], - [(-75.696, -75.795, -80.519), (38.471, 39.716, 39.724)] + [(-75.565,), (39.662,)], + [(-75.696, -75.795, -80.519), (38.471, 39.716, 39.724)], ] with self.subTest('Point geometry'): self.assertListEqual( - aggregate_all_geometries( - self.point_geojson['features'][0]['geometry'] - ), - point_output + aggregate_all_geometries(self.point_geojson['features'][0]['geometry']), + point_output, ) with self.subTest('GeometryCollection geometry'): @@ -418,13 +462,13 @@ def test_aggregate_all_geometries(self): aggregate_all_geometries( self.geometrycollection_geojson['features'][0]['geometry'] ), - geometrycollection_output + geometrycollection_output, ) with self.subTest('Point Feature'): self.assertListEqual( aggregate_all_geometries(self.point_geojson['features'][0]), - point_output + point_output, ) with self.subTest('GeometryCollection Feature'): @@ -432,19 +476,20 @@ def test_aggregate_all_geometries(self): aggregate_all_geometries( self.geometrycollection_geojson['features'][0] ), - geometrycollection_output + geometrycollection_output, ) with self.subTest('Point FeatureCollection'): - self.assertListEqual(aggregate_all_geometries(self.point_geojson), - point_output) + self.assertListEqual( + aggregate_all_geometries(self.point_geojson), point_output + ) with self.subTest('FeatureCollection with multiple Features'): # The features in multi_feature.geo.json match those in # geometrycollection.geo.json self.assertListEqual( aggregate_all_geometries(self.multi_features_geojson), - geometrycollection_output + geometrycollection_output, ) with self.subTest('Bad GeoJSON raises exception'): @@ -452,93 +497,127 @@ def test_aggregate_all_geometries(self): aggregate_all_geometries({'bad': 'input'}) def test_aggregate_geometry_coordinates(self): - """ Ensure that different types of GeoJSON objects (Point, LineString, - Polygon, etc) can have their coordinates grouped from lists of - [longitude, latitude (and possibly vertical)] points to ordered, - separate lists of each coordinate type. + """Ensure that different types of GeoJSON objects (Point, LineString, + Polygon, etc) can have their coordinates grouped from lists of + [longitude, latitude (and possibly vertical)] points to ordered, + separate lists of each coordinate type. """ test_args = [ - ['Point', self.point_geojson, [[(2.295, ), (48.874, )]]], - ['MultiPoint', self.multipoint_geojson, [[(-0.076, -0.142), - (51.508, 51.501)]]], - ['LineString', self.linestring_geojson, - [[(-75.696, -75.795, -80.519), (38.471, 39.716, 39.724)]]], - ['MultiLineString', self.multilinestring_geojson, - [[(-3.194, -3.181, -3.174), (55.949, 55.951, 55.953)], - [(-0.140, -0.128), (51.502, 51.507)]]], - ['Polygon', self.polygon_geojson, - [[(-114.05, -114.05, -109.04, -109.04, -111.05, -111.05, -114.05), - (42.0, 37.0, 37.0, 41.0, 41.0, 42.0, 42.0)]]], - ['MultiPolygon', self.multipolygon_geojson, - [[(-109.05, -109.05, -102.05, -102.05, -109.05), - (41.0, 37.0, 37.0, 41.0, 41.0)], - [(-111.05, -111.05, -104.05, -104.05, -111.05), - (45.0, 41.0, 41.0, 45.0, 45.0)]]], + ['Point', self.point_geojson, [[(2.295,), (48.874,)]]], + [ + 'MultiPoint', + self.multipoint_geojson, + [[(-0.076, -0.142), (51.508, 51.501)]], + ], + [ + 'LineString', + self.linestring_geojson, + [[(-75.696, -75.795, -80.519), (38.471, 39.716, 39.724)]], + ], + [ + 'MultiLineString', + self.multilinestring_geojson, + [ + [(-3.194, -3.181, -3.174), (55.949, 55.951, 55.953)], + [(-0.140, -0.128), (51.502, 51.507)], + ], + ], + [ + 'Polygon', + self.polygon_geojson, + [ + [ + (-114.05, -114.05, -109.04, -109.04, -111.05, -111.05, -114.05), + (42.0, 37.0, 37.0, 41.0, 41.0, 42.0, 42.0), + ] + ], + ], + [ + 'MultiPolygon', + self.multipolygon_geojson, + [ + [ + (-109.05, -109.05, -102.05, -102.05, -109.05), + (41.0, 37.0, 37.0, 41.0, 41.0), + ], + [ + (-111.05, -111.05, -104.05, -104.05, -111.05), + (45.0, 41.0, 41.0, 45.0, 45.0), + ], + ], + ], ] for description, geojson, expected_output in test_args: with self.subTest(description): coordinates = geojson['features'][0]['geometry']['coordinates'] self.assertListEqual( - aggregate_geometry_coordinates(coordinates), - expected_output + aggregate_geometry_coordinates(coordinates), expected_output ) def test_is_list_of_coordinates(self): - """ Ensure a list of coordiantes can be correctly recognised, and that - other inputs are note incorrectly considered a list of coordinates. + """Ensure a list of coordiantes can be correctly recognised, and that + other inputs are note incorrectly considered a list of coordinates. """ - test_args = [['List of horizontal coordinates', [[1, 2], [3, 4]]], - ['List of vertical coordinates', [[1, 2, 3], [4, 5, 6]]]] + test_args = [ + ['List of horizontal coordinates', [[1, 2], [3, 4]]], + ['List of vertical coordinates', [[1, 2, 3], [4, 5, 6]]], + ] for description, test_input in test_args: with self.subTest(description): self.assertTrue(is_list_of_coordinates(test_input)) - test_args = [['Input is not a list', 1.0], - ['Input elements are not coordinates', [1, 2]], - ['Coordinates item has wrong number of elements', [[1]]], - ['Input is too nested', [[[1.0, 2.0]]]]] + test_args = [ + ['Input is not a list', 1.0], + ['Input elements are not coordinates', [1, 2]], + ['Coordinates item has wrong number of elements', [[1]]], + ['Input is too nested', [[[1.0, 2.0]]]], + ] for description, test_input in test_args: with self.subTest(description): self.assertFalse(is_list_of_coordinates(test_input)) def test_is_single_point(self): - """ Ensure a single coordinate can be correctly recognised, and that - other inputs are not incorrectly considered a coordinate pair. + """Ensure a single coordinate can be correctly recognised, and that + other inputs are not incorrectly considered a coordinate pair. """ - test_args = [['Only horizontal coordinates', [-120.0, 20.0]], - ['Vertical coordinate included', [-120.0, 20.0, 300.0]]] + test_args = [ + ['Only horizontal coordinates', [-120.0, 20.0]], + ['Vertical coordinate included', [-120.0, 20.0, 300.0]], + ] for description, test_input in test_args: with self.subTest(description): self.assertTrue(is_single_point(test_input)) - test_args = [['Wrong number of list elements', [1.0]], - ['Input not a list', 1.0], - ['List contains a nested list', [[[-120.0, 20.0]]]]] + test_args = [ + ['Wrong number of list elements', [1.0]], + ['Input not a list', 1.0], + ['List contains a nested list', [[[-120.0, 20.0]]]], + ] for description, test_input in test_args: with self.subTest('A non coordinate type returns False'): self.assertFalse(is_single_point(test_input)) def test_flatten_list(self): - """ Ensure a list of lists is flattened by only one level. """ - self.assertListEqual(flatten_list([[1, 2], [3, 4], [5, 6]]), - [1, 2, 3, 4, 5, 6]) + """Ensure a list of lists is flattened by only one level.""" + self.assertListEqual(flatten_list([[1, 2], [3, 4], [5, 6]]), [1, 2, 3, 4, 5, 6]) - self.assertListEqual(flatten_list([[[1, 2], [3, 4]], [[5, 6]]]), - [[1, 2], [3, 4], [5, 6]]) + self.assertListEqual( + flatten_list([[[1, 2], [3, 4]], [[5, 6]]]), [[1, 2], [3, 4], [5, 6]] + ) def test_crosses_antimeridian(self): - """ Ensure that antimeridian crossing is correctly identified from an - ordered tuple of longitudes. Note, this relies on assuming a - separation between consecutive points over a certain threshold - indicates antimeridian crossing, which may not always be accurate. + """Ensure that antimeridian crossing is correctly identified from an + ordered tuple of longitudes. Note, this relies on assuming a + separation between consecutive points over a certain threshold + indicates antimeridian crossing, which may not always be accurate. """ with self.subTest('Longitudes do cross antimeridian.'): @@ -548,21 +627,22 @@ def test_crosses_antimeridian(self): self.assertFalse(crosses_antimeridian((140, 175, 150, 140))) def test_get_bounding_bbox_lon_lat(self): - """ Ensure the horizontal components of a GeoJSON bounding box - attribute can be correctly extracted, whether that bounding box - contains only horizontal coordinates or also vertical components. + """Ensure the horizontal components of a GeoJSON bounding box + attribute can be correctly extracted, whether that bounding box + contains only horizontal coordinates or also vertical components. """ expected_bounding_box = BBox(-10, -5, 10, 15) with self.subTest('Bounding box only has horizontal coordinates'): - self.assertTupleEqual(get_bounding_box_lon_lat([-10, -5, 10, 15]), - expected_bounding_box) + self.assertTupleEqual( + get_bounding_box_lon_lat([-10, -5, 10, 15]), expected_bounding_box + ) with self.subTest('Bounding box also has vertical coordinates'): self.assertTupleEqual( get_bounding_box_lon_lat([-10, -5, 20, 10, 15, 30]), - expected_bounding_box + expected_bounding_box, ) with self.subTest('Incorrect format raises exception'): diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index 2dcfa0b..51ac018 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -13,67 +13,94 @@ from varinfo import VarInfoFromDmr import numpy as np -from hoss.dimension_utilities import (add_index_range, get_dimension_bounds, - get_dimension_extents, - get_dimension_index_range, - get_dimension_indices_from_bounds, - get_dimension_indices_from_values, - get_fill_slice, - get_requested_index_ranges, - is_almost_in, is_dimension_ascending, - is_index_subset, - prefetch_dimension_variables, - add_bounds_variables, - needs_bounds, - get_bounds_array, - write_bounds) +from hoss.dimension_utilities import ( + add_index_range, + get_dimension_bounds, + get_dimension_extents, + get_dimension_index_range, + get_dimension_indices_from_bounds, + get_dimension_indices_from_values, + get_fill_slice, + get_requested_index_ranges, + is_almost_in, + is_dimension_ascending, + is_index_subset, + prefetch_dimension_variables, + add_bounds_variables, + needs_bounds, + get_bounds_array, + write_bounds, +) from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange class TestDimensionUtilities(TestCase): - """ A class for testing functions in the `hoss.dimension_utilities` - module. + """A class for testing functions in the `hoss.dimension_utilities` + module. """ + @classmethod def setUpClass(cls): - """ Create fixtures that can be reused for all tests. """ + """Create fixtures that can be reused for all tests.""" cls.config = config(validate=False) cls.logger = getLogger('tests') cls.varinfo = VarInfoFromDmr( 'tests/data/rssmif16d_example.dmr', - config_file='tests/data/test_subsetter_config.json' + config_file='tests/data/test_subsetter_config.json', ) cls.ascending_dimension = masked_array(np.linspace(0, 200, 101)) cls.descending_dimension = masked_array(np.linspace(200, 0, 101)) - cls.varinfo_with_bounds = VarInfoFromDmr( - 'tests/data/GPM_3IMERGHH_example.dmr' + cls.varinfo_with_bounds = VarInfoFromDmr('tests/data/GPM_3IMERGHH_example.dmr') + cls.bounds_array = np.array( + [ + [90.0, 89.0], + [89.0, 88.0], + [88.0, 87.0], + [87.0, 86.0], + [86.0, 85.0], + [85.0, 84.0], + [84.0, 83.0], + [83.0, 82.0], + [82.0, 81.0], + [81.0, 80.0], + [80.0, 79.0], + [79.0, 78.0], + [78.0, 77.0], + [77.0, 76.0], + [76.0, 75.0], + [75.0, 74.0], + [74.0, 73.0], + [73.0, 72.0], + [72.0, 71.0], + [71.0, 70.0], + [70.0, 69.0], + [69.0, 68.0], + [68.0, 67.0], + [67.0, 66.0], + [66.0, 65.0], + [65.0, 64.0], + [64.0, 63.0], + [63.0, 62.0], + [62.0, 61.0], + [61.0, 60.0], + ] ) - cls.bounds_array = np.array([ - [90.0, 89.0], [89.0, 88.0], [88.0, 87.0], [87.0, 86.0], - [86.0, 85.0], [85.0, 84.0], [84.0, 83.0], [83.0, 82.0], - [82.0, 81.0], [81.0, 80.0], [80.0, 79.0], [79.0, 78.0], - [78.0, 77.0], [77.0, 76.0], [76.0, 75.0], [75.0, 74.0], - [74.0, 73.0], [73.0, 72.0], [72.0, 71.0], [71.0, 70.0], - [70.0, 69.0], [69.0, 68.0], [68.0, 67.0], [67.0, 66.0], - [66.0, 65.0], [65.0, 64.0], [64.0, 63.0], [63.0, 62.0], - [62.0, 61.0], [61.0, 60.0] - ]) def setUp(self): - """ Create fixtures that should be unique per test. """ + """Create fixtures that should be unique per test.""" self.temp_dir = mkdtemp() def tearDown(self): - """ Remove per-test fixtures. """ + """Remove per-test fixtures.""" if exists(self.temp_dir): rmtree(self.temp_dir) def test_is_dimension_ascending(self): - """ Ensure that a dimension variable is correctly identified as - ascending or descending. This should be immune to having a few - fill values, particularly in the first and last element in the - array. + """Ensure that a dimension variable is correctly identified as + ascending or descending. This should be immune to having a few + fill values, particularly in the first and last element in the + array. """ # Create a mask that will mask the first and last element of an array @@ -81,10 +108,8 @@ def test_is_dimension_ascending(self): mask[0] = 1 mask[-1] = 1 - ascending_masked = masked_array(data=self.ascending_dimension.data, - mask=mask) - descending_masked = masked_array(data=self.descending_dimension.data, - mask=mask) + ascending_masked = masked_array(data=self.ascending_dimension.data, mask=mask) + descending_masked = masked_array(data=self.descending_dimension.data, mask=mask) single_element = masked_array(data=np.array([1])) test_args = [ @@ -92,88 +117,96 @@ def test_is_dimension_ascending(self): ['Ascending masked dimension returns True', ascending_masked, True], ['Single element array returns True', single_element, True], ['Descending dimension returns False', self.descending_dimension, False], - ['Descending masked dimension returns False', descending_masked, False] + ['Descending masked dimension returns False', descending_masked, False], ] for description, dimension, expected_result in test_args: with self.subTest(description): - self.assertEqual(is_dimension_ascending(dimension), - expected_result) + self.assertEqual(is_dimension_ascending(dimension), expected_result) @patch('hoss.dimension_utilities.get_dimension_indices_from_values') def test_get_dimension_index_range(self, mock_get_indices_from_values): - """ Ensure that the dimension variable is correctly determined to be - ascending or descending, such that `get_dimension_min_max_indices` - is called with the correct ordering of minimum and maximum values. - This function should also handle when either the minimum or maximum - requested value is unspecified, indicating that the beginning or - end of the array should be used accordingly. + """Ensure that the dimension variable is correctly determined to be + ascending or descending, such that `get_dimension_min_max_indices` + is called with the correct ordering of minimum and maximum values. + This function should also handle when either the minimum or maximum + requested value is unspecified, indicating that the beginning or + end of the array should be used accordingly. - data_ascending[20] = data_descending[80] = 40.0 - data_ascending[87] = data_descending[13] = 174.0 + data_ascending[20] = data_descending[80] = 40.0 + data_ascending[87] = data_descending[13] = 174.0 """ requested_min_value = 39.0 requested_max_value = 174.3 with self.subTest('Ascending, minimum and maximum extents specified'): - get_dimension_index_range(self.ascending_dimension, - requested_min_value, requested_max_value) + get_dimension_index_range( + self.ascending_dimension, requested_min_value, requested_max_value + ) mock_get_indices_from_values.called_once_with( - self.ascending_dimension, requested_min_value, - requested_max_value + self.ascending_dimension, requested_min_value, requested_max_value ) mock_get_indices_from_values.reset_mock() with self.subTest('Ascending, only minimum extent specified'): - get_dimension_index_range(self.ascending_dimension, - requested_min_value, None) + get_dimension_index_range( + self.ascending_dimension, requested_min_value, None + ) mock_get_indices_from_values.called_once_with( - self.ascending_dimension, requested_min_value, - self.ascending_dimension[:][-1] + self.ascending_dimension, + requested_min_value, + self.ascending_dimension[:][-1], ) mock_get_indices_from_values.reset_mock() with self.subTest('Ascending, only maximum extent specified'): - get_dimension_index_range(self.ascending_dimension, None, - requested_max_value) + get_dimension_index_range( + self.ascending_dimension, None, requested_max_value + ) mock_get_indices_from_values.called_once_with( - self.ascending_dimension, self.ascending_dimension[:][0], - requested_max_value + self.ascending_dimension, + self.ascending_dimension[:][0], + requested_max_value, ) mock_get_indices_from_values.reset_mock() with self.subTest('Descending, minimum and maximum extents specified'): - get_dimension_index_range(self.descending_dimension, - requested_min_value, requested_max_value) + get_dimension_index_range( + self.descending_dimension, requested_min_value, requested_max_value + ) mock_get_indices_from_values.called_once_with( - self.descending_dimension, requested_max_value, - requested_min_value + self.descending_dimension, requested_max_value, requested_min_value ) mock_get_indices_from_values.reset_mock() with self.subTest('Descending, only minimum extent specified'): - get_dimension_index_range(self.descending_dimension, - requested_min_value, None) + get_dimension_index_range( + self.descending_dimension, requested_min_value, None + ) mock_get_indices_from_values.called_once_with( - self.descending_dimension, self.descending_dimension[:][0], - requested_min_value + self.descending_dimension, + self.descending_dimension[:][0], + requested_min_value, ) mock_get_indices_from_values.reset_mock() with self.subTest('Descending, only maximum extent specified'): - get_dimension_index_range(self.descending_dimension, None, - requested_max_value) + get_dimension_index_range( + self.descending_dimension, None, requested_max_value + ) mock_get_indices_from_values.called_once_with( - self.descending_dimension, requested_max_value, - self.descending_dimension[:][-1] + self.descending_dimension, + requested_max_value, + self.descending_dimension[:][-1], ) mock_get_indices_from_values.reset_mock() @patch('hoss.dimension_utilities.get_dimension_indices_from_values') - def test_get_dimension_index_range_requested_zero_values(self, - mock_get_indices_from_values): - """ Ensure that a 0 is treated correctly, and not interpreted as a - False boolean value. + def test_get_dimension_index_range_requested_zero_values( + self, mock_get_indices_from_values + ): + """Ensure that a 0 is treated correctly, and not interpreted as a + False boolean value. """ with self.subTest('Ascending dimension values, min = 0'): @@ -205,96 +238,105 @@ def test_get_dimension_index_range_requested_zero_values(self, mock_get_indices_from_values.reset_mock() def test_get_dimension_indices_from_indices(self): - """ Ensure the expected index values are retrieved for the minimum and - maximum values of an expected range. This should correspond to the - nearest integer, to ensure partial pixels are included in a - bounding box spatial subset. List elements must be integers for - later array slicing. + """Ensure the expected index values are retrieved for the minimum and + maximum values of an expected range. This should correspond to the + nearest integer, to ensure partial pixels are included in a + bounding box spatial subset. List elements must be integers for + later array slicing. - data_ascending[20] = data_descending[80] = 40.0 - data_ascending[87] = data_descending[13] = 174.0 + data_ascending[20] = data_descending[80] = 40.0 + data_ascending[87] = data_descending[13] = 174.0 - This test should also ensure that extent values exactly halfway - between pixels should not include the outer pixel. + This test should also ensure that extent values exactly halfway + between pixels should not include the outer pixel. """ test_args = [ ['Ascending dimension', self.ascending_dimension, 39, 174.3, (20, 87)], ['Descending dimension', self.descending_dimension, 174.3, 39, (13, 80)], ['Ascending halfway between', self.ascending_dimension, 39, 175, (20, 87)], - ['Descending halfway between', self.descending_dimension, 175, 39, (13, 80)], + [ + 'Descending halfway between', + self.descending_dimension, + 175, + 39, + (13, 80), + ], ['Single point inside pixel', self.ascending_dimension, 10, 10, (5, 5)], ['Single point on pixel edges', self.ascending_dimension, 9, 9, (4, 5)], ] - for description, dimension, min_extent, max_extent, expected_results in test_args: + for ( + description, + dimension, + min_extent, + max_extent, + expected_results, + ) in test_args: with self.subTest(description): - results = get_dimension_indices_from_values(dimension, - min_extent, - max_extent) + results = get_dimension_indices_from_values( + dimension, min_extent, max_extent + ) self.assertIsInstance(results[0], int) self.assertIsInstance(results[1], int) self.assertTupleEqual(results, expected_results) def test_add_index_range(self): - """ Ensure the correct combinations of index ranges are added as - suffixes to the input variable based upon that variable's dimensions. + """Ensure the correct combinations of index ranges are added as + suffixes to the input variable based upon that variable's dimensions. - If a dimension range has the lower index > upper index, that - indicates the bounding box crosses the edge of the grid. In this - instance, the full range of the variable should be retrieved. + If a dimension range has the lower index > upper index, that + indicates the bounding box crosses the edge of the grid. In this + instance, the full range of the variable should be retrieved. - The order of indices in RSSMIF16D is: (time, latitude, longitude) + The order of indices in RSSMIF16D is: (time, latitude, longitude) """ with self.subTest('No index constraints'): index_ranges = {} - self.assertEqual(add_index_range('/sst_dtime', self.varinfo, - index_ranges), - '/sst_dtime') + self.assertEqual( + add_index_range('/sst_dtime', self.varinfo, index_ranges), '/sst_dtime' + ) with self.subTest('With index constraints'): index_ranges = {'/latitude': [12, 34], '/longitude': [45, 56]} - self.assertEqual(add_index_range('/sst_dtime', self.varinfo, - index_ranges), - '/sst_dtime[][12:34][45:56]') + self.assertEqual( + add_index_range('/sst_dtime', self.varinfo, index_ranges), + '/sst_dtime[][12:34][45:56]', + ) with self.subTest('With a longitude crossing discontinuity'): index_ranges = {'/latitude': [12, 34], '/longitude': [56, 5]} - self.assertEqual(add_index_range('/sst_dtime', self.varinfo, - index_ranges), - '/sst_dtime[][12:34][]') + self.assertEqual( + add_index_range('/sst_dtime', self.varinfo, index_ranges), + '/sst_dtime[][12:34][]', + ) def test_get_fill_slice(self): - """ Ensure that a slice object is correctly formed for a requested - dimension. + """Ensure that a slice object is correctly formed for a requested + dimension. """ fill_ranges = {'/longitude': [200, 15]} with self.subTest('An unfilled dimension returns slice(None).'): - self.assertEqual( - get_fill_slice('/time', fill_ranges), - slice(None) - ) + self.assertEqual(get_fill_slice('/time', fill_ranges), slice(None)) with self.subTest('A filled dimension returns slice(start, stop).'): - self.assertEqual( - get_fill_slice('/longitude', fill_ranges), - slice(16, 200) - ) + self.assertEqual(get_fill_slice('/longitude', fill_ranges), slice(16, 200)) @patch('hoss.dimension_utilities.add_bounds_variables') @patch('hoss.dimension_utilities.get_opendap_nc4') - def test_prefetch_dimension_variables(self, mock_get_opendap_nc4, - mock_add_bounds_variables): - """ Ensure that when a list of required variables is specified, a - request to OPeNDAP will be sent requesting only those that are - grid-dimension variables (both spatial and temporal). + def test_prefetch_dimension_variables( + self, mock_get_opendap_nc4, mock_add_bounds_variables + ): + """Ensure that when a list of required variables is specified, a + request to OPeNDAP will be sent requesting only those that are + grid-dimension variables (both spatial and temporal). - At this point only spatial dimensions will be included in a - prefetch request. + At this point only spatial dimensions will be included in a + prefetch request. """ prefetch_path = 'prefetch.nc4' @@ -303,47 +345,56 @@ def test_prefetch_dimension_variables(self, mock_get_opendap_nc4, access_token = 'access' output_dir = 'tests/output' url = 'https://url_to_opendap_granule' - required_variables = {'/latitude', '/longitude', '/time', - '/wind_speed'} + required_variables = {'/latitude', '/longitude', '/time', '/wind_speed'} required_dimensions = {'/latitude', '/longitude', '/time'} - self.assertEqual(prefetch_dimension_variables(url, self.varinfo, - required_variables, - output_dir, - self.logger, - access_token, - self.config), - prefetch_path) + self.assertEqual( + prefetch_dimension_variables( + url, + self.varinfo, + required_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) - mock_get_opendap_nc4.assert_called_once_with(url, required_dimensions, - output_dir, self.logger, - access_token, self.config) + mock_get_opendap_nc4.assert_called_once_with( + url, required_dimensions, output_dir, self.logger, access_token, self.config + ) - mock_add_bounds_variables.assert_called_once_with(prefetch_path, - required_dimensions, - self.varinfo, self.logger) + mock_add_bounds_variables.assert_called_once_with( + prefetch_path, required_dimensions, self.varinfo, self.logger + ) @patch('hoss.dimension_utilities.needs_bounds') @patch('hoss.dimension_utilities.write_bounds') def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): - """ Ensure that `write_bounds` is called when it's needed, - and that it's not called when it's not needed. + """Ensure that `write_bounds` is called when it's needed, + and that it's not called when it's not needed. """ prefetch_dataset_name = 'tests/data/ATL16_prefetch.nc4' - varinfo_prefetch = VarInfoFromDmr( - 'tests/data/ATL16_prefetch.dmr' - ) - required_dimensions = {'/npolar_grid_lat', '/npolar_grid_lon', - '/spolar_grid_lat', '/spolar_grid_lon', - '/global_grid_lat', '/global_grid_lon'} + varinfo_prefetch = VarInfoFromDmr('tests/data/ATL16_prefetch.dmr') + required_dimensions = { + '/npolar_grid_lat', + '/npolar_grid_lon', + '/spolar_grid_lat', + '/spolar_grid_lon', + '/global_grid_lat', + '/global_grid_lon', + } with self.subTest('Bounds need to be written'): mock_needs_bounds.return_value = True - add_bounds_variables(prefetch_dataset_name, - required_dimensions, - varinfo_prefetch, - self.logger) + add_bounds_variables( + prefetch_dataset_name, + required_dimensions, + varinfo_prefetch, + self.logger, + ) self.assertEqual(mock_write_bounds.call_count, 6) mock_needs_bounds.reset_mock() @@ -351,49 +402,53 @@ def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): with self.subTest('Bounds should not be written'): mock_needs_bounds.return_value = False - add_bounds_variables(prefetch_dataset_name, - required_dimensions, - varinfo_prefetch, - self.logger) + add_bounds_variables( + prefetch_dataset_name, + required_dimensions, + varinfo_prefetch, + self.logger, + ) mock_write_bounds.assert_not_called() def test_needs_bounds(self): - """ Ensure that the correct boolean value is returned for four - different cases: - - 1) False - cell_alignment[edge] attribute exists and - bounds variable already exists. - 2) False - cell_alignment[edge] attribute does not exist and - bounds variable already exists. - 3) True - cell_alignment[edge] attribute exists and - bounds variable does not exist. - 4) False - cell_alignment[edge] attribute does not exist and - bounds variable does not exist. + """Ensure that the correct boolean value is returned for four + different cases: + + 1) False - cell_alignment[edge] attribute exists and + bounds variable already exists. + 2) False - cell_alignment[edge] attribute does not exist and + bounds variable already exists. + 3) True - cell_alignment[edge] attribute exists and + bounds variable does not exist. + 4) False - cell_alignment[edge] attribute does not exist and + bounds variable does not exist. """ - varinfo_bounds = VarInfoFromDmr( - 'tests/data/ATL16_prefetch_bnds.dmr' - ) + varinfo_bounds = VarInfoFromDmr('tests/data/ATL16_prefetch_bnds.dmr') with self.subTest('Variable has cell alignment and bounds'): - self.assertFalse(needs_bounds(varinfo_bounds.get_variable( - '/variable_edge_has_bnds'))) + self.assertFalse( + needs_bounds(varinfo_bounds.get_variable('/variable_edge_has_bnds')) + ) with self.subTest('Variable has no cell alignment and has bounds'): - self.assertFalse(needs_bounds(varinfo_bounds.get_variable( - '/variable_no_edge_has_bnds'))) + self.assertFalse( + needs_bounds(varinfo_bounds.get_variable('/variable_no_edge_has_bnds')) + ) with self.subTest('Variable has cell alignment and no bounds'): - self.assertTrue(needs_bounds(varinfo_bounds.get_variable( - '/variable_edge_no_bnds'))) + self.assertTrue( + needs_bounds(varinfo_bounds.get_variable('/variable_edge_no_bnds')) + ) with self.subTest('Variable has no cell alignment and no bounds'): - self.assertFalse(needs_bounds(varinfo_bounds.get_variable( - '/variable_no_edge_no_bnds'))) + self.assertFalse( + needs_bounds(varinfo_bounds.get_variable('/variable_no_edge_no_bnds')) + ) def test_get_bounds_array(self): - """ Ensure that the expected bounds array is created given - the input dimension variable values. + """Ensure that the expected bounds array is created given + the input dimension variable values. """ prefetch_dataset = Dataset('tests/data/ATL16_prefetch.nc4', 'r') @@ -401,14 +456,14 @@ def test_get_bounds_array(self): expected_bounds_array = self.bounds_array - assert_array_equal(get_bounds_array(prefetch_dataset, - dimension_path), - expected_bounds_array) + assert_array_equal( + get_bounds_array(prefetch_dataset, dimension_path), expected_bounds_array + ) def test_write_bounds(self): - """ Ensure that bounds data array is written to the dimension - dataset, both when the dimension variable is in the root group - and in a nested group. + """Ensure that bounds data array is written to the dimension + dataset, both when the dimension variable is in the root group + and in a nested group. """ varinfo_prefetch = VarInfoFromDmr('tests/data/ATL16_prefetch_group.dmr') @@ -420,7 +475,8 @@ def test_write_bounds(self): with self.subTest('Dimension variable is in the root group'): root_variable_full_path = '/npolar_grid_lat' root_varinfo_variable = varinfo_prefetch.get_variable( - root_variable_full_path) + root_variable_full_path + ) root_variable_name = 'npolar_grid_lat' root_bounds_name = root_variable_name + '_bnds' @@ -429,27 +485,32 @@ def test_write_bounds(self): # Check that bounds variable was written to the root group. self.assertTrue(prefetch_dataset.variables[root_bounds_name]) - resulting_bounds_root_data = prefetch_dataset.variables[ - root_bounds_name][:] + resulting_bounds_root_data = prefetch_dataset.variables[root_bounds_name][:] - assert_array_equal(resulting_bounds_root_data, - expected_bounds_data) + assert_array_equal(resulting_bounds_root_data, expected_bounds_data) # Check that varinfo variable has 'bounds' attribute. - self.assertEqual(root_varinfo_variable.attributes['bounds'], - root_bounds_name) + self.assertEqual( + root_varinfo_variable.attributes['bounds'], root_bounds_name + ) # Check that NetCDF4 dimension variable has 'bounds' attribute. - self.assertEqual(prefetch_dataset.variables[ - root_variable_name].__dict__.get('bounds'), - root_bounds_name) + self.assertEqual( + prefetch_dataset.variables[root_variable_name].__dict__.get('bounds'), + root_bounds_name, + ) # Check that VariableFromDmr has 'bounds' reference in # the references dictionary. - self.assertEqual(root_varinfo_variable.references['bounds'], - {root_bounds_name, }) + self.assertEqual( + root_varinfo_variable.references['bounds'], + { + root_bounds_name, + }, + ) with self.subTest('Dimension variable is in a nested group'): nested_variable_full_path = '/group1/group2/zelda' nested_varinfo_variable = varinfo_prefetch.get_variable( - nested_variable_full_path) + nested_variable_full_path + ) nested_variable_name = 'zelda' nested_group_path = '/group1/group2' nested_group = prefetch_dataset[nested_group_path] @@ -460,27 +521,31 @@ def test_write_bounds(self): # Check that bounds variable exists in the nested group. self.assertTrue(nested_group.variables[nested_bounds_name]) - resulting_bounds_nested_data = nested_group.variables[ - nested_bounds_name][:] - assert_array_equal(resulting_bounds_nested_data, - expected_bounds_data) + resulting_bounds_nested_data = nested_group.variables[nested_bounds_name][:] + assert_array_equal(resulting_bounds_nested_data, expected_bounds_data) # Check that varinfo variable has 'bounds' attribute. - self.assertEqual(nested_varinfo_variable.attributes['bounds'], - nested_bounds_name) + self.assertEqual( + nested_varinfo_variable.attributes['bounds'], nested_bounds_name + ) # Check that NetCDF4 dimension variable has 'bounds' attribute. - self.assertEqual(nested_group.variables[ - nested_variable_name].__dict__.get('bounds'), - nested_bounds_name) + self.assertEqual( + nested_group.variables[nested_variable_name].__dict__.get('bounds'), + nested_bounds_name, + ) # Check that VariableFromDmr 'has bounds' reference in # the references dictionary. - self.assertEqual(nested_varinfo_variable.references['bounds'], - {nested_bounds_name, }) + self.assertEqual( + nested_varinfo_variable.references['bounds'], + { + nested_bounds_name, + }, + ) @patch('hoss.dimension_utilities.get_opendap_nc4') def test_prefetch_dimensions_with_bounds(self, mock_get_opendap_nc4): - """ Ensure that a variable which has dimensions with `bounds` metadata - retrieves both the dimension variables and the bounds variables to - which their metadata refers. + """Ensure that a variable which has dimensions with `bounds` metadata + retrieves both the dimension variables and the bounds variables to + which their metadata refers. """ prefetch_path = 'prefetch.nc4' @@ -488,39 +553,55 @@ def test_prefetch_dimensions_with_bounds(self, mock_get_opendap_nc4): access_token = 'access' url = 'https://url_to_opendap_granule' - required_variables = {'/Grid/precipitationCal', '/Grid/lat', - '/Grid/lon', '/Grid/time'} - dimensions_and_bounds = {'/Grid/lat', '/Grid/lat_bnds', '/Grid/lon', - '/Grid/lon_bnds', '/Grid/time', - '/Grid/time_bnds'} - - self.assertEqual(prefetch_dimension_variables(url, - self.varinfo_with_bounds, - required_variables, - self.temp_dir, - self.logger, - access_token, - self.config), - prefetch_path) - - mock_get_opendap_nc4.assert_called_once_with(url, - dimensions_and_bounds, - self.temp_dir, - self.logger, access_token, - self.config) + required_variables = { + '/Grid/precipitationCal', + '/Grid/lat', + '/Grid/lon', + '/Grid/time', + } + dimensions_and_bounds = { + '/Grid/lat', + '/Grid/lat_bnds', + '/Grid/lon', + '/Grid/lon_bnds', + '/Grid/time', + '/Grid/time_bnds', + } + + self.assertEqual( + prefetch_dimension_variables( + url, + self.varinfo_with_bounds, + required_variables, + self.temp_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + + mock_get_opendap_nc4.assert_called_once_with( + url, + dimensions_and_bounds, + self.temp_dir, + self.logger, + access_token, + self.config, + ) def test_get_dimension_extents(self): - """ Ensure that the expected dimension extents are retrieved. + """Ensure that the expected dimension extents are retrieved. - The three grids below correspond to longitude dimensions of three - collections used with HOSS: + The three grids below correspond to longitude dimensions of three + collections used with HOSS: - * GPM: -180 ≤ longitude (degrees east) ≤ 180. - * RSSMIF16D: 0 ≤ longitude (degrees east) ≤ 360. - * MERRA-2: -180.3125 ≤ longitude (degrees east) ≤ 179.6875. + * GPM: -180 ≤ longitude (degrees east) ≤ 180. + * RSSMIF16D: 0 ≤ longitude (degrees east) ≤ 360. + * MERRA-2: -180.3125 ≤ longitude (degrees east) ≤ 179.6875. - These represent fully wrapped longitudes (GPM), fully unwrapped - longitudes (RSSMIF16D) and partially wrapped longitudes (MERRA-2). + These represent fully wrapped longitudes (GPM), fully unwrapped + longitudes (RSSMIF16D) and partially wrapped longitudes (MERRA-2). """ gpm_lons = np.linspace(-179.950, 179.950, 3600) @@ -530,82 +611,91 @@ def test_get_dimension_extents(self): test_args = [ ['Fully wrapped dimension', gpm_lons, -180, 180], ['Fully unwrapped dimension', rss_lons, 0, 360], - ['Partially wrapped dimension', merra_lons, -180.3125, 179.6875] + ['Partially wrapped dimension', merra_lons, -180.3125, 179.6875], ] for description, dim_array, expected_min, expected_max in test_args: with self.subTest(description): np.testing.assert_almost_equal( - get_dimension_extents(dim_array), - (expected_min, expected_max) + get_dimension_extents(dim_array), (expected_min, expected_max) ) def test_is_index_subset(self): - """ Ensure the function correctly determines when a HOSS request will - be an index subset (i.e., bounding box, shape file or temporal). + """Ensure the function correctly determines when a HOSS request will + be an index subset (i.e., bounding box, shape file or temporal). """ bounding_box = [10, 20, 30, 40] - shape_file = {'href': 'path/to/shape.geo.json', - 'type': 'application/geo+json'} - temporal_range = {'start': '2021-01-01T01:30:00', - 'end': '2021-01-01T02:00:00'} + shape_file = {'href': 'path/to/shape.geo.json', 'type': 'application/geo+json'} + temporal_range = {'start': '2021-01-01T01:30:00', 'end': '2021-01-01T02:00:00'} dimensions = [{'name': 'lev', 'min': 800, 'max': 900}] with self.subTest('Bounding box subset only'): - self.assertTrue(is_index_subset(Message({ - 'subset': {'bbox': bounding_box} - }))) + self.assertTrue( + is_index_subset(Message({'subset': {'bbox': bounding_box}})) + ) with self.subTest('Named dimensions subset only'): - self.assertTrue(is_index_subset(Message({ - 'subset': {'dimensions': dimensions} - }))) + self.assertTrue( + is_index_subset(Message({'subset': {'dimensions': dimensions}})) + ) with self.subTest('Shape file only'): - self.assertTrue(is_index_subset(Message({ - 'subset': {'shape': shape_file} - }))) + self.assertTrue(is_index_subset(Message({'subset': {'shape': shape_file}}))) with self.subTest('Temporal subset only'): - self.assertTrue(is_index_subset( - Message({'temporal': temporal_range})) - ) + self.assertTrue(is_index_subset(Message({'temporal': temporal_range}))) with self.subTest('Bounding box and temporal'): - self.assertTrue(is_index_subset(Message({ - 'subset': {'bbox': bounding_box}, - 'temporal': temporal_range, - }))) + self.assertTrue( + is_index_subset( + Message( + { + 'subset': {'bbox': bounding_box}, + 'temporal': temporal_range, + } + ) + ) + ) with self.subTest('Shape file and temporal'): - self.assertTrue(is_index_subset(Message({ - 'subset': {'shape': shape_file}, - 'temporal': temporal_range, - }))) + self.assertTrue( + is_index_subset( + Message( + { + 'subset': {'shape': shape_file}, + 'temporal': temporal_range, + } + ) + ) + ) with self.subTest('Bounding box and named dimension'): - self.assertTrue(is_index_subset(Message({ - 'subset': {'bbox': bounding_box, 'dimensions': dimensions} - }))) + self.assertTrue( + is_index_subset( + Message( + {'subset': {'bbox': bounding_box, 'dimensions': dimensions}} + ) + ) + ) with self.subTest('Not an index range subset'): self.assertFalse(is_index_subset(Message({}))) def test_get_requested_index_ranges(self): - """ Ensure the function correctly retrieves all index ranges from - explicitly named dimensions. + """Ensure the function correctly retrieves all index ranges from + explicitly named dimensions. - This test will use the `latitude` and `longitude` variables in the - RSSMIF16D example files. + This test will use the `latitude` and `longitude` variables in the + RSSMIF16D example files. - If one extent is not specified, the returned index range should - extend to either the first or last element (depending on whether - the omitted extent is a maximum or a minimum and whether the - dimension array is ascending or descending). + If one extent is not specified, the returned index range should + extend to either the first or last element (depending on whether + the omitted extent is a maximum or a minimum and whether the + dimension array is ascending or descending). - f16_ssmis_lat_lon_desc.nc has a descending latitude dimension - array. + f16_ssmis_lat_lon_desc.nc has a descending latitude dimension + array. """ ascending_file = 'tests/data/f16_ssmis_lat_lon.nc' @@ -615,221 +705,239 @@ def test_get_requested_index_ranges(self): with self.subTest('Ascending dimension'): # 20.0 ≤ latitude[440] ≤ 20.25, 29.75 ≤ latitude[479] ≤ 30.0 - harmony_message = Message({ - 'subset': { - 'dimensions': [{'name': '/latitude', 'min': 20, 'max': 30}] + harmony_message = Message( + { + 'subset': { + 'dimensions': [{'name': '/latitude', 'min': 20, 'max': 30}] + } } - }) + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - ascending_file, harmony_message), - {'/latitude': (440, 479)} + get_requested_index_ranges( + required_variables, self.varinfo, ascending_file, harmony_message + ), + {'/latitude': (440, 479)}, ) with self.subTest('Multiple ascending dimensions'): # 20.0 ≤ latitude[440] ≤ 20.25, 29.75 ≤ latitude[479] ≤ 30.0 # 140.0 ≤ longitude[560] ≤ 140.25, 149.75 ≤ longitude[599] ≤ 150.0 - harmony_message = Message({ - 'subset': { - 'dimensions': [{'name': '/latitude', 'min': 20, 'max': 30}, - {'name': '/longitude', 'min': 140, 'max': 150}] + harmony_message = Message( + { + 'subset': { + 'dimensions': [ + {'name': '/latitude', 'min': 20, 'max': 30}, + {'name': '/longitude', 'min': 140, 'max': 150}, + ] + } } - }) + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - ascending_file, harmony_message), - {'/latitude': (440, 479), '/longitude': (560, 599)} + get_requested_index_ranges( + required_variables, self.varinfo, ascending_file, harmony_message + ), + {'/latitude': (440, 479), '/longitude': (560, 599)}, ) with self.subTest('Descending dimension'): # 30.0 ≥ latitude[240] ≥ 29.75, 20.25 ≥ latitude[279] ≥ 20.0 - harmony_message = Message({ - 'subset': { - 'dimensions': [{'name': '/latitude', 'min': 20, 'max': 30}] + harmony_message = Message( + { + 'subset': { + 'dimensions': [{'name': '/latitude', 'min': 20, 'max': 30}] + } } - }) + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - descending_file, harmony_message), - {'/latitude': (240, 279)} + get_requested_index_ranges( + required_variables, self.varinfo, descending_file, harmony_message + ), + {'/latitude': (240, 279)}, ) with self.subTest('Dimension has no leading slash'): # 20.0 ≤ latitude[440] ≤ 20.25, 29.75 ≤ latitude[479] ≤ 30.0 - harmony_message = Message({ - 'subset': { - 'dimensions': [{'name': 'latitude', 'min': 20, 'max': 30}] - } - }) + harmony_message = Message( + {'subset': {'dimensions': [{'name': 'latitude', 'min': 20, 'max': 30}]}} + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - ascending_file, harmony_message), - {'/latitude': (440, 479)} + get_requested_index_ranges( + required_variables, self.varinfo, ascending_file, harmony_message + ), + {'/latitude': (440, 479)}, ) with self.subTest('Unspecified minimum value'): # 29.75 ≤ latitude[479] ≤ 30.0 - harmony_message = Message({ - 'subset': {'dimensions': [{'name': '/latitude', 'max': 30}]} - }) + harmony_message = Message( + {'subset': {'dimensions': [{'name': '/latitude', 'max': 30}]}} + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - ascending_file, harmony_message), - {'/latitude': (0, 479)} + get_requested_index_ranges( + required_variables, self.varinfo, ascending_file, harmony_message + ), + {'/latitude': (0, 479)}, ) with self.subTest('Unspecified maximum value'): # 20.0 ≤ latitude[440] ≤ 20.25, 179.75 ≤ latitude[719] ≤ 180.0 - harmony_message = Message({ - 'subset': {'dimensions': [{'name': '/latitude', 'min': 20}]} - }) + harmony_message = Message( + {'subset': {'dimensions': [{'name': '/latitude', 'min': 20}]}} + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - ascending_file, harmony_message), - {'/latitude': (440, 719)} + get_requested_index_ranges( + required_variables, self.varinfo, ascending_file, harmony_message + ), + {'/latitude': (440, 719)}, ) with self.subTest('Descending, unspecified minimum value'): # 30.0 ≥ latitude[240] ≥ 29.75, 0.25 ≥ latitude[719] ≥ 0.0 - harmony_message = Message({ - 'subset': {'dimensions': [{'name': '/latitude', 'max': 30}]} - }) + harmony_message = Message( + {'subset': {'dimensions': [{'name': '/latitude', 'max': 30}]}} + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - descending_file, harmony_message), - {'/latitude': (240, 719)} + get_requested_index_ranges( + required_variables, self.varinfo, descending_file, harmony_message + ), + {'/latitude': (240, 719)}, ) with self.subTest('Descending, unspecified maximum value'): # 20.25 ≥ latitude[279] ≥ 20.0 - harmony_message = Message({ - 'subset': {'dimensions': [{'name': '/latitude', 'min': 20}]} - }) + harmony_message = Message( + {'subset': {'dimensions': [{'name': '/latitude', 'min': 20}]}} + ) self.assertDictEqual( - get_requested_index_ranges(required_variables, self.varinfo, - descending_file, harmony_message), - {'/latitude': (0, 279)} + get_requested_index_ranges( + required_variables, self.varinfo, descending_file, harmony_message + ), + {'/latitude': (0, 279)}, ) with self.subTest('Unrecognised dimension'): # Check for a non-existent named dimension - harmony_message = Message({ - 'subset': { - 'dimensions': [{'name': '/FooBar', 'min': None, 'max': 10}] + harmony_message = Message( + { + 'subset': { + 'dimensions': [{'name': '/FooBar', 'min': None, 'max': 10}] + } } - }) + ) with self.assertRaises(InvalidNamedDimension): - get_requested_index_ranges(required_variables, self.varinfo, - descending_file, harmony_message), + get_requested_index_ranges( + required_variables, self.varinfo, descending_file, harmony_message + ), @patch('hoss.dimension_utilities.get_dimension_index_range') - def test_get_requested_index_ranges_bounds(self, - mock_get_dimension_index_range): - """ Ensure that if bounds are present for a dimension, they are used - as an argument in the call to get_dimension_index_range. + def test_get_requested_index_ranges_bounds(self, mock_get_dimension_index_range): + """Ensure that if bounds are present for a dimension, they are used + as an argument in the call to get_dimension_index_range. """ mock_get_dimension_index_range.return_value = (2000, 2049) - gpm_varinfo = VarInfoFromDmr('tests/data/GPM_3IMERGHH_example.dmr', - short_name='GPM_3IMERGHH') + gpm_varinfo = VarInfoFromDmr( + 'tests/data/GPM_3IMERGHH_example.dmr', short_name='GPM_3IMERGHH' + ) gpm_prefetch_path = 'tests/data/GPM_3IMERGHH_prefetch.nc4' - harmony_message = Message({'subset': { - 'dimensions': [{'name': '/Grid/lon', 'min': 20, 'max': 25}] - }}) + harmony_message = Message( + {'subset': {'dimensions': [{'name': '/Grid/lon', 'min': 20, 'max': 25}]}} + ) self.assertDictEqual( - get_requested_index_ranges({'/Grid/lon'}, gpm_varinfo, - gpm_prefetch_path, harmony_message), - {'/Grid/lon': (2000, 2049)} + get_requested_index_ranges( + {'/Grid/lon'}, gpm_varinfo, gpm_prefetch_path, harmony_message + ), + {'/Grid/lon': (2000, 2049)}, + ) + mock_get_dimension_index_range.assert_called_once_with( + ANY, 20, 25, bounds_values=ANY ) - mock_get_dimension_index_range.assert_called_once_with(ANY, 20, 25, - bounds_values=ANY) with Dataset(gpm_prefetch_path) as prefetch: assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - prefetch['/Grid/lon'][:] + prefetch['/Grid/lon'][:], ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][1]['bounds_values'], - prefetch['/Grid/lon_bnds'][:] + prefetch['/Grid/lon_bnds'][:], ) @patch('hoss.dimension_utilities.get_dimension_indices_from_bounds') @patch('hoss.dimension_utilities.get_dimension_indices_from_values') - def test_get_dimension_index_range_bounds(self, - mock_get_indices_from_values, - mock_get_indices_from_bounds): - """ Ensure that the correct branch of the code is used depending on - whether bounds are specified or not. + def test_get_dimension_index_range_bounds( + self, mock_get_indices_from_values, mock_get_indices_from_bounds + ): + """Ensure that the correct branch of the code is used depending on + whether bounds are specified or not. - Also ensure that the minimum and maximum requested extent are - always in ascending order in calls to - `get_dimension_indices_from_bounds`, regardless of if the - dimension is ascending or descending. + Also ensure that the minimum and maximum requested extent are + always in ascending order in calls to + `get_dimension_indices_from_bounds`, regardless of if the + dimension is ascending or descending. """ dimension_values = np.ma.MaskedArray(np.linspace(0.5, 9.5, 10)) lower_bounds = np.linspace(0, 9, 10) upper_bounds = np.linspace(1, 10, 10) - dimension_bounds = np.ma.MaskedArray(np.array([lower_bounds, - upper_bounds]).T) + dimension_bounds = np.ma.MaskedArray(np.array([lower_bounds, upper_bounds]).T) with self.subTest('No bounds are specified'): get_dimension_index_range(dimension_values, 2.3, 4.6) mock_get_indices_from_values.assert_called_once_with(ANY, 2.3, 4.6) assert_array_equal( - mock_get_indices_from_values.call_args_list[0][0][0], - dimension_values + mock_get_indices_from_values.call_args_list[0][0][0], dimension_values ) mock_get_indices_from_values.reset_mock() mock_get_indices_from_bounds.assert_not_called() with self.subTest('Bounds are specified'): - get_dimension_index_range(dimension_values, 2.3, 4.6, - dimension_bounds) + get_dimension_index_range(dimension_values, 2.3, 4.6, dimension_bounds) mock_get_indices_from_values.assert_not_called() mock_get_indices_from_bounds.assert_called_once_with(ANY, 2.3, 4.6) assert_array_equal( - mock_get_indices_from_bounds.call_args_list[0][0][0], - dimension_bounds + mock_get_indices_from_bounds.call_args_list[0][0][0], dimension_bounds ) mock_get_indices_from_bounds.reset_mock() with self.subTest('Bounds are specified, descending dimension'): - get_dimension_index_range(np.flip(dimension_values), 2.3, 4.6, - np.flip(dimension_bounds)) + get_dimension_index_range( + np.flip(dimension_values), 2.3, 4.6, np.flip(dimension_bounds) + ) mock_get_indices_from_values.assert_not_called() mock_get_indices_from_bounds.assert_called_once_with(ANY, 2.3, 4.6) assert_array_equal( mock_get_indices_from_bounds.call_args_list[0][0][0], - np.flip(dimension_bounds) + np.flip(dimension_bounds), ) mock_get_indices_from_bounds.reset_mock() def test_get_dimension_bounds(self): - """ Ensure that if a dimension variable has a `bounds` metadata - attribute, the values in the associated bounds variable are - returned. Ensure graceful handling if the dimension variable lacks - bounds metadata, or the referred to bounds variable is absent from - the NetCDF-4 dataset. + """Ensure that if a dimension variable has a `bounds` metadata + attribute, the values in the associated bounds variable are + returned. Ensure graceful handling if the dimension variable lacks + bounds metadata, or the referred to bounds variable is absent from + the NetCDF-4 dataset. """ with self.subTest('Bounds are retrieved'): with Dataset('tests/data/GPM_3IMERGHH_prefetch.nc4') as dataset: assert_array_equal( - get_dimension_bounds('/Grid/lat', self.varinfo_with_bounds, - dataset), - dataset['/Grid/lat_bnds'][:] + get_dimension_bounds( + '/Grid/lat', self.varinfo_with_bounds, dataset + ), + dataset['/Grid/lat_bnds'][:], ) with self.subTest('Variable has no bounds, None is returned'): with Dataset('tests/data/f16_ssmis_lat_lon.nc') as dataset: - self.assertIsNone(get_dimension_bounds('/latitude', - self.varinfo, dataset)) + self.assertIsNone( + get_dimension_bounds('/latitude', self.varinfo, dataset) + ) with self.subTest('Incorrect bounds metadata, None is returned'): prefetch_bad_bounds = f'{self.temp_dir}/f16_ssmis_lat_lon.nc' @@ -837,60 +945,54 @@ def test_get_dimension_bounds(self): with Dataset(prefetch_bad_bounds, 'r+') as dataset: dataset['/latitude'].setncattr('bounds', '/does_not_exist') - self.assertIsNone(get_dimension_bounds('/latitude', - self.varinfo, dataset)) + self.assertIsNone( + get_dimension_bounds('/latitude', self.varinfo, dataset) + ) def test_get_dimension_indices_from_bounds(self): - """ Ensure that the correct index ranges are retrieved for a variety - of requested dimension ranges, including values that lie within - pixels and others on the boundary between two adjacent pixels. + """Ensure that the correct index ranges are retrieved for a variety + of requested dimension ranges, including values that lie within + pixels and others on the boundary between two adjacent pixels. """ - ascending_bounds = np.array([[0, 10], [10, 20], [20, 30], [30, 40], - [40, 50]]) - descending_bounds = np.array([[0, -10], [-10, -20], [-20, -30], - [-30, -40], [-40, -50]]) + ascending_bounds = np.array([[0, 10], [10, 20], [20, 30], [30, 40], [40, 50]]) + descending_bounds = np.array( + [[0, -10], [-10, -20], [-20, -30], [-30, -40], [-40, -50]] + ) with self.subTest('Ascending dimension, values within pixels'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, 5, 15), - (0, 1) + get_dimension_indices_from_bounds(ascending_bounds, 5, 15), (0, 1) ) with self.subTest('Ascending dimension, min_value on pixel edge'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, 10, 15), - (1, 1) + get_dimension_indices_from_bounds(ascending_bounds, 10, 15), (1, 1) ) with self.subTest('Ascending dimension, max_value on pixel edge'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, 5, 20), - (0, 1) + get_dimension_indices_from_bounds(ascending_bounds, 5, 20), (0, 1) ) with self.subTest('Ascending dimension, min=max on pixel edge'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, 20, 20), - (1, 2) + get_dimension_indices_from_bounds(ascending_bounds, 20, 20), (1, 2) ) with self.subTest('Ascending dimension, min=max within a pixel'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, 15, 15), - (1, 1) + get_dimension_indices_from_bounds(ascending_bounds, 15, 15), (1, 1) ) with self.subTest('Ascending dimension, min_value < lowest bounds'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, -10, 15), - (0, 1) + get_dimension_indices_from_bounds(ascending_bounds, -10, 15), (0, 1) ) with self.subTest('Ascending dimension, max_value > highest bound'): self.assertTupleEqual( - get_dimension_indices_from_bounds(ascending_bounds, 45, 55), - (4, 4) + get_dimension_indices_from_bounds(ascending_bounds, 45, 55), (4, 4) ) with self.subTest('Ascending dimension, max_value < lowest bound'): @@ -903,44 +1005,37 @@ def test_get_dimension_indices_from_bounds(self): with self.subTest('Descending dimension, values within pixels'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -15, -5), - (0, 1) + get_dimension_indices_from_bounds(descending_bounds, -15, -5), (0, 1) ) with self.subTest('Descending dimension, max_value on pixel edge'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -15, -10), - (1, 1) + get_dimension_indices_from_bounds(descending_bounds, -15, -10), (1, 1) ) with self.subTest('Descending dimension, min_value on pixel edge'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -20, -5), - (0, 1) + get_dimension_indices_from_bounds(descending_bounds, -20, -5), (0, 1) ) with self.subTest('Descending dimension, min=max on pixel edge'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -20, -20), - (1, 2) + get_dimension_indices_from_bounds(descending_bounds, -20, -20), (1, 2) ) with self.subTest('Descending dimension, min=max within a pixel'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -15, -15), - (1, 1) + get_dimension_indices_from_bounds(descending_bounds, -15, -15), (1, 1) ) with self.subTest('Descending dimension, max_value > highest bounds'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -15, 10), - (0, 1) + get_dimension_indices_from_bounds(descending_bounds, -15, 10), (0, 1) ) with self.subTest('Descending dimension, min_value > lowest bound'): self.assertTupleEqual( - get_dimension_indices_from_bounds(descending_bounds, -55, -45), - (4, 4) + get_dimension_indices_from_bounds(descending_bounds, -55, -45), (4, 4) ) with self.subTest('Descending dimension, min_value > highest bound'): @@ -952,8 +1047,8 @@ def test_get_dimension_indices_from_bounds(self): get_dimension_indices_from_bounds(descending_bounds, -65, -55) def test_is_almost_in(self): - """ Ensure that only values within an acceptable tolerance of data are - determined to have nearby values within the input array. + """Ensure that only values within an acceptable tolerance of data are + determined to have nearby values within the input array. """ test_array = np.linspace(0, 1, 1001) @@ -962,7 +1057,7 @@ def test_is_almost_in(self): ['0.1, value in test_array', test_array, 0.1], ['0.01, value in test_array ', test_array, 0.01], ['0.001, value in test_array', test_array, 0.001], - ['0.0000001, below tolerance rounds to zero', test_array, 0.0000001] + ['0.0000001, below tolerance rounds to zero', test_array, 0.0000001], ] false_tests = [ ['0.0001 - not in array, above tolerance', test_array, 0.0001], diff --git a/tests/unit/test_projection_utilities.py b/tests/unit/test_projection_utilities.py index f3d5884..2c3a955 100644 --- a/tests/unit/test_projection_utilities.py +++ b/tests/unit/test_projection_utilities.py @@ -4,6 +4,7 @@ collections that have projected grids. """ + from os.path import join as path_join from shutil import rmtree from tempfile import mkdtemp @@ -17,28 +18,36 @@ import numpy as np from hoss.bbox_utilities import BBox -from hoss.exceptions import (InvalidInputGeoJSON, MissingGridMappingMetadata, - MissingGridMappingVariable, - MissingSpatialSubsetInformation) -from hoss.projection_utilities import (get_bbox_polygon, get_grid_lat_lons, - get_geographic_resolution, - get_projected_x_y_extents, - get_projected_x_y_variables, - get_resolved_feature, - get_resolved_features, - get_resolved_geojson, - get_resolved_geometry, - get_resolved_line, get_variable_crs, - get_x_y_extents_from_geographic_points, - is_projection_x_dimension, - is_projection_y_dimension) +from hoss.exceptions import ( + InvalidInputGeoJSON, + MissingGridMappingMetadata, + MissingGridMappingVariable, + MissingSpatialSubsetInformation, +) +from hoss.projection_utilities import ( + get_bbox_polygon, + get_grid_lat_lons, + get_geographic_resolution, + get_projected_x_y_extents, + get_projected_x_y_variables, + get_resolved_feature, + get_resolved_features, + get_resolved_geojson, + get_resolved_geometry, + get_resolved_line, + get_variable_crs, + get_x_y_extents_from_geographic_points, + is_projection_x_dimension, + is_projection_y_dimension, +) class TestProjectionUtilities(TestCase): - """ A class for testing functions in the `hoss.projection_utilities` - module. + """A class for testing functions in the `hoss.projection_utilities` + module. """ + @classmethod def setUpClass(cls): # Set up GeoJSON fixtures (both as raw GeoJSON and parsed shapely objects) @@ -66,16 +75,16 @@ def tearDown(self): @staticmethod def read_geojson(geojson_base_name: str): - """ A helper function to extract GeoJSON from a supplied file path. """ + """A helper function to extract GeoJSON from a supplied file path.""" with open(f'tests/geojson_examples/{geojson_base_name}', 'r') as file_handler: geojson_content = json.load(file_handler) return geojson_content def test_get_variable_crs(self): - """ Ensure a `pyproj.CRS` object can be instantiated via the reference - in a variable. Alternatively, if the `grid_mapping` attribute is - absent, or erroneous, ensure the expected exceptions are raised. + """Ensure a `pyproj.CRS` object can be instantiated via the reference + in a variable. Alternatively, if the `grid_mapping` attribute is + absent, or erroneous, ensure the expected exceptions are raised. """ sample_dmr = ( @@ -143,102 +152,121 @@ def test_get_variable_crs(self): varinfo = VarInfoFromDmr(dmr_path) - expected_crs = CRS.from_cf({ - 'false_easting': 0.0, - 'false_northing': 0.0, - 'latitude_of_projection_origin': 40.0, - 'longitude_of_central_meridian': -96.0, - 'standard_parallel': [50.0, 70.0], - 'long_name': 'CRS definition', - 'longitude_of_prime_meridian': 0.0, - 'semi_major_axis': 6378137.0, - 'inverse_flattening': 298.25722210100002, - 'grid_mapping_name': 'albers_conical_equal_area' - }) + expected_crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'latitude_of_projection_origin': 40.0, + 'longitude_of_central_meridian': -96.0, + 'standard_parallel': [50.0, 70.0], + 'long_name': 'CRS definition', + 'longitude_of_prime_meridian': 0.0, + 'semi_major_axis': 6378137.0, + 'inverse_flattening': 298.25722210100002, + 'grid_mapping_name': 'albers_conical_equal_area', + } + ) with self.subTest('Variable with "grid_mapping" gets expected CRS'): - actual_crs = get_variable_crs('/variable_with_grid_mapping', - varinfo) + actual_crs = get_variable_crs('/variable_with_grid_mapping', varinfo) self.assertEqual(actual_crs, expected_crs) with self.subTest('Variable has no "grid_mapping" attribute'): with self.assertRaises(MissingGridMappingMetadata) as context: get_variable_crs('/variable_without_grid_mapping', varinfo) - self.assertEqual(context.exception.message, - 'Projected variable "/variable_without_grid_mapping"' - ' does not have an associated "grid_mapping" ' - 'metadata attribute.') + self.assertEqual( + context.exception.message, + 'Projected variable "/variable_without_grid_mapping"' + ' does not have an associated "grid_mapping" ' + 'metadata attribute.', + ) with self.subTest('"grid_mapping" points to non-existent variable'): with self.assertRaises(MissingGridMappingVariable) as context: get_variable_crs('/variable_with_bad_grid_mapping', varinfo) - self.assertEqual(context.exception.message, - 'Grid mapping variable "/non_existent_crs" ' - 'referred to by variable ' - '"/variable_with_bad_grid_mapping" is not ' - 'present in granule .dmr file.') + self.assertEqual( + context.exception.message, + 'Grid mapping variable "/non_existent_crs" ' + 'referred to by variable ' + '"/variable_with_bad_grid_mapping" is not ' + 'present in granule .dmr file.', + ) def test_get_projected_x_y_extents(self): - """ Ensure that the expected values for the x and y dimension extents - are recovered for a known projected grid and requested input. + """Ensure that the expected values for the x and y dimension extents + are recovered for a known projected grid and requested input. - The dimension values used below mimic one of the ABoVE TVPRM - granules. Both the bounding box and the shape file used are - identical shapes, just expressed either as a bounding box or a - GeoJSON polygon. They should therefore return the same extents. + The dimension values used below mimic one of the ABoVE TVPRM + granules. Both the bounding box and the shape file used are + identical shapes, just expressed either as a bounding box or a + GeoJSON polygon. They should therefore return the same extents. """ x_values = np.linspace(-3385020, -1255020, 72) y_values = np.linspace(4625000, 3575000, 36) - crs = CRS.from_cf({'false_easting': 0.0, - 'false_northing': 0.0, - 'latitude_of_projection_origin': 40.0, - 'longitude_of_central_meridian': -96.0, - 'standard_parallel': [50.0, 70.0], - 'long_name': 'CRS definition', - 'longitude_of_prime_meridian': 0.0, - 'semi_major_axis': 6378137.0, - 'inverse_flattening': 298.257222101, - 'grid_mapping_name': 'albers_conical_equal_area'}) + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'latitude_of_projection_origin': 40.0, + 'longitude_of_central_meridian': -96.0, + 'standard_parallel': [50.0, 70.0], + 'long_name': 'CRS definition', + 'longitude_of_prime_meridian': 0.0, + 'semi_major_axis': 6378137.0, + 'inverse_flattening': 298.257222101, + 'grid_mapping_name': 'albers_conical_equal_area', + } + ) bounding_box = BBox(-160, 68, -145, 70) - polygon = {'type': 'Polygon', - 'coordinates': [[(bounding_box.west, bounding_box.south), - (bounding_box.east, bounding_box.south), - (bounding_box.east, bounding_box.north), - (bounding_box.west, bounding_box.north), - (bounding_box.west, bounding_box.south)]]} + polygon = { + 'type': 'Polygon', + 'coordinates': [ + [ + (bounding_box.west, bounding_box.south), + (bounding_box.east, bounding_box.south), + (bounding_box.east, bounding_box.north), + (bounding_box.west, bounding_box.north), + (bounding_box.west, bounding_box.south), + ] + ], + } polygon_path = path_join(self.temp_dir, 'bbox_poly.geo.json') with open(polygon_path, 'w', encoding='utf-8') as file_handler: json.dump(polygon, file_handler, indent=4) - expected_output = {'x_min': -2273166.953240025, - 'x_max': -1709569.3224678137, - 'y_min': 3832621.3156695124, - 'y_max': 4425654.159834823} + expected_output = { + 'x_min': -2273166.953240025, + 'x_max': -1709569.3224678137, + 'y_min': 3832621.3156695124, + 'y_max': 4425654.159834823, + } with self.subTest('Bounding box input'): self.assertDictEqual( - get_projected_x_y_extents(x_values, y_values, crs, - bounding_box=bounding_box), - expected_output + get_projected_x_y_extents( + x_values, y_values, crs, bounding_box=bounding_box + ), + expected_output, ) with self.subTest('Shape file input'): self.assertDictEqual( - get_projected_x_y_extents(x_values, y_values, crs, - shape_file=polygon_path), - expected_output + get_projected_x_y_extents( + x_values, y_values, crs, shape_file=polygon_path + ), + expected_output, ) def test_get_projected_x_y_variables(self): - """ Ensure that the `standard_name` metadata attribute can be parsed - via `VarInfoFromDmr` for all dimenions of a specifed variable. If - no dimensions have either an x or y coordinate, the corresponding - return value should be `None`. + """Ensure that the `standard_name` metadata attribute can be parsed + via `VarInfoFromDmr` for all dimenions of a specifed variable. If + no dimensions have either an x or y coordinate, the corresponding + return value should be `None`. """ sample_dmr = ( @@ -334,11 +362,11 @@ def test_get_projected_x_y_variables(self): self.assertIsNone(actual_y) def test_is_projection_x_dimension(self): - """ Ensure that a dimension variable is correctly identified as being - an x-dimension if it has the expected `standard_name`. This - function must also handle absent dimensions, for cases such as the - `nv`, `latv` or `lonv` dimensions that do not have corresponding - variables in a granule. + """Ensure that a dimension variable is correctly identified as being + an x-dimension if it has the expected `standard_name`. This + function must also handle absent dimensions, for cases such as the + `nv`, `latv` or `lonv` dimensions that do not have corresponding + variables in a granule. """ sample_dmr = ( @@ -376,11 +404,11 @@ def test_is_projection_x_dimension(self): self.assertFalse(is_projection_x_dimension(varinfo, '/missing')) def test_is_projection_y_variable(self): - """ Ensure that a dimension variable is correctly identified as being - an y-dimension if it has the expected `standard_name`. This - function must also handle absent dimensions, for cases such as the - `nv`, `latv` or `lonv` dimensions that do not have corresponding - variables in a granule. + """Ensure that a dimension variable is correctly identified as being + an y-dimension if it has the expected `standard_name`. This + function must also handle absent dimensions, for cases such as the + `nv`, `latv` or `lonv` dimensions that do not have corresponding + variables in a granule. """ sample_dmr = ( @@ -418,11 +446,11 @@ def test_is_projection_y_variable(self): self.assertFalse(is_projection_y_dimension(varinfo, '/missing')) def test_get_grid_lat_lons(self): - """ Ensure that a grid of projected values is correctly converted to - longitude and latitude values. The inputs include 1-D arrays for - the x and y dimensions, whilst the output are 2-D grids of latitude - and longitude that correspond to all grid points defined by the - combinations of x and y coordinates. + """Ensure that a grid of projected values is correctly converted to + longitude and latitude values. The inputs include 1-D arrays for + the x and y dimensions, whilst the output are 2-D grids of latitude + and longitude that correspond to all grid points defined by the + combinations of x and y coordinates. """ x_values = np.array([1513760.59366167, 1048141.65434399]) @@ -437,47 +465,58 @@ def test_get_grid_lat_lons(self): np.testing.assert_almost_equal(actual_lons, expected_lons) def test_get_geographic_resolution(self): - """ Ensure the calculated resolution is the minimum Euclidean distance - between diagonally adjacent pixels. + """Ensure the calculated resolution is the minimum Euclidean distance + between diagonally adjacent pixels. - The example coordinates below have the shortest diagonal difference - between (10, 10) and (15, 15), resulting in a resolution of - (5^2 + 5^2)^0.5 = 50^0.5 ~= 7.07. + The example coordinates below have the shortest diagonal difference + between (10, 10) and (15, 15), resulting in a resolution of + (5^2 + 5^2)^0.5 = 50^0.5 ~= 7.07. """ latitudes = np.array([[10, 10, 10], [15, 15, 15], [25, 25, 25]]) longitudes = np.array([[10, 15, 25], [10, 15, 25], [10, 15, 25]]) expected_resolution = 7.071 - self.assertAlmostEqual(get_geographic_resolution(longitudes, latitudes), - expected_resolution, places=3) + self.assertAlmostEqual( + get_geographic_resolution(longitudes, latitudes), + expected_resolution, + places=3, + ) @patch('hoss.projection_utilities.get_bbox_polygon') @patch('hoss.projection_utilities.get_resolved_feature') @patch('hoss.projection_utilities.get_resolved_features') - def test_get_resolved_geojson(self, mock_get_resolved_features, - mock_get_resolved_feature, - mock_get_bbox_polygon): - """ Ensure that a GeoJSON shape or bounding box is correctly resolved - using the correct functionality (bounding box versus shape file). + def test_get_resolved_geojson( + self, + mock_get_resolved_features, + mock_get_resolved_feature, + mock_get_bbox_polygon, + ): + """Ensure that a GeoJSON shape or bounding box is correctly resolved + using the correct functionality (bounding box versus shape file). """ resolution = 0.1 shape_file = f'tests/geojson_examples/{self.polygon_file_name}' bounding_box = BBox(0, 10, 20, 30) - bounding_box_polygon = Polygon([(0, 10), (20, 10), (20, 30), (0, 30), - (0, 10)]) + bounding_box_polygon = Polygon([(0, 10), (20, 10), (20, 30), (0, 30), (0, 10)]) resolved_feature = [(0, 10), (20, 10), (20, 30), (0, 30)] - resolved_features = [(-114.05, 42.0), (-114.05, 37.0), (-109.04, 37.0), - (-109.04, 41.0), (-111.05, 41.0)] + resolved_features = [ + (-114.05, 42.0), + (-114.05, 37.0), + (-109.04, 37.0), + (-109.04, 41.0), + (-111.05, 41.0), + ] mock_get_resolved_features.return_value = resolved_features mock_get_resolved_feature.return_value = resolved_feature mock_get_bbox_polygon.return_value = bounding_box_polygon with self.subTest('Shape file is specified and used'): - self.assertListEqual(get_resolved_geojson(resolution, - shape_file=shape_file), - resolved_features) + self.assertListEqual( + get_resolved_geojson(resolution, shape_file=shape_file), + resolved_features, + ) mock_get_resolved_features.assert_called_once_with( self.polygon_geojson, resolution ) @@ -489,7 +528,7 @@ def test_get_resolved_geojson(self, mock_get_resolved_features, with self.subTest('Bounding box is specified and used'): self.assertListEqual( get_resolved_geojson(resolution, bounding_box=bounding_box), - resolved_feature + resolved_feature, ) mock_get_resolved_features.assert_not_called() mock_get_resolved_feature.assert_called_once_with( @@ -502,9 +541,10 @@ def test_get_resolved_geojson(self, mock_get_resolved_features, with self.subTest('Bounding box is used when both are specified'): self.assertListEqual( - get_resolved_geojson(resolution, shape_file=shape_file, - bounding_box=bounding_box), - resolved_feature + get_resolved_geojson( + resolution, shape_file=shape_file, bounding_box=bounding_box + ), + resolved_feature, ) mock_get_resolved_feature.assert_called_once_with( bounding_box_polygon, resolution @@ -522,54 +562,63 @@ def test_get_resolved_geojson(self, mock_get_resolved_features, mock_get_resolved_feature.assert_not_called() def test_get_bbox_polygon(self): - """ Ensure a polygon is constructed from the input bounding box. It - should only have an exterior set of points, and those should only - be combinations of the West, South, East and North coordinates of - the input bounding box. + """Ensure a polygon is constructed from the input bounding box. It + should only have an exterior set of points, and those should only + be combinations of the West, South, East and North coordinates of + the input bounding box. """ bounding_box = BBox(0, 10, 20, 30) - expected_bounding_box_polygon = Polygon([(0, 10), (20, 10), (20, 30), - (0, 30), (0, 10)]) + expected_bounding_box_polygon = Polygon( + [(0, 10), (20, 10), (20, 30), (0, 30), (0, 10)] + ) bounding_box_result = get_bbox_polygon(bounding_box) self.assertEqual(bounding_box_result, expected_bounding_box_polygon) self.assertListEqual(list(bounding_box_result.interiors), []) @patch('hoss.projection_utilities.get_resolved_feature') def test_get_resolved_features(self, mock_get_resolved_feature): - """ Ensure that the parsed GeoJSON content can be correctly sent to - `get_resolved_feature`, depending on if the content is a GeoJSON - Geometry, Feature or FeatureCollection. If the object does not - conform to the expected GeoJSON schema, and exception will be - raised. + """Ensure that the parsed GeoJSON content can be correctly sent to + `get_resolved_feature`, depending on if the content is a GeoJSON + Geometry, Feature or FeatureCollection. If the object does not + conform to the expected GeoJSON schema, and exception will be + raised. """ resolution = 2.0 - resolved_linestring = [(-75.696, 38.471), (-75.795, 39.716), - (-77.370, 39.719), (-78.944, 39.721), - (-80.519, 39.724)] + resolved_linestring = [ + (-75.696, 38.471), + (-75.795, 39.716), + (-77.370, 39.719), + (-78.944, 39.721), + (-80.519, 39.724), + ] with self.subTest('A Geometry input is passed directly through'): mock_get_resolved_feature.return_value = resolved_linestring self.assertListEqual( - get_resolved_features(self.linestring_geojson['features'][0]['geometry'], - resolution), - resolved_linestring + get_resolved_features( + self.linestring_geojson['features'][0]['geometry'], resolution + ), + resolved_linestring, + ) + mock_get_resolved_feature.assert_called_once_with( + self.linestring, resolution ) - mock_get_resolved_feature.assert_called_once_with(self.linestring, - resolution) mock_get_resolved_feature.reset_mock() with self.subTest('A Feature input uses its Geometry attribute'): mock_get_resolved_feature.return_value = resolved_linestring self.assertListEqual( - get_resolved_features(self.linestring_geojson['features'][0], - resolution), - resolved_linestring + get_resolved_features( + self.linestring_geojson['features'][0], resolution + ), + resolved_linestring, + ) + mock_get_resolved_feature.assert_called_once_with( + self.linestring, resolution ) - mock_get_resolved_feature.assert_called_once_with(self.linestring, - resolution) mock_get_resolved_feature.reset_mock() @@ -577,19 +626,30 @@ def test_get_resolved_features(self, mock_get_resolved_feature): multi_feature_geojson = self.read_geojson('multi_feature.geo.json') first_shape = shape(multi_feature_geojson['features'][0]['geometry']) second_shape = shape(multi_feature_geojson['features'][1]['geometry']) - multi_feature_side_effect = [[(-75.565, 39.662)], - [(-75.696, 38.471), (-75.795, 39.716), - (-77.370, 39.718), (-78.944, 39.721), - (-80.519, 39.724)]] - resolved_multi_feature = [(-75.565, 39.662), (-75.696, 38.471), - (-75.795, 39.716), (-77.370, 39.718), - (-78.944, 39.721), (-80.519, 39.724)] + multi_feature_side_effect = [ + [(-75.565, 39.662)], + [ + (-75.696, 38.471), + (-75.795, 39.716), + (-77.370, 39.718), + (-78.944, 39.721), + (-80.519, 39.724), + ], + ] + resolved_multi_feature = [ + (-75.565, 39.662), + (-75.696, 38.471), + (-75.795, 39.716), + (-77.370, 39.718), + (-78.944, 39.721), + (-80.519, 39.724), + ] with self.subTest('A FeatureCollection uses the Geometry of each Feature'): mock_get_resolved_feature.side_effect = multi_feature_side_effect self.assertListEqual( get_resolved_features(multi_feature_geojson, resolution), - resolved_multi_feature + resolved_multi_feature, ) self.assertEqual(mock_get_resolved_feature.call_count, 2) mock_get_resolved_feature.assert_has_calls( @@ -604,61 +664,102 @@ def test_get_resolved_features(self, mock_get_resolved_feature): @patch('hoss.projection_utilities.get_resolved_geometry') def test_get_resolved_feature(self, mock_get_resolved_geometry): - """ Ensure that GeoJSON features with various geometry types are - correctly handled to produce a list of points at the specified - resolution. + """Ensure that GeoJSON features with various geometry types are + correctly handled to produce a list of points at the specified + resolution. - Single geometry features (Point, Line, Polygon) should be handled - with a single call to `get_resolved_feature`. + Single geometry features (Point, Line, Polygon) should be handled + with a single call to `get_resolved_feature`. - Multi geometry features (MultiPoint, Line, Polygon, - GeometryCollection) should recursively call this function and - flatten the resulting list of lists of coordinates. + Multi geometry features (MultiPoint, Line, Polygon, + GeometryCollection) should recursively call this function and + flatten the resulting list of lists of coordinates. - Any other geometry type will not be recognised and will raise an - exception. + Any other geometry type will not be recognised and will raise an + exception. - Mock return values for `get_resolved_geometry` are rounded to 2 or - 3 decimal places as appropriate, but are otherwise accurate. + Mock return values for `get_resolved_geometry` are rounded to 2 or + 3 decimal places as appropriate, but are otherwise accurate. """ resolution = 2.0 - resolved_polygon = [(-114.05, 42.0), (-114.05, 40.33), - (-114.05, 38.67), (-114.05, 37.0), (-112.38, 37.0), - (-110.71, 37.0), (-109.04, 37.0), (-109.04, 39.0), - (-109.04, 41.0), (-110.045, 41.0), (-111.05, 41.0), - (-111.05, 42.0), (-112.55, 42.0)] - resolved_linestring = [(-75.696, 38.471), (-75.795, 39.716), - (-77.370, 39.719), (-78.944, 39.721), - (-80.519, 39.724)] - - mlinestring_side_effect = [[(-3.194, 55.949), (-3.181, 55.951), - (-3.174, 55.953)], - [(-0.14, 51.502), (-0.128, 51.507)]] - resolved_mlinestring = [(-3.194, 55.949), (-3.181, 55.951), - (-3.174, 55.953), (-0.14, 51.502), - (-0.128, 51.507)] + resolved_polygon = [ + (-114.05, 42.0), + (-114.05, 40.33), + (-114.05, 38.67), + (-114.05, 37.0), + (-112.38, 37.0), + (-110.71, 37.0), + (-109.04, 37.0), + (-109.04, 39.0), + (-109.04, 41.0), + (-110.045, 41.0), + (-111.05, 41.0), + (-111.05, 42.0), + (-112.55, 42.0), + ] + resolved_linestring = [ + (-75.696, 38.471), + (-75.795, 39.716), + (-77.370, 39.719), + (-78.944, 39.721), + (-80.519, 39.724), + ] + + mlinestring_side_effect = [ + [(-3.194, 55.949), (-3.181, 55.951), (-3.174, 55.953)], + [(-0.14, 51.502), (-0.128, 51.507)], + ] + resolved_mlinestring = [ + (-3.194, 55.949), + (-3.181, 55.951), + (-3.174, 55.953), + (-0.14, 51.502), + (-0.128, 51.507), + ] resolved_multi_point = [(-0.076, 51.508), (-0.142, 51.501)] - mpolygon_side_effect = [[(-109.05, 41.0), (-109.05, 39.0), - (-109.05, 37), (-105.55, 37.0), (-103.8, 37.0), - (-102.05, 37.0), (-102.05, 39.0), - (-102.05, 41.0), (-103.8, 41.0), - (-105.55, 41.0), (-107.3, 41.0)]] + mpolygon_side_effect = [ + [ + (-109.05, 41.0), + (-109.05, 39.0), + (-109.05, 37), + (-105.55, 37.0), + (-103.8, 37.0), + (-102.05, 37.0), + (-102.05, 39.0), + (-102.05, 41.0), + (-103.8, 41.0), + (-105.55, 41.0), + (-107.3, 41.0), + ] + ] resolved_mpolygon = mpolygon_side_effect[0] - geom_coll_side_effect = [[(-75.696, 38.471), (-75.795, 39.716), - (-77.370, 39.718), (-78.944, 39.721), - (-80.519, 39.724)]] - resolved_geom_collection = [(-75.565, 39.662), (-75.696, 38.471), - (-75.795, 39.716), (-77.370, 39.718), - (-78.944, 39.721), (-80.519, 39.724)] + geom_coll_side_effect = [ + [ + (-75.696, 38.471), + (-75.795, 39.716), + (-77.370, 39.718), + (-78.944, 39.721), + (-80.519, 39.724), + ] + ] + resolved_geom_collection = [ + (-75.565, 39.662), + (-75.696, 38.471), + (-75.795, 39.716), + (-77.370, 39.718), + (-78.944, 39.721), + (-80.519, 39.724), + ] with self.subTest('Polygon'): mock_get_resolved_geometry.return_value = resolved_polygon - self.assertListEqual(get_resolved_feature(self.polygon, resolution), - resolved_polygon) + self.assertListEqual( + get_resolved_feature(self.polygon, resolution), resolved_polygon + ) mock_get_resolved_geometry.assert_called_once_with( list(self.polygon.exterior.coords), resolution ) @@ -667,8 +768,9 @@ def test_get_resolved_feature(self, mock_get_resolved_geometry): with self.subTest('LineString'): mock_get_resolved_geometry.return_value = resolved_linestring - self.assertListEqual(get_resolved_feature(self.linestring, resolution), - resolved_linestring) + self.assertListEqual( + get_resolved_feature(self.linestring, resolution), resolved_linestring + ) mock_get_resolved_geometry.assert_called_once_with( list(self.linestring.coords), resolution, is_closed=False ) @@ -676,52 +778,64 @@ def test_get_resolved_feature(self, mock_get_resolved_geometry): mock_get_resolved_geometry.reset_mock() with self.subTest('Point'): - self.assertListEqual(get_resolved_feature(self.point, resolution), - [(self.point.x, self.point.y)]) + self.assertListEqual( + get_resolved_feature(self.point, resolution), + [(self.point.x, self.point.y)], + ) mock_get_resolved_geometry.assert_not_called() with self.subTest('MultiPolygon'): mock_get_resolved_geometry.side_effect = mpolygon_side_effect - self.assertListEqual(get_resolved_feature(self.multi_polygon, - resolution), - resolved_mpolygon) + self.assertListEqual( + get_resolved_feature(self.multi_polygon, resolution), resolved_mpolygon + ) mock_get_resolved_geometry.assert_called_once_with( - list(self.multi_polygon.geoms[0].exterior.coords), resolution, + list(self.multi_polygon.geoms[0].exterior.coords), + resolution, ) mock_get_resolved_geometry.reset_mock() with self.subTest('MultiLineString'): mock_get_resolved_geometry.side_effect = mlinestring_side_effect - self.assertListEqual(get_resolved_feature(self.multi_linestring, - resolution), - resolved_mlinestring) + self.assertListEqual( + get_resolved_feature(self.multi_linestring, resolution), + resolved_mlinestring, + ) self.assertEqual(mock_get_resolved_geometry.call_count, 2) - mock_get_resolved_geometry.assert_has_calls([ - call(list(self.multi_linestring.geoms[0].coords), resolution, - is_closed=False), - call(list(self.multi_linestring.geoms[1].coords), resolution, - is_closed=False) - ]) + mock_get_resolved_geometry.assert_has_calls( + [ + call( + list(self.multi_linestring.geoms[0].coords), + resolution, + is_closed=False, + ), + call( + list(self.multi_linestring.geoms[1].coords), + resolution, + is_closed=False, + ), + ] + ) mock_get_resolved_geometry.reset_mock() with self.subTest('MultiPoint'): - self.assertListEqual(get_resolved_feature(self.multi_point, - resolution), - resolved_multi_point) + self.assertListEqual( + get_resolved_feature(self.multi_point, resolution), resolved_multi_point + ) mock_get_resolved_geometry.assert_not_called() with self.subTest('GeometryCollection'): # Contains a Point and a LineString, the point will not need to # call `get_resolved_geometry`. mock_get_resolved_geometry.side_effect = geom_coll_side_effect - self.assertListEqual(get_resolved_feature(self.geometry_coll, - resolution), - resolved_geom_collection) + self.assertListEqual( + get_resolved_feature(self.geometry_coll, resolution), + resolved_geom_collection, + ) mock_get_resolved_geometry.assert_called_once_with( - list(self.geometry_coll.geoms[1].coords), resolution, - is_closed=False + list(self.geometry_coll.geoms[1].coords), resolution, is_closed=False ) mock_get_resolved_geometry.reset_mock() @@ -731,70 +845,88 @@ def test_get_resolved_feature(self, mock_get_resolved_geometry): get_resolved_feature('not_geojson_shape', resolution) def test_get_resolved_geometry(self): - """ Ensure that a set of input points are updated to the specified - resolution. Specific test cases include whether the input forms a - closed loop or not. + """Ensure that a set of input points are updated to the specified + resolution. Specific test cases include whether the input forms a + closed loop or not. """ - input_geometry = [(1.0, 1.0), (1.0, 1.5), (2.0, 1.5), (2.0, 1.0), - (1.0, 1.0)] + input_geometry = [(1.0, 1.0), (1.0, 1.5), (2.0, 1.5), (2.0, 1.0), (1.0, 1.0)] resolution = 0.5 - output_open_geometry = [(1.0, 1.0), (1.0, 1.5), (1.5, 1.5), (2.0, 1.5), - (2.0, 1.0), (1.5, 1.0), (1.0, 1.0)] + output_open_geometry = [ + (1.0, 1.0), + (1.0, 1.5), + (1.5, 1.5), + (2.0, 1.5), + (2.0, 1.0), + (1.5, 1.0), + (1.0, 1.0), + ] output_closed_geometry = output_open_geometry[:-1] test_args = [ ['Open geometry includes the final point.', False, output_open_geometry], - ['Closed geometry excludes final point.', True, output_closed_geometry] + ['Closed geometry excludes final point.', True, output_closed_geometry], ] for description, is_closed, expected_geometry in test_args: with self.subTest(description): - self.assertListEqual(get_resolved_geometry(input_geometry, - resolution, - is_closed=is_closed), - expected_geometry) + self.assertListEqual( + get_resolved_geometry( + input_geometry, resolution, is_closed=is_closed + ), + expected_geometry, + ) def test_get_resolved_line(self): - """ Ensure that a line, defined by its two end-points, will be - converted so that there are evenly spaced points separated by, - at most, the resolution supplied to the function. + """Ensure that a line, defined by its two end-points, will be + converted so that there are evenly spaced points separated by, + at most, the resolution supplied to the function. - Note, in the first test, the distance between each point is 2.83, - resulting from the smallest number of points possible being placed - on the line at a distance of no greater than the requested - resolution (3). + Note, in the first test, the distance between each point is 2.83, + resulting from the smallest number of points possible being placed + on the line at a distance of no greater than the requested + resolution (3). """ test_args = [ - ['Line needs additional points', (0, 0), (10, 10), 3, - [(0, 0), (2, 2), (4, 4), (6, 6), (8, 8), (10, 10)]], - ['Resolution bigger than line', (0, 0), (1, 1), 2, - [(0, 0), (1, 1)]], - ['Line flat in one dimension', (0, 0), (0, 10), 5, - [(0, 0), (0, 5), (0, 10)]] + [ + 'Line needs additional points', + (0, 0), + (10, 10), + 3, + [(0, 0), (2, 2), (4, 4), (6, 6), (8, 8), (10, 10)], + ], + ['Resolution bigger than line', (0, 0), (1, 1), 2, [(0, 0), (1, 1)]], + [ + 'Line flat in one dimension', + (0, 0), + (0, 10), + 5, + [(0, 0), (0, 5), (0, 10)], + ], ] for description, point_one, point_two, resolution, expected_output in test_args: with self.subTest(description): - self.assertListEqual(get_resolved_line(point_one, point_two, - resolution), - expected_output) + self.assertListEqual( + get_resolved_line(point_one, point_two, resolution), expected_output + ) def test_get_x_y_extents_from_geographic_points(self): - """ Ensure that a list of coordinates is transformed to a specified - projection, and that the expected extents in the projected x and y - dimensions are returned. + """Ensure that a list of coordinates is transformed to a specified + projection, and that the expected extents in the projected x and y + dimensions are returned. """ points = [(-180, 75), (-90, 75), (0, 75), (90, 75)] crs = CRS.from_epsg(6931) - expected_x_y_extents = {'x_min': -1670250.0136418417, - 'x_max': 1670250.0136418417, - 'y_min': -1670250.0136418417, - 'y_max': 1670250.0136418417} + expected_x_y_extents = { + 'x_min': -1670250.0136418417, + 'x_max': 1670250.0136418417, + 'y_min': -1670250.0136418417, + 'y_max': 1670250.0136418417, + } self.assertDictEqual( - get_x_y_extents_from_geographic_points(points, crs), - expected_x_y_extents + get_x_y_extents_from_geographic_points(points, crs), expected_x_y_extents ) diff --git a/tests/unit/test_spatial.py b/tests/unit/test_spatial.py index ea53250..9f55ad4 100644 --- a/tests/unit/test_spatial.py +++ b/tests/unit/test_spatial.py @@ -11,20 +11,23 @@ import numpy as np from hoss.bbox_utilities import BBox -from hoss.spatial import (get_bounding_box_longitudes, - get_geographic_index_range, - get_projected_x_y_index_ranges, - get_longitude_in_grid, - get_spatial_index_ranges) +from hoss.spatial import ( + get_bounding_box_longitudes, + get_geographic_index_range, + get_projected_x_y_index_ranges, + get_longitude_in_grid, + get_spatial_index_ranges, +) class TestSpatial(TestCase): - """ A class for testing functions in the hoss.spatial module. """ + """A class for testing functions in the hoss.spatial module.""" + @classmethod def setUpClass(cls): cls.varinfo = VarInfoFromDmr( 'tests/data/rssmif16d_example.dmr', - config_file='tests/data/test_subsetter_config.json' + config_file='tests/data/test_subsetter_config.json', ) cls.test_dir = 'tests/output' @@ -35,55 +38,53 @@ def tearDown(self): rmtree(self.test_dir) def test_get_spatial_index_ranges_projected(self): - """ Ensure that correct index ranges can be calculated for an ABoVE - TVPRM granule. This granule has variables that use a grid with an - Albers Conic Equal Area projection, in the Alaska region. + """Ensure that correct index ranges can be calculated for an ABoVE + TVPRM granule. This granule has variables that use a grid with an + Albers Conic Equal Area projection, in the Alaska region. """ harmony_message = Message({'subset': {'bbox': [-160, 68, -145, 70]}}) above_varinfo = VarInfoFromDmr('tests/data/ABoVE_TVPRM_example.dmr') self.assertDictEqual( - get_spatial_index_ranges({'/NEE', '/x', '/y', '/time'}, - above_varinfo, - 'tests/data/ABoVE_TVPRM_prefetch.nc4', - harmony_message), - {'/x': (37, 56), '/y': (7, 26)} + get_spatial_index_ranges( + {'/NEE', '/x', '/y', '/time'}, + above_varinfo, + 'tests/data/ABoVE_TVPRM_prefetch.nc4', + harmony_message, + ), + {'/x': (37, 56), '/y': (7, 26)}, ) def test_get_spatial_index_ranges_geographic(self): - """ Ensure that correct index ranges can be calculated for: + """Ensure that correct index ranges can be calculated for: - - Latitude dimensions - - Longitude dimensions (continuous ranges) - - Longitude dimensions (bounding box crossing grid edge) - - Latitude dimension (descending) - - Longitude dimension (descending, not crossing grid edge) - - Values that are exactly halfway between pixels. + - Latitude dimensions + - Longitude dimensions (continuous ranges) + - Longitude dimensions (bounding box crossing grid edge) + - Latitude dimension (descending) + - Longitude dimension (descending, not crossing grid edge) + - Values that are exactly halfway between pixels. - This test will use the valid range of the RSSMIF16D collection, - such that 0 ≤ longitude (degrees east) ≤ 360. + This test will use the valid range of the RSSMIF16D collection, + such that 0 ≤ longitude (degrees east) ≤ 360. """ test_file_name = f'{self.test_dir}/test.nc' - harmony_message_ints = Message({ - 'subset': {'bbox': [160, 45, 200, 85]} - }) - harmony_message_floats = Message({ - 'subset': {'bbox': [160.1, 44.9, 200.1, 84.9]} - }) + harmony_message_ints = Message({'subset': {'bbox': [160, 45, 200, 85]}}) + harmony_message_floats = Message( + {'subset': {'bbox': [160.1, 44.9, 200.1, 84.9]}} + ) with Dataset(test_file_name, 'w', format='NETCDF4') as test_file: test_file.createDimension('latitude', size=180) test_file.createDimension('longitude', size=360) - test_file.createVariable('latitude', float, - dimensions=('latitude', )) + test_file.createVariable('latitude', float, dimensions=('latitude',)) test_file['latitude'][:] = np.linspace(-89.5, 89.5, 180) test_file['latitude'].setncatts({'units': 'degrees_north'}) - test_file.createVariable('longitude', float, - dimensions=('longitude', )) + test_file.createVariable('longitude', float, dimensions=('longitude',)) test_file['longitude'][:] = np.linspace(0.5, 359.5, 360) test_file['longitude'].setncatts({'units': 'degrees_east'}) @@ -93,9 +94,10 @@ def test_get_spatial_index_ranges_geographic(self): # latitude[174] = 84.5, latitude[175] = 85.5: # Northern extent = 85 => index = 174 (max index so round down) self.assertDictEqual( - get_spatial_index_ranges({'/latitude'}, self.varinfo, - test_file_name, harmony_message_ints), - {'/latitude': (135, 174)} + get_spatial_index_ranges( + {'/latitude'}, self.varinfo, test_file_name, harmony_message_ints + ), + {'/latitude': (135, 174)}, ) with self.subTest('Latitude dimension, not halfway between pixels'): @@ -104,10 +106,10 @@ def test_get_spatial_index_ranges_geographic(self): # latitude[174] = 84.5, latitude[175] = 85.5: # Northern extent = 84.9 => index = 174 self.assertDictEqual( - get_spatial_index_ranges({'/latitude'}, self.varinfo, - test_file_name, - harmony_message_floats), - {'/latitude': (134, 174)} + get_spatial_index_ranges( + {'/latitude'}, self.varinfo, test_file_name, harmony_message_floats + ), + {'/latitude': (134, 174)}, ) with self.subTest('Longitude dimension, bounding box within grid'): @@ -116,9 +118,10 @@ def test_get_spatial_index_ranges_geographic(self): # longitude[199] = 199.5, longitude[200] = 200.5: # Eastern extent = 200 => index = 199 (max index so round down) self.assertDictEqual( - get_spatial_index_ranges({'/longitude'}, self.varinfo, - test_file_name, harmony_message_ints), - {'/longitude': (160, 199)} + get_spatial_index_ranges( + {'/longitude'}, self.varinfo, test_file_name, harmony_message_ints + ), + {'/longitude': (160, 199)}, ) with self.subTest('Longitude, bounding box crosses grid edge'): @@ -126,27 +129,26 @@ def test_get_spatial_index_ranges_geographic(self): # Western longitude = -20 => 340 => index = 340 (min index, so round up) # longitude[19] = 19.5, longitude[20] = 20.5: # Eastern longitude = 20 => index 19 (max index, so round down) - harmony_message_crossing = Message({ - 'subset': {'bbox': [-20, 45, 20, 85]} - }) + harmony_message_crossing = Message({'subset': {'bbox': [-20, 45, 20, 85]}}) self.assertDictEqual( - get_spatial_index_ranges({'/longitude'}, self.varinfo, - test_file_name, - harmony_message_crossing), - {'/longitude': (340, 19)} + get_spatial_index_ranges( + {'/longitude'}, + self.varinfo, + test_file_name, + harmony_message_crossing, + ), + {'/longitude': (340, 19)}, ) with Dataset(test_file_name, 'w', format='NETCDF4') as test_file: test_file.createDimension('latitude', size=180) test_file.createDimension('longitude', size=360) - test_file.createVariable('latitude', float, - dimensions=('latitude', )) + test_file.createVariable('latitude', float, dimensions=('latitude',)) test_file['latitude'][:] = np.linspace(89.5, -89.5, 180) test_file['latitude'].setncatts({'units': 'degrees_north'}) - test_file.createVariable('longitude', float, - dimensions=('longitude', )) + test_file.createVariable('longitude', float, dimensions=('longitude',)) test_file['longitude'][:] = np.linspace(359.5, 0.5, 360) test_file['longitude'].setncatts({'units': 'degrees_east'}) @@ -156,10 +158,13 @@ def test_get_spatial_index_ranges_geographic(self): # longitude[159] = 200.5, longitude[160] = 199.5, lon = 200.1 => 159 # longitude[199] = 160.5, longitude[200] = 159.5, lon = 160.1 => 199 self.assertDictEqual( - get_spatial_index_ranges({'/latitude', '/longitude'}, - self.varinfo, test_file_name, - harmony_message_floats), - {'/latitude': (5, 45), '/longitude': (159, 199)} + get_spatial_index_ranges( + {'/latitude', '/longitude'}, + self.varinfo, + test_file_name, + harmony_message_floats, + ), + {'/latitude': (5, 45), '/longitude': (159, 199)}, ) with self.subTest('Descending dimensions, halfway between pixels'): @@ -168,23 +173,27 @@ def test_get_spatial_index_ranges_geographic(self): # longitude[159] = 200.5, longitude[160] = 199.5, lon = 200 => index = 160 # longitude[199] = 160.5, longitude[200] = 159.5, lon = 160 => index = 199 self.assertDictEqual( - get_spatial_index_ranges({'/latitude', '/longitude'}, - self.varinfo, test_file_name, - harmony_message_ints), - {'/latitude': (5, 44), '/longitude': (160, 199)} + get_spatial_index_ranges( + {'/latitude', '/longitude'}, + self.varinfo, + test_file_name, + harmony_message_ints, + ), + {'/latitude': (5, 44), '/longitude': (160, 199)}, ) @patch('hoss.spatial.get_dimension_index_range') @patch('hoss.spatial.get_projected_x_y_extents') - def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, - mock_get_dimension_index_range): - """ Ensure that x and y index ranges are only requested when there are - projected grid dimensions, and the values have not already been - calculated. + def test_get_projected_x_y_index_ranges( + self, mock_get_x_y_extents, mock_get_dimension_index_range + ): + """Ensure that x and y index ranges are only requested when there are + projected grid dimensions, and the values have not already been + calculated. - The example used in this test is for the ABoVE TVPRM collection, - which uses an Albers Conical Equal Area CRS for a projected grid, - with data in Alaska. + The example used in this test is for the ABoVE TVPRM collection, + which uses an Albers Conical Equal Area CRS for a projected grid, + with data in Alaska. """ above_varinfo = VarInfoFromDmr('tests/data/ABoVE_TVPRM_example.dmr') @@ -192,21 +201,27 @@ def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, expected_index_ranges = {'/x': (37, 56), '/y': (7, 26)} bbox = BBox(-160, 68, -145, 70) - crs = CRS.from_cf({'false_easting': 0.0, - 'false_northing': 0.0, - 'latitude_of_projection_origin': 40.0, - 'longitude_of_central_meridian': -96.0, - 'standard_parallel': [50.0, 70.0], - 'long_name': 'CRS definition', - 'longitude_of_prime_meridian': 0.0, - 'semi_major_axis': 6378137.0, - 'inverse_flattening': 298.257222101, - 'grid_mapping_name': 'albers_conical_equal_area'}) - - x_y_extents = {'x_min': -2273166.953240025, - 'x_max': -1709569.3224678137, - 'y_min': 3832621.3156695124, - 'y_max': 4425654.159834823} + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'latitude_of_projection_origin': 40.0, + 'longitude_of_central_meridian': -96.0, + 'standard_parallel': [50.0, 70.0], + 'long_name': 'CRS definition', + 'longitude_of_prime_meridian': 0.0, + 'semi_major_axis': 6378137.0, + 'inverse_flattening': 298.257222101, + 'grid_mapping_name': 'albers_conical_equal_area', + } + ) + + x_y_extents = { + 'x_min': -2273166.953240025, + 'x_max': -1709569.3224678137, + 'y_min': 3832621.3156695124, + 'y_max': 4425654.159834823, + } mock_get_x_y_extents.return_value = x_y_extents @@ -216,17 +231,17 @@ def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, with self.subTest('Projected grid gets expected dimension ranges'): with Dataset(above_file_path, 'r') as above_prefetch: self.assertDictEqual( - get_projected_x_y_index_ranges('/NEE', above_varinfo, - above_prefetch, {}, - bounding_box=bbox), - expected_index_ranges + get_projected_x_y_index_ranges( + '/NEE', above_varinfo, above_prefetch, {}, bounding_box=bbox + ), + expected_index_ranges, ) # Assertions don't like direct comparisons of numpy arrays, so # have to extract the call arguments and compare those - mock_get_x_y_extents.assert_called_once_with(ANY, ANY, crs, - shape_file=None, - bounding_box=bbox) + mock_get_x_y_extents.assert_called_once_with( + ANY, ANY, crs, shape_file=None, bounding_box=bbox + ) actual_x_values = mock_get_x_y_extents.call_args_list[0][0][0] actual_y_values = mock_get_x_y_extents.call_args_list[0][0][1] @@ -235,19 +250,29 @@ def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, assert_array_equal(actual_y_values, above_prefetch['/y'][:]) self.assertEqual(mock_get_dimension_index_range.call_count, 2) - mock_get_dimension_index_range.assert_has_calls([ - call(ANY, x_y_extents['x_min'], x_y_extents['x_max'], - bounds_values=None), - call(ANY, x_y_extents['y_min'], x_y_extents['y_max'], - bounds_values=None) - ]) + mock_get_dimension_index_range.assert_has_calls( + [ + call( + ANY, + x_y_extents['x_min'], + x_y_extents['x_max'], + bounds_values=None, + ), + call( + ANY, + x_y_extents['y_min'], + x_y_extents['y_max'], + bounds_values=None, + ), + ] + ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - above_prefetch['/x'][:] + above_prefetch['/x'][:], ) assert_array_equal( mock_get_dimension_index_range.call_args_list[1][0][0], - above_prefetch['/y'][:] + above_prefetch['/y'][:], ) mock_get_x_y_extents.reset_mock() @@ -256,10 +281,10 @@ def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, with self.subTest('Non projected grid not try to get index ranges'): with Dataset(above_file_path, 'r') as above_prefetch: self.assertDictEqual( - get_projected_x_y_index_ranges('/x', above_varinfo, - above_prefetch, {}, - bounding_box=bbox), - {} + get_projected_x_y_index_ranges( + '/x', above_varinfo, above_prefetch, {}, bounding_box=bbox + ), + {}, ) mock_get_x_y_extents.assert_not_called() @@ -268,11 +293,14 @@ def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, with self.subTest('Function does not rederive known index ranges'): with Dataset(above_file_path, 'r') as above_prefetch: self.assertDictEqual( - get_projected_x_y_index_ranges('/NEE', above_varinfo, - above_prefetch, - expected_index_ranges, - bounding_box=bbox), - {} + get_projected_x_y_index_ranges( + '/NEE', + above_varinfo, + above_prefetch, + expected_index_ranges, + bounding_box=bbox, + ), + {}, ) mock_get_x_y_extents.assert_not_called() @@ -280,11 +308,11 @@ def test_get_projected_x_y_index_ranges(self, mock_get_x_y_extents, @patch('hoss.spatial.get_dimension_index_range') def test_get_geographic_index_range(self, mock_get_dimension_index_range): - """ Ensure both a latitude and longitude variable is correctly handled. + """Ensure both a latitude and longitude variable is correctly handled. - The numpy arrays cannot be compared directly as part of the - `unittest.mock.Mock.assert_called_once_with`, and so require the - use of `numpy.testing.assert_array_equal`. + The numpy arrays cannot be compared directly as part of the + `unittest.mock.Mock.assert_called_once_with`, and so require the + use of `numpy.testing.assert_array_equal`. """ bounding_box = BBox(10, 20, 30, 40) @@ -292,116 +320,122 @@ def test_get_geographic_index_range(self, mock_get_dimension_index_range): with self.subTest('Latitude variable'): with Dataset('tests/data/f16_ssmis_lat_lon.nc', 'r') as prefetch: - self.assertTupleEqual(get_geographic_index_range('/latitude', - self.varinfo, - prefetch, - bounding_box), - (1, 2)) + self.assertTupleEqual( + get_geographic_index_range( + '/latitude', self.varinfo, prefetch, bounding_box + ), + (1, 2), + ) mock_get_dimension_index_range.assert_called_once_with( - ANY, bounding_box.south, bounding_box.north, - bounds_values=None + ANY, bounding_box.south, bounding_box.north, bounds_values=None ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - prefetch['/latitude'][:] + prefetch['/latitude'][:], ) mock_get_dimension_index_range.reset_mock() with self.subTest('Longitude variable'): with Dataset('tests/data/f16_ssmis_lat_lon.nc', 'r') as prefetch: - self.assertEqual(get_geographic_index_range('/longitude', - self.varinfo, - prefetch, - bounding_box), - (1, 2)) + self.assertEqual( + get_geographic_index_range( + '/longitude', self.varinfo, prefetch, bounding_box + ), + (1, 2), + ) mock_get_dimension_index_range.assert_called_once_with( - ANY, bounding_box.west, bounding_box.east, - bounds_values=None + ANY, bounding_box.west, bounding_box.east, bounds_values=None ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - prefetch['/longitude'][:] + prefetch['/longitude'][:], ) mock_get_dimension_index_range.reset_mock() @patch('hoss.spatial.get_dimension_index_range') - def test_get_geographic_index_range_bounds(self, - mock_get_dimension_index_range): - """ Ensure the expected bounds values can be extracted for a variable - that has the appropriate metadata, and that these bounds values are - used in the call to `get_dimension_index_range`. + def test_get_geographic_index_range_bounds(self, mock_get_dimension_index_range): + """Ensure the expected bounds values can be extracted for a variable + that has the appropriate metadata, and that these bounds values are + used in the call to `get_dimension_index_range`. """ - gpm_varinfo = VarInfoFromDmr('tests/data/GPM_3IMERGHH_example.dmr', - short_name='GPM_3IMERGHH') + gpm_varinfo = VarInfoFromDmr( + 'tests/data/GPM_3IMERGHH_example.dmr', short_name='GPM_3IMERGHH' + ) bounding_box = BBox(10, 20, 30, 40) mock_get_dimension_index_range.return_value = (1, 2) with self.subTest('Latitude variable with bounds'): with Dataset('tests/data/GPM_3IMERGHH_prefetch.nc4', 'r') as prefetch: - self.assertEqual(get_geographic_index_range('/Grid/lat', - gpm_varinfo, - prefetch, - bounding_box), - (1, 2)) + self.assertEqual( + get_geographic_index_range( + '/Grid/lat', gpm_varinfo, prefetch, bounding_box + ), + (1, 2), + ) mock_get_dimension_index_range.assert_called_once_with( - ANY, bounding_box.south, bounding_box.north, - bounds_values=ANY + ANY, bounding_box.south, bounding_box.north, bounds_values=ANY ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - prefetch['/Grid/lat'][:] + prefetch['/Grid/lat'][:], ) assert_array_equal( - mock_get_dimension_index_range.call_args_list[0][1]['bounds_values'], - prefetch['/Grid/lat_bnds'][:] + mock_get_dimension_index_range.call_args_list[0][1][ + 'bounds_values' + ], + prefetch['/Grid/lat_bnds'][:], ) mock_get_dimension_index_range.reset_mock() with self.subTest('Longitude variable with bounds'): with Dataset('tests/data/GPM_3IMERGHH_prefetch.nc4', 'r') as prefetch: - self.assertEqual(get_geographic_index_range('/Grid/lon', - gpm_varinfo, - prefetch, - bounding_box), - (1, 2)) + self.assertEqual( + get_geographic_index_range( + '/Grid/lon', gpm_varinfo, prefetch, bounding_box + ), + (1, 2), + ) mock_get_dimension_index_range.assert_called_once_with( - ANY, bounding_box.west, bounding_box.east, - bounds_values=ANY + ANY, bounding_box.west, bounding_box.east, bounds_values=ANY ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - prefetch['/Grid/lon'][:] + prefetch['/Grid/lon'][:], ) assert_array_equal( - mock_get_dimension_index_range.call_args_list[0][1]['bounds_values'], - prefetch['/Grid/lon_bnds'][:] + mock_get_dimension_index_range.call_args_list[0][1][ + 'bounds_values' + ], + prefetch['/Grid/lon_bnds'][:], ) mock_get_dimension_index_range.reset_mock() def test_get_bounding_box_longitudes(self): - """ Ensure the western and eastern extents of a bounding box are - converted to the correct range according to the range of the - longitude variable. + """Ensure the western and eastern extents of a bounding box are + converted to the correct range according to the range of the + longitude variable. - If the variable range is -180 ≤ longitude (degrees) < 180, then the - bounding box values should remain unconverted. If the variable - range is 0 ≤ longitude (degrees) < 360, then the bounding box - values should be converted to this range. + If the variable range is -180 ≤ longitude (degrees) < 180, then the + bounding box values should remain unconverted. If the variable + range is 0 ≤ longitude (degrees) < 360, then the bounding box + values should be converted to this range. """ bounding_box = BBox(-150, -15, -120, 15) - test_args = [['-180 ≤ lon (deg) < 180', -180, 180, [-150, -120]], - ['0 ≤ lon (deg) < 360', 0, 360, [210, 240]]] + test_args = [ + ['-180 ≤ lon (deg) < 180', -180, 180, [-150, -120]], + ['0 ≤ lon (deg) < 360', 0, 360, [210, 240]], + ] for description, valid_min, valid_max, results in test_args: with self.subTest(description): @@ -411,24 +445,27 @@ def test_get_bounding_box_longitudes(self): partially_wrapped_longitudes = np.linspace(-180, 179.375, 576) - test_args = [['W = -180, E = -140', -180, -140, [-180, -140]], - ['W = 0, E = 179.6875', 0, 179.6875, [0, 179.6875]], - ['W = 179.688, E = 180', 179.688, 180, [-180.312, -180]]] + test_args = [ + ['W = -180, E = -140', -180, -140, [-180, -140]], + ['W = 0, E = 179.6875', 0, 179.6875, [0, 179.6875]], + ['W = 179.688, E = 180', 179.688, 180, [-180.312, -180]], + ] for description, bbox_west, bbox_east, expected_output in test_args: with self.subTest(f'Partial wrapping: {description}'): input_bounding_box = BBox(bbox_west, -15, bbox_east, 15) self.assertListEqual( - get_bounding_box_longitudes(input_bounding_box, - partially_wrapped_longitudes), - expected_output + get_bounding_box_longitudes( + input_bounding_box, partially_wrapped_longitudes + ), + expected_output, ) def test_get_longitude_in_grid(self): - """ Ensure a longitude value is retrieved, where possible, that is - within the given grid. For example, if longitude = -10 degrees east - and the grid 0 ≤ longitude (degrees east) ≤ 360, the resulting - value should be 190 degrees east. + """Ensure a longitude value is retrieved, where possible, that is + within the given grid. For example, if longitude = -10 degrees east + and the grid 0 ≤ longitude (degrees east) ≤ 360, the resulting + value should be 190 degrees east. """ rss_min, rss_max = (0, 360) @@ -461,5 +498,5 @@ def test_get_longitude_in_grid(self): with self.subTest(test): self.assertEqual( get_longitude_in_grid(grid_min, grid_max, input_lon), - expected_output + expected_output, ) diff --git a/tests/unit/test_subset.py b/tests/unit/test_subset.py index bedbb33..51699bc 100644 --- a/tests/unit/test_subset.py +++ b/tests/unit/test_subset.py @@ -10,16 +10,21 @@ from varinfo import VarInfoFromDmr import numpy as np -from hoss.subset import (fill_variables, fill_variable, - get_required_variables, get_varinfo, subset_granule) +from hoss.subset import ( + fill_variables, + fill_variable, + get_required_variables, + get_varinfo, + subset_granule, +) class TestSubset(TestCase): - """ Test the module that performs subsetting on a single granule. """ + """Test the module that performs subsetting on a single granule.""" @classmethod def setUpClass(cls): - """ Define test assets that can be shared between tests. """ + """Define test assets that can be shared between tests.""" cls.access_token = 'access' cls.bounding_box = [40, -30, 50, -20] cls.config = config(validate=False) @@ -27,23 +32,28 @@ def setUpClass(cls): cls.granule_url = 'https://harmony.earthdata.nasa.gov/bucket/rssmif16d' cls.logger = Logger('tests') cls.output_path = 'f16_ssmis_subset.nc4' - cls.required_variables = {'/latitude', '/longitude', '/time', - '/rainfall_rate'} - cls.harmony_source = Source({ - 'collection': 'C1234567890-PROV', - 'shortName': cls.collection_short_name, - 'variables': [{'id': 'V1238395077-EEDTEST', - 'name': '/rainfall_rate', - 'fullPath': '/rainfall_rate'}] - }) + cls.required_variables = {'/latitude', '/longitude', '/time', '/rainfall_rate'} + cls.harmony_source = Source( + { + 'collection': 'C1234567890-PROV', + 'shortName': cls.collection_short_name, + 'variables': [ + { + 'id': 'V1238395077-EEDTEST', + 'name': '/rainfall_rate', + 'fullPath': '/rainfall_rate', + } + ], + } + ) cls.varinfo = VarInfoFromDmr('tests/data/rssmif16d_example.dmr') def setUp(self): - """ Define test assets that should not be shared between tests. """ + """Define test assets that should not be shared between tests.""" self.output_dir = mkdtemp() def tearDown(self): - """ Clean-up to perform between every test. """ + """Clean-up to perform between every test.""" shutil.rmtree(self.output_dir) @patch('hoss.subset.fill_variables') @@ -53,44 +63,57 @@ def tearDown(self): @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_granule_not_geo(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, mock_fill_variables): - """ Ensure a request to extract only a variable subset runs without - error. Because no bounding box and no temporal range is specified - in this request, the prefetch dimension utility functionality, the - HOSS functionality in `hoss.spatial.py` and the functionality in - `hoss.temporal.py` should not be called. + def test_subset_granule_not_geo( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request to extract only a variable subset runs without + error. Because no bounding box and no temporal range is specified + in this request, the prefetch dimension utility functionality, the + HOSS functionality in `hoss.spatial.py` and the functionality in + `hoss.temporal.py` should not be called. """ harmony_message = Message({'accessToken': self.access_token}) mock_get_varinfo.return_value = self.varinfo mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, self.harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + self.harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - self.required_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - self.required_variables, - {}) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + self.required_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, self.required_variables, {} + ) mock_prefetch_dimensions.assert_not_called() mock_get_spatial_index_ranges.assert_not_called() @@ -104,69 +127,88 @@ def test_subset_granule_not_geo(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_granule_geo(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, mock_fill_variables): - """ Ensure a request to extract both a variable and spatial subset runs - without error. Because a bounding box is specified in this request, - the prefetch dimension utility functionality and the HOSS - functionality in `hoss.spatial.py` should be called. However, - because there is no specified `temporal_range`, the functionality - in `hoss.temporal.py` should not be called. + def test_subset_granule_geo( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request to extract both a variable and spatial subset runs + without error. Because a bounding box is specified in this request, + the prefetch dimension utility functionality and the HOSS + functionality in `hoss.spatial.py` should be called. However, + because there is no specified `temporal_range`, the functionality + in `hoss.temporal.py` should not be called. """ - harmony_message = Message({'accessToken': self.access_token, - 'subset': {'bbox': self.bounding_box}}) + harmony_message = Message( + {'accessToken': self.access_token, 'subset': {'bbox': self.bounding_box}} + ) index_ranges = {'/latitude': (240, 279), '/longitude': (160, 199)} prefetch_path = 'prefetch.nc4' - variables_with_ranges = {'/latitude[240:279]', '/longitude[160:199]', - '/rainfall_rate[][240:279][160:199]', '/time'} + variables_with_ranges = { + '/latitude[240:279]', + '/longitude[160:199]', + '/rainfall_rate[][240:279][160:199]', + '/time', + } mock_get_varinfo.return_value = self.varinfo mock_prefetch_dimensions.return_value = prefetch_path mock_get_spatial_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, self.harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + self.harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(self.granule_url, - self.varinfo, - self.required_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + self.granule_url, + self.varinfo, + self.required_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() mock_get_spatial_index_ranges.assert_called_once_with( - self.required_variables, self.varinfo, prefetch_path, - harmony_message, None + self.required_variables, self.varinfo, prefetch_path, harmony_message, None ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - self.required_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, self.required_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -175,51 +217,67 @@ def test_subset_granule_geo(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_non_geo_no_variables(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, - mock_fill_variables): - """ Ensure a request without a bounding box and without any specified - variables will produce a request to OPeNDAP that does not specify - any variables. This will default to retrieving the full NetCDF-4 - file from OPeNDAP. The prefetch dimension functionality and the - HOSS functionality in both `hoss.spatial.py` and - `hoss.temporal.py` should not be called. + def test_subset_non_geo_no_variables( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request without a bounding box and without any specified + variables will produce a request to OPeNDAP that does not specify + any variables. This will default to retrieving the full NetCDF-4 + file from OPeNDAP. The prefetch dimension functionality and the + HOSS functionality in both `hoss.spatial.py` and + `hoss.temporal.py` should not be called. """ - harmony_source = Source({'collection': 'C1234567890-EEDTEST', - 'shortName': self.collection_short_name}) + harmony_source = Source( + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': self.collection_short_name, + } + ) harmony_message = Message({'accessToken': self.access_token}) expected_variables = set() index_ranges = {} mock_get_varinfo.return_value = self.varinfo mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - expected_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - expected_variables, - index_ranges) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + expected_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, expected_variables, index_ranges + ) mock_prefetch_dimensions.assert_not_called() mock_get_spatial_index_ranges.assert_not_called() @@ -233,42 +291,58 @@ def test_subset_non_geo_no_variables(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_geo_no_variables(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, - mock_fill_variables): - """ Ensure a request with a bounding box, but without any specified - variables will consider all science and metadata variables as the - requested variables. This situation will arise if a user requests - all variables. HOSS will need to explicitly list all the variables - it retrieves as the DAP4 constraint expression will need to specify - index ranges for all geographically gridded variables. Both the - prefetch dimension functionality and the HOSS functionality in - `hoss.spatial.py` should be called. However, because there is no - specified `temporal_range`, the functionality in `hoss.temporal.py` - should not be called. + def test_subset_geo_no_variables( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request with a bounding box, but without any specified + variables will consider all science and metadata variables as the + requested variables. This situation will arise if a user requests + all variables. HOSS will need to explicitly list all the variables + it retrieves as the DAP4 constraint expression will need to specify + index ranges for all geographically gridded variables. Both the + prefetch dimension functionality and the HOSS functionality in + `hoss.spatial.py` should be called. However, because there is no + specified `temporal_range`, the functionality in `hoss.temporal.py` + should not be called. """ - harmony_source = Source({'collection': 'C1234567890-EEDTEST', - 'shortName': self.collection_short_name}) - harmony_message = Message({'accessToken': self.access_token, - 'subset': {'bbox': self.bounding_box}}) - expected_variables = {'/atmosphere_cloud_liquid_water_content', - '/atmosphere_water_vapor_content', - '/latitude', '/longitude', '/rainfall_rate', - '/sst_dtime', '/time', '/wind_speed'} + harmony_source = Source( + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': self.collection_short_name, + } + ) + harmony_message = Message( + {'accessToken': self.access_token, 'subset': {'bbox': self.bounding_box}} + ) + expected_variables = { + '/atmosphere_cloud_liquid_water_content', + '/atmosphere_water_vapor_content', + '/latitude', + '/longitude', + '/rainfall_rate', + '/sst_dtime', + '/time', + '/wind_speed', + } index_ranges = {'/latitude': (240, 279), '/longitude': (160, 199)} prefetch_path = 'prefetch.nc4' variables_with_ranges = { '/atmosphere_cloud_liquid_water_content[][240:279][160:199]', '/atmosphere_water_vapor_content[][240:279][160:199]', - '/latitude[240:279]', '/longitude[160:199]', + '/latitude[240:279]', + '/longitude[160:199]', '/rainfall_rate[][240:279][160:199]', - '/sst_dtime[][240:279][160:199]', '/time', - '/wind_speed[][240:279][160:199]' + '/sst_dtime[][240:279][160:199]', + '/time', + '/wind_speed[][240:279][160:199]', } mock_get_varinfo.return_value = self.varinfo @@ -276,43 +350,53 @@ def test_subset_geo_no_variables(self, mock_get_varinfo, mock_get_spatial_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(self.granule_url, - self.varinfo, - expected_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + self.granule_url, + self.varinfo, + expected_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() mock_get_spatial_index_ranges.assert_called_once_with( - expected_variables, self.varinfo, prefetch_path, harmony_message, - None + expected_variables, self.varinfo, prefetch_path, harmony_message, None ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - expected_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, expected_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -321,37 +405,54 @@ def test_subset_geo_no_variables(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_non_variable_dimensions(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, - mock_fill_variables): - """ Ensure a request with a bounding box, without specified variables, - will not include non-variable dimensions in the DAP4 constraint - expression of the final request to OPeNDAP - - In the GPM_3IMERGHH data, the specific dimensions that should not - be included in the required variables are `latv`, `lonv` and `nv`. - These are size-only dimensions for the `lat_bnds`, `lon_bnds` and - `time_bnds` variables. + def test_subset_non_variable_dimensions( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request with a bounding box, without specified variables, + will not include non-variable dimensions in the DAP4 constraint + expression of the final request to OPeNDAP + + In the GPM_3IMERGHH data, the specific dimensions that should not + be included in the required variables are `latv`, `lonv` and `nv`. + These are size-only dimensions for the `lat_bnds`, `lon_bnds` and + `time_bnds` variables. """ - harmony_source = Source({'collection': 'C1234567890-EEDTEST', - 'shortName': self.collection_short_name}) - harmony_message = Message({'accessToken': self.access_token, - 'subset': {'bbox': self.bounding_box}}) + harmony_source = Source( + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': self.collection_short_name, + } + ) + harmony_message = Message( + {'accessToken': self.access_token, 'subset': {'bbox': self.bounding_box}} + ) url = 'https://harmony.earthdata.nasa.gov/bucket/GPM' varinfo = VarInfoFromDmr('tests/data/GPM_3IMERGHH_example.dmr') expected_variables = { - '/Grid/HQobservationTime', '/Grid/HQprecipitation', - '/Grid/HQprecipSource', '/Grid/IRkalmanFilterWeight', - '/Grid/IRprecipitation', '/Grid/lat', '/Grid/lat_bnds', - '/Grid/lon', '/Grid/lon_bnds', '/Grid/precipitationCal', - '/Grid/precipitationQualityIndex', '/Grid/precipitationUncal', - '/Grid/probabilityLiquidPrecipitation', '/Grid/randomError', - '/Grid/time', '/Grid/time_bnds' + '/Grid/HQobservationTime', + '/Grid/HQprecipitation', + '/Grid/HQprecipSource', + '/Grid/IRkalmanFilterWeight', + '/Grid/IRprecipitation', + '/Grid/lat', + '/Grid/lat_bnds', + '/Grid/lon', + '/Grid/lon_bnds', + '/Grid/precipitationCal', + '/Grid/precipitationQualityIndex', + '/Grid/precipitationUncal', + '/Grid/probabilityLiquidPrecipitation', + '/Grid/randomError', + '/Grid/time', + '/Grid/time_bnds', } prefetch_path = 'GPM_prefetch.nc' @@ -364,14 +465,17 @@ def test_subset_non_variable_dimensions(self, mock_get_varinfo, '/Grid/HQprecipSource[][2200:2299][600:699]', '/Grid/IRkalmanFilterWeight[][2200:2299][600:699]', '/Grid/IRprecipitation[][2200:2299][600:699]', - '/Grid/lat[600:699]', '/Grid/lat_bnds[600:699][]', - '/Grid/lon[2200:2299]', '/Grid/lon_bnds[2200:2299][]', + '/Grid/lat[600:699]', + '/Grid/lat_bnds[600:699][]', + '/Grid/lon[2200:2299]', + '/Grid/lon_bnds[2200:2299][]', '/Grid/precipitationCal[][2200:2299][600:699]', '/Grid/precipitationQualityIndex[][2200:2299][600:699]', '/Grid/precipitationUncal[][2200:2299][600:699]', '/Grid/probabilityLiquidPrecipitation[][2200:2299][600:699]', '/Grid/randomError[][2200:2299][600:699]', - '/Grid/time', '/Grid/time_bnds' + '/Grid/time', + '/Grid/time_bnds', } mock_get_varinfo.return_value = varinfo @@ -379,41 +483,53 @@ def test_subset_non_variable_dimensions(self, mock_get_varinfo, mock_get_spatial_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = expected_output_path - output_path = subset_granule(url, harmony_source, self.output_dir, - harmony_message, self.logger, self.config) + output_path = subset_granule( + url, + harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, expected_output_path) - mock_get_varinfo.assert_called_once_with(url, self.output_dir, - self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(url, varinfo, - expected_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + url, + varinfo, + expected_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() mock_get_spatial_index_ranges.assert_called_once_with( - expected_variables, varinfo, prefetch_path, harmony_message, - None + expected_variables, varinfo, prefetch_path, harmony_message, None ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(expected_output_path, - varinfo, - expected_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + expected_output_path, varinfo, expected_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -422,30 +538,36 @@ def test_subset_non_variable_dimensions(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_bounds_reference(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, - mock_fill_variables): - """ Ensure a request with a bounding box, specifying variables that - have references in a `bounds` attribute also consider the variables - referred to in the `bounds` attribute as required. - - In the GPM_3IMERGHH data, the `lat`, `lon` and `time` variables - have `lat_bnds`, `lon_bnds` and `time_bnds`, respectively. + def test_subset_bounds_reference( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request with a bounding box, specifying variables that + have references in a `bounds` attribute also consider the variables + referred to in the `bounds` attribute as required. + + In the GPM_3IMERGHH data, the `lat`, `lon` and `time` variables + have `lat_bnds`, `lon_bnds` and `time_bnds`, respectively. """ - harmony_source = Source({ - 'collection': 'C1234567890-EEDTEST', - 'shortName': self.collection_short_name, - 'variables': [{'fullPath': '/Grid/lon', - 'id': 'V123-EEDTEST', - 'name': '/Grid/lon'}] - }) - harmony_message = Message({'accessToken': self.access_token, - 'subset': {'bbox': self.bounding_box}}) + harmony_source = Source( + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': self.collection_short_name, + 'variables': [ + {'fullPath': '/Grid/lon', 'id': 'V123-EEDTEST', 'name': '/Grid/lon'} + ], + } + ) + harmony_message = Message( + {'accessToken': self.access_token, 'subset': {'bbox': self.bounding_box}} + ) url = 'https://harmony.earthdata.nasa.gov/bucket/GPM' varinfo = VarInfoFromDmr('tests/data/GPM_3IMERGHH_example.dmr') @@ -455,30 +577,41 @@ def test_subset_bounds_reference(self, mock_get_varinfo, index_ranges = {'/Grid/lon': (2200, 2299)} expected_output_path = 'GPM_3IMERGHH_subset.nc4' - variables_with_ranges = {'/Grid/lon[2200:2299]', - '/Grid/lon_bnds[2200:2299][]'} + variables_with_ranges = {'/Grid/lon[2200:2299]', '/Grid/lon_bnds[2200:2299][]'} mock_get_varinfo.return_value = varinfo mock_prefetch_dimensions.return_value = prefetch_path mock_get_spatial_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = expected_output_path - output_path = subset_granule(url, harmony_source, self.output_dir, - harmony_message, self.logger, self.config) + output_path = subset_granule( + url, + harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertIn('GPM_3IMERGHH_subset.nc4', output_path) - mock_get_varinfo.assert_called_once_with(url, self.output_dir, - self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(url, varinfo, - expected_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + url, + varinfo, + expected_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() mock_get_spatial_index_ranges.assert_called_once_with( @@ -486,17 +619,18 @@ def test_subset_bounds_reference(self, mock_get_varinfo, ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(expected_output_path, - varinfo, - expected_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + expected_output_path, varinfo, expected_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -505,29 +639,41 @@ def test_subset_bounds_reference(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_temporal(self, mock_get_varinfo, mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, mock_fill_variables): - """ Ensure a request with a temporal range constructs an OPeNDAP - request that contains index range values for only the temporal - dimension of the data. + def test_subset_temporal( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request with a temporal range constructs an OPeNDAP + request that contains index range values for only the temporal + dimension of the data. """ url = 'https://harmony.earthdata.nasa.gov/bucket/M2T1NXSLV' - harmony_source = Source({'collection': 'C1234567890-PROVIDER', - 'shortName': self.collection_short_name, - 'variables': [{'fullPath': '/PS', - 'id': 'V123-EEDTEST', - 'name': '/PS'}]}) - harmony_message = Message({ - 'accessToken': self.access_token, - 'temporal': {'start': '2021-01-10T01:00:00', - 'end': '2021-01-10T03:00:00'} - }) - varinfo = VarInfoFromDmr('tests/data/M2T1NXSLV_example.dmr', - config_file='hoss/hoss_config.json') + harmony_source = Source( + { + 'collection': 'C1234567890-PROVIDER', + 'shortName': self.collection_short_name, + 'variables': [{'fullPath': '/PS', 'id': 'V123-EEDTEST', 'name': '/PS'}], + } + ) + harmony_message = Message( + { + 'accessToken': self.access_token, + 'temporal': { + 'start': '2021-01-10T01:00:00', + 'end': '2021-01-10T03:00:00', + }, + } + ) + varinfo = VarInfoFromDmr( + 'tests/data/M2T1NXSLV_example.dmr', config_file='hoss/hoss_config.json' + ) expected_variables = {'/PS', '/lat', '/lon', '/time'} @@ -542,22 +688,34 @@ def test_subset_temporal(self, mock_get_varinfo, mock_prefetch_dimensions, mock_get_temporal_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = expected_output_path - output_path = subset_granule(url, harmony_source, self.output_dir, - harmony_message, self.logger, self.config) + output_path = subset_granule( + url, + harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertIn('M2T1NXSLV_subset.nc4', output_path) - mock_get_varinfo.assert_called_once_with(url, self.output_dir, - self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(url, varinfo, - expected_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + url, + varinfo, + expected_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_spatial_index_ranges.assert_not_called() mock_get_temporal_index_ranges.assert_called_once_with( @@ -565,17 +723,18 @@ def test_subset_temporal(self, mock_get_varinfo, mock_prefetch_dimensions, ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(expected_output_path, - varinfo, - expected_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + expected_output_path, varinfo, expected_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -584,31 +743,42 @@ def test_subset_temporal(self, mock_get_varinfo, mock_prefetch_dimensions, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_geo_temporal(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, mock_fill_variables): - """ Ensure a request with a temporal range and a bounding box - constructs an OPeNDAP request that contains index range values for - both the geographic and the temporal dimensions of the data. + def test_subset_geo_temporal( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request with a temporal range and a bounding box + constructs an OPeNDAP request that contains index range values for + both the geographic and the temporal dimensions of the data. """ url = 'https://harmony.earthdata.nasa.gov/bucket/M2T1NXSLV' - harmony_source = Source({'collection': 'C1234567890-EEDTEST', - 'shortName': self.collection_short_name, - 'variables': [{'fullPath': '/PS', - 'id': 'V123-EEDTEST', - 'name': '/PS'}]}) - harmony_message = Message({ - 'accessToken': self.access_token, - 'subset': {'bbox': self.bounding_box}, - 'temporal': {'start': '2021-01-10T01:00:00', - 'end': '2021-01-10T03:00:00'} - }) - varinfo = VarInfoFromDmr('tests/data/M2T1NXSLV_example.dmr', - config_file='hoss/hoss_config.json') + harmony_source = Source( + { + 'collection': 'C1234567890-EEDTEST', + 'shortName': self.collection_short_name, + 'variables': [{'fullPath': '/PS', 'id': 'V123-EEDTEST', 'name': '/PS'}], + } + ) + harmony_message = Message( + { + 'accessToken': self.access_token, + 'subset': {'bbox': self.bounding_box}, + 'temporal': { + 'start': '2021-01-10T01:00:00', + 'end': '2021-01-10T03:00:00', + }, + } + ) + varinfo = VarInfoFromDmr( + 'tests/data/M2T1NXSLV_example.dmr', config_file='hoss/hoss_config.json' + ) expected_variables = {'/PS', '/lat', '/lon', '/time'} @@ -619,8 +789,12 @@ def test_subset_geo_temporal(self, mock_get_varinfo, all_index_ranges.update(temporal_index_ranges) expected_output_path = 'M2T1NXSLV_subset.nc4' - variables_with_ranges = {'/PS[1:2][120:140][352:368]', '/lat[120:140]', - '/lon[352:368]', '/time[1:2]'} + variables_with_ranges = { + '/PS[1:2][120:140][352:368]', + '/lat[120:140]', + '/lon[352:368]', + '/time[1:2]', + } mock_get_varinfo.return_value = varinfo mock_prefetch_dimensions.return_value = prefetch_path @@ -628,22 +802,34 @@ def test_subset_geo_temporal(self, mock_get_varinfo, mock_get_spatial_index_ranges.return_value = geo_index_ranges mock_get_opendap_nc4.return_value = expected_output_path - output_path = subset_granule(url, harmony_source, self.output_dir, - harmony_message, self.logger, self.config) + output_path = subset_granule( + url, + harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertIn('M2T1NXSLV_subset.nc4', output_path) - mock_get_varinfo.assert_called_once_with(url, self.output_dir, - self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(url, varinfo, - expected_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + url, + varinfo, + expected_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_spatial_index_ranges.assert_called_once_with( expected_variables, varinfo, prefetch_path, harmony_message, None @@ -654,17 +840,18 @@ def test_subset_geo_temporal(self, mock_get_varinfo, ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(expected_output_path, - varinfo, - expected_variables, - all_index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + expected_output_path, varinfo, expected_variables, all_index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -674,82 +861,108 @@ def test_subset_geo_temporal(self, mock_get_varinfo, @patch('hoss.subset.get_request_shape_file') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_granule_shape(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_request_shape_file, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, mock_fill_variables): - """ Ensure a request to extract both a variable and spatial subset runs - without error. This request will have specified a shape file rather - than a bounding box, which should be passed along to the - `get_spatial_index_ranges` function. The prefetch dimension utility - functionality and the HOSS functionality in `hoss.spatial.py` - should be called. However, because there is no specified - `temporal_range`, the functionality in `hoss.temporal.py` should - not be called. + def test_subset_granule_shape( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_request_shape_file, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request to extract both a variable and spatial subset runs + without error. This request will have specified a shape file rather + than a bounding box, which should be passed along to the + `get_spatial_index_ranges` function. The prefetch dimension utility + functionality and the HOSS functionality in `hoss.spatial.py` + should be called. However, because there is no specified + `temporal_range`, the functionality in `hoss.temporal.py` should + not be called. """ shape_file_path = 'tests/geojson_examples/polygon.geo.json' mock_get_request_shape_file.return_value = shape_file_path - harmony_message = Message({ - 'accessToken': self.access_token, - 'subset': {'shape': {'href': 'https://example.com/polygon.geo.json', - 'type': 'application/geo+json'}} - }) + harmony_message = Message( + { + 'accessToken': self.access_token, + 'subset': { + 'shape': { + 'href': 'https://example.com/polygon.geo.json', + 'type': 'application/geo+json', + } + }, + } + ) index_ranges = {'/latitude': (508, 527), '/longitude': (983, 1003)} prefetch_path = 'prefetch.nc4' - variables_with_ranges = {'/latitude[508:527]', '/longitude[983:1003]', - '/rainfall_rate[][508:527][983:1003]', - '/time'} + variables_with_ranges = { + '/latitude[508:527]', + '/longitude[983:1003]', + '/rainfall_rate[][508:527][983:1003]', + '/time', + } mock_get_varinfo.return_value = self.varinfo mock_prefetch_dimensions.return_value = prefetch_path mock_get_spatial_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, self.harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + self.harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(self.granule_url, - self.varinfo, - self.required_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + self.granule_url, + self.varinfo, + self.required_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() - mock_get_request_shape_file.assert_called_once_with(harmony_message, - self.output_dir, - self.logger, - self.config) + mock_get_request_shape_file.assert_called_once_with( + harmony_message, self.output_dir, self.logger, self.config + ) mock_get_spatial_index_ranges.assert_called_once_with( - self.required_variables, self.varinfo, prefetch_path, - harmony_message, shape_file_path + self.required_variables, + self.varinfo, + prefetch_path, + harmony_message, + shape_file_path, ) mock_get_requested_index_ranges.assert_not_called() - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - self.required_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, self.required_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -759,86 +972,110 @@ def test_subset_granule_shape(self, mock_get_varinfo, @patch('hoss.subset.get_request_shape_file') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_granule_shape_and_bbox(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_request_shape_file, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, - mock_fill_variables): - """ Ensure a request to extract both a variable and spatial subset runs - without error. This request will have specified both a bounding box - and a shape file, both of which will be passed along to - `get_spatial_index_ranges`, so that it can determine which to use. - The prefetch dimension utility functionality and the HOSS - functionality in `hoss.spatial.py` should be called. However, - because there is no specified `temporal_range`, the functionality - in `hoss.temporal.py` should not be called. + def test_subset_granule_shape_and_bbox( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_request_shape_file, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request to extract both a variable and spatial subset runs + without error. This request will have specified both a bounding box + and a shape file, both of which will be passed along to + `get_spatial_index_ranges`, so that it can determine which to use. + The prefetch dimension utility functionality and the HOSS + functionality in `hoss.spatial.py` should be called. However, + because there is no specified `temporal_range`, the functionality + in `hoss.temporal.py` should not be called. """ shape_file_path = 'tests/geojson_examples/polygon.geo.json' mock_get_request_shape_file.return_value = shape_file_path - harmony_message = Message({ - 'accessToken': self.access_token, - 'subset': { - 'bbox': self.bounding_box, - 'shape': {'href': 'https://example.com/polygon.geo.json', - 'type': 'application/geo+json'} + harmony_message = Message( + { + 'accessToken': self.access_token, + 'subset': { + 'bbox': self.bounding_box, + 'shape': { + 'href': 'https://example.com/polygon.geo.json', + 'type': 'application/geo+json', + }, + }, } - }) + ) index_ranges = {'/latitude': (240, 279), '/longitude': (160, 199)} prefetch_path = 'prefetch.nc4' - variables_with_ranges = {'/latitude[240:279]', '/longitude[160:199]', - '/rainfall_rate[][240:279][160:199]', '/time'} + variables_with_ranges = { + '/latitude[240:279]', + '/longitude[160:199]', + '/rainfall_rate[][240:279][160:199]', + '/time', + } mock_get_varinfo.return_value = self.varinfo mock_prefetch_dimensions.return_value = prefetch_path mock_get_spatial_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, self.harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + self.harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(self.granule_url, - self.varinfo, - self.required_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + self.granule_url, + self.varinfo, + self.required_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() - mock_get_request_shape_file.assert_called_once_with(harmony_message, - self.output_dir, - self.logger, - self.config) + mock_get_request_shape_file.assert_called_once_with( + harmony_message, self.output_dir, self.logger, self.config + ) mock_get_spatial_index_ranges.assert_called_once_with( - self.required_variables, self.varinfo, prefetch_path, - harmony_message, shape_file_path + self.required_variables, + self.varinfo, + prefetch_path, + harmony_message, + shape_file_path, ) mock_get_requested_index_ranges.asset_not_called() - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) - - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - self.required_variables, - index_ranges) + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) + + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, self.required_variables, index_ranges + ) @patch('hoss.subset.fill_variables') @patch('hoss.subset.get_opendap_nc4') @@ -847,201 +1084,271 @@ def test_subset_granule_shape_and_bbox(self, mock_get_varinfo, @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.prefetch_dimension_variables') @patch('hoss.subset.get_varinfo') - def test_subset_granule_geo_named(self, mock_get_varinfo, - mock_prefetch_dimensions, - mock_get_spatial_index_ranges, - mock_get_temporal_index_ranges, - mock_get_requested_index_ranges, - mock_get_opendap_nc4, - mock_fill_variables): - """ Ensure a request to extract both a variable and named dimension - subset runs without error. Because a dimension is specified in this - request, the prefetch dimension utility functionality and the HOSS - functionality in `hoss.spatial.py` should be called. However, - because there is no specified `temporal_range`, the functionality - in `hoss.temporal.py` should not be called. - - This test will use spatial dimensions, but explicitly naming them - instead of using a bounding box. + def test_subset_granule_geo_named( + self, + mock_get_varinfo, + mock_prefetch_dimensions, + mock_get_spatial_index_ranges, + mock_get_temporal_index_ranges, + mock_get_requested_index_ranges, + mock_get_opendap_nc4, + mock_fill_variables, + ): + """Ensure a request to extract both a variable and named dimension + subset runs without error. Because a dimension is specified in this + request, the prefetch dimension utility functionality and the HOSS + functionality in `hoss.spatial.py` should be called. However, + because there is no specified `temporal_range`, the functionality + in `hoss.temporal.py` should not be called. + + This test will use spatial dimensions, but explicitly naming them + instead of using a bounding box. """ - harmony_message = Message({ - 'accessToken': self.access_token, - 'subset': { - 'dimensions': [{'name': '/latitude', 'min': -30, 'max': -20}, - {'name': '/longitude', 'min': 40, 'max': 50}] + harmony_message = Message( + { + 'accessToken': self.access_token, + 'subset': { + 'dimensions': [ + {'name': '/latitude', 'min': -30, 'max': -20}, + {'name': '/longitude', 'min': 40, 'max': 50}, + ] + }, } - }) + ) index_ranges = {'/latitude': (240, 279), '/longitude': (160, 199)} prefetch_path = 'prefetch.nc4' - variables_with_ranges = {'/latitude[240:279]', '/longitude[160:199]', - '/rainfall_rate[][240:279][160:199]', '/time'} + variables_with_ranges = { + '/latitude[240:279]', + '/longitude[160:199]', + '/rainfall_rate[][240:279][160:199]', + '/time', + } mock_get_varinfo.return_value = self.varinfo mock_prefetch_dimensions.return_value = prefetch_path mock_get_requested_index_ranges.return_value = index_ranges mock_get_opendap_nc4.return_value = self.output_path - output_path = subset_granule(self.granule_url, self.harmony_source, - self.output_dir, harmony_message, - self.logger, self.config) + output_path = subset_granule( + self.granule_url, + self.harmony_source, + self.output_dir, + harmony_message, + self.logger, + self.config, + ) self.assertEqual(output_path, self.output_path) - mock_get_varinfo.assert_called_once_with(self.granule_url, - self.output_dir, self.logger, - self.collection_short_name, - self.access_token, - self.config) - - mock_prefetch_dimensions.assert_called_once_with(self.granule_url, - self.varinfo, - self.required_variables, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_varinfo.assert_called_once_with( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) + + mock_prefetch_dimensions.assert_called_once_with( + self.granule_url, + self.varinfo, + self.required_variables, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) mock_get_temporal_index_ranges.assert_not_called() mock_get_spatial_index_ranges.assert_not_called() mock_get_requested_index_ranges.assert_called_once_with( - self.required_variables, self.varinfo, prefetch_path, - harmony_message + self.required_variables, self.varinfo, prefetch_path, harmony_message ) - mock_get_opendap_nc4.assert_called_once_with(self.granule_url, - variables_with_ranges, - self.output_dir, - self.logger, - self.access_token, - self.config) + mock_get_opendap_nc4.assert_called_once_with( + self.granule_url, + variables_with_ranges, + self.output_dir, + self.logger, + self.access_token, + self.config, + ) - mock_fill_variables.assert_called_once_with(self.output_path, - self.varinfo, - self.required_variables, - index_ranges) + mock_fill_variables.assert_called_once_with( + self.output_path, self.varinfo, self.required_variables, index_ranges + ) @patch('hoss.subset.download_url') def test_get_varinfo(self, mock_download_url): - """ Ensure a request is made to OPeNDAP to retrieve the `.dmr` and - that a `VarInfoFromDmr` instance can be created from that - downloaded file. + """Ensure a request is made to OPeNDAP to retrieve the `.dmr` and + that a `VarInfoFromDmr` instance can be created from that + downloaded file. """ - dmr_path = shutil.copy('tests/data/rssmif16d_example.dmr', - f'{self.output_dir}/rssmif16d_example.dmr') + dmr_path = shutil.copy( + 'tests/data/rssmif16d_example.dmr', + f'{self.output_dir}/rssmif16d_example.dmr', + ) mock_download_url.return_value = dmr_path - varinfo = get_varinfo(self.granule_url, self.output_dir, self.logger, - self.collection_short_name, self.access_token, - self.config) + varinfo = get_varinfo( + self.granule_url, + self.output_dir, + self.logger, + self.collection_short_name, + self.access_token, + self.config, + ) self.assertIsInstance(varinfo, VarInfoFromDmr) - self.assertSetEqual(set(varinfo.variables.keys()), - {'/atmosphere_cloud_liquid_water_content', - '/atmosphere_water_vapor_content', '/latitude', - '/longitude', '/rainfall_rate', '/sst_dtime', - '/time', '/wind_speed'}) + self.assertSetEqual( + set(varinfo.variables.keys()), + { + '/atmosphere_cloud_liquid_water_content', + '/atmosphere_water_vapor_content', + '/latitude', + '/longitude', + '/rainfall_rate', + '/sst_dtime', + '/time', + '/wind_speed', + }, + ) def test_get_required_variables(self): - """ Ensure that all requested variables are extracted from the list of - variables in the Harmony message. Alternatively, if no variables - are specified, all variables in the `.dmr` should be returned. - - After the requested variables have been identified, the return - value should also include all those variables that support those - requested (e.g., dimensions, coordinates, etc). - - * Test case 1: variables in message - the variable paths should be - extracted. - * Test case 2: variables in message, some without leading slash - - the variables paths should be extracted with a - slash prepended to each. - * Test case 3: variables in message, index ranges required (e.g., - for bounding box, shape file or temporal subset) - - the same variable paths from the message should be - extracted. - * Test case 4: variables not in message, no index ranges required - - the output should be an empty set (straight - variable subset, all variables from OPeNDAP). - * Test case 5: variables not in message, index ranges required - (e.g., for bounding box, shape file or temporal - subset) - the return value should include all - non-dimension variables from the `VarInfoFromDmr` - instance. + """Ensure that all requested variables are extracted from the list of + variables in the Harmony message. Alternatively, if no variables + are specified, all variables in the `.dmr` should be returned. + + After the requested variables have been identified, the return + value should also include all those variables that support those + requested (e.g., dimensions, coordinates, etc). + + * Test case 1: variables in message - the variable paths should be + extracted. + * Test case 2: variables in message, some without leading slash - + the variables paths should be extracted with a + slash prepended to each. + * Test case 3: variables in message, index ranges required (e.g., + for bounding box, shape file or temporal subset) + - the same variable paths from the message should be + extracted. + * Test case 4: variables not in message, no index ranges required + - the output should be an empty set (straight + variable subset, all variables from OPeNDAP). + * Test case 5: variables not in message, index ranges required + (e.g., for bounding box, shape file or temporal + subset) - the return value should include all + non-dimension variables from the `VarInfoFromDmr` + instance. """ - all_variables = {'/atmosphere_cloud_liquid_water_content', - '/atmosphere_water_vapor_content', '/latitude', - '/longitude', '/rainfall_rate', '/sst_dtime', '/time', - '/wind_speed'} + all_variables = { + '/atmosphere_cloud_liquid_water_content', + '/atmosphere_water_vapor_content', + '/latitude', + '/longitude', + '/rainfall_rate', + '/sst_dtime', + '/time', + '/wind_speed', + } with self.subTest('Variables specified, no index range subset:'): - harmony_variables = [HarmonyVariable({'fullPath': '/rainfall_rate', - 'id': 'V1234-PROVIDER', - 'name': '/rainfall_rate'})] - self.assertSetEqual(get_required_variables(self.varinfo, - harmony_variables, - False, self.logger), - {'/latitude', '/longitude', '/rainfall_rate', - '/time'}) + harmony_variables = [ + HarmonyVariable( + { + 'fullPath': '/rainfall_rate', + 'id': 'V1234-PROVIDER', + 'name': '/rainfall_rate', + } + ) + ] + self.assertSetEqual( + get_required_variables( + self.varinfo, harmony_variables, False, self.logger + ), + {'/latitude', '/longitude', '/rainfall_rate', '/time'}, + ) with self.subTest('Variable without leading slash can be handled'): - harmony_variables = [HarmonyVariable({'fullPath': 'rainfall_rate', - 'id': 'V1234-PROVIDER', - 'name': 'rainfall_rate'})] - self.assertSetEqual(get_required_variables(self.varinfo, - harmony_variables, - False, self.logger), - {'/latitude', '/longitude', '/rainfall_rate', - '/time'}) + harmony_variables = [ + HarmonyVariable( + { + 'fullPath': 'rainfall_rate', + 'id': 'V1234-PROVIDER', + 'name': 'rainfall_rate', + } + ) + ] + self.assertSetEqual( + get_required_variables( + self.varinfo, harmony_variables, False, self.logger + ), + {'/latitude', '/longitude', '/rainfall_rate', '/time'}, + ) with self.subTest('Variables specified for an index_range_subset'): - harmony_variables = [HarmonyVariable({'fullPath': '/rainfall_rate', - 'id': 'V1234-PROVIDER', - 'name': '/rainfall_rate'})] - self.assertSetEqual(get_required_variables(self.varinfo, - harmony_variables, - True, self.logger), - {'/latitude', '/longitude', '/rainfall_rate', - '/time'}) + harmony_variables = [ + HarmonyVariable( + { + 'fullPath': '/rainfall_rate', + 'id': 'V1234-PROVIDER', + 'name': '/rainfall_rate', + } + ) + ] + self.assertSetEqual( + get_required_variables( + self.varinfo, harmony_variables, True, self.logger + ), + {'/latitude', '/longitude', '/rainfall_rate', '/time'}, + ) with self.subTest('No variables, no index range subset returns none'): - self.assertSetEqual(get_required_variables(self.varinfo, [], False, - self.logger), - set()) + self.assertSetEqual( + get_required_variables(self.varinfo, [], False, self.logger), set() + ) with self.subTest('No variables, index-range subset, returns all'): - self.assertSetEqual(get_required_variables(self.varinfo, [], True, - self.logger), - all_variables) + self.assertSetEqual( + get_required_variables(self.varinfo, [], True, self.logger), + all_variables, + ) def test_fill_variables(self): - """ Ensure only the expected variables are filled (e.g., those with - a longitude crossing the grid edge). Longitude variables should not - themselves be filled. + """Ensure only the expected variables are filled (e.g., those with + a longitude crossing the grid edge). Longitude variables should not + themselves be filled. """ varinfo = VarInfoFromDmr( 'tests/data/rssmif16d_example.dmr', - config_file='tests/data/test_subsetter_config.json' + config_file='tests/data/test_subsetter_config.json', ) input_file = 'tests/data/f16_ssmis_20200102v7.nc' test_file = shutil.copy(input_file, self.output_dir) index_ranges = {'/latitude': [0, 719], '/longitude': [1400, 10]} - required_variables = {'/sst_dtime', '/wind_speed', - '/latitude', '/longitude', '/time'} + required_variables = { + '/sst_dtime', + '/wind_speed', + '/latitude', + '/longitude', + '/time', + } - fill_variables(test_file, varinfo, required_variables, - index_ranges) + fill_variables(test_file, varinfo, required_variables, index_ranges) - with Dataset(test_file, 'r') as test_output, \ - Dataset(input_file, 'r') as test_input: + with Dataset(test_file, 'r') as test_output, Dataset( + input_file, 'r' + ) as test_input: # Assert none of the dimension variables are filled at any pixel for variable_dimension in ['/time', '/latitude', '/longitude']: data = test_output[variable_dimension][:] self.assertFalse(np.any(data.mask)) - np.testing.assert_array_equal(test_input[variable_dimension], - test_output[variable_dimension]) + np.testing.assert_array_equal( + test_input[variable_dimension], test_output[variable_dimension] + ) # Assert the expected range of wind_speed and sst_dtime are filled # but that rest of the variable matches the input file. @@ -1049,10 +1356,12 @@ def test_fill_variables(self): input_data = test_input[variable][:] output_data = test_output[variable][:] self.assertTrue(np.all(output_data[:][:][11:1400].mask)) - np.testing.assert_array_equal(output_data[:][:][:11], - input_data[:][:][:11]) - np.testing.assert_array_equal(output_data[:][:][1400:], - input_data[:][:][1400:]) + np.testing.assert_array_equal( + output_data[:][:][:11], input_data[:][:][:11] + ) + np.testing.assert_array_equal( + output_data[:][:][1400:], input_data[:][:][1400:] + ) # Assert a variable that wasn't to be filled isn't rainfall_rate_in = test_input['/rainfall_rate'][:] @@ -1061,32 +1370,34 @@ def test_fill_variables(self): @patch('hoss.subset.Dataset') def test_fill_variables_no_fill(self, mock_dataset): - """ Ensure that the output file is not opened if there is no need to - fill any variables. This will arise if: + """Ensure that the output file is not opened if there is no need to + fill any variables. This will arise if: - * There are no index ranges (e.g., a purely variable subset). - * None of the variables cross a grid-discontinuity. + * There are no index ranges (e.g., a purely variable subset). + * None of the variables cross a grid-discontinuity. """ - non_fill_index_ranges = {'/latitude': (100, 200), - '/longitude': (150, 300)} + non_fill_index_ranges = {'/latitude': (100, 200), '/longitude': (150, 300)} - test_args = [['Variable subset only', {}], - ['No index ranges need filling', non_fill_index_ranges]] + test_args = [ + ['Variable subset only', {}], + ['No index ranges need filling', non_fill_index_ranges], + ] for description, index_ranges in test_args: with self.subTest(description): - fill_variables(self.output_dir, self.varinfo, - self.required_variables, index_ranges) + fill_variables( + self.output_dir, self.varinfo, self.required_variables, index_ranges + ) mock_dataset.assert_not_called() @patch('hoss.subset.get_fill_slice') def test_fill_variable(self, mock_get_fill_slice): - """ Ensure that values are only filled when the correct criteria are - met: + """Ensure that values are only filled when the correct criteria are + met: - * Variable is not a longitude. - * Variable has at least one dimension that requires filling. + * Variable is not a longitude. + * Variable has at least one dimension that requires filling. """ fill_ranges = {'/longitude': (1439, 0)} @@ -1096,41 +1407,49 @@ def test_fill_variable(self, mock_get_fill_slice): mock_get_fill_slice.return_value = slice(None) with self.subTest('Longitude variable should not be filled'): - dataset_path = shutil.copy('tests/data/f16_ssmis_20200102v7.nc', - self.output_dir) + dataset_path = shutil.copy( + 'tests/data/f16_ssmis_20200102v7.nc', self.output_dir + ) with Dataset(dataset_path, 'a') as dataset: - fill_variable(dataset, fill_ranges, self.varinfo, '/longitude', - dimensions_to_fill) + fill_variable( + dataset, fill_ranges, self.varinfo, '/longitude', dimensions_to_fill + ) self.assertFalse(dataset['/longitude'][:].any() is np.ma.masked) mock_get_fill_slice.assert_not_called() mock_get_fill_slice.reset_mock() with self.subTest('Variable has no dimensions needing filling'): - dataset_path = shutil.copy('tests/data/f16_ssmis_20200102v7.nc', - self.output_dir) + dataset_path = shutil.copy( + 'tests/data/f16_ssmis_20200102v7.nc', self.output_dir + ) with Dataset(dataset_path, 'a') as dataset: - fill_variable(dataset, fill_ranges, self.varinfo, '/latitude', - dimensions_to_fill) + fill_variable( + dataset, fill_ranges, self.varinfo, '/latitude', dimensions_to_fill + ) self.assertFalse(dataset['/latitude'][:].any() is np.ma.masked) mock_get_fill_slice.assert_not_called() mock_get_fill_slice.reset_mock() with self.subTest('Variable that should be filled'): - dataset_path = shutil.copy('tests/data/f16_ssmis_20200102v7.nc', - self.output_dir) + dataset_path = shutil.copy( + 'tests/data/f16_ssmis_20200102v7.nc', self.output_dir + ) with Dataset(dataset_path, 'a') as dataset: - fill_variable(dataset, fill_ranges, self.varinfo, '/sst_dtime', - dimensions_to_fill) + fill_variable( + dataset, fill_ranges, self.varinfo, '/sst_dtime', dimensions_to_fill + ) self.assertTrue(dataset['/sst_dtime'][:].all() is np.ma.masked) - mock_get_fill_slice.assert_has_calls([ - call('/time', fill_ranges), - call('/latitude', fill_ranges), - call('/longitude', fill_ranges), - ]) + mock_get_fill_slice.assert_has_calls( + [ + call('/time', fill_ranges), + call('/latitude', fill_ranges), + call('/longitude', fill_ranges), + ] + ) mock_get_fill_slice.reset_mock() diff --git a/tests/unit/test_temporal.py b/tests/unit/test_temporal.py index ba2e560..8e043ac 100644 --- a/tests/unit/test_temporal.py +++ b/tests/unit/test_temporal.py @@ -11,18 +11,21 @@ from varinfo import VarInfoFromDmr from hoss.exceptions import UnsupportedTemporalUnits -from hoss.temporal import (get_datetime_with_timezone, - get_temporal_index_ranges, - get_time_ref) +from hoss.temporal import ( + get_datetime_with_timezone, + get_temporal_index_ranges, + get_time_ref, +) class TestTemporal(TestCase): - """ A class for testing functions in the hoss.spatial module. """ + """A class for testing functions in the hoss.spatial module.""" + @classmethod def setUpClass(cls): cls.varinfo = VarInfoFromDmr( 'tests/data/M2T1NXSLV_example.dmr', - config_file='tests/data/test_subsetter_config.json' + config_file='tests/data/test_subsetter_config.json', ) cls.test_dir = 'tests/output' @@ -33,52 +36,50 @@ def tearDown(self): rmtree(self.test_dir) def test_get_temporal_index_ranges(self): - """ Ensure that correct temporal index ranges can be calculated. """ + """Ensure that correct temporal index ranges can be calculated.""" test_file_name = f'{self.test_dir}/test.nc' - harmony_message = Message({ - 'temporal': {'start': '2021-01-10T01:30:00', - 'end': '2021-01-10T05:30:00'} - }) + harmony_message = Message( + {'temporal': {'start': '2021-01-10T01:30:00', 'end': '2021-01-10T05:30:00'}} + ) with Dataset(test_file_name, 'w', format='NETCDF4') as test_file: test_file.createDimension('time', size=24) - test_file.createVariable('time', int, - dimensions=('time', )) + test_file.createVariable('time', int, dimensions=('time',)) test_file['time'][:] = np.linspace(0, 1380, 24) test_file['time'].setncatts({'units': 'minutes since 2021-01-10 00:30:00'}) with self.subTest('Time dimension, halfway between the whole hours'): self.assertDictEqual( - get_temporal_index_ranges({'/time'}, self.varinfo, - test_file_name, harmony_message), - {'/time': (1, 5)} + get_temporal_index_ranges( + {'/time'}, self.varinfo, test_file_name, harmony_message + ), + {'/time': (1, 5)}, ) @patch('hoss.temporal.get_dimension_index_range') - def test_get_temporal_index_ranges_bounds(self, - mock_get_dimension_index_range): - """ Ensure that bounds are correctly extracted and used as an argument - for the `get_dimension_index_range` utility function if they are - present in the prefetch file. + def test_get_temporal_index_ranges_bounds(self, mock_get_dimension_index_range): + """Ensure that bounds are correctly extracted and used as an argument + for the `get_dimension_index_range` utility function if they are + present in the prefetch file. - The GPM IMERG prefetch data are for a granule with a temporal range - of 2020-01-01T12:00:00 to 2020-01-01T12:30:00. + The GPM IMERG prefetch data are for a granule with a temporal range + of 2020-01-01T12:00:00 to 2020-01-01T12:30:00. """ mock_get_dimension_index_range.return_value = (1, 2) gpm_varinfo = VarInfoFromDmr('tests/data/GPM_3IMERGHH_example.dmr') gpm_prefetch_path = 'tests/data/GPM_3IMERGHH_prefetch.nc4' - harmony_message = Message({ - 'temporal': {'start': '2020-01-01T12:15:00', - 'end': '2020-01-01T12:45:00'} - }) + harmony_message = Message( + {'temporal': {'start': '2020-01-01T12:15:00', 'end': '2020-01-01T12:45:00'}} + ) self.assertDictEqual( - get_temporal_index_ranges({'/Grid/time'}, gpm_varinfo, - gpm_prefetch_path, harmony_message), - {'/Grid/time': (1, 2)} + get_temporal_index_ranges( + {'/Grid/time'}, gpm_varinfo, gpm_prefetch_path, harmony_message + ), + {'/Grid/time': (1, 2)}, ) mock_get_dimension_index_range.assert_called_once_with( ANY, 1577880900.0, 1577882700, bounds_values=ANY @@ -87,64 +88,68 @@ def test_get_temporal_index_ranges_bounds(self, with Dataset(gpm_prefetch_path) as prefetch: assert_array_equal( mock_get_dimension_index_range.call_args_list[0][0][0], - prefetch['/Grid/time'][:] + prefetch['/Grid/time'][:], ) assert_array_equal( mock_get_dimension_index_range.call_args_list[0][1]['bounds_values'], - prefetch['/Grid/time_bnds'][:] + prefetch['/Grid/time_bnds'][:], ) def test_get_time_ref(self): - """ Ensure the 'units' attribute tells the correct time_ref and - time_delta + """Ensure the 'units' attribute tells the correct time_ref and + time_delta """ expected_datetime = datetime(2021, 12, 8, 0, 30, tzinfo=timezone.utc) with self.subTest('units of minutes'): - self.assertEqual(get_time_ref('minutes since 2021-12-08 00:30:00'), - (expected_datetime, timedelta(minutes=1))) + self.assertEqual( + get_time_ref('minutes since 2021-12-08 00:30:00'), + (expected_datetime, timedelta(minutes=1)), + ) with self.subTest('Units of seconds'): - self.assertEqual(get_time_ref('seconds since 2021-12-08 00:30:00'), - (expected_datetime, timedelta(seconds=1))) + self.assertEqual( + get_time_ref('seconds since 2021-12-08 00:30:00'), + (expected_datetime, timedelta(seconds=1)), + ) with self.subTest('Units of hours'): - self.assertEqual(get_time_ref('hours since 2021-12-08 00:30:00'), - (expected_datetime, timedelta(hours=1))) + self.assertEqual( + get_time_ref('hours since 2021-12-08 00:30:00'), + (expected_datetime, timedelta(hours=1)), + ) with self.subTest('Units of days'): - self.assertEqual(get_time_ref('days since 2021-12-08 00:30:00'), - (expected_datetime, timedelta(days=1))) + self.assertEqual( + get_time_ref('days since 2021-12-08 00:30:00'), + (expected_datetime, timedelta(days=1)), + ) with self.subTest('Unrecognised unit'): with self.assertRaises(UnsupportedTemporalUnits): get_time_ref('fortnights since 2021-12-08 00:30:00') def test_get_datetime_with_timezone(self): - """ Ensure the string is parsed to datetime with timezone. """ + """Ensure the string is parsed to datetime with timezone.""" expected_datetime = datetime(2021, 12, 8, 0, 30, tzinfo=timezone.utc) with self.subTest('with space'): self.assertEqual( - get_datetime_with_timezone('2021-12-08 00:30:00'), - expected_datetime + get_datetime_with_timezone('2021-12-08 00:30:00'), expected_datetime ) with self.subTest('no space'): self.assertEqual( - get_datetime_with_timezone('2021-12-08T00:30:00'), - expected_datetime + get_datetime_with_timezone('2021-12-08T00:30:00'), expected_datetime ) with self.subTest('no space with trailing Z'): self.assertEqual( - get_datetime_with_timezone('2021-12-08T00:30:00Z'), - expected_datetime + get_datetime_with_timezone('2021-12-08T00:30:00Z'), expected_datetime ) with self.subTest('space with trailing Z'): self.assertEqual( - get_datetime_with_timezone('2021-12-08 00:30:00Z'), - expected_datetime + get_datetime_with_timezone('2021-12-08 00:30:00Z'), expected_datetime ) diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py index 696805e..4e546a4 100644 --- a/tests/unit/test_utilities.py +++ b/tests/unit/test_utilities.py @@ -6,15 +6,20 @@ from harmony.util import config from hoss.exceptions import UrlAccessFailed -from hoss.utilities import (download_url, format_dictionary_string, - format_variable_set_string, - get_constraint_expression, get_file_mimetype, - get_opendap_nc4, get_value_or_default, - move_downloaded_nc4) +from hoss.utilities import ( + download_url, + format_dictionary_string, + format_variable_set_string, + get_constraint_expression, + get_file_mimetype, + get_opendap_nc4, + get_value_or_default, + move_downloaded_nc4, +) class TestUtilities(TestCase): - """ A class for testing functions in the hoss.utilities module. """ + """A class for testing functions in the hoss.utilities module.""" @classmethod def setUpClass(cls): @@ -24,9 +29,9 @@ def setUpClass(cls): cls.logger = getLogger('tests') def test_get_file_mimetype(self): - """ Ensure a mimetype can be retrieved for a valid file path or, if - the mimetype cannot be inferred, that the default output is - returned. This assumes the output is a NetCDF-4 file. + """Ensure a mimetype can be retrieved for a valid file path or, if + the mimetype cannot be inferred, that the default output is + returned. This assumes the output is a NetCDF-4 file. """ with self.subTest('File with MIME type'): @@ -41,9 +46,9 @@ def test_get_file_mimetype(self): @patch('hoss.utilities.util_download') def test_download_url(self, mock_util_download): - """ Ensure that the `harmony.util.download` function is called. If an - error occurs, the caught exception should be re-raised with a - custom exception with a human-readable error message. + """Ensure that the `harmony.util.download` function is called. If an + error occurs, the caught exception should be re-raised with a + custom exception with a human-readable error message. """ output_directory = 'output/dir' @@ -55,8 +60,9 @@ def test_download_url(self, mock_util_download): with self.subTest('Successful response, only make one request.'): mock_util_download.return_value = http_response - response = download_url(test_url, output_directory, self.logger, - access_token, self.config) + response = download_url( + test_url, output_directory, self.logger, access_token, self.config + ) self.assertEqual(response, http_response) mock_util_download.assert_called_once_with( @@ -65,14 +71,20 @@ def test_download_url(self, mock_util_download): self.logger, access_token=access_token, data=None, - cfg=self.config + cfg=self.config, ) mock_util_download.reset_mock() with self.subTest('A request with data passes the data to Harmony.'): mock_util_download.return_value = http_response - response = download_url(test_url, output_directory, self.logger, - access_token, self.config, data=test_data) + response = download_url( + test_url, + output_directory, + self.logger, + access_token, + self.config, + data=test_data, + ) self.assertEqual(response, http_response) mock_util_download.assert_called_once_with( @@ -81,17 +93,17 @@ def test_download_url(self, mock_util_download): self.logger, access_token=access_token, data=test_data, - cfg=self.config + cfg=self.config, ) mock_util_download.reset_mock() with self.subTest('500 error is caught and handled.'): - mock_util_download.side_effect = [self.harmony_500_error, - http_response] + mock_util_download.side_effect = [self.harmony_500_error, http_response] with self.assertRaises(UrlAccessFailed): - download_url(test_url, output_directory, self.logger, - access_token, self.config) + download_url( + test_url, output_directory, self.logger, access_token, self.config + ) mock_util_download.assert_called_once_with( test_url, @@ -99,17 +111,17 @@ def test_download_url(self, mock_util_download): self.logger, access_token=access_token, data=None, - cfg=self.config + cfg=self.config, ) mock_util_download.reset_mock() with self.subTest('Non-500 error does not retry, and is re-raised.'): - mock_util_download.side_effect = [self.harmony_auth_error, - http_response] + mock_util_download.side_effect = [self.harmony_auth_error, http_response] with self.assertRaises(UrlAccessFailed): - download_url(test_url, output_directory, self.logger, - access_token, self.config) + download_url( + test_url, output_directory, self.logger, access_token, self.config + ) mock_util_download.assert_called_once_with( test_url, @@ -117,18 +129,18 @@ def test_download_url(self, mock_util_download): self.logger, access_token=access_token, data=None, - cfg=self.config + cfg=self.config, ) mock_util_download.reset_mock() @patch('hoss.utilities.move_downloaded_nc4') @patch('hoss.utilities.util_download') def test_get_opendap_nc4(self, mock_download, mock_move_download): - """ Ensure a request is sent to OPeNDAP that combines the URL of the - granule with a constraint expression. + """Ensure a request is sent to OPeNDAP that combines the URL of the + granule with a constraint expression. - Once the request is completed, the output file should be moved to - ensure a second request to the same URL is still performed. + Once the request is completed, the output file should be moved to + ensure a second request to the same URL is still performed. """ downloaded_file_name = 'output_file.nc4' @@ -143,83 +155,99 @@ def test_get_opendap_nc4(self, mock_download, mock_move_download): expected_data = {'dap4.ce': 'variable'} with self.subTest('Request with variables includes dap4.ce'): - output_file = get_opendap_nc4(url, required_variables, output_dir, - self.logger, access_token, - self.config) + output_file = get_opendap_nc4( + url, + required_variables, + output_dir, + self.logger, + access_token, + self.config, + ) self.assertEqual(output_file, moved_file_name) mock_download.assert_called_once_with( - f'{url}.dap.nc4', output_dir, self.logger, - access_token=access_token, data=expected_data, cfg=self.config + f'{url}.dap.nc4', + output_dir, + self.logger, + access_token=access_token, + data=expected_data, + cfg=self.config, ) - mock_move_download.assert_called_once_with(output_dir, - downloaded_file_name) + mock_move_download.assert_called_once_with(output_dir, downloaded_file_name) mock_download.reset_mock() mock_move_download.reset_mock() with self.subTest('Request with no variables omits dap4.ce'): - output_file = get_opendap_nc4(url, {}, output_dir, self.logger, - access_token, self.config) + output_file = get_opendap_nc4( + url, {}, output_dir, self.logger, access_token, self.config + ) self.assertEqual(output_file, moved_file_name) mock_download.assert_called_once_with( - f'{url}.dap.nc4', output_dir, self.logger, - access_token=access_token, data=None, cfg=self.config + f'{url}.dap.nc4', + output_dir, + self.logger, + access_token=access_token, + data=None, + cfg=self.config, ) - mock_move_download.assert_called_once_with(output_dir, - downloaded_file_name) + mock_move_download.assert_called_once_with(output_dir, downloaded_file_name) def test_get_constraint_expression(self): - """ Ensure a correctly encoded DAP4 constraint expression is - constructed for the given input. + """Ensure a correctly encoded DAP4 constraint expression is + constructed for the given input. - URL encoding: + URL encoding: - - %2F = '/' - - %3A = ':' - - %3B = ';' - - %5B = '[' - - %5D = ']' + - %2F = '/' + - %3A = ':' + - %3B = ';' + - %5B = '[' + - %5D = ']' - Note - with sets, the order can't be guaranteed, so there are two - options for the combined constraint expression. + Note - with sets, the order can't be guaranteed, so there are two + options for the combined constraint expression. """ with self.subTest('No index ranges specified'): self.assertIn( get_constraint_expression({'/alpha_var', '/blue_var'}), - ['%2Falpha_var%3B%2Fblue_var', '%2Fblue_var%3B%2Falpha_var'] + ['%2Falpha_var%3B%2Fblue_var', '%2Fblue_var%3B%2Falpha_var'], ) with self.subTest('Variables with index ranges'): self.assertIn( get_constraint_expression({'/alpha_var[1:2]', '/blue_var[3:4]'}), - ['%2Falpha_var%5B1%3A2%5D%3B%2Fblue_var%5B3%3A4%5D', - '%2Fblue_var%5B3%3A4%5D%3B%2Falpha_var%5B1%3A2%5D'] + [ + '%2Falpha_var%5B1%3A2%5D%3B%2Fblue_var%5B3%3A4%5D', + '%2Fblue_var%5B3%3A4%5D%3B%2Falpha_var%5B1%3A2%5D', + ], ) @patch('hoss.utilities.move') @patch('hoss.utilities.uuid4') def test_move_downloaded_nc4(self, mock_uuid4, mock_move): - """ Ensure a specified file is moved to the specified location. """ + """Ensure a specified file is moved to the specified location.""" mock_uuid4.return_value = Mock(hex='uuid4') output_dir = '/tmp/path/to' old_path = '/tmp/path/to/file.nc4' - self.assertEqual(move_downloaded_nc4(output_dir, old_path), - '/tmp/path/to/uuid4.nc4') + self.assertEqual( + move_downloaded_nc4(output_dir, old_path), '/tmp/path/to/uuid4.nc4' + ) - mock_move.assert_called_once_with('/tmp/path/to/file.nc4', - '/tmp/path/to/uuid4.nc4') + mock_move.assert_called_once_with( + '/tmp/path/to/file.nc4', '/tmp/path/to/uuid4.nc4' + ) def test_format_variable_set(self): - """ Ensure a set of variable strings is printed out as expected, and - does not contain any curly braces. + """Ensure a set of variable strings is printed out as expected, and + does not contain any curly braces. - The formatted string is broken up for verification because sets are - unordered, so the exact ordering of the variables within the - formatted string may not be consistent between runs. + The formatted string is broken up for verification because sets are + unordered, so the exact ordering of the variables within the + formatted string may not be consistent between runs. """ variable_set = {'/var_one', '/var_two', '/var_three'} @@ -230,19 +258,21 @@ def test_format_variable_set(self): self.assertSetEqual(variable_set, set(formatted_string.split(', '))) def test_format_dictionary_string(self): - """ Ensure a dictionary is formatted to a string without curly braces. - This function assumes only a single level dictionary, without any - sets for values. + """Ensure a dictionary is formatted to a string without curly braces. + This function assumes only a single level dictionary, without any + sets for values. """ input_dictionary = {'key_one': 'value_one', 'key_two': 'value_two'} - self.assertEqual(format_dictionary_string(input_dictionary), - 'key_one: value_one\nkey_two: value_two') + self.assertEqual( + format_dictionary_string(input_dictionary), + 'key_one: value_one\nkey_two: value_two', + ) def test_get_value_or_default(self): - """ Ensure a value is retrieved if supplied, even if it is 0, or a - default value is returned if not. + """Ensure a value is retrieved if supplied, even if it is 0, or a + default value is returned if not. """ with self.subTest('Value is returned'): diff --git a/tests/utilities.py b/tests/utilities.py index e3fd653..564a1ba 100644 --- a/tests/utilities.py +++ b/tests/utilities.py @@ -1,4 +1,5 @@ """ Utility classes used to extend the unittest capabilities """ + from collections import namedtuple from datetime import datetime from typing import List @@ -12,9 +13,9 @@ def write_dmr(output_dir: str, content: str): - """ A helper function to write out the content of a `.dmr`, when the - `harmony.util.download` function is called. This will be called as - a side-effect to the mock for that function. + """A helper function to write out the content of a `.dmr`, when the + `harmony.util.download` function is called. This will be called as + a side-effect to the mock for that function. """ dmr_name = f'{output_dir}/downloaded.dmr' @@ -59,6 +60,7 @@ def wrapper(self, *args, **kwargs): raise return_values.append(result) return result + wrapper.mock = mock wrapper.return_values = return_values wrapper.errors = errors @@ -66,25 +68,29 @@ def wrapper(self, *args, **kwargs): def create_stac(granules: List[Granule]) -> Catalog: - """ Create a SpatioTemporal Asset Catalog (STAC). These are used as inputs - for Harmony requests, containing the URL and other information for - input granules. + """Create a SpatioTemporal Asset Catalog (STAC). These are used as inputs + for Harmony requests, containing the URL and other information for + input granules. - For simplicity the geometry and temporal properties of each item are - set to default values, as only the URL, media type and role are used by - HOSS. + For simplicity the geometry and temporal properties of each item are + set to default values, as only the URL, media type and role are used by + HOSS. """ catalog = Catalog(id='input', description='test input') for granule_index, granule in enumerate(granules): - item = Item(id=f'granule_{granule_index}', - geometry=bbox_to_geometry([-180, -90, 180, 90]), - bbox=[-180, -90, 180, 90], - datetime=datetime(2020, 1, 1), properties=None) - item.add_asset('input_data', - Asset(granule.url, media_type=granule.media_type, - roles=granule.roles)) + item = Item( + id=f'granule_{granule_index}', + geometry=bbox_to_geometry([-180, -90, 180, 90]), + bbox=[-180, -90, 180, 90], + datetime=datetime(2020, 1, 1), + properties=None, + ) + item.add_asset( + 'input_data', + Asset(granule.url, media_type=granule.media_type, roles=granule.roles), + ) catalog.add_item(item) return catalog From 258458b8d7a716c0c06adfe9f33f4189a1cecdad Mon Sep 17 00:00:00 2001 From: Owen Littlejohns Date: Thu, 4 Apr 2024 14:07:05 -0400 Subject: [PATCH 3/4] IP-241 - Add .git-blame-ignore-revs. --- .git-blame-ignore-revs | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..0514463 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,5 @@ +# For more information, see: +# https://docs.github.com/en/repositories/working-with-files/using-files/viewing-a-file#ignore-commits-in-the-blame-view + +# Black code formatting of entire repository +56dd43f69d901abbba6cfb765a98dee26ff71cfc From 8645d1d8209a856ed0fa2c66693c2bb2592c6eb7 Mon Sep 17 00:00:00 2001 From: Owen Littlejohns Date: Fri, 5 Apr 2024 12:32:07 -0400 Subject: [PATCH 4/4] IP-241 - Increment service version for black formatting. --- CHANGELOG.md | 18 ++++++++++++------ docker/service_version.txt | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 830ac23..c1845e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,19 @@ +## v1.0.4 +### 2024-04-05 + +This version of HOSS implements `black` code formatting across the repository. +There should be no functional changes in the service. + ## v1.0.3 -### 2024-3-29 +### 2024-03-29 -This version of HOSS handles the error in the crs_wkt attribute in ATL19 where the -north polar crs variable has a leading iquotation mark escaped by back slash in the -crs_wkt attribute. This causes errors when the projection is being interpreted from -the crs variable attributes. +This version of HOSS handles the error in the crs_wkt attribute in ATL19 where the +north polar crs variable has a leading iquotation mark escaped by back slash in the +crs_wkt attribute. This causes errors when the projection is being interpreted from +the crs variable attributes. ## v1.0.2 -### 2024-2-26 +### 2024-02-26 This version of HOSS correctly handles edge-aligned geographic collections by adding the attribute `cell_alignment` with the value `edge` to `hoss_config.json` diff --git a/docker/service_version.txt b/docker/service_version.txt index 21e8796..ee90284 100644 --- a/docker/service_version.txt +++ b/docker/service_version.txt @@ -1 +1 @@ -1.0.3 +1.0.4