From cf8559b594c0d29f32e4f5cef866b5190a7f26aa Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 25 Apr 2024 11:41:26 +0200 Subject: [PATCH] Setup of inital structure --- .github/workflows/unitest-workflow.yml | 29 ++ .gitignore | 114 +++++++ CHANGES.md | 3 + LICENSE | 21 ++ environment.yml | 8 + pyproject.toml | 43 +++ test/test_plugin.py | 53 ++++ test/test_stac.py | 35 +++ test/test_store.py | 135 ++++++++ xcube_stac/__init__.py | 24 ++ xcube_stac/constants.py | 23 ++ xcube_stac/plugin.py | 43 +++ xcube_stac/stac.py | 65 ++++ xcube_stac/store.py | 410 +++++++++++++++++++++++++ xcube_stac/version.py | 22 ++ 15 files changed, 1028 insertions(+) create mode 100644 .github/workflows/unitest-workflow.yml create mode 100644 .gitignore create mode 100644 CHANGES.md create mode 100644 LICENSE create mode 100644 environment.yml create mode 100644 pyproject.toml create mode 100644 test/test_plugin.py create mode 100644 test/test_stac.py create mode 100644 test/test_store.py create mode 100644 xcube_stac/__init__.py create mode 100644 xcube_stac/constants.py create mode 100644 xcube_stac/plugin.py create mode 100644 xcube_stac/stac.py create mode 100644 xcube_stac/store.py create mode 100644 xcube_stac/version.py diff --git a/.github/workflows/unitest-workflow.yml b/.github/workflows/unitest-workflow.yml new file mode 100644 index 0000000..2f78c54 --- /dev/null +++ b/.github/workflows/unitest-workflow.yml @@ -0,0 +1,29 @@ +name: Build + +on: + push: + release: + types: [published] + +jobs: + unittest: + runs-on: ubuntu-latest + env: + NUMBA_DISABLE_JIT: 1 + steps: + - name: checkout xcube-stac + uses: actions/checkout@v3 + + - name: Set up MicroMamba + uses: mamba-org/provision-with-micromamba@main + with: + cache-env: true + extra-specs: | + python=3.10 + + - name: Run unit tests + shell: bash -l {0} + run: | + cd /home/runner/work/xcube-stac/xcube-stac + ls + pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ab82702 --- /dev/null +++ b/.gitignore @@ -0,0 +1,114 @@ +# Credentials +*-credentials.json + +# Test output data +test-outputs/ + +# IntelliJ / PyCharm +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.vscode/settings.json diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..37e4516 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,3 @@ +## Changes in 0.1.0 (in development) + +Initial version of STAC Data Store. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a4370ac --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Brockmann Consult GmbH + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..32e3187 --- /dev/null +++ b/environment.yml @@ -0,0 +1,8 @@ +name: xcube-stac +channels: + - conda-forge + - defaults +dependencies: + # Required + - xarray + - pystac diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..442b355 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["setuptools >= 61.2.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "xcube_stac" +dynamic = ["version"] +authors = [ + {name = "Konstantin Ntokas", email = "konstantin.ntokas@brockmann-consult.de"} +] +description = """\ + xcube data store for accessing stac catalogs\ + """ +keywords = [ + "analysis ready data", "data science", "datacube", "xarray", "zarr", "xcube", "stac" +] +readme = {file = "README.md", content-type = "text/markdown"} +license = {text = "MIT"} +requires-python = ">=3.10" +dependencies = [ + "xarray", + "pystac" +] + +[tool.setuptools.dynamic] +version = {attr = "xcube_stac.__version__"} + +[tool.setuptools.packages.find] +exclude = [ + "test*", + "doc*" +] + +[project.optional-dependencies] +dev = [ + "pytest", + "flake8" +] + +[project.urls] +Repository = 'https://github.com/xcube-dev/xcube-stac' +Issues = 'https://github.com/xcube-dev/xcube-stac/issues' +Changelog = 'https://github.com/xcube-dev/xcube-stac/blob/main/CHANGES.md' diff --git a/test/test_plugin.py b/test/test_plugin.py new file mode 100644 index 0000000..796135c --- /dev/null +++ b/test/test_plugin.py @@ -0,0 +1,53 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from xcube.util.extension import ExtensionRegistry +from xcube_stac.plugin import init_plugin + + +class XcubePluginTest(unittest.TestCase): + def test_plugin(self): + """Assert xcube extensions registered by xcube-stac""" + registry = ExtensionRegistry() + init_plugin(registry) + self.assertEqual( + { + "xcube.core.store": { + "stac": { + "component": "", + "description": "STAC DataStore", + "name": "stac", + "point": "xcube.core.store", + } + }, + "xcube.core.store.opener": { + "dataset:zarr:stac": { + "component": "", + "description": "xarray.Dataset from STAC API", + "name": "dataset:zarr:stac", + "point": "xcube.core.store.opener", + } + }, + }, + registry.to_dict(), + ) diff --git a/test/test_stac.py b/test/test_stac.py new file mode 100644 index 0000000..bbaff85 --- /dev/null +++ b/test/test_stac.py @@ -0,0 +1,35 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest +from xcube_stac.stac import Stac + + +class StacTest(unittest.TestCase): + + def test_open_data(self): + stac_instance = Stac("url") + with self.assertRaises(NotImplementedError) as cm: + stac_instance.open_data("data_id1") + self.assertEqual( + "open_data() operation is not supported yet", + f"{cm.exception}", + ) diff --git a/test/test_store.py b/test/test_store.py new file mode 100644 index 0000000..f233a28 --- /dev/null +++ b/test/test_store.py @@ -0,0 +1,135 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from xcube.util.jsonschema import JsonObjectSchema +from xcube_stac.store import StacDataOpener +from xcube_stac.store import StacDataStore +from xcube_stac.stac import Stac + + +class StacDataOpenerTest(unittest.TestCase): + + def setUp(self) -> None: + stac_instance = Stac("url") + self.opener = StacDataOpener(stac_instance) + + def test_get_open_data_params_schema(self): + schema = self.opener.get_open_data_params_schema() + self.assertIsInstance(schema, JsonObjectSchema) + + def test_open_data(self): + with self.assertRaises(NotImplementedError) as cm: + self.opener.open_data("data_id1") + self.assertEqual( + "open_data() operation is not supported yet", + f"{cm.exception}", + ) + + def test_describe_data(self): + with self.assertRaises(NotImplementedError) as cm: + self.opener.describe_data("data_id1") + self.assertEqual( + "describe_data() operation is not supported yet", + f"{cm.exception}", + ) + + +class StacDataStoreTest(unittest.TestCase): + + def setUp(self) -> None: + self.store = StacDataStore(url="url") + + def test_get_data_store_params_schema(self): + schema = self.store.get_data_store_params_schema() + self.assertIsInstance(schema, JsonObjectSchema) + self.assertIn("url", schema.properties) + self.assertIn("collection_prefix", schema.properties) + self.assertIn("data_id_delimiter", schema.properties) + self.assertIn("url", schema.required) + + def test_get_data_types(self): + self.assertEqual(("dataset",), self.store.get_data_types()) + + def test_get_data_types_for_data(self): + self.assertEqual( + ("dataset",), + self.store.get_data_types_for_data("data_id1") + ) + + def test_get_data_ids(self): + with self.assertRaises(NotImplementedError) as cm: + self.store.get_data_ids() + self.assertEqual( + "get_data_ids() operation is not supported yet", + f"{cm.exception}", + ) + + def test_has_data(self): + with self.assertRaises(NotImplementedError) as cm: + self.store.has_data("data_id1") + self.assertEqual( + "has_data() operation is not supported yet", + f"{cm.exception}", + ) + + def test_describe_data(self): + with self.assertRaises(NotImplementedError) as cm: + self.store.describe_data("data_id1") + self.assertEqual( + "describe_data() operation is not supported yet", + f"{cm.exception}", + ) + + def test_get_data_opener_ids(self): + self.assertEqual( + ("dataset:zarr:stac",), + self.store.get_data_opener_ids() + ) + + def test_get_open_data_params_schema(self): + schema = self.store.get_open_data_params_schema() + self.assertIsInstance(schema, JsonObjectSchema) + + def test_open_data(self): + with self.assertRaises(NotImplementedError) as cm: + self.store.open_data("data_id1") + self.assertEqual( + "open_data() operation is not supported yet", + f"{cm.exception}", + ) + + def test_search_data(self): + with self.assertRaises(NotImplementedError) as cm: + self.store.search_data() + self.assertEqual( + "search_data() operation is not supported yet", + f"{cm.exception}", + ) + + def test_get_search_params_schema(self): + with self.assertRaises(NotImplementedError) as cm: + self.store.get_search_params_schema() + self.assertEqual( + "get_search_params_schema() operation is not supported yet", + f"{cm.exception}", + ) diff --git a/xcube_stac/__init__.py b/xcube_stac/__init__.py new file mode 100644 index 0000000..ac5e73a --- /dev/null +++ b/xcube_stac/__init__.py @@ -0,0 +1,24 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +from .version import version + +__version__ = version diff --git a/xcube_stac/constants.py b/xcube_stac/constants.py new file mode 100644 index 0000000..98d81e4 --- /dev/null +++ b/xcube_stac/constants.py @@ -0,0 +1,23 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +DATA_STORE_ID = "stac" +DATASET_OPENER_ID = f"dataset:zarr:{DATA_STORE_ID}" diff --git a/xcube_stac/plugin.py b/xcube_stac/plugin.py new file mode 100644 index 0000000..5ef9c69 --- /dev/null +++ b/xcube_stac/plugin.py @@ -0,0 +1,43 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from xcube.util import extension +from xcube.constants import EXTENSION_POINT_DATA_OPENERS +from xcube.constants import EXTENSION_POINT_DATA_STORES +from xcube_stac.constants import DATASET_OPENER_ID +from xcube_stac.constants import DATA_STORE_ID + + +def init_plugin(ext_registry: extension.ExtensionRegistry): + # xcube DataStore extensions + ext_registry.add_extension( + loader=extension.import_component("xcube_stac.store:StacDataStore"), + point=EXTENSION_POINT_DATA_STORES, + name=DATA_STORE_ID, + description="STAC DataStore", + ) + # xcube DataOpener extensions + ext_registry.add_extension( + loader=extension.import_component("xcube_stac.store:StacDatasetOpener"), + point=EXTENSION_POINT_DATA_OPENERS, + name=DATASET_OPENER_ID, + description="xarray.Dataset from STAC API", + ) diff --git a/xcube_stac/stac.py b/xcube_stac/stac.py new file mode 100644 index 0000000..7a5817a --- /dev/null +++ b/xcube_stac/stac.py @@ -0,0 +1,65 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import xarray as xr + + +class Stac: + """ Class containing methods handling STAC catalogs + """ + + def __init__( + self, url: str, + collection_prefix: str = None, + data_id_delimiter: str = "/" + ): + """ + Args: + url (str): URL to STAC catalog + collection_prefix (str, optional): Path of collection used as + entry point. Defaults to None. + data_id_delimiter (str, optional): Delimiter used to separate + collections, items and assets from each other. + Defaults to "/". + """ + self._url = url + self._collection_prefix = collection_prefix + self._data_id_delimiter = data_id_delimiter + # ToDo: open Catalog and direct to entry point defined by *collection_prefix* + # ToDo: Add a data store "file", which will be used to open the hrefs + + def open_data(self, data_id: str, **open_params) -> xr.Dataset: + """ Open the data given by the data resource identifier *data_id* + using the supplied *open_params*. + + Args: + data_id (str): An identifier of data that is provided by this + store. + + Raises: + NotImplementedError: Not implemented yet. + + Returns: + xr.Dataset: An in-memory representation of the data resources + identified by *data_id* and *open_params*. + """ + # ToDo: implement this method using data store "file", see __init__() + raise NotImplementedError("open_data() operation is not supported yet") diff --git a/xcube_stac/store.py b/xcube_stac/store.py new file mode 100644 index 0000000..6293496 --- /dev/null +++ b/xcube_stac/store.py @@ -0,0 +1,410 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from typing import Any, Tuple, Iterator, Dict, Container, Union + +import logging +import xarray as xr + +from xcube.util.jsonschema import ( + JsonObjectSchema, + JsonStringSchema +) +from xcube.core.store import ( + DATASET_TYPE, + DataDescriptor, + DataOpener, + DataStore, + DataStoreError, + DataTypeLike, + DatasetDescriptor +) +from .constants import DATASET_OPENER_ID +from .stac import Stac + +_LOG = logging.getLogger("xcube") + + +class StacDataOpener(DataOpener): + """ STAC implementation of the data opener. + """ + + def __init__(self, stac: Stac): + """ + Args: + stac (Stac): class containing methods handling STAC catalogs + """ + self.stac = stac + + def get_open_data_params_schema(self, data_id: str = None) -> JsonObjectSchema: + """ Get the schema for the parameters passed as *open_params* to + :meth:`open_data`. + + Args: + data_id (str, optional): An identifier of data that is provided by this + store. Defaults to None. + + Returns: + JsonObjectSchema: containing the parameters used by the data opener + to open data. + """ + # ToDo: to be adjusted + open_parms = {} + stac_schema = JsonObjectSchema( + properties=dict(**open_parms), + required=[], + additional_properties=False + ) + return stac_schema + + def open_data(self, data_id: str, **open_params) -> xr.Dataset: + """ Open the data given by the data resource identifier *data_id* + using the supplied *open_params*. + + Args: + data_id (str): An identifier of data that is provided by this + store. + + Returns: + xr.Dataset: An in-memory representation of the data resources + identified by *data_id* and *open_params*. + """ + stac_schema = self.get_open_data_params_schema() + stac_schema.validate_instance(open_params) + return self.stac.open_data(data_id, **open_params) + + def describe_data( + self, data_id: str, **open_params + ) -> DatasetDescriptor: + """ Get the descriptor for the data resource given by *data_id*. + + Args: + data_id (str): An identifier of data that is provided by this + store. + + Raises: + NotImplementedError: Not implemented yet. + + Returns: + DatasetDescriptor: data descriptor containing meta data of + the data resources identified by *data_id* + """ + # ToDo: implement describe_data method. + raise NotImplementedError("describe_data() operation is not supported yet") + + +class StacDataStore(StacDataOpener, DataStore): + """ STAC implementation of the data store. + """ + + def __init__(self, **stac_kwargs): + """ + Args: + **stac_kwargs: Parameters required by the STAC data store. + * url (str): URL to STAC catalog (required) + * collection_prefix (str): Path of collection used as + entry point (optional) + * data_id_delimiter (str): Delimiter used to separate + collections, items and assets from each other (optional) + """ + super().__init__(stac=Stac(**stac_kwargs)) + + @classmethod + def get_data_store_params_schema(cls) -> JsonObjectSchema: + """ Get the JSON schema for instantiating a new data store. + + Returns: + JsonObjectSchema: The JSON schema for the data store's parameters. + """ + stac_params = dict( + url=JsonStringSchema( + title="URL to STAC catalog" + ), + collection_prefix=JsonStringSchema( + title="Collection prefix", + description="Path of collection used as entry point", + ), + data_id_delimiter=JsonStringSchema( + title="Data ID delimiter", + description=( + "Delimiter used to separate collections, " + "items and assets from each other" + ), + ) + ) + return JsonObjectSchema( + properties=stac_params, + required=["url"], + additional_properties=False + ) + + @classmethod + def get_data_types(cls) -> Tuple[str, ...]: + """ Get alias names for all data types supported by this store. + + Returns: + Tuple[str, ...]: The tuple of supported data types. + """ + return (DATASET_TYPE.alias,) + + def get_data_types_for_data(self, data_id: str) -> Tuple[str, ...]: + """ Get alias names for of data types that are supported + by this store for the given *data_id*. + + Args: + data_id (str): An identifier of data that is provided by this + store. + + Returns: + Tuple[str, ...]: A tuple of data types that apply + to the given *data_id*. + """ + return self.get_data_types() + + def get_data_ids( + self, data_type: DataTypeLike = None, include_attrs: Container[str] = None + ) -> Union[Iterator[str], Iterator[Tuple[str, Dict[str, Any]]]]: + """ Get an iterator over the data resource identifiers for the + given type *data_type*. If *data_type* is omitted, all data + resource identifiers are returned. + + Args: + data_type (DataTypeLike, optional): If given, only data identifiers + that are available as this type are returned. If this is None, + all available data identifiers are returned. Defaults to None. + include_attrs (Container[str], optional): A sequence of names + of attributes to be returned for each dataset identifier. + If given, the store will attempt to provide the set of + requested dataset attributes in addition to the data ids. + Defaults to None. + + Raises: + NotImplementedError: Not implemented yet. + + Returns: + Union[Iterator[str], Iterator[Tuple[str, Dict[str, Any]]]]: An iterator + over the identifiers (and additional attributes defined by + *include_attrs* of data resources provided by this data store. + """ + # ToDo: implement get_data_ids method. + raise NotImplementedError("get_data_ids() operation is not supported yet") + + def has_data(self, data_id: str, data_type: DataTypeLike = None) -> bool: + """ Check if the data resource given by *data_id* is available + in this store. + + Args: + data_id (str): An identifier of data that is provided by this + store. + data_type (DataTypeLike, optional): An optional data type. If given, + it will also bE checked whether the data is available as the + specifieD type. May be given as type alias name, as a type, or as + a :class:`DataType` instance. Defaults to None. + + Raises: + NotImplementedError: Not implemented yet. + + Returns: + bool: True, if the data resource is available in this store, + False otherwise. + """ + # ToDo: get_data_ids() is needed. + # Add this method after get_data_ids() is implemented. + raise NotImplementedError("has_data() operation is not supported yet") + + def describe_data(self, data_id: str, **open_params) -> DataDescriptor: + """ Get the descriptor for the data resource given by *data_id*. + + Args: + data_id (str): An identifier of data that is provided by this + store. + + Returns: + DataDescriptor: data descriptor containing meta data of + the data resources identified by *data_id* + """ + return super().describe_data(data_id, **open_params) + + def get_data_opener_ids( + self, data_id: str = None, data_type: DataTypeLike = None + ) -> Tuple[str, ...]: + """ Get identifiers of data openers that can be used to open data + resources from this store. + + Args: + data_id (str, optional): An identifier of data that is provided by this + store. Defaults to None. + data_type (DataTypeLike, optional): Data type that is known to be + supported by this data store. May be given as type alias name, + as a type, or as a :class:`DataType` instance. Defaults to None. + + Raises: + DataStoreError: If an error occurs. + + Returns: + Tuple[str, ...]: A tuple of identifiers of data openers that + can be used to open data resources. + """ + self._assert_valid_data_type(data_type) + if data_id is not None and not self.has_data(data_id, data_type=data_type): + raise DataStoreError( + f"Data resource {data_id!r}" f" is not available." + ) + if data_type is not None and not DATASET_TYPE.is_super_type_of(data_type): + raise DataStoreError( + f"Data resource {data_id!r}" f" is not " + f"available as type {data_type!r}." + ) + return (DATASET_OPENER_ID,) + + def get_open_data_params_schema( + self, data_id: str = None, opener_id: str = None + ) -> JsonObjectSchema: + """ Get the schema for the parameters passed as *open_params* to + :meth:`open_data`. + + Args: + data_id (str, optional): An identifier of data that is provided by this + store. Defaults to None. + opener_id (str, optional): Data opener identifier. Defaults to None. + + Returns: + JsonObjectSchema: The schema for the parameters in *open_params*. + """ + self._assert_valid_opener_id(opener_id) + return super().get_open_data_params_schema(data_id) + + def open_data( + self, data_id: str, opener_id: str = None, **open_params + ) -> xr.Dataset: + """ Open the data given by the data resource identifier *data_id* + using the data opener identified by *opener_id* and + the supplied *open_params*. + + Args: + data_id (str): An identifier of data that is provided by this + store. + opener_id (str, optional): Data opener identifier. Defaults to None. + + Returns: + xr.Dataset: An in-memory representation of the data resources identified + by *data_id* and *open_params*. + """ + self._assert_valid_opener_id(opener_id) + return super().open_data(data_id, **open_params) + + def search_data( + self, data_type: DataTypeLike = None, **search_params + ) -> Iterator[DataDescriptor]: + """ Search this store for data resources. If *data_type* is given, + the search is restricted to data resources of that type. + + Args: + data_type (DataTypeLike, optional): Data type that is known to be + supported by this data store. Defaults to None. + + Raises: + NotImplementedError: Not implemented yet. + + Yields: + Iterator[DataDescriptor]: An iterator of data descriptors + for the found data resources. + """ + # ToDo: implement search_data method. + raise NotImplementedError("search_data() operation is not supported yet") + + @classmethod + def get_search_params_schema( + cls, data_type: DataTypeLike = None + ) -> JsonObjectSchema: + """ Get the schema for the parameters that can be passed + as *search_params* to :meth:`search_data`. Parameters are + named and described by the properties of the returned JSON object schema. + + Args: + data_type (DataTypeLike, optional): Data type that is known to be + supported by this data store. Defaults to None. + + Raises: + NotImplementedError: Not implemented yet. + + Returns: + JsonObjectSchema: A JSON object schema whose properties + describe this store's search parameters. + """ + # ToDo: implement get_search_params_schema in + # combination with search_data method. + raise NotImplementedError( + "get_search_params_schema() operation is not supported yet" + ) + + ########################################################################## + # Implementation helpers + + @classmethod + def _is_valid_data_type(cls, data_type: DataTypeLike) -> bool: + """ Auxiliary function to check if data type is supported + by the store. + + Args: + data_type (DataTypeLike): Data type that is to be checked. + + Returns: + bool: True if *data_type* is supported by the store, otherwise False + """ + return data_type is None or DATASET_TYPE.is_super_type_of(data_type) + + @classmethod + def _assert_valid_data_type(cls, data_type: DataTypeLike): + """ Auxiliary function to assert if data type is supported + by the store. + + Args: + data_type (DataTypeLike): Data type that is to be checked. + + Raises: + DataStoreError: Error, if *data_type* is not + supported by the store. + """ + if not cls._is_valid_data_type(data_type): + raise DataStoreError( + f"Data type must be {DATASET_TYPE!r}, " + f"but got {data_type!r}" + ) + + @classmethod + def _assert_valid_opener_id(cls, opener_id): + """ Auxiliary function to assert if data opener identified by + *opener_id* is supported by the store. + + Args: + opener_id (_type_): Data opener identifier + + Raises: + DataStoreError: Error, if *opener_id* is not + supported by the store. + """ + if opener_id is not None and opener_id != DATASET_OPENER_ID: + raise DataStoreError( + f"Data opener identifier must be" + f' "{DATASET_OPENER_ID}",' + f' but got "{opener_id}"' + ) diff --git a/xcube_stac/version.py b/xcube_stac/version.py new file mode 100644 index 0000000..711ec8b --- /dev/null +++ b/xcube_stac/version.py @@ -0,0 +1,22 @@ +# The MIT License (MIT) +# Copyright (c) 2024 by the xcube development team and contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +version = "0.1.0.dev0"