From 68d51dd43d29666d7db52a2c7988d2de1207c720 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Sat, 29 Oct 2022 09:51:46 +0200 Subject: [PATCH 01/15] Create module interfaces.py - Add interfaces.py to process responses into third-party library objects --- erddapy/core/interfaces.py | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 erddapy/core/interfaces.py diff --git a/erddapy/core/interfaces.py b/erddapy/core/interfaces.py new file mode 100644 index 00000000..cd50ce43 --- /dev/null +++ b/erddapy/core/interfaces.py @@ -0,0 +1,50 @@ +""" +Interface between URL responses and third-party libraries. + +This module takes an URL or the bytes response of a request and converts it to Pandas, +XArray, Iris, etc. objects. +""" + +import iris +import pandas as pd +import xarray as xr +from netCDF4 import Dataset as ncDataset + +from erddapy.core.netcdf import _nc_dataset, _tempnc +from erddapy.core.url import urlopen + + +def to_pandas(url: str, **kw) -> pd.DataFrame: + """Convert a URL to Pandas DataFrame.""" + data = urlopen(url, **kw) + try: + return pd.read_csv(data, **kw) + except Exception: + print("Couldn't process response into Pandas DataFrame.") + raise + + +def to_ncCF(url: str, **kw) -> ncDataset: + """Convert a URL to a netCDF4 Dataset.""" + return _nc_dataset(url, **kw) + + +def to_xarray(url: str, response="opendap", **kw) -> xr.Dataset: + """Convert a URL to an xarray dataset.""" + if response == "opendap": + return xr.open_dataset(url, **kw) + else: + nc = _nc_dataset(url, **kw) + return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw) + + +def to_iris(url: str, **kw) -> iris.CubeList: + """Convert a URL to an iris CubeList.""" + data = urlopen(url, **kw) + with _tempnc(data) as tmp: + cubes = iris.load_raw(tmp, **kw) + try: + cubes.realise_data() + except ValueError: + _ = [cube.data for cube in cubes] + return cubes From e60e4e4f9d96f8d6004ca78ba896d4bba6c25868 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Sat, 29 Oct 2022 09:52:05 +0200 Subject: [PATCH 02/15] Use methods from interfaces.py in erddapy.py module --- erddapy/erddapy.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/erddapy/erddapy.py b/erddapy/erddapy.py index 96506956..a9c6a7f4 100644 --- a/erddapy/erddapy.py +++ b/erddapy/erddapy.py @@ -10,7 +10,7 @@ _griddap_check_variables, _griddap_get_constraints, ) -from erddapy.core.netcdf import _nc_dataset, _tempnc +from erddapy.core.interfaces import to_iris, to_ncCF, to_pandas, to_xarray from erddapy.core.url import ( _check_substrings, _distinct, @@ -344,50 +344,37 @@ def to_pandas(self, **kw): """ response = kw.pop("response", "csvp") url = self.get_download_url(response=response, **kw) - data = urlopen(url, auth=self.auth, **self.requests_kwargs) - return pd.read_csv(data, **kw) + return to_pandas(url, **kw) def to_ncCF(self, **kw): """Load the data request into a Climate and Forecast compliant netCDF4-python object.""" if self.protocol == "griddap": return ValueError("Cannot use ncCF with griddap.") url = self.get_download_url(response="ncCF", **kw) - nc = _nc_dataset(url, auth=self.auth, **self.requests_kwargs) - return nc + return to_ncCF(url, **kw) def to_xarray(self, **kw): """Load the data request into a xarray.Dataset. Accepts any `xr.open_dataset` keyword arguments. """ - import xarray as xr - if self.response == "opendap": - url = self.get_download_url() - return xr.open_dataset(url, **kw) + response = "opendap" + elif self.protocol == "griddap": + response = "nc" else: - response = "nc" if self.protocol == "griddap" else "ncCF" - url = self.get_download_url(response=response) - nc = _nc_dataset(url, auth=self.auth, **self.requests_kwargs) - return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw) + response = "ncCF" + url = self.get_download_url(response=response) + return to_xarray(url, response=response, auth=self.auth, **kw) def to_iris(self, **kw): """Load the data request into an iris.CubeList. Accepts any `iris.load_raw` keyword arguments. """ - import iris - response = "nc" if self.protocol == "griddap" else "ncCF" url = self.get_download_url(response=response, **kw) - data = urlopen(url, auth=self.auth, **self.requests_kwargs) - with _tempnc(data) as tmp: - cubes = iris.load_raw(tmp, **kw) - try: - cubes.realise_data() - except ValueError: - _ = [cube.data for cube in cubes] - return cubes + return to_iris(url, **kw) @functools.lru_cache(maxsize=None) def _get_variables(self, dataset_id: OptionalStr = None) -> Dict: From b6a2086cc16346ca7029ce3cf91deb28db13b488 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Sat, 29 Oct 2022 09:58:26 +0200 Subject: [PATCH 03/15] Remove iris typehint --- erddapy/core/interfaces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erddapy/core/interfaces.py b/erddapy/core/interfaces.py index cd50ce43..cb48fd26 100644 --- a/erddapy/core/interfaces.py +++ b/erddapy/core/interfaces.py @@ -38,7 +38,7 @@ def to_xarray(url: str, response="opendap", **kw) -> xr.Dataset: return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw) -def to_iris(url: str, **kw) -> iris.CubeList: +def to_iris(url: str, **kw): """Convert a URL to an iris CubeList.""" data = urlopen(url, **kw) with _tempnc(data) as tmp: From ff2961ef379b8d1bb7f0168fa1026918385ab795 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Sat, 29 Oct 2022 10:17:34 +0200 Subject: [PATCH 04/15] Fixing kwargs propagation in interfaces module --- erddapy/core/interfaces.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/erddapy/core/interfaces.py b/erddapy/core/interfaces.py index cb48fd26..a4d3eb37 100644 --- a/erddapy/core/interfaces.py +++ b/erddapy/core/interfaces.py @@ -14,9 +14,9 @@ from erddapy.core.url import urlopen -def to_pandas(url: str, **kw) -> pd.DataFrame: +def to_pandas(url: str, requests_kwargs=dict(), **kw) -> pd.DataFrame: """Convert a URL to Pandas DataFrame.""" - data = urlopen(url, **kw) + data = urlopen(url, **requests_kwargs) try: return pd.read_csv(data, **kw) except Exception: @@ -26,15 +26,17 @@ def to_pandas(url: str, **kw) -> pd.DataFrame: def to_ncCF(url: str, **kw) -> ncDataset: """Convert a URL to a netCDF4 Dataset.""" - return _nc_dataset(url, **kw) + auth = kw.pop("auth", None) + return _nc_dataset(url, auth=auth, **kw) def to_xarray(url: str, response="opendap", **kw) -> xr.Dataset: """Convert a URL to an xarray dataset.""" + auth = kw.pop("auth", None) if response == "opendap": return xr.open_dataset(url, **kw) else: - nc = _nc_dataset(url, **kw) + nc = _nc_dataset(url, auth=auth, **kw) return xr.open_dataset(xr.backends.NetCDF4DataStore(nc), **kw) From 85f5233e33cfc60386cbbfcef0178e8493b2b0a8 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Sat, 29 Oct 2022 10:47:37 +0200 Subject: [PATCH 05/15] Fix timeout test - Use existing dataset - Pass requests_kwargs to 'to_pandas' method --- tests/test_erddapy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_erddapy.py b/tests/test_erddapy.py index 3c10cc4c..560d88a0 100644 --- a/tests/test_erddapy.py +++ b/tests/test_erddapy.py @@ -103,13 +103,13 @@ def test_erddap_requests_kwargs(): slowwly_url = f"https://flash-the-slow-api.herokuapp.com/delay/{slowwly_milliseconds}/url/{base_url}" connection = ERDDAP(slowwly_url) - connection.dataset_id = "M01_sbe37_all" + connection.dataset_id = "raw_asset_inventory" connection.protocol = "tabledap" connection.requests_kwargs["timeout"] = timeout_seconds with pytest.raises(httpx.ReadTimeout): - connection.to_xarray() + connection.to_pandas(requests_kwargs=connection.requests_kwargs) @pytest.mark.web From e5aa1dc1e3b3b0db9b06700b96bb4e096b52f524 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Mon, 12 Sep 2022 17:48:46 -0300 Subject: [PATCH 06/15] Add PyCharm .idea/ dir to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0d5b6cdd..4883f9c6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ build/ dist/ *-output.ipynb .vscode/ +.idea/ *.code-workspace **/__pycache__ From 19e38d850c8bce5d8bb01baae8b396be99167f7e Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Mon, 12 Sep 2022 18:29:12 -0300 Subject: [PATCH 07/15] Drafting new classes This commit introduces a rough draft of the classes to be introduced in the object/opinionated layer. For now, the subpackage containing them is named 'objects' but that may change in the future. For discussion, see #228. - Add 'objects' subpackage with init module - Add 'objects.py' module with 5 new classes - Methods will be implemented in the following commits. --- erddapy/objects/__init__.py | 23 +++++ erddapy/objects/objects.py | 187 ++++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 erddapy/objects/__init__.py create mode 100644 erddapy/objects/objects.py diff --git a/erddapy/objects/__init__.py b/erddapy/objects/__init__.py new file mode 100644 index 00000000..9101d851 --- /dev/null +++ b/erddapy/objects/__init__.py @@ -0,0 +1,23 @@ +""" +This module contains opinionated, higher-level objects for searching servers and accessing datasets. + +It is named 'objects' after object-relational mapping, which is the concept of having an object-oriented +layer between a database (in this case, ERDDAP), and the programming language. +""" + + +from objects import ( + ERDDAPConnection, + ERDDAPDataset, + ERDDAPServer, + GridDataset, + TableDataset, +) + +__all__ = [ + "ERDDAPDataset", + "ERDDAPConnection", + "ERDDAPServer", + "TableDataset", + "GridDataset", +] diff --git a/erddapy/objects/objects.py b/erddapy/objects/objects.py new file mode 100644 index 00000000..043e001b --- /dev/null +++ b/erddapy/objects/objects.py @@ -0,0 +1,187 @@ +"""Main module of the 'objects' subpackage containing most classes.""" + +from pathlib import Path +from typing import Dict, Union + +StrLike = Union[str, bytes] +FilePath = Union[str, Path] + + +class ERDDAPConnection: + """ + Manages connection that will be used in ERDDAPServer instances. + + While most ERDDAP servers allow connections via a bare url, some servers may require authentication + to access data. + """ + + def __init__(self, server: str): + """Initialize instance of ERDDAPConnection.""" + self._server = self.to_string(server) + + @classmethod + def to_string(cls, value): + """Convert an instance of ERDDAPConnection to a string.""" + if isinstance(value, str): + return value + elif isinstance(value, cls): + return value.server + else: + raise TypeError( + f"Server must be either a string or an instance of ERDDAPConnection. '{value}' was " + f"passed.", + ) + + def get(self, url_part: str) -> StrLike: + """ + Request data from the server. + + Uses requests by default similar to most of the current erddapy data fetching functionality. + + Can be overridden to use httpx, and potentially aiohttp or other async functionality, which could + hopefully make anything else async compatible. + """ + pass + + def open(self, url_part: str) -> FilePath: + """Yield file-like object for access for file types that don't enjoy getting passed a string.""" + pass + + @property + def server(self) -> str: + """Access the private ._server attribute.""" + return self._server + + @server.setter + def server(self, value: str): + """Set private ._server attribute.""" + self._server = self.to_string(value) + + +class ERDDAPDataset: + """Base class for more focused table or grid datasets.""" + + def __init__( + self, + dataset_id: str, + connection: str | ERDDAPConnection, + variables, + constraints, + ): + """Initialize instance of ERDDAPDataset.""" + self.dataset_id = dataset_id + self._connection = ERDDAPConnection(ERDDAPConnection.to_string(connection)) + self._variables = variables + self._constraints = constraints + self._meta = None + + @property + def connection(self) -> ERDDAPConnection: + """Access private ._connection variable.""" + return self._connection + + @connection.setter + def connection(self, value: str | ERDDAPConnection): + """Set private ._connection variable.""" + self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value)) + + def get(self, file_type: str) -> StrLike: + """Request data using underlying connection.""" + return self.connection.get(file_type) + + def open(self, file_type: str) -> FilePath: + """Download and open dataset using underlying connection.""" + return self.connection.open(file_type) + + def get_meta(self): + """Request dataset metadata from the server.""" + self._meta = None + + @property + def meta(self): + """Access private ._meta attribute. Request metadata if ._meta is empty.""" + return self.get_meta() if (self._meta is None) else self._meta + + @property + def variables(self): + """Access private ._variables attribute.""" + return self._variables + + @property + def constraints(self): + """Access private ._constraints attribute.""" + return self._constraints + + def url_segment(self, file_type: str) -> str: + """Return URL segment without the base URL (the portion after 'https://server.com/erddap/').""" + pass + + def url(self, file_type: str) -> str: + """ + Return a URL constructed using the underlying ERDDAPConnection. + + The URL will contain information regarding the base class server info, the dataset ID, + access method (tabledap/griddap), file type, variables, and constraints. + + This allows ERDDAPDataset subclasses to be used as more opinionated URL constructors while still + not tying users to a specific IO method. + + Not guaranteed to capture all the specifics of formatting a request, such as if a server requires + specific auth or headers. + """ + pass + + def to_dataset(self): + """Open the dataset as xarray dataset by downloading a subset NetCDF.""" + pass + + def opendap_dataset(self): + """Open the full dataset in xarray via OpenDAP.""" + pass + + +class TableDataset(ERDDAPDataset): + """Subclass of ERDDAPDataset specific to TableDAP datasets.""" + + def to_dataframe(self): + """Open the dataset as a Pandas DataFrame.""" + + +class GridDataset(ERDDAPDataset): + """Subclass of ERDDAPDataset specific to GridDAP datasets.""" + + pass + + +class ERDDAPServer: + """Instance of an ERDDAP server, with support to ERDDAP's native functionalities.""" + + def __init__(self, connection: str | ERDDAPConnection): + """Initialize instance of ERDDAPServer.""" + self._connection = ERDDAPConnection(ERDDAPConnection.to_string(connection)) + + @property + def connection(self) -> ERDDAPConnection: + """Access private ._connection attribute.""" + return self._connection + + @connection.setter + def connection(self, value: str | ERDDAPConnection): + """Set private ._connection attribute.""" + self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value)) + + def full_text_search(self, query: str) -> Dict[str, ERDDAPDataset]: + """Search the server with native ERDDAP full text search capabilities.""" + pass + + def search(self, query: str) -> Dict[str, ERDDAPDataset]: + """ + Search the server with native ERDDAP full text search capabilities. + + Also see ERDDAPServer.full_text_search. + """ + return self.full_text_search(query) + + def advanced_search(self, **kwargs) -> Dict[str, ERDDAPDataset]: + """Search server with ERDDAP advanced search capabilities (may return pre-filtered datasets).""" + pass From 101d1b05ab761ee47413985f3a58cf11e6fd860a Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Tue, 13 Sep 2022 13:19:20 -0300 Subject: [PATCH 08/15] Add imports to package top-level init --- erddapy/__init__.py | 10 +++++++++- erddapy/objects/__init__.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/erddapy/__init__.py b/erddapy/__init__.py index a970f37d..0090b7a3 100644 --- a/erddapy/__init__.py +++ b/erddapy/__init__.py @@ -1,9 +1,17 @@ """Easier access to scientific data.""" from erddapy.erddapy import ERDDAP +from erddapy.objects import ERDDAPConnection, ERDDAPServer, GridDataset, TableDataset from erddapy.servers.servers import servers -__all__ = ["ERDDAP", "servers"] +__all__ = [ + "ERDDAP", + "servers", + "ERDDAPConnection", + "ERDDAPServer", + "TableDataset", + "GridDataset", +] try: from ._version import __version__ diff --git a/erddapy/objects/__init__.py b/erddapy/objects/__init__.py index 9101d851..a1ef9c3c 100644 --- a/erddapy/objects/__init__.py +++ b/erddapy/objects/__init__.py @@ -6,7 +6,7 @@ """ -from objects import ( +from .objects import ( ERDDAPConnection, ERDDAPDataset, ERDDAPServer, From b33bd08dba9f5f48d738e7aa477d9892d8581787 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Tue, 13 Sep 2022 13:39:36 -0300 Subject: [PATCH 09/15] Add __future__ import The '|' operator for typing was introduced in Python 3.10. This import allows previous Python versions to work with this operator. --- erddapy/objects/objects.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/erddapy/objects/objects.py b/erddapy/objects/objects.py index 043e001b..920e313b 100644 --- a/erddapy/objects/objects.py +++ b/erddapy/objects/objects.py @@ -1,7 +1,9 @@ """Main module of the 'objects' subpackage containing most classes.""" +from __future__ import annotations + from pathlib import Path -from typing import Dict, Union +from typing import Dict, Union # noqa StrLike = Union[str, bytes] FilePath = Union[str, Path] @@ -170,11 +172,11 @@ def connection(self, value: str | ERDDAPConnection): """Set private ._connection attribute.""" self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value)) - def full_text_search(self, query: str) -> Dict[str, ERDDAPDataset]: + def full_text_search(self, query: str) -> dict[str, ERDDAPDataset]: """Search the server with native ERDDAP full text search capabilities.""" pass - def search(self, query: str) -> Dict[str, ERDDAPDataset]: + def search(self, query: str) -> dict[str, ERDDAPDataset]: """ Search the server with native ERDDAP full text search capabilities. @@ -182,6 +184,6 @@ def search(self, query: str) -> Dict[str, ERDDAPDataset]: """ return self.full_text_search(query) - def advanced_search(self, **kwargs) -> Dict[str, ERDDAPDataset]: + def advanced_search(self, **kwargs) -> dict[str, ERDDAPDataset]: """Search server with ERDDAP advanced search capabilities (may return pre-filtered datasets).""" pass From 9345c93658d32fc192100e1b6c11bb2bf645717e Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Tue, 4 Oct 2022 10:21:35 +1100 Subject: [PATCH 10/15] Simplify constructor method of ERDDAPServer - code review Co-authored-by: Alex Kerney --- erddapy/objects/objects.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/erddapy/objects/objects.py b/erddapy/objects/objects.py index 920e313b..95192cd8 100644 --- a/erddapy/objects/objects.py +++ b/erddapy/objects/objects.py @@ -158,9 +158,13 @@ class GridDataset(ERDDAPDataset): class ERDDAPServer: """Instance of an ERDDAP server, with support to ERDDAP's native functionalities.""" - def __init__(self, connection: str | ERDDAPConnection): + def __init__(self, url: str, connection: ERDDAPConnection | None): """Initialize instance of ERDDAPServer.""" - self._connection = ERDDAPConnection(ERDDAPConnection.to_string(connection)) + if "http" in url: + self.url = url + else: + # get URL from dict of ERDDAP servers + self._connection = connection or ERDDAPConnection() @property def connection(self) -> ERDDAPConnection: From f5ab397aaaa70c6ebd608675939fafb0d88d5486 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Tue, 4 Oct 2022 10:29:56 +1100 Subject: [PATCH 11/15] Refactoring subpackage name - Rename 'objects' to 'array_like' to avoid clobbering Python built-ins - Refactor imports --- erddapy/__init__.py | 2 +- erddapy/{objects => array_like}/__init__.py | 2 +- erddapy/{objects/objects.py => array_like/array_like.py} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename erddapy/{objects => array_like}/__init__.py (95%) rename erddapy/{objects/objects.py => array_like/array_like.py} (98%) diff --git a/erddapy/__init__.py b/erddapy/__init__.py index 0090b7a3..61656549 100644 --- a/erddapy/__init__.py +++ b/erddapy/__init__.py @@ -1,7 +1,7 @@ """Easier access to scientific data.""" +from erddapy.array_like import ERDDAPConnection, ERDDAPServer, GridDataset, TableDataset from erddapy.erddapy import ERDDAP -from erddapy.objects import ERDDAPConnection, ERDDAPServer, GridDataset, TableDataset from erddapy.servers.servers import servers __all__ = [ diff --git a/erddapy/objects/__init__.py b/erddapy/array_like/__init__.py similarity index 95% rename from erddapy/objects/__init__.py rename to erddapy/array_like/__init__.py index a1ef9c3c..45bef358 100644 --- a/erddapy/objects/__init__.py +++ b/erddapy/array_like/__init__.py @@ -6,7 +6,7 @@ """ -from .objects import ( +from .array_like import ( ERDDAPConnection, ERDDAPDataset, ERDDAPServer, diff --git a/erddapy/objects/objects.py b/erddapy/array_like/array_like.py similarity index 98% rename from erddapy/objects/objects.py rename to erddapy/array_like/array_like.py index 95192cd8..f74dff5e 100644 --- a/erddapy/objects/objects.py +++ b/erddapy/array_like/array_like.py @@ -164,7 +164,7 @@ def __init__(self, url: str, connection: ERDDAPConnection | None): self.url = url else: # get URL from dict of ERDDAP servers - self._connection = connection or ERDDAPConnection() + self._connection = connection or ERDDAPConnection() @property def connection(self) -> ERDDAPConnection: From 122c14defb31856413e9c002c7f9f58ccaac74cc Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Fri, 7 Oct 2022 15:05:09 +1100 Subject: [PATCH 12/15] Split classes into modules - Create connection.py, datasets.py, server.py --- erddapy/array_like/connection.py | 60 ++++++++++++++++++ erddapy/array_like/datasets.py | 104 +++++++++++++++++++++++++++++++ erddapy/array_like/server.py | 42 +++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 erddapy/array_like/connection.py create mode 100644 erddapy/array_like/datasets.py create mode 100644 erddapy/array_like/server.py diff --git a/erddapy/array_like/connection.py b/erddapy/array_like/connection.py new file mode 100644 index 00000000..eb73227d --- /dev/null +++ b/erddapy/array_like/connection.py @@ -0,0 +1,60 @@ +"""Class ERDDAPConnection to represent connection to a particular URL.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Union + +StrLike = Union[str, bytes] +FilePath = Union[str, Path] + + +class ERDDAPConnection: + """ + Manages connection that will be used in ERDDAPServer instances. + + While most ERDDAP servers allow connections via a bare url, some servers may require authentication + to access data. + """ + + def __init__(self, server: str): + """Initialize instance of ERDDAPConnection.""" + self._server = self.to_string(server) + + @classmethod + def to_string(cls, value): + """Convert an instance of ERDDAPConnection to a string.""" + if isinstance(value, str): + return value + elif isinstance(value, cls): + return value.server + else: + raise TypeError( + f"Server must be either a string or an instance of ERDDAPConnection. '{value}' was " + f"passed.", + ) + + def get(self, url_part: str) -> StrLike: + """ + Request data from the server. + + Uses requests by default similar to most of the current erddapy data fetching functionality. + + Can be overridden to use httpx, and potentially aiohttp or other async functionality, which could + hopefully make anything else async compatible. + """ + pass + + def open(self, url_part: str) -> FilePath: + """Yield file-like object for access for file types that don't enjoy getting passed a string.""" + pass + + @property + def server(self) -> str: + """Access the private ._server attribute.""" + return self._server + + @server.setter + def server(self, value: str): + """Set private ._server attribute.""" + self._server = self.to_string(value) diff --git a/erddapy/array_like/datasets.py b/erddapy/array_like/datasets.py new file mode 100644 index 00000000..9b546bc3 --- /dev/null +++ b/erddapy/array_like/datasets.py @@ -0,0 +1,104 @@ +"""Classes to represent ERDDAP datasets.""" + +from pathlib import Path +from typing import Union + +from erddapy.array_like.connection import ERDDAPConnection + +StrLike = Union[str, bytes] +FilePath = Union[str, Path] + + +class ERDDAPDataset: + """Base class for more focused table or grid datasets.""" + + def __init__( + self, + dataset_id: str, + connection: str | ERDDAPConnection, + variables, + constraints, + ): + """Initialize instance of ERDDAPDataset.""" + self.dataset_id = dataset_id + self._connection = ERDDAPConnection(ERDDAPConnection.to_string(connection)) + self._variables = variables + self._constraints = constraints + self._meta = None + + @property + def connection(self) -> ERDDAPConnection: + """Access private ._connection variable.""" + return self._connection + + @connection.setter + def connection(self, value: str | ERDDAPConnection): + """Set private ._connection variable.""" + self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value)) + + def get(self, file_type: str) -> StrLike: + """Request data using underlying connection.""" + return self.connection.get(file_type) + + def open(self, file_type: str) -> FilePath: + """Download and open dataset using underlying connection.""" + return self.connection.open(file_type) + + def get_meta(self): + """Request dataset metadata from the server.""" + self._meta = None + + @property + def meta(self): + """Access private ._meta attribute. Request metadata if ._meta is empty.""" + return self.get_meta() if (self._meta is None) else self._meta + + @property + def variables(self): + """Access private ._variables attribute.""" + return self._variables + + @property + def constraints(self): + """Access private ._constraints attribute.""" + return self._constraints + + def url_segment(self, file_type: str) -> str: + """Return URL segment without the base URL (the portion after 'https://server.com/erddap/').""" + pass + + def url(self, file_type: str) -> str: + """ + Return a URL constructed using the underlying ERDDAPConnection. + + The URL will contain information regarding the base class server info, the dataset ID, + access method (tabledap/griddap), file type, variables, and constraints. + + This allows ERDDAPDataset subclasses to be used as more opinionated URL constructors while still + not tying users to a specific IO method. + + Not guaranteed to capture all the specifics of formatting a request, such as if a server requires + specific auth or headers. + """ + pass + + def to_dataset(self): + """Open the dataset as xarray dataset by downloading a subset NetCDF.""" + pass + + def opendap_dataset(self): + """Open the full dataset in xarray via OpenDAP.""" + pass + + +class TableDataset(ERDDAPDataset): + """Subclass of ERDDAPDataset specific to TableDAP datasets.""" + + def to_dataframe(self): + """Open the dataset as a Pandas DataFrame.""" + + +class GridDataset(ERDDAPDataset): + """Subclass of ERDDAPDataset specific to GridDAP datasets.""" + + pass diff --git a/erddapy/array_like/server.py b/erddapy/array_like/server.py new file mode 100644 index 00000000..a8e6b522 --- /dev/null +++ b/erddapy/array_like/server.py @@ -0,0 +1,42 @@ +"""Class ERDDAPServer to represent an ERDDAP server connection.""" + +from erddapy.array_like.connection import ERDDAPConnection +from erddapy.array_like.datasets import ERDDAPDataset + + +class ERDDAPServer: + """Instance of an ERDDAP server, with support to ERDDAP's native functionalities.""" + + def __init__(self, url: str, connection: ERDDAPConnection | None): + """Initialize instance of ERDDAPServer.""" + if "http" in url: + self.url = url + else: + # get URL from dict of ERDDAP servers + self._connection = connection or ERDDAPConnection() + + @property + def connection(self) -> ERDDAPConnection: + """Access private ._connection attribute.""" + return self._connection + + @connection.setter + def connection(self, value: str | ERDDAPConnection): + """Set private ._connection attribute.""" + self._connection = value or ERDDAPConnection() + + def full_text_search(self, query: str) -> dict[str, ERDDAPDataset]: + """Search the server with native ERDDAP full text search capabilities.""" + pass + + def search(self, query: str) -> dict[str, ERDDAPDataset]: + """ + Search the server with native ERDDAP full text search capabilities. + + Also see ERDDAPServer.full_text_search. + """ + return self.full_text_search(query) + + def advanced_search(self, **kwargs) -> dict[str, ERDDAPDataset]: + """Search server with ERDDAP advanced search capabilities (may return pre-filtered datasets).""" + pass From 0782350642360af358fb963b4b4bc38a038b5421 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Fri, 7 Oct 2022 15:07:18 +1100 Subject: [PATCH 13/15] Remove array_like module Replaced by connections, datasets, and server modules. --- erddapy/array_like/array_like.py | 193 ------------------------------- 1 file changed, 193 deletions(-) delete mode 100644 erddapy/array_like/array_like.py diff --git a/erddapy/array_like/array_like.py b/erddapy/array_like/array_like.py deleted file mode 100644 index f74dff5e..00000000 --- a/erddapy/array_like/array_like.py +++ /dev/null @@ -1,193 +0,0 @@ -"""Main module of the 'objects' subpackage containing most classes.""" - -from __future__ import annotations - -from pathlib import Path -from typing import Dict, Union # noqa - -StrLike = Union[str, bytes] -FilePath = Union[str, Path] - - -class ERDDAPConnection: - """ - Manages connection that will be used in ERDDAPServer instances. - - While most ERDDAP servers allow connections via a bare url, some servers may require authentication - to access data. - """ - - def __init__(self, server: str): - """Initialize instance of ERDDAPConnection.""" - self._server = self.to_string(server) - - @classmethod - def to_string(cls, value): - """Convert an instance of ERDDAPConnection to a string.""" - if isinstance(value, str): - return value - elif isinstance(value, cls): - return value.server - else: - raise TypeError( - f"Server must be either a string or an instance of ERDDAPConnection. '{value}' was " - f"passed.", - ) - - def get(self, url_part: str) -> StrLike: - """ - Request data from the server. - - Uses requests by default similar to most of the current erddapy data fetching functionality. - - Can be overridden to use httpx, and potentially aiohttp or other async functionality, which could - hopefully make anything else async compatible. - """ - pass - - def open(self, url_part: str) -> FilePath: - """Yield file-like object for access for file types that don't enjoy getting passed a string.""" - pass - - @property - def server(self) -> str: - """Access the private ._server attribute.""" - return self._server - - @server.setter - def server(self, value: str): - """Set private ._server attribute.""" - self._server = self.to_string(value) - - -class ERDDAPDataset: - """Base class for more focused table or grid datasets.""" - - def __init__( - self, - dataset_id: str, - connection: str | ERDDAPConnection, - variables, - constraints, - ): - """Initialize instance of ERDDAPDataset.""" - self.dataset_id = dataset_id - self._connection = ERDDAPConnection(ERDDAPConnection.to_string(connection)) - self._variables = variables - self._constraints = constraints - self._meta = None - - @property - def connection(self) -> ERDDAPConnection: - """Access private ._connection variable.""" - return self._connection - - @connection.setter - def connection(self, value: str | ERDDAPConnection): - """Set private ._connection variable.""" - self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value)) - - def get(self, file_type: str) -> StrLike: - """Request data using underlying connection.""" - return self.connection.get(file_type) - - def open(self, file_type: str) -> FilePath: - """Download and open dataset using underlying connection.""" - return self.connection.open(file_type) - - def get_meta(self): - """Request dataset metadata from the server.""" - self._meta = None - - @property - def meta(self): - """Access private ._meta attribute. Request metadata if ._meta is empty.""" - return self.get_meta() if (self._meta is None) else self._meta - - @property - def variables(self): - """Access private ._variables attribute.""" - return self._variables - - @property - def constraints(self): - """Access private ._constraints attribute.""" - return self._constraints - - def url_segment(self, file_type: str) -> str: - """Return URL segment without the base URL (the portion after 'https://server.com/erddap/').""" - pass - - def url(self, file_type: str) -> str: - """ - Return a URL constructed using the underlying ERDDAPConnection. - - The URL will contain information regarding the base class server info, the dataset ID, - access method (tabledap/griddap), file type, variables, and constraints. - - This allows ERDDAPDataset subclasses to be used as more opinionated URL constructors while still - not tying users to a specific IO method. - - Not guaranteed to capture all the specifics of formatting a request, such as if a server requires - specific auth or headers. - """ - pass - - def to_dataset(self): - """Open the dataset as xarray dataset by downloading a subset NetCDF.""" - pass - - def opendap_dataset(self): - """Open the full dataset in xarray via OpenDAP.""" - pass - - -class TableDataset(ERDDAPDataset): - """Subclass of ERDDAPDataset specific to TableDAP datasets.""" - - def to_dataframe(self): - """Open the dataset as a Pandas DataFrame.""" - - -class GridDataset(ERDDAPDataset): - """Subclass of ERDDAPDataset specific to GridDAP datasets.""" - - pass - - -class ERDDAPServer: - """Instance of an ERDDAP server, with support to ERDDAP's native functionalities.""" - - def __init__(self, url: str, connection: ERDDAPConnection | None): - """Initialize instance of ERDDAPServer.""" - if "http" in url: - self.url = url - else: - # get URL from dict of ERDDAP servers - self._connection = connection or ERDDAPConnection() - - @property - def connection(self) -> ERDDAPConnection: - """Access private ._connection attribute.""" - return self._connection - - @connection.setter - def connection(self, value: str | ERDDAPConnection): - """Set private ._connection attribute.""" - self._connection = ERDDAPConnection(ERDDAPConnection.to_string(value)) - - def full_text_search(self, query: str) -> dict[str, ERDDAPDataset]: - """Search the server with native ERDDAP full text search capabilities.""" - pass - - def search(self, query: str) -> dict[str, ERDDAPDataset]: - """ - Search the server with native ERDDAP full text search capabilities. - - Also see ERDDAPServer.full_text_search. - """ - return self.full_text_search(query) - - def advanced_search(self, **kwargs) -> dict[str, ERDDAPDataset]: - """Search server with ERDDAP advanced search capabilities (may return pre-filtered datasets).""" - pass From 3a918c432d216586f9b407b9028b790ac6e37f11 Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Fri, 7 Oct 2022 15:29:19 +1100 Subject: [PATCH 14/15] Fix imports in array_like module --- erddapy/array_like/__init__.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/erddapy/array_like/__init__.py b/erddapy/array_like/__init__.py index 45bef358..f05c836e 100644 --- a/erddapy/array_like/__init__.py +++ b/erddapy/array_like/__init__.py @@ -6,13 +6,9 @@ """ -from .array_like import ( - ERDDAPConnection, - ERDDAPDataset, - ERDDAPServer, - GridDataset, - TableDataset, -) +from .connection import ERDDAPConnection +from .datasets import ERDDAPDataset, GridDataset, TableDataset +from .server import ERDDAPServer __all__ = [ "ERDDAPDataset", From e8f9c1195c89d01e7447b5c267f8dcdf7a40a2be Mon Sep 17 00:00:00 2001 From: Vini Salazar <17276653+vinisalazar@users.noreply.github.com> Date: Mon, 10 Oct 2022 16:08:29 +1100 Subject: [PATCH 15/15] Add annotations import Add __future__.annotations import to modules 'datasets' and 'server' --- erddapy/array_like/datasets.py | 2 ++ erddapy/array_like/server.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/erddapy/array_like/datasets.py b/erddapy/array_like/datasets.py index 9b546bc3..6089da3c 100644 --- a/erddapy/array_like/datasets.py +++ b/erddapy/array_like/datasets.py @@ -1,5 +1,7 @@ """Classes to represent ERDDAP datasets.""" +from __future__ import annotations + from pathlib import Path from typing import Union diff --git a/erddapy/array_like/server.py b/erddapy/array_like/server.py index a8e6b522..bb18807d 100644 --- a/erddapy/array_like/server.py +++ b/erddapy/array_like/server.py @@ -1,5 +1,7 @@ """Class ERDDAPServer to represent an ERDDAP server connection.""" +from __future__ import annotations + from erddapy.array_like.connection import ERDDAPConnection from erddapy.array_like.datasets import ERDDAPDataset