diff --git a/argopy/static/assets/gdac_servers.json b/argopy/static/assets/gdac_servers.json new file mode 100644 index 00000000..8864d38a --- /dev/null +++ b/argopy/static/assets/gdac_servers.json @@ -0,0 +1,14 @@ +{ + "name": "gdac", + "long_name": "List of official Argo GDAC servers", + "last_update": "2024-12-17T14:34:57.182480+00:00", + "data": { + "path": [ + "https://data-argo.ifremer.fr", + "https://usgodae.org/pub/outgoing/argo", + "https://argo-gdac-sandbox.s3-eu-west-3.amazonaws.com/pub", + "ftp://ftp.ifremer.fr/ifremer/argo", + "s3://argo-gdac-sandbox/pub" + ] + } +} \ No newline at end of file diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index 1ea22498..b5acd63e 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -65,6 +65,7 @@ from ..utils.monitored_threadpool import MyThreadPoolExecutor as MyExecutor from ..utils.accessories import Registry from ..utils.format import UriCName +from ..utils.lists import list_gdac_servers from .. import __version__ @@ -2216,12 +2217,12 @@ class s3store(httpstore): class gdacfs: """ - Create a file system for any Argo GDAC compliant path + Create a file system for any Argo GDAC possible path Parameters ---------- path: str, optional - GDAC path to create a file system for. Support any GDAC compliant path. + GDAC path to create a file system for. Support any GDAC possible path. If not specified, option ``gdac`` will be used. Returns @@ -2240,29 +2241,41 @@ class gdacfs: >>> with argopy.set_options(gdac="s3://argo-gdac-sandbox/pub"): >>> fs = gdacfs() + Warnings + -------- + This class does not check if the path is Argo GDAC compliant + + See Also + -------- + :meth:`argopy.utils.check_gdac_path`, :meth:`argopy.utils.list_gdac_servers` + """ protocol2fs = {"file": filestore, "http": httpstore, "ftp": ftpstore, "s3": s3store} @staticmethod - def path2protocol(path: str) -> str: + def path2protocol(path: Union[str, Path]) -> str: """Narrow down any path to a supported protocols""" - split = split_protocol(path)[0] - if split is None: + if isinstance(path, Path): return "file" - elif "http" in split: # will also catch "https" - return "http" - elif "ftp" in split: - return "ftp" - elif "s3" in split: - return "s3" else: - raise GdacPathError("Unknown protocol for an Argo GDAC host: %s" % split) + split = split_protocol(path)[0] + if split is None: + return "file" + if "http" in split: # will also catch "https" + return "http" + elif "ftp" in split: + return "ftp" + elif "s3" in split: + return "s3" + else: + raise GdacPathError("Unknown protocol for an Argo GDAC host: %s" % split) def __new__(cls, path: Union[str, Path, None] = None): """Create a file system for any Argo GDAC compliant path""" if path is None: path = OPTIONS["gdac"] + protocol = cls.path2protocol(path) fs = cls.protocol2fs[protocol] diff --git a/argopy/utils/__init__.py b/argopy/utils/__init__.py index 7df7294f..4c7e1fa9 100644 --- a/argopy/utils/__init__.py +++ b/argopy/utils/__init__.py @@ -30,6 +30,7 @@ list_bgc_s_parameters, list_radiometry_variables, list_radiometry_parameters, + list_gdac_servers, ) from .caching import clear_cache, lscache from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor @@ -101,6 +102,7 @@ "list_bgc_s_parameters", "list_radiometry_variables", "list_radiometry_parameters", + "list_gdac_servers", # Cache management: "clear_cache", "lscache", diff --git a/argopy/utils/checkers.py b/argopy/utils/checkers.py index d2087e6e..e5ce2a20 100644 --- a/argopy/utils/checkers.py +++ b/argopy/utils/checkers.py @@ -14,7 +14,7 @@ from ..options import OPTIONS from ..errors import InvalidDatasetStructure, GdacPathError, InvalidFetcher -from .lists import list_available_data_src, list_available_index_src +from .lists import list_available_data_src, list_available_index_src, list_gdac_servers from .casting import to_list @@ -475,36 +475,45 @@ def check_gdac_path(path, errors="ignore"): # noqa: C901 Returns ------- checked: boolean - """ - from ..stores import gdacfs # Otherwise raises circular import - try: - fs = gdacfs(path) - except GdacPathError: - if errors == "raise": - raise - elif errors == "warn": - warnings.warn("Can't get address info (GAIerror) on '%s'" % path) - return False - else: - return False + See also + -------- + :class:`argopy.stores.gdacfs`, :meth:`argopy.utils.list_gdac_servers` - check1 = fs.exists(fs.sep.join([path, "dac"])) - if check1: + """ + if path in list_gdac_servers(): return True + else: - elif errors == "raise": - raise GdacPathError( - "This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s" - % path - ) + from ..stores import gdacfs # import here, otherwise raises circular import - elif errors == "warn": - warnings.warn("This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s" % path) - return False + try: + fs = gdacfs(path) + except GdacPathError: + if errors == "raise": + raise + elif errors == "warn": + warnings.warn("Can't get address info (GAIerror) on '%s'" % path) + return False + else: + return False + + check1 = fs.exists(fs.sep.join([path, "dac"])) + if check1: + return True + + elif errors == "raise": + raise GdacPathError( + "This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s" + % path + ) - else: - return False + elif errors == "warn": + warnings.warn("This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s" % path) + return False + + else: + return False def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10): diff --git a/argopy/utils/lists.py b/argopy/utils/lists.py index 0a020888..d40099ff 100644 --- a/argopy/utils/lists.py +++ b/argopy/utils/lists.py @@ -390,3 +390,20 @@ def list_radiometry_parameters() -> List[str]: for v in params if "DATA_MODE" not in v and "QC" not in v and "ADJUSTED" not in v ] + + +def list_gdac_servers() -> List[str]: + """List of official Argo GDAC servers + + Returns + ------- + List[str] + + See also + -------- + :class:`argopy.stores.gdacfs`, :meth:`argopy.utils.check_gdac_path` + + """ + with open(os.path.join(path2assets, "gdac_servers.json"), "r") as f: + vlist = json.load(f) + return vlist["data"]["path"] \ No newline at end of file diff --git a/docs/api-hidden.rst b/docs/api-hidden.rst index 408360ae..3f3c33cf 100644 --- a/docs/api-hidden.rst +++ b/docs/api-hidden.rst @@ -73,6 +73,7 @@ argopy.utils.list_bgc_s_parameters argopy.utils.list_radiometry_variables argopy.utils.list_radiometry_parameters + argopy.utils.list_gdac_servers argopy.utils.Chunker diff --git a/docs/api.rst b/docs/api.rst index a2e7d3ad..c84dd645 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -215,6 +215,9 @@ Function under the ``argopy.utils`` submodule. .. currentmodule:: argopy.utils +Lists +----- + .. autosummary:: :toctree: generated/ @@ -227,20 +230,35 @@ Function under the ``argopy.utils`` submodule. list_bgc_s_parameters list_radiometry_variables list_radiometry_parameters + list_gdac_servers - check_wmo - check_cyc +Checkers +-------- - float_wmo - Registry +.. autosummary:: + :toctree: generated/ - Chunker + check_wmo + check_cyc + check_gdac_path isconnected urlhaskeyword isalive isAPIconnected + +Misc +-------- + +.. autosummary:: + :toctree: generated/ + + float_wmo + Registry + + Chunker + drop_variables_not_in_all_datasets fill_variables_not_in_all_datasets @@ -275,6 +293,7 @@ File systems stores.httpstore_erddap_auth stores.s3store stores.ArgoKerchunker + stores.gdacfs Argo index store ---------------- diff --git a/docs/whats-new.rst b/docs/whats-new.rst index e1a79547..f6f64659 100644 --- a/docs/whats-new.rst +++ b/docs/whats-new.rst @@ -21,7 +21,7 @@ Features and front-end API with argopy.set_options(gdac='s3://argo-gdac-sandbox/pub'): ds = DataFetcher(src='gdac').float(6903091).to_xarray() -- **Expert new feature: lazy remote netcdf opening**. We now provide support for opening a remote netcdf Argo dataset lazily with `kerchunk `_. Simply use the new option ``lazy=True`` with a :class:`stores.httpstore.open_dataset` or :class:`stores.s3store.open_dataset`. For expert users we expose the :class:`stores.ArgoKerchunker` to finely tune how to handle json zarr data. (:pr:`385`) by |gmaze|. +- **Expert new feature: lazy remote netcdf opening**. We now provide low-level support for opening a remote netcdf Argo dataset lazily with `kerchunk `_. Simply use the new option ``lazy=True`` with a :class:`stores.httpstore.open_dataset` or :class:`stores.s3store.open_dataset`. For expert users we expose the :class:`stores.ArgoKerchunker` to finely tune how to handle json zarr data. (:pr:`385`) by |gmaze|. .. code-block:: python