Skip to content

Commit

Permalink
New utility to list official GDAC servers
Browse files Browse the repository at this point in the history
  • Loading branch information
gmaze committed Dec 17, 2024
1 parent 9056665 commit d8f48a4
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 43 deletions.
14 changes: 14 additions & 0 deletions argopy/static/assets/gdac_servers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"name": "gdac",
"long_name": "List of official Argo GDAC servers",
"last_update": "2024-12-17T14:34:57.182480+00:00",
"data": {
"path": [
"https://data-argo.ifremer.fr",
"https://usgodae.org/pub/outgoing/argo",
"https://argo-gdac-sandbox.s3-eu-west-3.amazonaws.com/pub",
"ftp://ftp.ifremer.fr/ifremer/argo",
"s3://argo-gdac-sandbox/pub"
]
}
}
37 changes: 25 additions & 12 deletions argopy/stores/filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
from ..utils.monitored_threadpool import MyThreadPoolExecutor as MyExecutor
from ..utils.accessories import Registry
from ..utils.format import UriCName
from ..utils.lists import list_gdac_servers
from .. import __version__


Expand Down Expand Up @@ -2216,12 +2217,12 @@ class s3store(httpstore):
class gdacfs:
"""
Create a file system for any Argo GDAC compliant path
Create a file system for any Argo GDAC possible path
Parameters
----------
path: str, optional
GDAC path to create a file system for. Support any GDAC compliant path.
GDAC path to create a file system for. Support any GDAC possible path.
If not specified, option ``gdac`` will be used.
Returns
Expand All @@ -2240,29 +2241,41 @@ class gdacfs:
>>> with argopy.set_options(gdac="s3://argo-gdac-sandbox/pub"):
>>> fs = gdacfs()
Warnings
--------
This class does not check if the path is Argo GDAC compliant
See Also
--------
:meth:`argopy.utils.check_gdac_path`, :meth:`argopy.utils.list_gdac_servers`
"""

protocol2fs = {"file": filestore, "http": httpstore, "ftp": ftpstore, "s3": s3store}

@staticmethod
def path2protocol(path: str) -> str:
def path2protocol(path: Union[str, Path]) -> str:
"""Narrow down any path to a supported protocols"""
split = split_protocol(path)[0]
if split is None:
if isinstance(path, Path):
return "file"
elif "http" in split: # will also catch "https"
return "http"
elif "ftp" in split:
return "ftp"
elif "s3" in split:
return "s3"
else:
raise GdacPathError("Unknown protocol for an Argo GDAC host: %s" % split)
split = split_protocol(path)[0]
if split is None:
return "file"
if "http" in split: # will also catch "https"
return "http"
elif "ftp" in split:
return "ftp"
elif "s3" in split:
return "s3"
else:
raise GdacPathError("Unknown protocol for an Argo GDAC host: %s" % split)

def __new__(cls, path: Union[str, Path, None] = None):
"""Create a file system for any Argo GDAC compliant path"""
if path is None:
path = OPTIONS["gdac"]

protocol = cls.path2protocol(path)
fs = cls.protocol2fs[protocol]

Expand Down
2 changes: 2 additions & 0 deletions argopy/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
list_bgc_s_parameters,
list_radiometry_variables,
list_radiometry_parameters,
list_gdac_servers,
)
from .caching import clear_cache, lscache
from .monitored_threadpool import MyThreadPoolExecutor as MonitoredThreadPoolExecutor
Expand Down Expand Up @@ -101,6 +102,7 @@
"list_bgc_s_parameters",
"list_radiometry_variables",
"list_radiometry_parameters",
"list_gdac_servers",
# Cache management:
"clear_cache",
"lscache",
Expand Down
59 changes: 34 additions & 25 deletions argopy/utils/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from ..options import OPTIONS
from ..errors import InvalidDatasetStructure, GdacPathError, InvalidFetcher
from .lists import list_available_data_src, list_available_index_src
from .lists import list_available_data_src, list_available_index_src, list_gdac_servers
from .casting import to_list


Expand Down Expand Up @@ -475,36 +475,45 @@ def check_gdac_path(path, errors="ignore"): # noqa: C901
Returns
-------
checked: boolean
"""
from ..stores import gdacfs # Otherwise raises circular import
try:
fs = gdacfs(path)
except GdacPathError:
if errors == "raise":
raise
elif errors == "warn":
warnings.warn("Can't get address info (GAIerror) on '%s'" % path)
return False
else:
return False
See also
--------
:class:`argopy.stores.gdacfs`, :meth:`argopy.utils.list_gdac_servers`
check1 = fs.exists(fs.sep.join([path, "dac"]))
if check1:
"""
if path in list_gdac_servers():
return True
else:

elif errors == "raise":
raise GdacPathError(
"This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s"
% path
)
from ..stores import gdacfs # import here, otherwise raises circular import

elif errors == "warn":
warnings.warn("This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s" % path)
return False
try:
fs = gdacfs(path)
except GdacPathError:
if errors == "raise":
raise
elif errors == "warn":
warnings.warn("Can't get address info (GAIerror) on '%s'" % path)
return False
else:
return False

check1 = fs.exists(fs.sep.join([path, "dac"]))
if check1:
return True

elif errors == "raise":
raise GdacPathError(
"This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s"
% path
)

else:
return False
elif errors == "warn":
warnings.warn("This path is not GDAC compliant (no legitimate sub-folder `dac`):\n%s" % path)
return False

else:
return False


def isconnected(host: str = "https://www.ifremer.fr", maxtry: int = 10):
Expand Down
17 changes: 17 additions & 0 deletions argopy/utils/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,20 @@ def list_radiometry_parameters() -> List[str]:
for v in params
if "DATA_MODE" not in v and "QC" not in v and "ADJUSTED" not in v
]


def list_gdac_servers() -> List[str]:
"""List of official Argo GDAC servers
Returns
-------
List[str]
See also
--------
:class:`argopy.stores.gdacfs`, :meth:`argopy.utils.check_gdac_path`
"""
with open(os.path.join(path2assets, "gdac_servers.json"), "r") as f:
vlist = json.load(f)
return vlist["data"]["path"]
1 change: 1 addition & 0 deletions docs/api-hidden.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
argopy.utils.list_bgc_s_parameters
argopy.utils.list_radiometry_variables
argopy.utils.list_radiometry_parameters
argopy.utils.list_gdac_servers

argopy.utils.Chunker

Expand Down
29 changes: 24 additions & 5 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ Function under the ``argopy.utils`` submodule.

.. currentmodule:: argopy.utils

Lists
-----

.. autosummary::
:toctree: generated/

Expand All @@ -227,20 +230,35 @@ Function under the ``argopy.utils`` submodule.
list_bgc_s_parameters
list_radiometry_variables
list_radiometry_parameters
list_gdac_servers

check_wmo
check_cyc
Checkers
--------

float_wmo
Registry
.. autosummary::
:toctree: generated/

Chunker
check_wmo
check_cyc
check_gdac_path

isconnected
urlhaskeyword
isalive
isAPIconnected


Misc
--------

.. autosummary::
:toctree: generated/

float_wmo
Registry

Chunker

drop_variables_not_in_all_datasets
fill_variables_not_in_all_datasets

Expand Down Expand Up @@ -275,6 +293,7 @@ File systems
stores.httpstore_erddap_auth
stores.s3store
stores.ArgoKerchunker
stores.gdacfs

Argo index store
----------------
Expand Down
2 changes: 1 addition & 1 deletion docs/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Features and front-end API
with argopy.set_options(gdac='s3://argo-gdac-sandbox/pub'):
ds = DataFetcher(src='gdac').float(6903091).to_xarray()
- **Expert new feature: lazy remote netcdf opening**. We now provide support for opening a remote netcdf Argo dataset lazily with `kerchunk <https://fsspec.github.io/kerchunk/>`_. Simply use the new option ``lazy=True`` with a :class:`stores.httpstore.open_dataset` or :class:`stores.s3store.open_dataset`. For expert users we expose the :class:`stores.ArgoKerchunker` to finely tune how to handle json zarr data. (:pr:`385`) by |gmaze|.
- **Expert new feature: lazy remote netcdf opening**. We now provide low-level support for opening a remote netcdf Argo dataset lazily with `kerchunk <https://fsspec.github.io/kerchunk/>`_. Simply use the new option ``lazy=True`` with a :class:`stores.httpstore.open_dataset` or :class:`stores.s3store.open_dataset`. For expert users we expose the :class:`stores.ArgoKerchunker` to finely tune how to handle json zarr data. (:pr:`385`) by |gmaze|.

.. code-block:: python
Expand Down

0 comments on commit d8f48a4

Please sign in to comment.