-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
github-actions
committed
Dec 3, 2024
1 parent
0fd1016
commit b704817
Showing
15 changed files
with
1,523 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
94 changes: 94 additions & 0 deletions
94
python/nwm_client/build/lib/hydrotools/nwm_client/AzureFileCatalog.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
""" | ||
============================================= | ||
NWM Azure Blob Storage Container File Catalog | ||
============================================= | ||
Concrete implementation of a National Water Model file client for discovering | ||
files on Microsoft Azure. | ||
https://planetarycomputer.microsoft.com/dataset/storage/noaa-nwm | ||
Classes | ||
------- | ||
AzureFileCatalog | ||
""" | ||
from .NWMFileCatalog import NWMFileCatalog | ||
from hydrotools._restclient.urllib import Url | ||
import azure.storage.blob | ||
import planetary_computer | ||
import adlfs | ||
from typing import List | ||
|
||
class AzureFileCatalog(NWMFileCatalog): | ||
"""An Azure Cloud client class for NWM data. | ||
This AzureFileCatalog class provides various methods for discovering NWM | ||
files on Azure Blob Storage. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
server: str = 'https://noaanwm.blob.core.windows.net/' | ||
) -> None: | ||
"""Initialize catalog of NWM data source on Azure Blob Storage. | ||
Parameters | ||
---------- | ||
server : str, required | ||
Fully qualified path to Azure Cloud endpoint. | ||
Returns | ||
------- | ||
None | ||
""" | ||
super().__init__() | ||
self.server = server | ||
|
||
def list_blobs( | ||
self, | ||
configuration: str, | ||
reference_time: str, | ||
must_contain: str = 'channel_rt' | ||
) -> List[str]: | ||
"""List available blobs with provided parameters. | ||
Parameters | ||
---------- | ||
configuration : str, required | ||
Particular model simulation or forecast configuration. For a list | ||
of available configurations see NWMDataService.configurations | ||
reference_time : str, required | ||
Model simulation or forecast issuance/reference time in | ||
YYYYmmddTHHZ format. | ||
must_contain : str, optional, default 'channel_rt' | ||
Optional substring found in each blob name. | ||
Returns | ||
------- | ||
A list of blob names that satisfy the criteria set by the parameters. | ||
""" | ||
# Validate configuration | ||
self.raise_invalid_configuration(configuration) | ||
|
||
# Break-up reference time | ||
issue_date, issue_time = self.separate_datetime(reference_time) | ||
|
||
# Get list of blobs | ||
fs = adlfs.AzureBlobFileSystem( | ||
"noaanwm", credential=planetary_computer.sas.get_token("noaanwm", "nwm").token | ||
) | ||
blobs = fs.glob(f"nwm/nwm.{issue_date}/{configuration}/nwm.t{issue_time}*") | ||
|
||
# Return blob URLs | ||
return [ | ||
str(self.server / suffix) | ||
for suffix in list(blobs) | ||
if must_contain in suffix | ||
] | ||
|
||
@property | ||
def server(self) -> str: | ||
return self._server | ||
|
||
@server.setter | ||
def server(self, server: str) -> None: | ||
self._server = Url(server) | ||
|
209 changes: 209 additions & 0 deletions
209
python/nwm_client/build/lib/hydrotools/nwm_client/FileDownloader.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
""" | ||
=============== | ||
File Downloader | ||
=============== | ||
Tool for downloading files asynchronously. | ||
Classes | ||
------- | ||
FileDownloader | ||
""" | ||
import asyncio | ||
import ssl | ||
import aiohttp | ||
import aiofiles | ||
from pathlib import Path | ||
from typing import List, Tuple, Union | ||
import warnings | ||
from http import HTTPStatus | ||
|
||
class FileDownloader: | ||
"""Provides a convenient interface to download a list of files | ||
asynchronously using HTTP. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
output_directory: Union[str, Path] = Path("."), | ||
create_directory: bool = False, | ||
ssl_context: ssl.SSLContext = ssl.create_default_context(), | ||
limit: int = 10 | ||
) -> None: | ||
"""Initialize File Downloader object with specified output directory. | ||
Parameters | ||
---------- | ||
output_directory: str, pathlib.Path, optional, default "." | ||
Output directory where files are written. | ||
create_directory: bool, options, default False | ||
Indicates whether to create the output directory if it does not | ||
exist. | ||
ssl_context : ssl.SSLContext, optional, default context | ||
SSL configuration context. | ||
limit: int, optional, default 10 | ||
Number of simultaneous connections. | ||
Returns | ||
------- | ||
None | ||
""" | ||
# Set output directory | ||
self.output_directory = output_directory | ||
|
||
# Set directory creation | ||
self.create_directory = create_directory | ||
|
||
# Setup SSL context | ||
self.ssl_context = ssl_context | ||
|
||
# Set limit | ||
self.limit = limit | ||
|
||
async def get_file( | ||
self, | ||
url: str, | ||
filename: str, | ||
session: aiohttp.ClientSession | ||
) -> None: | ||
"""Download a single file. | ||
Parameters | ||
---------- | ||
url: str, required | ||
URL path to file. | ||
filename: str, required | ||
Local filename used to write downloaded file. This will save the file | ||
to self.output_directory/filename | ||
session: aiohttp.ClientSession, required | ||
Session object used for retrieval. | ||
Returns | ||
------- | ||
None | ||
""" | ||
# Retrieve a single file | ||
async with session.get(url, ssl=self.ssl_context, timeout=900) as response: | ||
# Warn if unable to locate file | ||
if response.status != HTTPStatus.OK: | ||
status = HTTPStatus(response.status_code) | ||
message = ( | ||
f"HTTP Status: {status.value}" + | ||
f" - {status.phrase}" + | ||
f" - {status.description}\n" + | ||
f"{response.url}" | ||
) | ||
warnings.warn(message, RuntimeWarning) | ||
return | ||
|
||
# Construct output file path | ||
output_file = self.output_directory / filename | ||
|
||
# Stream download | ||
async with aiofiles.open(output_file, 'wb') as fo: | ||
while True: | ||
chunk = await response.content.read(1024) | ||
if not chunk: | ||
break | ||
await fo.write(chunk) | ||
|
||
async def get_files(self, src_dst_list: List[Tuple[str,str]]) -> None: | ||
"""Asynchronously download multiple files. | ||
Parameters | ||
---------- | ||
src_dst_list: List[Tuple[str,str]], required | ||
List of tuples containing two strings. The first string is the | ||
source URL from which to retrieve a file, the second string is the | ||
local filename where the file will be saved. | ||
Returns | ||
------- | ||
None | ||
""" | ||
# Retrieve each file | ||
connector = aiohttp.TCPConnector(limit=self.limit) | ||
async with aiohttp.ClientSession(connector=connector) as session: | ||
await asyncio.gather(*[self.get_file(url, filename, session) for url, filename in src_dst_list]) | ||
|
||
def get(self, src_dst_list: List[Tuple[str,str]], overwrite: bool = False) -> None: | ||
"""Setup event loop and asynchronously download multiple files. If | ||
self.create_directory is True, an output directory will be | ||
created if needed. | ||
Parameters | ||
---------- | ||
src_dst_list: List[Tuple[str,str]], required | ||
List of tuples containing two strings. The first string is the | ||
source URL from which to retrieve a file, the second string is the | ||
local filename where the file will be saved. | ||
overwrite: bool, optional, default False | ||
If True will overwrite destination file, if it exists. If False, | ||
download of this file is skipped. | ||
Returns | ||
------- | ||
None | ||
Examples | ||
-------- | ||
>>> from nwm_client.FileDownloader import FileDownloader | ||
>>> downloader = FileDownloader(output_directory="my_output") | ||
>>> # This will download the pandas homepage and save it to "my_output/index.html" | ||
>>> downloader.get( | ||
>>> [("https://pandas.pydata.org/docs/user_guide/index.html","index.html")] | ||
>>> ) | ||
""" | ||
# Shorten list to files that do not exist | ||
if not overwrite: | ||
short = [] | ||
for src, dst in src_dst_list: | ||
if (self.output_directory / dst).exists(): | ||
message = f"File exists, skipping download of {self.output_directory / dst}" | ||
warnings.warn(message, UserWarning) | ||
continue | ||
short.append((src, dst)) | ||
src_dst_list = short | ||
|
||
# Check output directory, optionally create | ||
if not self.output_directory.exists(): | ||
if self.create_directory: | ||
self.output_directory.mkdir(parents=True) | ||
else: | ||
message = f"{self.output_directory} does not exist." | ||
raise FileNotFoundError(message) | ||
|
||
# Start event loop to retrieve files | ||
asyncio.run(self.get_files(src_dst_list)) | ||
|
||
@property | ||
def output_directory(self) -> Path: | ||
return self._output_directory | ||
|
||
@output_directory.setter | ||
def output_directory(self, output_directory: Union[str, Path]): | ||
self._output_directory = Path(output_directory).expanduser().resolve() | ||
|
||
@property | ||
def create_directory(self) -> bool: | ||
return self._create_directory | ||
|
||
@create_directory.setter | ||
def create_directory(self, create_directory: bool): | ||
self._create_directory = bool(create_directory) | ||
|
||
@property | ||
def ssl_context(self) -> ssl.SSLContext: | ||
return self._ssl_context | ||
|
||
@ssl_context.setter | ||
def ssl_context(self, ssl_context: ssl.SSLContext) -> None: | ||
self._ssl_context = ssl_context | ||
|
||
@property | ||
def limit(self) -> int: | ||
return self._limit | ||
|
||
@limit.setter | ||
def limit(self, limit: int) -> None: | ||
self._limit = limit |
Oops, something went wrong.