Skip to content

Commit

Permalink
ENH: Add client for NOAA/CIRA MLWP archive
Browse files Browse the repository at this point in the history
  • Loading branch information
dopplershift committed Feb 14, 2025
1 parent 69e7e8a commit 57abab6
Show file tree
Hide file tree
Showing 4 changed files with 341 additions and 1 deletion.
43 changes: 43 additions & 0 deletions examples/remote/ml_forecast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2025 MetPy Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""
=========================================
ML Weather Prediction Access and Plotting
=========================================
Use MetPy to access machine learning weather prediction (MLWP) data in AWS S3 and plot using
the simplified plotting interface.
"""
from datetime import datetime

from metpy.remote import MLWPArchive
from metpy.plots import MapPanel, PanelContainer, RasterPlot

###################
# Access the GraphCast forecast closest to the desired date/time
dt = datetime(2025, 2, 15, 18)
ds = MLWPArchive().get_product('graphcast', dt).parse()

###################
# Plot the data using MetPy's simplified plotting interface.
raster = RasterPlot()
raster.data = ds
raster.field = 't2'
raster.time = dt
raster.colorbar = 'horizontal'
raster.colormap = 'RdBu_r'

panel = MapPanel()
panel.area = 'co'
panel.projection = 'lcc'
panel.layers = ['coastline', 'borders', 'states']
panel.plots = [raster]
panel.title = f"{ds[raster.field].attrs['long_name']} @ {dt}"

pc = PanelContainer()
pc.size = (8, 8)
pc.panels = [panel]
pc.draw()

pc.show()

Check failure on line 43 in examples/remote/ml_forecast.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

Ruff (W292)

examples/remote/ml_forecast.py:43:10: W292 No newline at end of file
117 changes: 117 additions & 0 deletions src/metpy/remote/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,3 +592,120 @@ def _build_result(self, obj):
"""Build a product that opens the data using `xarray.open_dataset`."""
return AWSProduct(obj,
lambda s: xr.open_dataset(s.url + '#mode=bytes', engine='netcdf4'))


@exporter.export
class MLWPArchive(S3DataStore):
"""Access data from the NOAA/CIRA Machine-Learning Weather Prediction archive in AWS.
This consists of individual model runs stored in netCDF format, across a variety
a collection of models (Aurora, FourCastNet, GraphCast, Pangu) and initial conditions
(GFS or IFS).
"""

_model_map = {'aurora': 'AURO', 'fourcastnet': 'FOUR',
'graphcast': 'GRAP', 'pangu': 'PANG'}

def __init__(self):
super().__init__('noaa-oar-mlwp-data')

def _model_id(self, model, version, init):
"""Build a model id from the model name, version, and initial conditions."""
init = init or 'GFS'
model = self._model_map.get(model.lower(), model)
if version is None:
model_id = sorted(self.common_prefixes(model + '_', '_'))[-1]
else:
version = str(version)
if len(version) < 3:
version = version + '00'
model_id = f'{model}_v{version}_'

Check warning on line 623 in src/metpy/remote/aws.py

View check run for this annotation

Codecov / codecov/patch

src/metpy/remote/aws.py#L620-L623

Added lines #L620 - L623 were not covered by tests
return f'{model_id}{init}'

def _build_key(self, model_id, dt, depth=None):
"""Build a key for the bucket up to the desired point."""
first_hour = 0
last_hour = 240
step_hours = 6
parts = [model_id, f'{dt:%Y}', f'{dt:%m%d}',
f'{model_id}_{dt:%Y%m%d%H}_'
f'f{first_hour:03d}_f{last_hour:03d}_{step_hours:02d}.nc']
return self.delimiter.join(parts[slice(0, depth)])

def dt_from_key(self, key): # noqa: D102
# Docstring inherited
# GRAP_v100_GFS_2025021212_f000_f240_06.nc
dt = key.split('/')[-1].split('_')[3]
return datetime.strptime(dt, '%Y%m%d%H').replace(tzinfo=timezone.utc)

def get_product(self, model, dt=None, version=None, init=None):
"""Get a product from the archive.
Parameters
----------
model : str
The selected model to get data for. Can be any of the four-letter codes supported
by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (

Check failure on line 649 in src/metpy/remote/aws.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

[codespell] reported by reviewdog 🐶 GRAP ==> GREP, GRAPE Raw Output: ./src/metpy/remote/aws.py:649: GRAP ==> GREP, GRAPE
case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
``'pangu'``.
dt : `datetime.datetime`, optional
The desired date/time for the model run; the one closest matching in time will
be returned. If not given, defaults to the current UTC date/time.
version : str or int, optional
The particular version of the model to select. If not given, the query will try
to select the most recent version of the model.
init : str, optional
Selects the model run initialized with a particular set of initial conditions.
Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
See Also
--------
get_range
"""
dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt)
model_id = self._model_id(model, version, init)
search_key = self._build_key(model_id, dt)
prefix = search_key.rsplit('_', maxsplit=4)[0]
return self._closest_result(self.objects(prefix), dt)

Check warning on line 671 in src/metpy/remote/aws.py

View check run for this annotation

Codecov / codecov/patch

src/metpy/remote/aws.py#L667-L671

Added lines #L667 - L671 were not covered by tests

def get_range(self, model, start, end, version=None, init=None):
"""Yield products within a particular date/time range.
Parameters
----------
model : str
The selected model to get data for. Can be any of the four-letter codes supported
by the archive (currently FOUR, PANG, GRAP, AURO), or the known names (

Check failure on line 680 in src/metpy/remote/aws.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

[codespell] reported by reviewdog 🐶 GRAP ==> GREP, GRAPE Raw Output: ./src/metpy/remote/aws.py:680: GRAP ==> GREP, GRAPE
case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or
``'pangu'``.
start : `datetime.datetime`
The start of the date/time range
end : `datetime.datetime`
The end of the date/time range
version : str or int, optional
The particular version of the model to select. If not given, the query will try
to select the most recent version of the model.
init : str, optional
Selects the model run initialized with a particular set of initial conditions.
Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``.
See Also
--------
get_product
"""
start = ensure_timezone(start)
end = ensure_timezone(end)
model_id = self._model_id(model, version, init)
for dt in date_iterator(start, end, days=1):
prefix = self._build_key(model_id, dt, depth=3)
for obj in self.objects(prefix):
if start <= self.dt_from_key(obj.key) < end:
yield self._build_result(obj)

def _build_result(self, obj):
"""Build a product that opens the data using `xarray.open_dataset`."""
return AWSProduct(obj,
lambda s: xr.open_dataset(s.url + '#mode=bytes', engine='netcdf4'))
158 changes: 158 additions & 0 deletions tests/remote/fixtures/test_mlwp_single.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 23 additions & 1 deletion tests/remote/test_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
import tempfile

from metpy.remote import GOESArchive, NEXRADLevel2Archive, NEXRADLevel3Archive
from metpy.remote import GOESArchive, MLWPArchive, NEXRADLevel2Archive, NEXRADLevel3Archive
from metpy.testing import needs_aws


Expand Down Expand Up @@ -92,3 +92,25 @@ def test_goes_range():
'OR_ABI-L1b-RadC-M6C01_G16_s20243450206170_e20243450208543_c20243450208597.nc',
'OR_ABI-L1b-RadC-M6C01_G16_s20243450211170_e20243450213543_c20243450214031.nc']
assert names == truth


@needs_aws
def test_mlwp_single():
"""Test getting a single product from the MLWP archive."""
prod = MLWPArchive().get_product('graphcast', datetime(2025, 1, 30, 10))
assert prod.url == ('https://noaa-oar-mlwp-data.s3.amazonaws.com/GRAP_v100_GFS/'

Check warning on line 101 in tests/remote/test_aws.py

View check run for this annotation

Codecov / codecov/patch

tests/remote/test_aws.py#L100-L101

Added lines #L100 - L101 were not covered by tests
'2025/0130/GRAP_v100_GFS_2025013012_f000_f240_06.nc')


@needs_aws
def test_mlwp_single():

Check failure on line 106 in tests/remote/test_aws.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

Ruff (F811)

tests/remote/test_aws.py:106:5: F811 Redefinition of unused `test_mlwp_single` from line 98
"""Test getting a single product from the MLWP archive."""
prods = MLWPArchive().get_range('fourcastnet', datetime(2025, 2, 3), datetime(2025, 2, 6))
names = [p.name for p in prods]
truth = ['FOUR_v200_GFS_2025020300_f000_f240_06.nc',
'FOUR_v200_GFS_2025020312_f000_f240_06.nc',
'FOUR_v200_GFS_2025020400_f000_f240_06.nc',
'FOUR_v200_GFS_2025020412_f000_f240_06.nc',
'FOUR_v200_GFS_2025020500_f000_f240_06.nc',
'FOUR_v200_GFS_2025020512_f000_f240_06.nc']
assert names == truth

0 comments on commit 57abab6

Please sign in to comment.