diff --git a/examples/remote/ml_forecast.py b/examples/remote/ml_forecast.py new file mode 100644 index 0000000000..a8f03205a0 --- /dev/null +++ b/examples/remote/ml_forecast.py @@ -0,0 +1,43 @@ +# Copyright (c) 2025 MetPy Developers. +# Distributed under the terms of the BSD 3-Clause License. +# SPDX-License-Identifier: BSD-3-Clause +""" +========================================= +ML Weather Prediction Access and Plotting +========================================= + +Use MetPy to access machine learning weather prediction (MLWP) data in AWS S3 and plot using +the simplified plotting interface. +""" +from datetime import datetime + +from metpy.remote import MLWPArchive +from metpy.plots import MapPanel, PanelContainer, RasterPlot + +################### +# Access the GraphCast forecast closest to the desired date/time +dt = datetime(2025, 2, 15, 18) +ds = MLWPArchive().get_product('graphcast', dt).parse() + +################### +# Plot the data using MetPy's simplified plotting interface. +raster = RasterPlot() +raster.data = ds +raster.field = 't2' +raster.time = dt +raster.colorbar = 'horizontal' +raster.colormap = 'RdBu_r' + +panel = MapPanel() +panel.area = 'co' +panel.projection = 'lcc' +panel.layers = ['coastline', 'borders', 'states'] +panel.plots = [raster] +panel.title = f"{ds[raster.field].attrs['long_name']} @ {dt}" + +pc = PanelContainer() +pc.size = (8, 8) +pc.panels = [panel] +pc.draw() + +pc.show() \ No newline at end of file diff --git a/src/metpy/remote/aws.py b/src/metpy/remote/aws.py index 6b9d971a28..7a769c5582 100644 --- a/src/metpy/remote/aws.py +++ b/src/metpy/remote/aws.py @@ -592,3 +592,120 @@ def _build_result(self, obj): """Build a product that opens the data using `xarray.open_dataset`.""" return AWSProduct(obj, lambda s: xr.open_dataset(s.url + '#mode=bytes', engine='netcdf4')) + + +@exporter.export +class MLWPArchive(S3DataStore): + """Access data from the NOAA/CIRA Machine-Learning Weather Prediction archive in AWS. + + This consists of individual model runs stored in netCDF format, across a variety + a collection of models (Aurora, FourCastNet, GraphCast, Pangu) and initial conditions + (GFS or IFS). + + """ + + _model_map = {'aurora': 'AURO', 'fourcastnet': 'FOUR', + 'graphcast': 'GRAP', 'pangu': 'PANG'} + + def __init__(self): + super().__init__('noaa-oar-mlwp-data') + + def _model_id(self, model, version, init): + """Build a model id from the model name, version, and initial conditions.""" + init = init or 'GFS' + model = self._model_map.get(model.lower(), model) + if version is None: + model_id = sorted(self.common_prefixes(model + '_', '_'))[-1] + else: + version = str(version) + if len(version) < 3: + version = version + '00' + model_id = f'{model}_v{version}_' + return f'{model_id}{init}' + + def _build_key(self, model_id, dt, depth=None): + """Build a key for the bucket up to the desired point.""" + first_hour = 0 + last_hour = 240 + step_hours = 6 + parts = [model_id, f'{dt:%Y}', f'{dt:%m%d}', + f'{model_id}_{dt:%Y%m%d%H}_' + f'f{first_hour:03d}_f{last_hour:03d}_{step_hours:02d}.nc'] + return self.delimiter.join(parts[slice(0, depth)]) + + def dt_from_key(self, key): # noqa: D102 + # Docstring inherited + # GRAP_v100_GFS_2025021212_f000_f240_06.nc + dt = key.split('/')[-1].split('_')[3] + return datetime.strptime(dt, '%Y%m%d%H').replace(tzinfo=timezone.utc) + + def get_product(self, model, dt=None, version=None, init=None): + """Get a product from the archive. + + Parameters + ---------- + model : str + The selected model to get data for. Can be any of the four-letter codes supported + by the archive (currently FOUR, PANG, GRAP, AURO), or the known names ( + case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or + ``'pangu'``. + dt : `datetime.datetime`, optional + The desired date/time for the model run; the one closest matching in time will + be returned. If not given, defaults to the current UTC date/time. + version : str or int, optional + The particular version of the model to select. If not given, the query will try + to select the most recent version of the model. + init : str, optional + Selects the model run initialized with a particular set of initial conditions. + Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``. + + See Also + -------- + get_range + + """ + dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt) + model_id = self._model_id(model, version, init) + search_key = self._build_key(model_id, dt) + prefix = search_key.rsplit('_', maxsplit=4)[0] + return self._closest_result(self.objects(prefix), dt) + + def get_range(self, model, start, end, version=None, init=None): + """Yield products within a particular date/time range. + + Parameters + ---------- + model : str + The selected model to get data for. Can be any of the four-letter codes supported + by the archive (currently FOUR, PANG, GRAP, AURO), or the known names ( + case-insensitive): ``'Aurora'``, ``'FourCastNet'``, ``'graphcast'``, or + ``'pangu'``. + start : `datetime.datetime` + The start of the date/time range + end : `datetime.datetime` + The end of the date/time range + version : str or int, optional + The particular version of the model to select. If not given, the query will try + to select the most recent version of the model. + init : str, optional + Selects the model run initialized with a particular set of initial conditions. + Should be one of ``'GFS'`` or ``'IFS'``, defaults to ``'GFS'``. + + See Also + -------- + get_product + + """ + start = ensure_timezone(start) + end = ensure_timezone(end) + model_id = self._model_id(model, version, init) + for dt in date_iterator(start, end, days=1): + prefix = self._build_key(model_id, dt, depth=3) + for obj in self.objects(prefix): + if start <= self.dt_from_key(obj.key) < end: + yield self._build_result(obj) + + def _build_result(self, obj): + """Build a product that opens the data using `xarray.open_dataset`.""" + return AWSProduct(obj, + lambda s: xr.open_dataset(s.url + '#mode=bytes', engine='netcdf4')) diff --git a/tests/remote/fixtures/test_mlwp_single.yaml b/tests/remote/fixtures/test_mlwp_single.yaml new file mode 100644 index 0000000000..045471953b --- /dev/null +++ b/tests/remote/fixtures/test_mlwp_single.yaml @@ -0,0 +1,158 @@ +interactions: +- request: + body: null + headers: + User-Agent: + - !!binary | + Qm90bzMvMS4zNi4zIG1kL0JvdG9jb3JlIzEuMzYuMyB1YS8yLjAgb3MvbWFjb3MjMjQuMy4wIG1k + L2FyY2gjYXJtNjQgbGFuZy9weXRob24jMy4xMy4xIG1kL3B5aW1wbCNDUHl0aG9uIGNmZy9yZXRy + eS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM2LjMgUmVzb3VyY2U= + amz-sdk-invocation-id: + - !!binary | + YzAzMGFlNWYtZGYwZi00MWM2LTk2MjEtMjg1MzA4MzRiZjA4 + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: GET + uri: https://noaa-oar-mlwp-data.s3.amazonaws.com/?list-type=2&prefix=FOUR_&delimiter=_&encoding-type=url + response: + body: + string: ' + + noaa-oar-mlwp-dataFOUR_21000_urlfalseFOUR_v100_FOUR_v200_' + headers: + Content-Type: + - application/xml + Date: + - Fri, 14 Feb 2025 18:36:59 GMT + Server: + - AmazonS3 + Transfer-Encoding: + - chunked + x-amz-bucket-region: + - us-east-1 + x-amz-id-2: + - JFKbpD1uohmvonFSq6IdiLYa8HF1X980ymRtrIzf+QmSRnOTSOjYRCf8LNo/FBla/qDfwFyBxcY= + x-amz-request-id: + - BY69X0B404H8N54W + status: + code: 200 + message: OK +- request: + body: null + headers: + User-Agent: + - !!binary | + Qm90bzMvMS4zNi4zIG1kL0JvdG9jb3JlIzEuMzYuMyB1YS8yLjAgb3MvbWFjb3MjMjQuMy4wIG1k + L2FyY2gjYXJtNjQgbGFuZy9weXRob24jMy4xMy4xIG1kL3B5aW1wbCNDUHl0aG9uIGNmZy9yZXRy + eS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM2LjMgUmVzb3VyY2U= + amz-sdk-invocation-id: + - !!binary | + ODgwYTQzOGMtZmM2ZS00OGMwLTgwNTktY2MwNmQ5NDYxMjIx + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: GET + uri: https://noaa-oar-mlwp-data.s3.amazonaws.com/?prefix=FOUR_v200_GFS%2F2025%2F0203&encoding-type=url + response: + body: + string: ' + + noaa-oar-mlwp-dataFOUR_v200_GFS/2025/02031000urlfalseFOUR_v200_GFS/2025/0203/FOUR_v200_GFS_2025020300_f000_f240_06.nc2025-02-03T04:11:52.000Z"0114c9602beb9f67a8a1316a9f979847-875"73368043792804704e53c01e2a6281a0e905de37df634ec18b3c5816683548dda374531cdenoaasandboxSTANDARDFOUR_v200_GFS/2025/0203/FOUR_v200_GFS_2025020312_f000_f240_06.nc2025-02-03T16:09:37.000Z"80903a779991991242fdb68af76e8f77-874"73292596272804704e53c01e2a6281a0e905de37df634ec18b3c5816683548dda374531cdenoaasandboxSTANDARD' + headers: + Content-Type: + - application/xml + Date: + - Fri, 14 Feb 2025 18:36:59 GMT + Server: + - AmazonS3 + Transfer-Encoding: + - chunked + x-amz-bucket-region: + - us-east-1 + x-amz-id-2: + - ycngl9N68OCPRpkwPmhQoL5wGv3h3r9axIqPvyGpyci3uUFdfUKHm6E2dF5DNDsAeZ8NsjWPRBk= + x-amz-request-id: + - BY6ACEGW6NDS6ERT + status: + code: 200 + message: OK +- request: + body: null + headers: + User-Agent: + - !!binary | + Qm90bzMvMS4zNi4zIG1kL0JvdG9jb3JlIzEuMzYuMyB1YS8yLjAgb3MvbWFjb3MjMjQuMy4wIG1k + L2FyY2gjYXJtNjQgbGFuZy9weXRob24jMy4xMy4xIG1kL3B5aW1wbCNDUHl0aG9uIGNmZy9yZXRy + eS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM2LjMgUmVzb3VyY2U= + amz-sdk-invocation-id: + - !!binary | + ODdiYWVjMTItM2I1YS00ODBiLWIxYzEtOTE3NzEyODA3NmFi + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: GET + uri: https://noaa-oar-mlwp-data.s3.amazonaws.com/?prefix=FOUR_v200_GFS%2F2025%2F0204&encoding-type=url + response: + body: + string: ' + + noaa-oar-mlwp-dataFOUR_v200_GFS/2025/02041000urlfalseFOUR_v200_GFS/2025/0204/FOUR_v200_GFS_2025020400_f000_f240_06.nc2025-02-04T04:54:58.000Z"aef738d3b726b3b3925cae06e6579878-874"73311746362804704e53c01e2a6281a0e905de37df634ec18b3c5816683548dda374531cdenoaasandboxSTANDARDFOUR_v200_GFS/2025/0204/FOUR_v200_GFS_2025020412_f000_f240_06.nc2025-02-04T16:08:23.000Z"ba5e28ee280b5732685f0f8162fd6c64-874"73282585842804704e53c01e2a6281a0e905de37df634ec18b3c5816683548dda374531cdenoaasandboxSTANDARD' + headers: + Content-Type: + - application/xml + Date: + - Fri, 14 Feb 2025 18:36:59 GMT + Server: + - AmazonS3 + Transfer-Encoding: + - chunked + x-amz-bucket-region: + - us-east-1 + x-amz-id-2: + - k5Uz7cw0ssJpvwR8Sz/142qKo8ogGeVr34Baqlel3XVBDesUcJHvn/LS9zaSbF9Uki366IoKpj4= + x-amz-request-id: + - BY6BCG4WQCKHM47N + status: + code: 200 + message: OK +- request: + body: null + headers: + User-Agent: + - !!binary | + Qm90bzMvMS4zNi4zIG1kL0JvdG9jb3JlIzEuMzYuMyB1YS8yLjAgb3MvbWFjb3MjMjQuMy4wIG1k + L2FyY2gjYXJtNjQgbGFuZy9weXRob24jMy4xMy4xIG1kL3B5aW1wbCNDUHl0aG9uIGNmZy9yZXRy + eS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM2LjMgUmVzb3VyY2U= + amz-sdk-invocation-id: + - !!binary | + ZTJmZWQwN2MtMTdmNS00ZGRiLWE2NDMtMTg1MGU5ODFkOGRi + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: GET + uri: https://noaa-oar-mlwp-data.s3.amazonaws.com/?prefix=FOUR_v200_GFS%2F2025%2F0205&encoding-type=url + response: + body: + string: ' + + noaa-oar-mlwp-dataFOUR_v200_GFS/2025/02051000urlfalseFOUR_v200_GFS/2025/0205/FOUR_v200_GFS_2025020500_f000_f240_06.nc2025-02-05T04:11:34.000Z"e8e8789b5a638b99f687340db896b5ff-874"73305817212804704e53c01e2a6281a0e905de37df634ec18b3c5816683548dda374531cdenoaasandboxSTANDARDFOUR_v200_GFS/2025/0205/FOUR_v200_GFS_2025020512_f000_f240_06.nc2025-02-05T16:09:36.000Z"d9856506b257bb08c767f98ea357b06e-874"73276424222804704e53c01e2a6281a0e905de37df634ec18b3c5816683548dda374531cdenoaasandboxSTANDARD' + headers: + Content-Type: + - application/xml + Date: + - Fri, 14 Feb 2025 18:37:00 GMT + Server: + - AmazonS3 + Transfer-Encoding: + - chunked + x-amz-bucket-region: + - us-east-1 + x-amz-id-2: + - ieT1k1ZxzsI/MUJj3Yfd06vAYTOd2XRHIt6m4Hx8JBCaEKbhu+ceNRyFRObq7uWFNR2Z0Lb8mbs= + x-amz-request-id: + - 0GEHDRP0KDQV0WBT + status: + code: 200 + message: OK +version: 1 diff --git a/tests/remote/test_aws.py b/tests/remote/test_aws.py index 27778c3928..fecfb57fb9 100644 --- a/tests/remote/test_aws.py +++ b/tests/remote/test_aws.py @@ -6,7 +6,7 @@ from pathlib import Path import tempfile -from metpy.remote import GOESArchive, NEXRADLevel2Archive, NEXRADLevel3Archive +from metpy.remote import GOESArchive, MLWPArchive, NEXRADLevel2Archive, NEXRADLevel3Archive from metpy.testing import needs_aws @@ -92,3 +92,25 @@ def test_goes_range(): 'OR_ABI-L1b-RadC-M6C01_G16_s20243450206170_e20243450208543_c20243450208597.nc', 'OR_ABI-L1b-RadC-M6C01_G16_s20243450211170_e20243450213543_c20243450214031.nc'] assert names == truth + + +@needs_aws +def test_mlwp_single(): + """Test getting a single product from the MLWP archive.""" + prod = MLWPArchive().get_product('graphcast', datetime(2025, 1, 30, 10)) + assert prod.url == ('https://noaa-oar-mlwp-data.s3.amazonaws.com/GRAP_v100_GFS/' + '2025/0130/GRAP_v100_GFS_2025013012_f000_f240_06.nc') + + +@needs_aws +def test_mlwp_single(): + """Test getting a single product from the MLWP archive.""" + prods = MLWPArchive().get_range('fourcastnet', datetime(2025, 2, 3), datetime(2025, 2, 6)) + names = [p.name for p in prods] + truth = ['FOUR_v200_GFS_2025020300_f000_f240_06.nc', + 'FOUR_v200_GFS_2025020312_f000_f240_06.nc', + 'FOUR_v200_GFS_2025020400_f000_f240_06.nc', + 'FOUR_v200_GFS_2025020412_f000_f240_06.nc', + 'FOUR_v200_GFS_2025020500_f000_f240_06.nc', + 'FOUR_v200_GFS_2025020512_f000_f240_06.nc'] + assert names == truth