Skip to content

Commit

Permalink
Merge pull request #165 from blaylockbk/151-bug-in-herbie-xarray-acce…
Browse files Browse the repository at this point in the history
…ssor-for-nearest_point

Improvements to `nearest_points` required input.
  • Loading branch information
blaylockbk authored Feb 20, 2023
2 parents e92f723 + 607f07a commit a115712
Show file tree
Hide file tree
Showing 10 changed files with 1,005 additions and 461 deletions.
487 changes: 255 additions & 232 deletions docs/user_guide/_bonus_notebooks/nearest_points.ipynb

Large diffs are not rendered by default.

861 changes: 673 additions & 188 deletions docs/user_guide/_tutorial_notebooks/accessors.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion herbie/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
__version__ = "unknown"
__version_tuple__ = (999, 999, 999)


########################################################################
# Append Path object with my custom expand method so user can use
# environment variables in the config file (e.g., ${HOME}).
Expand Down Expand Up @@ -124,7 +125,6 @@ def template(self):
########################################################################
# If a config file isn't found, make one
if not _config_path.exists():

print(
f" ╭─────────────────────────────────────────────────╮\n"
f" │ I'm building Herbie's default config file. │\n"
Expand Down
85 changes: 58 additions & 27 deletions herbie/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
import numpy as np
import pandas as pd
import xarray as xr
from shapely.geometry import Polygon
import shapely
from shapely.geometry import Polygon, MultiPoint, Point


_level_units = dict(
Expand Down Expand Up @@ -146,33 +147,73 @@ def nearest_points(self, points, names=None, verbose=True):
"""
Get the nearest latitude/longitude points from a xarray Dataset.
Info
----
- Stack Overflow: https://stackoverflow.com/questions/58758480/xarray-select-nearest-lat-lon-with-multi-dimension-coordinates
- MetPy Details: https://unidata.github.io/MetPy/latest/tutorials/xarray_tutorial.html?highlight=assign_y_x
Parameters
----------
ds : xr.Dataset
A Herbie-friendly xarray Dataset
points : tuple (lon, lat) or list of tuples
The longitude and latitude (lon, lat) coordinate pair (as a tuple)
for the points you want to pluck from the gridded Dataset.
A list of tuples may be given to return the values from multiple points.
points : tuple, list of tuples, pd.DataFrame
Points to be plucked from the gridded Dataset.
There are multiple objects accepted.
1. Tuple of longitude and latitude (lon, lat) coordinate pair.
1. List of multiple (lon, lat) coordinate pair tuples.
1. Pandas DataFrame with ``longitude`` and ``latitude`` columns. Index will be used as point names, unless ``names`` is specified.
1. Shapeley Point or Points
names : list
A list of names for each point location (i.e., station name).
None will not append any names. names should be the same
length as points.
Benchmark
---------
Notes
-----
This is **much** faster than my old "pluck_points" method.
For matchign 1,948 points:
For matching 1,948 points:
- `nearest_points` completed in 7.5 seconds.
- `pluck_points` completed in 2 minutes.
TODO: Explore alternatives
- Could Shapely nearest_points be used
https://shapely.readthedocs.io/en/latest/manual.html#nearest-points
- Or possibly scipy BallTree method.
"""
ds = self._obj

# Longitude and Latitude point DataFrame
if isinstance(points, pd.DataFrame):
point_df = points[["longitude", "latitude"]]
if names is not None:
point_df.index = names
elif np.shape(points) == (2,):
# points is a tuple (lon, lat) or list [lon, lat]
# and name is given as None or str
point_df = pd.DataFrame(
[points],
columns=["longitude", "latitude"],
index=[names],
)
elif isinstance(points, list):
# points given as a list of coordinate-pair tuples
# and name is given as a list of str
point_df = pd.DataFrame(
points,
columns=["longitude", "latitude"],
index=names,
)
elif isinstance(points, (MultiPoint, Point)):
# points is given as a Shapely object
point_df = pd.DataFrame(
shapely.get_coordinates(points),
columns=["longitude", "latitude"],
index=names,
)
else:
raise ValueError("The points supplied was not understood.")

# Check if MetPy has already parsed the CF metadata grid projection.
# Do that if it hasn't been done yet.
if "metpy_crs" not in ds:
Expand All @@ -187,16 +228,10 @@ def nearest_points(self, points, names=None, verbose=True):
# We want to index the dataset at a single point.
# We can do this by transforming a lat/lon point to the grid location
crs = ds.metpy_crs.item().to_cartopy()
# lat/lon input must be a numpy array, not a list or polygon
if isinstance(points, tuple):
# If a tuple is give, turn into a one-item list.
points = np.array([points])
if not isinstance(points, np.ndarray):
# Points must be a 2D numpy array
points = np.array(points)
lons = points[:, 0]
lats = points[:, 1]
transformed_data = crs.transform_points(ccrs.PlateCarree(), lons, lats)

transformed_data = crs.transform_points(
ccrs.PlateCarree(), point_df.longitude, point_df.latitude
)
xs = transformed_data[:, 0]
ys = transformed_data[:, 1]

Expand All @@ -211,13 +246,9 @@ def nearest_points(self, points, names=None, verbose=True):
dim="point",
)

# Add list of names as a coordinate
if names is not None:
# Assign the point dimension as the names.
assert len(points) == len(
names
), "`points` and `names` must be same length."
new_ds["point"] = names
new_ds.coords["point"] = ("point", point_df.index.to_list())
new_ds.coords["point_latitude"] = ("point", point_df.latitude)
new_ds.coords["point_longitude"] = ("point", point_df.longitude)

return new_ds

Expand Down
1 change: 0 additions & 1 deletion herbie/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,6 @@ def _check_idx(self, url, verbose=False):

# Loop through IDX_SUFFIX options until we find one that exists
for i in self.IDX_SUFFIX:

if Path(url).suffix in {".grb", ".grib", ".grb2", ".grib2"}:
idx_url = url.rsplit(".", maxsplit=1)[0] + i
else:
Expand Down
1 change: 0 additions & 1 deletion herbie/hrrr_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def load_combined_dataset(start_date, num_hours, level, param_short_name):


def demo_tmp2m():

# Let's grab an analysis file.
ds = load_combined_dataset(
start_date=pd.to_datetime("2021-07-03 12:00"),
Expand Down
1 change: 0 additions & 1 deletion herbie/models/ecmwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

class ecmwf:
def template(self):

# TODO: This will need to be updated someday
version = "0p4-beta"
# version = '0p4'
Expand Down
1 change: 0 additions & 1 deletion herbie/models/gefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ class gefs_reforecast:
"""

def template(self):

self.DESCRIPTION = "Global Ensemble Forecast System (GEFS)"
self.DETAILS = {
"aws": "https://registry.opendata.aws/noaa-gefs-reforecast/",
Expand Down
26 changes: 18 additions & 8 deletions tests/test_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""

from herbie import Herbie
from shapely.geometry import MultiPoint
import pandas as pd


def test_crs():
Expand All @@ -17,14 +19,22 @@ def test_crs():


def test_nearest_points():
H = Herbie(
"2022-12-13 12:00",
model="hrrr",
product="sfc",
)
ds = H.xarray("TMP:2 m")
ds1 = ds.herbie.nearest_points([(-100, 40), (-105, 35)])
assert len(ds1.t2m)
ds = Herbie("2022-12-13 12:00", model="hrrr", product="sfc").xarray("TMP:2 m")

p3 = [(-110, 50), (-100, 40), (-105, 35)]
n3 = ["AAA", "BBB", "CCC"]

test_points = [
(-100, 40),
[-100, 40],
p3,
MultiPoint(p3),
pd.DataFrame(p3, columns=["longitude", "latitude"], index=n3),
]

for p in test_points:
ds1 = ds.herbie.nearest_points(p)
assert len(ds1.t2m)


def test_polygon():
Expand Down
1 change: 0 additions & 1 deletion tests/test_hrrr.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def test_hrrr_to_netcdf():


def test_hrrr_aws2():

# Test HRRR with string date
H = Herbie(
yesterday_str,
Expand Down

0 comments on commit a115712

Please sign in to comment.