Skip to content

Commit

Permalink
Optimized code: benchmark from 27.133s->0.067s
Browse files Browse the repository at this point in the history
  • Loading branch information
thorbjoernl committed Sep 6, 2024
1 parent 600865a commit 82df240
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 21 deletions.
5 changes: 3 additions & 2 deletions scripts/benchmark_relalt_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
end_time = "1997-01-02 00:00:00"


writer.writerow((variable, name, lat, lon, value, unit, start_time, end_time, altitude))
writer.writerow((variable, name, lon, lat, value, unit, start_time, end_time, altitude))

# Benchmark
engines = pyaro.list_timeseries_engines()
with engines["csv_timeseries"].open(
filename="tmp_data.csv",
filters=[pyaro.timeseries.filters.get("relaltitude", topo_file = "../tests/testdata/datadir_elevation/topography.nc", rdiff=90)],
#filters=[pyaro.timeseries.filters.get("altitude", min_altitude=200)], # 0.023s
filters=[pyaro.timeseries.filters.get("relaltitude", topo_file = "../tests/testdata/datadir_elevation/topography.nc", rdiff=90)], # 27.133s
columns={
"variable": 0,
"station": 1,
Expand Down
47 changes: 28 additions & 19 deletions src/pyaro/timeseries/Filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,16 +960,12 @@ def _extract_bounding_box(self):
self._boundary_north = float(self._topography[self._lat].max())
logger.info("Bounding box (NESW): %.2f, %.2f, %.2f, %.2f", self._boundary_north, self._boundary_east, self._boundary_south, self._boundary_west)

def _gridded_altitude_from_lat_lon(self, lat: float, lon: float) -> float:
# TODO: Include a tolerance?
data = self._topography.sel({self._lat: lat, self._lon: lon}, method="nearest")

# Should not vary in time too much so picking the first one here.
altitude = data[self._topo_var][0]
def _gridded_altitude_from_lat_lon(self, lat: np.ndarray, lon: np.ndarray) -> np.ndarray:
altitude = self._topography.sel({"lat": xr.DataArray(lat, dims="latlon"), "lon": xr.DataArray(lon, dims="latlon")}, method="nearest")

return float(altitude)
return altitude[self._topo_var].values[0]

def _is_close(self, alt_gridded: float, alt_station: float) -> bool:
def _is_close(self, alt_gridded: np.ndarray, alt_station: np.ndarray) -> np.ndarray[bool]:
"""
Function to check if two altitudes are within a relative tolerance of each
other.
Expand All @@ -981,7 +977,7 @@ def _is_close(self, alt_gridded: float, alt_station: float) -> bool:
True if the absolute difference between alt_gridded and alt_station is
<= self._rdiff
"""
return abs(alt_gridded-alt_station) <= self._rdiff
return np.abs(alt_gridded-alt_station) <= self._rdiff

def init_kwargs(self):
return {
Expand All @@ -999,18 +995,31 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]:

filtered_stations = dict()

names: list[str] = []
lats: list[float] = []
lons: list[float] = []
alts: list[float] = []
for name, station in stations.items():
lat = station["latitude"]
lon = station["longitude"]
names.append(name)
lats.append(station["latitude"])
lons.append(station["longitude"])
alts.append(station["altitude"])

if lon < self._boundary_west or lon > self._boundary_east or lat < self._boundary_south or lat > self._boundary_north:
logger.warning("Station '%s' (lat=%.2f, lon=%.2f) lies outside topography bounding box. It has been removed.", name, lat, lon)
continue
names = np.array(names)
lats = np.array(lats)
lons = np.array(lons)
alts = np.array(alts)

out_of_bounds_mask = np.logical_or(np.logical_or(lons < self._boundary_west, lons > self._boundary_east), np.logical_or(lats < self._boundary_south, lats > self._boundary_north))
if np.sum(out_of_bounds_mask) > 0:
logger.warning("Some stations were removed due to being out of bounds of the gridded topography")

topo = self._gridded_altitude_from_lat_lon(lats, lons)

within_rdiff_mask = self._is_close(topo, alts)

altobs = station["altitude"]
topo = self._gridded_altitude_from_lat_lon(lat, lon)
mask = np.logical_and(~out_of_bounds_mask, within_rdiff_mask)

if not math.isnan(altobs) and self._is_close(topo, altobs):
filtered_stations[name] = station
selected_names = names[mask]

return filtered_stations
return {name: stations[name] for name in selected_names}

0 comments on commit 82df240

Please sign in to comment.