Skip to content

Commit e7c081c

Browse files
committed
make all nowcast methods xarray compatible (#414)
* make test steps skill run * undo accidental change * make steps nowcast xarray compatible * wrap all nowcasts in xarray * fix dimension.py tests * update dimension.py to work with new dataarrays * fix test_nowcast_utils tests * update docs and make xarray usage more explicit in nowcasts * update docs and make xarray usage in motion methods more explicit
1 parent 016d28f commit e7c081c

27 files changed

+865
-441
lines changed

pysteps/decorators.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import numpy as np
2424

25-
from pysteps.converters import convert_to_xarray_dataset
25+
from pysteps.xarray_helpers import convert_input_to_xarray_dataset
2626

2727

2828
def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text):
@@ -90,7 +90,9 @@ def _import_with_postprocessing(*args, **kwargs):
9090
mask = ~np.isfinite(precip)
9191
precip[mask] = _fillna
9292

93-
return convert_to_xarray_dataset(precip.astype(_dtype), quality, metadata)
93+
return convert_input_to_xarray_dataset(
94+
precip.astype(_dtype), quality, metadata
95+
)
9496

9597
extra_kwargs_doc = """
9698
Other Parameters
@@ -126,7 +128,9 @@ def new_function(*args, **kwargs):
126128
target motion_method_func function.
127129
"""
128130

129-
input_images = args[0]
131+
dataset = args[0]
132+
precip_var = dataset.attrs["precip_var"]
133+
input_images = dataset[precip_var].values
130134
if input_images.ndim != 3:
131135
raise ValueError(
132136
"input_images dimension mismatch.\n"

pysteps/io/importers.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,22 +74,27 @@
7474
7575
.. tabularcolumns:: |p{2cm}|L|
7676
77-
+---------------+-------------------------------------------------------------------------------------------+
78-
| Coordinate | Description |
79-
+===============+===========================================================================================+
80-
| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
81-
+---------------+-------------------------------------------------------------------------------------------+
82-
| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
83-
+---------------+-------------------------------------------------------------------------------------------+
84-
| lat | latitude coordinate in degrees |
85-
+---------------+-------------------------------------------------------------------------------------------+
86-
| lon | longitude coordinate in degrees |
87-
+---------------+-------------------------------------------------------------------------------------------+
88-
| time | forecast time in seconds since forecast start time |
89-
+---------------+-------------------------------------------------------------------------------------------+
90-
| member | ensemble member number (integer) |
91-
+---------------+-------------------------------------------------------------------------------------------+
92-
77+
+--------------------+-------------------------------------------------------------------------------------------+
78+
| Coordinate | Description |
79+
+====================+===========================================================================================+
80+
| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
81+
+--------------------+-------------------------------------------------------------------------------------------+
82+
| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
83+
+--------------------+-------------------------------------------------------------------------------------------+
84+
| lat | latitude coordinate in degrees |
85+
+--------------------+-------------------------------------------------------------------------------------------+
86+
| lon | longitude coordinate in degrees |
87+
+--------------------+-------------------------------------------------------------------------------------------+
88+
| time | forecast time in seconds since forecast start time |
89+
+--------------------+-------------------------------------------------------------------------------------------+
90+
| ens_number | ensemble member number (integer) |
91+
+--------------------+-------------------------------------------------------------------------------------------+
92+
| direction | used by proesmans to return the forward and backward advection and consistency fields |
93+
+--------------------+-------------------------------------------------------------------------------------------+
94+
95+
The time, x and y dimensions all MUST be regularly spaced, with the stepsize included
96+
in a ``stepsize`` attribute. The stepsize is given in the unit of the dimension (this
97+
is alwyas seconds for the time dimension).
9398
9499
The dataset can contain the following data variables:
95100
@@ -102,8 +107,14 @@
102107
| precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, |
103108
| or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) |
104109
+-------------------+-----------------------------------------------------------------------------------------------------------+
110+
| velocity_x | x-component of the advection field in cartesian_unit per timestep |
111+
+-------------------+-----------------------------------------------------------------------------------------------------------+
112+
| velocity_y | y-component of the advection field in cartesian_unit per timestep |
113+
+-------------------+-----------------------------------------------------------------------------------------------------------+
105114
| quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything |
106115
+-------------------+-----------------------------------------------------------------------------------------------------------+
116+
| velocity_quality | value between 0 and 1 denoting the quality of the velocity data, currently only returned by proesmans |
117+
+-------------------+-----------------------------------------------------------------------------------------------------------+
107118
108119
Some of the metadata in the metadata dictionary is not explicitely stored in the dataset,
109120
but is still implicitly present. For example ``x1`` can easily be found by taking the first

pysteps/io/readers.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import xarray as xr
1616

1717

18-
def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
18+
def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset | None:
1919
"""
2020
Read a time series of input files using the methods implemented in the
2121
:py:mod:`pysteps.io.importers` module and stack them into a 3d xarray
@@ -28,6 +28,9 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
2828
:py:mod:`pysteps.io.archive` module.
2929
importer: function
3030
A function implemented in the :py:mod:`pysteps.io.importers` module.
31+
timestep: int, optional
32+
The timestep in seconds, this value is optional if more than 1 inputfns
33+
are given.
3134
kwargs: dict
3235
Optional keyword arguments for the importer.
3336
@@ -58,6 +61,16 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
5861
return None
5962

6063
startdate = min(inputfns[1])
64+
sorted_dates = sorted(inputfns[1])
65+
timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds())
66+
67+
if timestep is None:
68+
timestep = timestep_dates
69+
if timestep != timestep_dates:
70+
raise ValueError("given timestep does not match inputfns")
71+
for i in range(len(sorted_dates) - 1):
72+
if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep:
73+
raise ValueError("supplied dates are not evenly spaced")
6174

6275
datasets = []
6376
for i, ifn in enumerate(inputfns[0]):
@@ -73,6 +86,7 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
7386
{
7487
"long_name": "forecast time",
7588
"units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}",
89+
"stepsize": timestep,
7690
},
7791
)
7892
)

pysteps/motion/constant.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,32 @@
1414

1515
import numpy as np
1616
import scipy.optimize as op
17+
import xarray as xr
1718
from scipy.ndimage import map_coordinates
1819

1920

20-
def constant(R, **kwargs):
21+
def constant(dataset: xr.Dataset, **kwargs):
2122
"""
2223
Compute a constant advection field by finding a translation vector that
2324
maximizes the correlation between two successive images.
2425
2526
Parameters
2627
----------
27-
R: array_like
28-
Array of shape (T,m,n) containing a sequence of T two-dimensional input
29-
images of shape (m,n). If T > 2, two last elements along axis 0 are used.
28+
dataset: xarray.Dataset
29+
Input dataset as described in the documentation of
30+
:py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
31+
The dataset has to have a time dimension. If the size of this dimension
32+
is larger than 2, the last 2 entries of this dimension are used.
3033
3134
Returns
3235
-------
33-
out: array_like
34-
The constant advection field having shape (2, m, n), where out[0, :, :]
35-
contains the x-components of the motion vectors and out[1, :, :]
36-
contains the y-components.
36+
out: xarray.Dataset
37+
The input dataset with the constant advection field added in the ``velocity_x``
38+
and ``velocity_y`` data variables.
3739
"""
40+
dataset = dataset.copy(deep=True)
41+
precip_var = dataset.attrs["precip_var"]
42+
R = dataset[precip_var].values
3843
m, n = R.shape[1:]
3944
X, Y = np.meshgrid(np.arange(n), np.arange(m))
4045

@@ -51,4 +56,7 @@ def f(v):
5156
options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))}
5257
result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options)
5358

54-
return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
59+
output = np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
60+
dataset["velocity_x"] = (["y", "x"], output[0])
61+
dataset["velocity_y"] = (["y", "x"], output[1])
62+
return dataset

pysteps/motion/darts.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,28 @@
1111
DARTS
1212
"""
1313

14-
import numpy as np
1514
import time
15+
16+
import numpy as np
17+
import xarray as xr
1618
from numpy.linalg import lstsq, svd
1719

1820
from pysteps import utils
1921
from pysteps.decorators import check_input_frames
2022

2123

2224
@check_input_frames(just_ndim=True)
23-
def DARTS(input_images, **kwargs):
25+
def DARTS(dataset: xr.Dataset, **kwargs):
2426
"""
2527
Compute the advection field from a sequence of input images by using the
2628
DARTS method. :cite:`RCW2011`
2729
2830
Parameters
2931
----------
30-
input_images: array-like
31-
Array of shape (T,m,n) containing a sequence of T two-dimensional input
32-
images of shape (m,n).
32+
dataset: xarray.Dataset
33+
Input dataset as described in the documentation of
34+
:py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
35+
The dataset has to have a time dimension.
3336
3437
Other Parameters
3538
----------------
@@ -67,13 +70,15 @@ def DARTS(input_images, **kwargs):
6770
6871
Returns
6972
-------
70-
out: ndarray
71-
Three-dimensional array (2,m,n) containing the dense x- and y-components
72-
of the motion field in units of pixels / timestep as given by the input
73-
array R.
73+
out: xarray.Dataset
74+
The input dataset with the advection field added in the ``velocity_x``
75+
and ``velocity_y`` data variables.
7476
7577
"""
7678

79+
dataset = dataset.copy(deep=True)
80+
precip_var = dataset.attrs["precip_var"]
81+
input_images = dataset[precip_var].values
7782
N_x = kwargs.get("N_x", 50)
7883
N_y = kwargs.get("N_y", 50)
7984
N_t = kwargs.get("N_t", 4)
@@ -214,10 +219,14 @@ def DARTS(input_images, **kwargs):
214219
fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y))
215220
)
216221

222+
output = np.stack([U, V])
223+
dataset["velocity_x"] = (["y", "x"], output[0])
224+
dataset["velocity_y"] = (["y", "x"], output[1])
225+
217226
if verbose:
218227
print("--- %s seconds ---" % (time.time() - t0))
219228

220-
return np.stack([U, V])
229+
return dataset
221230

222231

223232
def _leastsq(A, B, y):

pysteps/motion/lucaskanade.py

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,22 @@
2222
dense_lucaskanade
2323
"""
2424

25+
import time
26+
2527
import numpy as np
28+
import xarray as xr
2629
from numpy.ma.core import MaskedArray
2730

31+
from pysteps import feature, utils
2832
from pysteps.decorators import check_input_frames
29-
30-
from pysteps import utils, feature
3133
from pysteps.tracking.lucaskanade import track_features
3234
from pysteps.utils.cleansing import decluster, detect_outliers
3335
from pysteps.utils.images import morph_opening
3436

35-
import time
36-
3737

3838
@check_input_frames(2)
3939
def dense_lucaskanade(
40-
input_images,
40+
dataset: xr.Dataset,
4141
lk_kwargs=None,
4242
fd_method="shitomasi",
4343
fd_kwargs=None,
@@ -73,18 +73,14 @@ def dense_lucaskanade(
7373
7474
Parameters
7575
----------
76-
input_images: ndarray_ or MaskedArray_
77-
Array of shape (T, m, n) containing a sequence of *T* two-dimensional
78-
input images of shape (m, n). The indexing order in **input_images** is
79-
assumed to be (time, latitude, longitude).
80-
81-
*T* = 2 is the minimum required number of images.
82-
With *T* > 2, all the resulting sparse vectors are pooled together for
83-
the final interpolation on a regular grid.
84-
85-
In case of ndarray_, invalid values (Nans or infs) are masked,
86-
otherwise the mask of the MaskedArray_ is used. Such mask defines a
87-
region where features are not detected for the tracking algorithm.
76+
dataset: xarray.Dataset
77+
Input dataset as described in the documentation of
78+
:py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
79+
The dataset has to have a time dimension. The size of the time dimension needs to
80+
be at least 2. If it is larger than 2, all the resulting sparse vectors are pooled
81+
together for the final interpolation on a regular grid. Invalid values (Nans or infs)
82+
are masked. This mask defines a region where features are not detected for the tracking
83+
algorithm.
8884
8985
lk_kwargs: dict, optional
9086
Optional dictionary containing keyword arguments for the `Lucas-Kanade`_
@@ -151,14 +147,10 @@ def dense_lucaskanade(
151147
152148
Returns
153149
-------
154-
out: ndarray_ or tuple
155-
If **dense=True** (the default), return the advection field having shape
156-
(2, m, n), where out[0, :, :] contains the x-components of the motion
157-
vectors and out[1, :, :] contains the y-components.
158-
The velocities are in units of pixels / timestep, where timestep is the
159-
time difference between the two input images.
160-
Return a zero motion field of shape (2, m, n) when no motion is
161-
detected.
150+
out: xarray.Dataset or tuple
151+
If **dense=True** (the default), return the input dataset with the advection
152+
field added in the ``velocity_x`` and ``velocity_y`` data variables.
153+
Return a zero motion field when no motion is detected.
162154
163155
If **dense=False**, it returns a tuple containing the 2-dimensional
164156
arrays **xy** and **uv**, where x, y define the vector locations,
@@ -179,7 +171,9 @@ def dense_lucaskanade(
179171
Understanding Workshop, pp. 121–130, 1981.
180172
"""
181173

182-
input_images = input_images.copy()
174+
dataset = dataset.copy(deep=True)
175+
precip_var = dataset.attrs["precip_var"]
176+
input_images = dataset[precip_var].values
183177

184178
if verbose:
185179
print("Computing the motion field with the Lucas-Kanade method.")
@@ -244,7 +238,10 @@ def dense_lucaskanade(
244238
# return zero motion field is no sparse vectors are found
245239
if xy.shape[0] == 0:
246240
if dense:
247-
return np.zeros((2, domain_size[0], domain_size[1]))
241+
uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
242+
dataset["velocity_x"] = (["y", "x"], uvgrid[0])
243+
dataset["velocity_y"] = (["y", "x"], uvgrid[1])
244+
return dataset
248245
else:
249246
return xy, uv
250247

@@ -266,14 +263,20 @@ def dense_lucaskanade(
266263

267264
# return zero motion field if no sparse vectors are left for interpolation
268265
if xy.shape[0] == 0:
269-
return np.zeros((2, domain_size[0], domain_size[1]))
266+
uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
267+
dataset["velocity_x"] = (["y", "x"], uvgrid[0])
268+
dataset["velocity_y"] = (["y", "x"], uvgrid[1])
269+
return dataset
270270

271271
# interpolation
272272
xgrid = np.arange(domain_size[1])
273273
ygrid = np.arange(domain_size[0])
274274
uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs)
275275

276+
dataset["velocity_x"] = (["y", "x"], uvgrid[0])
277+
dataset["velocity_y"] = (["y", "x"], uvgrid[1])
278+
276279
if verbose:
277280
print("--- total time: %.2f seconds ---" % (time.time() - t0))
278281

279-
return uvgrid
282+
return dataset

0 commit comments

Comments
 (0)