Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate mf_flag call signature in xclim.ensembles, expose calendar conversion utilities, fix _gen_returns_section bug #1317

Merged
merged 14 commits into from
Mar 10, 2023
Merged
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ Changelog
-------------------
Contributors to this version: Trevor James Smith (:user:`Zeitsperre`).

Breaking changes
^^^^^^^^^^^^^^^^
* The call signatures for ``xclim.ensembles.create_ensemble`` and ``xclim.ensembles._base._ens_align_dataset`` have been deprecated. (:issue:`1305`, :pull:`1317`).
* `mf_flag` (bool) -> `multifile` (bool)
Calls to these functions made with the original signature will emit warnings. Changes will become breaking in `xclim>=0.43.0`.

Internal changes
^^^^^^^^^^^^^^^^
* Added `xclim` to the `ouranos Zenodo community <https://zenodo.org/communities/ouranos/>`_ . (:pull:`1313`).
Expand Down
1 change: 1 addition & 0 deletions xclim/core/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"adjust_doy_calendar",
"climatological_mean_doy",
"compare_offsets",
"common_calendar",
"convert_calendar",
"date_range",
"date_range_like",
Expand Down
55 changes: 38 additions & 17 deletions xclim/ensembles/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
from __future__ import annotations

import warnings
from glob import glob
from pathlib import Path
from typing import Any, Sequence
Expand All @@ -19,11 +20,12 @@

def create_ensemble(
datasets: Any,
mf_flag: bool = False,
multifile: bool = False,
resample_freq: str | None = None,
calendar: str | None = None,
realizations: Sequence[Any] | None = None,
cal_kwargs: dict | None = None,
mf_flag: bool | str = "UNSET", # noqa
**xr_kwargs,
) -> xr.Dataset:
"""Create an xarray dataset of an ensemble of climate simulation from a list of netcdf files.
Expand All @@ -38,32 +40,32 @@ def create_ensemble(
Parameters
----------
datasets : list or dict or string
List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of
lists where each sublist contains input .nc files of an xarray multifile Dataset.
List of netcdf file paths or xarray Dataset/DataArray objects . If `multifile` is True, ncfiles should be a
list of lists where each sublist contains input .nc files of an xarray multifile Dataset.
If DataArray objects are passed, they should have a name in order to be transformed into Datasets.
A dictionary can be passed instead of a list, in which case the keys are used as coordinates along the new
`realization` axis.
If a string is passed, it is assumed to be a glob pattern for finding datasets.
mf_flag : bool
multifile : bool
If True, climate simulations are treated as xarray multifile Datasets before concatenation.
Only applicable when "datasets" is sequence of list of file paths.
Only applicable when "datasets" is sequence of list of file paths. Default: False.
resample_freq : Optional[str]
If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned.
If resample_freq is set, the time coordinate of each member will be modified to fit this frequency.
calendar : str, optional
The calendar of the time coordinate of the ensemble.
By default, the smallest common calendar is chosen. For example, a mixed input of "noleap" and "360_day" will default to "noleap".
'default' is the standard calendar using np.datetime64 objects (xarray's "standard" with `use_cftime=False`).
realizations: sequence, optional
realizations : sequence, optional
The coordinate values for the new `realization` axis.
If None (default), the new axis has a simple integer coordinate.
This argument shouldn't be used if `datasets` is a glob pattern as the dataset order is random.
cal_kwargs : dict, optional
Additionnal arguments to pass to py:func:`xclim.core.calendar.convert_calendar`.
Additional arguments to pass to py:func:`xclim.core.calendar.convert_calendar`.
For conversions involving '360_day', the align_on='date' option is used by default.
**xr_kwargs
Any keyword arguments to be given to `xr.open_dataset` when opening the files
(or to `xr.open_mfdataset` if mf_flag is True)
(or to `xr.open_mfdataset` if `multifile` is True)

Returns
-------
Expand All @@ -90,7 +92,7 @@ def create_ensemble(

# Simulation 2 is also a list of .nc files:
datasets.extend(Path("/dir2").glob("*.nc"))
ens = create_ensemble(datasets, mf_flag=True)
ens = create_ensemble(datasets, multifile=True)
"""
if isinstance(datasets, dict):
if realizations is None:
Expand All @@ -103,9 +105,18 @@ def create_ensemble(
"is a glob pattern, as the final order is random."
)

if mf_flag != "UNSET":
warnings.warn(
"The `mf_flag` argument is being deprecated in favour of `multifile` in `create.ensemble()`. "
"This change will be made effective from `xclim>=0.43.0`. Please update your scripts accordingly",
DeprecationWarning,
Zeitsperre marked this conversation as resolved.
Show resolved Hide resolved
stacklevel=3,
)
multifile = mf_flag

ds = _ens_align_datasets(
datasets,
mf_flag,
multifile,
resample_freq,
calendar=calendar,
cal_kwargs=cal_kwargs or {},
Expand All @@ -118,8 +129,8 @@ def create_ensemble(
dim = xr.IndexVariable("realization", list(realizations), attrs={"axis": "E"})

ens = xr.concat(ds, dim)
for vname, var in ds[0].variables.items():
ens[vname].attrs.update(**var.attrs)
for var_name, var in ds[0].variables.items():
ens[var_name].attrs.update(**var.attrs)
ens.attrs.update(**ds[0].attrs)

return ens
Expand Down Expand Up @@ -336,22 +347,23 @@ def ensemble_percentiles(

def _ens_align_datasets(
datasets: list[xr.Dataset | Path | str | list[Path | str]] | str,
mf_flag: bool = False,
multifile: bool = False,
resample_freq: str | None = None,
calendar: str = "default",
cal_kwargs: dict | None = None,
mf_flag: bool | str = "UNSET", # noqa
**xr_kwargs,
) -> list[xr.Dataset]:
"""Create a list of aligned xarray Datasets for ensemble Dataset creation.

Parameters
----------
datasets : list[xr.Dataset | xr.DataArray | Path | str | list[Path | str]] or str
List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, 'datasets' should be a list
of lists where each sublist contains input NetCDF files of a xarray multi-file Dataset.
List of netcdf file paths or xarray Dataset/DataArray objects . If `multifile` is True, 'datasets' should be a
list of lists where each sublist contains input NetCDF files of a xarray multi-file Dataset.
DataArrays should have a name, so they can be converted to datasets.
If a string, it is assumed to be a glob pattern for finding datasets.
mf_flag : bool
multifile : bool
If True climate simulations are treated as xarray multi-file datasets before concatenation.
Only applicable when 'datasets' is a sequence of file paths.
resample_freq : str, optional
Expand All @@ -375,10 +387,19 @@ def _ens_align_datasets(
if isinstance(datasets, str):
datasets = glob(datasets)

if mf_flag != "UNSET":
warnings.warn(
"The `mf_flag` argument is being deprecated in favour of `multifile` in `_ens_align_datasets()`. "
"This change will be made effective from `xclim>=0.43.0`. Please update your scripts accordingly",
DeprecationWarning,
Zeitsperre marked this conversation as resolved.
Show resolved Hide resolved
stacklevel=3,
)
multifile = mf_flag

ds_all = []
calendars = []
for i, n in enumerate(datasets):
if mf_flag:
if multifile:
ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs)
else:
if isinstance(n, xr.Dataset):
Expand Down
2 changes: 1 addition & 1 deletion xclim/ensembles/_robustness.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
def change_significance(
fut: xr.DataArray | xr.Dataset,
ref: xr.DataArray | xr.Dataset = None,
test: str = "ttest",
test: str | None = "ttest",
weights: xr.DataArray = None,
p_vals: bool = False,
**kwargs,
Expand Down
6 changes: 3 additions & 3 deletions xclim/testing/tests/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def test_create_ensemble(
ds = open_dataset(n, decode_times=False)
ds["time"] = xr.decode_cf(ds).time
ds_all.append(ds)

ens = ensembles.create_ensemble(ds_all)

assert len(ens.realization) == len(ensemble_dataset_objects["nc_files_simple"])
Expand Down Expand Up @@ -135,8 +134,8 @@ def test_create_unaligned_times(self, timegen, calkw):
ens = ensembles.create_ensemble((d1, d2))
assert ens.time.size == 48
np.testing.assert_equal(ens.isel(time=0), [np.nan, 0])

ens = ensembles.create_ensemble((d1, d2), resample_freq="MS")

assert ens.time.size == 24
np.testing.assert_equal(ens.isel(time=0), [0, 0])

Expand Down Expand Up @@ -244,7 +243,8 @@ def test_calc_mean_std_min_max(self, ensemble_dataset_objects, open_dataset):
for n in ensemble_dataset_objects["nc_files_simple"]:
ds = open_dataset(n)
ds_all.append(ds)
ens = ensembles.create_ensemble(ds_all)
with pytest.warns(DeprecationWarning):
Zeitsperre marked this conversation as resolved.
Show resolved Hide resolved
ens = ensembles.create_ensemble(ds_all, mf_flag=False)

out1 = ensembles.ensemble_mean_std_max_min(ens)
np.testing.assert_array_equal(
Expand Down