diff --git a/CHANGES.rst b/CHANGES.rst index 81fe0855c..89e8b30f1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,17 @@ Changelog ------------------- Contributors to this version: Trevor James Smith (:user:`Zeitsperre`). +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* Two previously private functions for selecting a day of year in a time series when performing calendar conversions are now exposed. (:issue:`1305`, :pull:`1317`). New functions are: + * ``xclim.core.calendar.yearly_interpolated_doy`` + * ``xclim.core.calendar.yearly_random_doy`` + +Breaking changes +^^^^^^^^^^^^^^^^ +* The call signatures for ``xclim.ensembles.create_ensemble`` and ``xclim.ensembles._base._ens_align_dataset`` have been deprecated. Calls to these functions made with the original signature will emit warnings. Changes will become breaking in `xclim>=0.43.0`.(:issue:`1305`, :pull:`1317`). Affected variable: + * `mf_flag` (bool) -> `multifile` (bool) + Internal changes ^^^^^^^^^^^^^^^^ * Added `xclim` to the `ouranos Zenodo community `_ . (:pull:`1313`). @@ -21,6 +32,7 @@ Internal changes * Markdown explanations in some Jupyter Notebooks have been edited for clarity * Removed `Mapping` abstract base class types in call signatures (`dict` variables were always expected). (:pull:`1308`). * Changes in testing setup now prevent ``test_mean_radiant_temperature`` from sometimes causing a segmentation fault. (:issue:`1303`, :pull:`1315`). +* Addressed a formatting bug that caused `Indicators` with multiple variables returned to not be properly formatted in the documentation. (:issue:`1305`, :pull:`1317`). 0.41.0 (2023-02-28) ------------------- diff --git a/docs/api.rst b/docs/api.rst index 83d731b70..55b30dc2e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -83,22 +83,22 @@ Ensembles Module .. Use of autofunction is so that paths do not include private modules. .. autofunction:: xclim.ensembles.kkz_reduce_ensemble - :noindex: + :noindex: .. autofunction:: xclim.ensembles.kmeans_reduce_ensemble - :noindex: + :noindex: .. autofunction:: xclim.ensembles.plot_rsqprofile - :noindex: + :noindex: .. automodule:: xclim.ensembles._robustness - :noindex: + :noindex: .. autofunction:: xclim.ensembles.change_significance - :noindex: + :noindex: .. autofunction:: xclim.ensembles.robustness_coefficient - :noindex: + :noindex: Units Handling Submodule ======================== diff --git a/docs/indices.rst b/docs/indices.rst index e19a06969..5ec6fc0c7 100644 --- a/docs/indices.rst +++ b/docs/indices.rst @@ -4,10 +4,32 @@ Climate Indices .. note:: - Climate `Indices` serve as the driving mechanisms behind `Indicators` and should be used in cases where default settings for an Indicator may need to be tweaked, metadata completeness is not required, or a user wishes to design a virtual module from existing indices (e.g. see :ref:`notebooks/extendxclim:Defining new indicators`). + Climate `Indices` serve as the driving mechanisms behind `Indicators` and should be used in cases where + default settings for an Indicator may need to be tweaked, metadata completeness is not required, or a user + wishes to design a virtual module from existing indices (see: :ref:`notebooks/extendxclim:Defining new indicators`). For higher-level and general purpose use, the xclim developers suggest using the :ref:`indicators:Climate Indicators`. +Indices Library +--------------- + +Climate indices functions are designed to operate on :py:class:`xarray.DataArray` objects. +Most of these functions operate on daily time series, but in some cases might accept other sampling +frequencies as well. All functions perform units checks to make sure that inputs have the expected dimensions +(e.g. handling for units of temperature, whether they are Celsius, kelvin or Fahrenheit), and set the `units` +attribute of the output `DataArray`. + +The :py:mod:`xclim.indices.generic`, :py:mod:`xclim.indices.helpers`, :py:mod:`xclim.indices.run_length`, and +:py:mod:`xclim.indices.stats` submodules provide helper functions to simplify the implementation of indices +while functions under :py:mod:`xclim.core.calendar` can aid with challenges arising from variable calendar +types. + +.. warning:: + + Indices functions do not perform missing value checks, and usually do not set CF-Convention attributes + (long_name, standard_name, description, cell_methods, etc.). These functionalities are provided by + :py:class:`xclim.core.indicator.Indicator` instances found in the :py:mod:`xclim.indicators.atmos`, + :py:mod:`xclim.indicators.land` and :mod:`xclim.indicators.seaIce` modules. .. automodule:: xclim.indices :members: diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index a853d687e..fe56397db 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -34,7 +34,9 @@ __all__ = [ "DayOfYearStr", "adjust_doy_calendar", + "build_climatology_bounds", "climatological_mean_doy", + "common_calendar", "compare_offsets", "convert_calendar", "date_range", @@ -52,9 +54,10 @@ "resample_doy", "select_time", "time_bnds", - "within_bnds_doy", "uniform_calendars", - "build_climatology_bounds", + "within_bnds_doy", + "yearly_interpolated_doy", + "yearly_random_doy", ] # Maximum day of year in each calendar. @@ -96,6 +99,43 @@ def date_range( return xr.cftime_range(*args, calendar=calendar, **kwargs) +def yearly_interpolated_doy( + time: pd.DatetimeIndex | CFTimeIndex, source_calendar: str, target_calendar: str +): + """Return the nearest day in the target calendar of the corresponding "decimal year" in the source calendar.""" + yr = int(time.dt.year[0]) + return np.round( + days_in_year(yr, target_calendar) + * time.dt.dayofyear + / days_in_year(yr, source_calendar) + ).astype(int) + + +def yearly_random_doy( + time: pd.DatetimeIndex | CFTimeIndex, + rng: np.random.Generator, + source_calendar: str, + target_calendar: str, +): + """Return a day of year in the new calendar. + + Removes Feb 29th and five other days chosen randomly within five sections of 72 days. + """ + yr = int(time.dt.year[0]) + new_doy = np.arange(360) + 1 + rm_idx = rng.integers(0, 72, 5) + (np.arange(5) * 72) + if source_calendar == "360_day": + for idx in rm_idx: + new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 + if days_in_year(yr, target_calendar) == 366: + new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1 + elif target_calendar == "360_day": + new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) + if days_in_year(yr, source_calendar) == 366: + new_doy = np.insert(new_doy, 60, -1) + return new_doy[time.dt.dayofyear - 1] + + def get_calendar(obj: Any, dim: str = "time") -> str: """Return the calendar of an object. @@ -321,38 +361,18 @@ def convert_calendar( # TODO Maybe the 5-6 days to remove could be given by the user? if align_on in ["year", "random"]: if align_on == "year": + new_doy = source.time.groupby(f"{dim}.year").map( + yearly_interpolated_doy, + source_calendar=cal_src, + target_calendar=cal_tgt, + ) - def _yearly_interp_doy(time): - # Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar - yr = int(time.dt.year[0]) - return np.round( - days_in_year(yr, cal_tgt) - * time.dt.dayofyear - / days_in_year(yr, cal_src) - ).astype(int) - - new_doy = source.time.groupby(f"{dim}.year").map(_yearly_interp_doy) elif align_on == "random": - - def _yearly_random_doy(time, rng): - # Return a doy in the new calendar, removing the Feb 29th and 5 other - # days chosen randomly within 5 sections of 72 days. - yr = int(time.dt.year[0]) - new_doy = np.arange(360) + 1 - rm_idx = rng.integers(0, 72, 5) + (np.arange(5) * 72) - if cal_src == "360_day": - for idx in rm_idx: - new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 - if days_in_year(yr, cal_tgt) == 366: - new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1 - elif cal_tgt == "360_day": - new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) - if days_in_year(yr, cal_src) == 366: - new_doy = np.insert(new_doy, 60, -1) - return new_doy[time.dt.dayofyear - 1] - new_doy = source.time.groupby(f"{dim}.year").map( - _yearly_random_doy, rng=np.random.default_rng() + yearly_random_doy, + rng=np.random.default_rng(), + source_calendar=cal_src, + target_calendar=cal_tgt, ) # Convert the source datetimes, but override the doy with our new doys diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index 08a642925..63fa509f3 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -600,7 +600,7 @@ def _gen_returns_section(cf_attrs: Sequence[dict[str, Any]]): attr = "" added_section += f" **{key}**: {attr};" if added_section: - section = f"{section}, with additional attributes:{added_section[:-1]}" + section = f"{section}, with additional attributes:{added_section[:-1]}\n" return section diff --git a/xclim/core/units.py b/xclim/core/units.py index b91444bba..44b3f5b5e 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -932,7 +932,7 @@ def check_units(val: str | int | float | None, dim: str | None) -> None: def declare_units( - **units_by_name: dict[str, str], + **units_by_name: str, ) -> Callable: """Create a decorator to check units of function arguments. diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index c9ecf1af3..08874e441 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -5,6 +5,7 @@ """ from __future__ import annotations +import warnings from glob import glob from pathlib import Path from typing import Any, Sequence @@ -19,11 +20,12 @@ def create_ensemble( datasets: Any, - mf_flag: bool = False, + multifile: bool = False, resample_freq: str | None = None, calendar: str | None = None, realizations: Sequence[Any] | None = None, cal_kwargs: dict | None = None, + mf_flag: bool | str = "UNSET", # noqa **xr_kwargs, ) -> xr.Dataset: """Create an xarray dataset of an ensemble of climate simulation from a list of netcdf files. @@ -38,15 +40,15 @@ def create_ensemble( Parameters ---------- datasets : list or dict or string - List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of - lists where each sublist contains input .nc files of an xarray multifile Dataset. + List of netcdf file paths or xarray Dataset/DataArray objects . If `multifile` is True, ncfiles should be a + list of lists where each sublist contains input .nc files of an xarray multifile Dataset. If DataArray objects are passed, they should have a name in order to be transformed into Datasets. A dictionary can be passed instead of a list, in which case the keys are used as coordinates along the new `realization` axis. If a string is passed, it is assumed to be a glob pattern for finding datasets. - mf_flag : bool + multifile : bool If True, climate simulations are treated as xarray multifile Datasets before concatenation. - Only applicable when "datasets" is sequence of list of file paths. + Only applicable when "datasets" is sequence of list of file paths. Default: False. resample_freq : Optional[str] If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned. If resample_freq is set, the time coordinate of each member will be modified to fit this frequency. @@ -54,16 +56,16 @@ def create_ensemble( The calendar of the time coordinate of the ensemble. By default, the smallest common calendar is chosen. For example, a mixed input of "noleap" and "360_day" will default to "noleap". 'default' is the standard calendar using np.datetime64 objects (xarray's "standard" with `use_cftime=False`). - realizations: sequence, optional + realizations : sequence, optional The coordinate values for the new `realization` axis. If None (default), the new axis has a simple integer coordinate. This argument shouldn't be used if `datasets` is a glob pattern as the dataset order is random. cal_kwargs : dict, optional - Additionnal arguments to pass to py:func:`xclim.core.calendar.convert_calendar`. + Additional arguments to pass to py:func:`xclim.core.calendar.convert_calendar`. For conversions involving '360_day', the align_on='date' option is used by default. **xr_kwargs Any keyword arguments to be given to `xr.open_dataset` when opening the files - (or to `xr.open_mfdataset` if mf_flag is True) + (or to `xr.open_mfdataset` if `multifile` is True) Returns ------- @@ -90,7 +92,7 @@ def create_ensemble( # Simulation 2 is also a list of .nc files: datasets.extend(Path("/dir2").glob("*.nc")) - ens = create_ensemble(datasets, mf_flag=True) + ens = create_ensemble(datasets, multifile=True) """ if isinstance(datasets, dict): if realizations is None: @@ -103,9 +105,18 @@ def create_ensemble( "is a glob pattern, as the final order is random." ) + if mf_flag != "UNSET": + warnings.warn( + "The `mf_flag` argument is being deprecated in favour of `multifile` in `create.ensemble()`. " + "This change will be made effective from `xclim>=0.43.0`. Please update your scripts accordingly", + FutureWarning, + stacklevel=3, + ) + multifile = mf_flag + ds = _ens_align_datasets( datasets, - mf_flag, + multifile, resample_freq, calendar=calendar, cal_kwargs=cal_kwargs or {}, @@ -118,8 +129,8 @@ def create_ensemble( dim = xr.IndexVariable("realization", list(realizations), attrs={"axis": "E"}) ens = xr.concat(ds, dim) - for vname, var in ds[0].variables.items(): - ens[vname].attrs.update(**var.attrs) + for var_name, var in ds[0].variables.items(): + ens[var_name].attrs.update(**var.attrs) ens.attrs.update(**ds[0].attrs) return ens @@ -336,10 +347,11 @@ def ensemble_percentiles( def _ens_align_datasets( datasets: list[xr.Dataset | Path | str | list[Path | str]] | str, - mf_flag: bool = False, + multifile: bool = False, resample_freq: str | None = None, calendar: str = "default", cal_kwargs: dict | None = None, + mf_flag: bool | str = "UNSET", # noqa **xr_kwargs, ) -> list[xr.Dataset]: """Create a list of aligned xarray Datasets for ensemble Dataset creation. @@ -347,11 +359,11 @@ def _ens_align_datasets( Parameters ---------- datasets : list[xr.Dataset | xr.DataArray | Path | str | list[Path | str]] or str - List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, 'datasets' should be a list - of lists where each sublist contains input NetCDF files of a xarray multi-file Dataset. + List of netcdf file paths or xarray Dataset/DataArray objects . If `multifile` is True, 'datasets' should be a + list of lists where each sublist contains input NetCDF files of a xarray multi-file Dataset. DataArrays should have a name, so they can be converted to datasets. If a string, it is assumed to be a glob pattern for finding datasets. - mf_flag : bool + multifile : bool If True climate simulations are treated as xarray multi-file datasets before concatenation. Only applicable when 'datasets' is a sequence of file paths. resample_freq : str, optional @@ -375,10 +387,19 @@ def _ens_align_datasets( if isinstance(datasets, str): datasets = glob(datasets) + if mf_flag != "UNSET": + warnings.warn( + "The `mf_flag` argument is being deprecated in favour of `multifile` in `_ens_align_datasets()`. " + "This change will be made effective from `xclim>=0.43.0`. Please update your scripts accordingly", + FutureWarning, + stacklevel=3, + ) + multifile = mf_flag + ds_all = [] calendars = [] for i, n in enumerate(datasets): - if mf_flag: + if multifile: ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs) else: if isinstance(n, xr.Dataset): diff --git a/xclim/ensembles/_robustness.py b/xclim/ensembles/_robustness.py index aa6891dd9..799a6de10 100644 --- a/xclim/ensembles/_robustness.py +++ b/xclim/ensembles/_robustness.py @@ -23,7 +23,7 @@ def change_significance( fut: xr.DataArray | xr.Dataset, ref: xr.DataArray | xr.Dataset = None, - test: str = "ttest", + test: str | None = "ttest", weights: xr.DataArray = None, p_vals: bool = False, **kwargs, diff --git a/xclim/indices/__init__.py b/xclim/indices/__init__.py index c170b9f96..7d587f2f8 100644 --- a/xclim/indices/__init__.py +++ b/xclim/indices/__init__.py @@ -1,27 +1,4 @@ -# noqa: D205,D400 -""" -=============== -Indices Library -=============== - -This module contains climate indices functions operating on `xarray.DataArray`. Most of these -functions operate on daily time series, but might accept other sampling frequencies as well. All -functions perform units checks to make sure that inputs have the expected dimensions (for example -have units of temperature, whether it is Celsius, kelvin or Fahrenheit), and set the `units` -attribute of the output DataArray. - -The `calendar`, `fire`, `generic`, `helpers`, `run_length` and `stats` submodules provide -helpers to simplify the implementation of the indices. - -.. note:: - - Indices functions do not perform missing value checks, and usually do not set CF-Convention attributes - (long_name, standard_name, description, cell_methods, etc.). These functionalities are provided by - :py:class:`xclim.indicators.Indicator` instances found in the :py:mod:`xclim.indicators.atmos`, - :py:mod:`xclim.indicators.land` and :mod:`xclim.indicators.seaIce` modules, - documented in :ref:`indicators:Climate Indicators`. - -""" +"""Indices module.""" from __future__ import annotations from ._agro import * @@ -42,8 +19,8 @@ ) """ -Notes for docstring -------------------- +Notes for docstrings +-------------------- The docstrings adhere to the `NumPy`_ style convention and is meant as a way to store CF-Convention metadata as well as information relevant to third party libraries (such as a WPS server). diff --git a/xclim/indices/fire/_cffwis.py b/xclim/indices/fire/_cffwis.py index 595acc6ab..326e88be7 100644 --- a/xclim/indices/fire/_cffwis.py +++ b/xclim/indices/fire/_cffwis.py @@ -256,7 +256,7 @@ def _fine_fuel_moisture_code(t, p, w, h, ffmc0): # pragma: no cover Parameters ---------- - t: array_like + t : array_like Noon temperature [C]. p : array_like Rain fall in open over previous 24 hours, at noon [mm]. @@ -342,7 +342,7 @@ def _duff_moisture_code( Parameters ---------- - t: array_like + t : array_like Noon temperature [C]. p : array_like Rain fall in open over previous 24 hours, at noon [mm]. @@ -404,7 +404,7 @@ def _drought_code( Parameters ---------- - t: array-like + t : array-like Noon temperature [C]. p : array_like Rain fall in open over previous 24 hours, at noon [mm]. @@ -945,9 +945,9 @@ def fire_weather_ufunc( How to compute the start-up and shutdown of the fire season. If "None", no start-ups or shutdowns are computed, similar to the R fire function. Ignored if `season_mask` is given. - overwintering: bool + overwintering : bool Whether to activate DC overwintering or not. If True, either season_method or season_mask must be given. - dry_start: {None, 'CFS', 'GFWED'} + dry_start : {None, 'CFS', 'GFWED'} Whether to activate the DC and DMC "dry start" mechanism and which method to use. See Notes. If overwintering is activated, it overrides this parameter : only DMC is handled through the dry start mechanism. initial_start_up : bool @@ -1316,9 +1316,9 @@ def cffwis_indices( How to compute the start-up and shutdown of the fire season. If "None", no start-ups or shutdowns are computed, similar to the R fire function. Ignored if `season_mask` is given. - overwintering: bool + overwintering : bool Whether to activate DC overwintering or not. If True, either season_method or season_mask must be given. - dry_start: {None, 'CFS', 'GFWED'} + dry_start : {None, 'CFS', 'GFWED'} Whether to activate the DC and DMC "dry start" mechanism or not, see :py:func:`fire_weather_ufunc`. initial_start_up : bool If True (default), gridpoints where the fire season is active on the first timestep go through a start_up phase @@ -1420,9 +1420,9 @@ def drought_code( How to compute the start-up and shutdown of the fire season. If "None", no start-ups or shutdowns are computed, similar to the R fire function. Ignored if `season_mask` is given. - overwintering: bool + overwintering : bool Whether to activate DC overwintering or not. If True, either season_method or season_mask must be given. - dry_start: {None, "CFS", 'GFWED'} + dry_start : {None, "CFS", 'GFWED'} Whether to activate the DC and DMC "dry start" mechanism and which method to use. See :py:func:`fire_weather_ufunc`. initial_start_up : bool @@ -1503,16 +1503,16 @@ def fire_season( freq : str, optional If given only the longest fire season for each period defined by this frequency, Every "seasons" are returned if None, including the short shoulder seasons. - temp_start_thresh: Quantified + temp_start_thresh : Quantified Minimal temperature needed to start the season. Must be scalar. temp_end_thresh : Quantified Maximal temperature needed to end the season. Must be scalar. - temp_condition_days: int + temp_condition_days : int Number of days with temperature above or below the thresholds to trigger a start or an end of the fire season. - snow_condition_days: int + snow_condition_days : int Parameters for the fire season determination. See :py:func:`fire_season`. Temperature is in degC, snow in m. The `snow_thresh` parameters is also used when `dry_start` is set to "GFWED". - snow_thresh: Quantified + snow_thresh : Quantified Minimal snow depth level to end a fire season, only used with method "LA08". Must be scalar. diff --git a/xclim/testing/tests/test_ensembles.py b/xclim/testing/tests/test_ensembles.py index 59b41da1c..068c3d8be 100644 --- a/xclim/testing/tests/test_ensembles.py +++ b/xclim/testing/tests/test_ensembles.py @@ -39,7 +39,6 @@ def test_create_ensemble( ds = open_dataset(n, decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append(ds) - ens = ensembles.create_ensemble(ds_all) assert len(ens.realization) == len(ensemble_dataset_objects["nc_files_simple"]) @@ -135,8 +134,8 @@ def test_create_unaligned_times(self, timegen, calkw): ens = ensembles.create_ensemble((d1, d2)) assert ens.time.size == 48 np.testing.assert_equal(ens.isel(time=0), [np.nan, 0]) - ens = ensembles.create_ensemble((d1, d2), resample_freq="MS") + assert ens.time.size == 24 np.testing.assert_equal(ens.isel(time=0), [0, 0]) @@ -244,7 +243,8 @@ def test_calc_mean_std_min_max(self, ensemble_dataset_objects, open_dataset): for n in ensemble_dataset_objects["nc_files_simple"]: ds = open_dataset(n) ds_all.append(ds) - ens = ensembles.create_ensemble(ds_all) + with pytest.warns(FutureWarning): + ens = ensembles.create_ensemble(ds_all, mf_flag=False) out1 = ensembles.ensemble_mean_std_max_min(ens) np.testing.assert_array_equal(