Ouranosinc · Zeitsperre · Mar 10, 2023 · Mar 9, 2023 · Mar 9, 2023 · Mar 9, 2023
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -6,6 +6,17 @@ Changelog
 -------------------
 Contributors to this version: Trevor James Smith (:user:`Zeitsperre`).
 
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* Two previously private functions for selecting a day of year in a time series when performing calendar conversions are now exposed. (:issue:`1305`, :pull:`1317`). New functions are:
+    * ``xclim.core.calendar.yearly_interpolated_doy``
+    * ``xclim.core.calendar.yearly_random_doy``
+
+Breaking changes
+^^^^^^^^^^^^^^^^
+* The call signatures for ``xclim.ensembles.create_ensemble`` and ``xclim.ensembles._base._ens_align_dataset`` have been deprecated. Calls to these functions made with the original signature will emit warnings. Changes will become breaking in `xclim>=0.43.0`.(:issue:`1305`, :pull:`1317`). Affected variable:
+    * `mf_flag` (bool) -> `multifile` (bool)
+
 Internal changes
 ^^^^^^^^^^^^^^^^
 * Added `xclim` to the `ouranos Zenodo community <https://zenodo.org/communities/ouranos/>`_ . (:pull:`1313`).
@@ -21,6 +32,7 @@ Internal changes
     * Markdown explanations in some Jupyter Notebooks have been edited for clarity
 * Removed `Mapping` abstract base class types in call signatures (`dict` variables were always expected). (:pull:`1308`).
 * Changes in testing setup now prevent ``test_mean_radiant_temperature`` from sometimes causing a segmentation fault. (:issue:`1303`, :pull:`1315`).
+* Addressed a formatting bug that caused `Indicators` with multiple variables returned to not be properly formatted in the documentation. (:issue:`1305`, :pull:`1317`).
 
 0.41.0 (2023-02-28)
 -------------------

diff --git a/docs/api.rst b/docs/api.rst
@@ -83,22 +83,22 @@ Ensembles Module
 
 .. Use of autofunction is so that paths do not include private modules.
 .. autofunction:: xclim.ensembles.kkz_reduce_ensemble
-    :noindex:
+   :noindex:
 
 .. autofunction:: xclim.ensembles.kmeans_reduce_ensemble
-    :noindex:
+   :noindex:
 
 .. autofunction:: xclim.ensembles.plot_rsqprofile
-    :noindex:
+   :noindex:
 
 .. automodule:: xclim.ensembles._robustness
-    :noindex:
+   :noindex:
 
 .. autofunction:: xclim.ensembles.change_significance
-    :noindex:
+   :noindex:
 
 .. autofunction:: xclim.ensembles.robustness_coefficient
-    :noindex:
+   :noindex:
 
 Units Handling Submodule
 ========================

diff --git a/docs/indices.rst b/docs/indices.rst
@@ -4,10 +4,32 @@ Climate Indices
 
 .. note::
 
-    Climate `Indices` serve as the driving mechanisms behind `Indicators` and should be used in cases where default settings for an Indicator may need to be tweaked, metadata completeness is not required, or a user wishes to design a virtual module from existing indices (e.g. see :ref:`notebooks/extendxclim:Defining new indicators`).
+    Climate `Indices` serve as the driving mechanisms behind `Indicators` and should be used in cases where
+    default settings for an Indicator may need to be tweaked, metadata completeness is not required, or a user
+    wishes to design a virtual module from existing indices (see: :ref:`notebooks/extendxclim:Defining new indicators`).
 
     For higher-level and general purpose use, the xclim developers suggest using the :ref:`indicators:Climate Indicators`.
 
+Indices Library
+---------------
+
+Climate indices functions are designed to operate on :py:class:`xarray.DataArray` objects.
+Most of these functions operate on daily time series, but in some cases might accept other sampling
+frequencies as well. All functions perform units checks to make sure that inputs have the expected dimensions
+(e.g. handling for units of temperature, whether they are Celsius, kelvin or Fahrenheit), and set the `units`
+attribute of the output `DataArray`.
+
+The :py:mod:`xclim.indices.generic`, :py:mod:`xclim.indices.helpers`, :py:mod:`xclim.indices.run_length`, and
+:py:mod:`xclim.indices.stats` submodules provide helper functions to simplify the implementation of indices
+while functions under :py:mod:`xclim.core.calendar` can aid with challenges arising from variable calendar
+types.
+
+.. warning::
+
+    Indices functions do not perform missing value checks, and usually do not set CF-Convention attributes
+    (long_name, standard_name, description, cell_methods, etc.). These functionalities are provided by
+    :py:class:`xclim.core.indicator.Indicator` instances found in the :py:mod:`xclim.indicators.atmos`,
+    :py:mod:`xclim.indicators.land` and :mod:`xclim.indicators.seaIce` modules.
 
 .. automodule:: xclim.indices
    :members:

diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py
@@ -34,7 +34,9 @@
 __all__ = [
     "DayOfYearStr",
     "adjust_doy_calendar",
+    "build_climatology_bounds",
     "climatological_mean_doy",
+    "common_calendar",
     "compare_offsets",
     "convert_calendar",
     "date_range",
@@ -52,9 +54,10 @@
     "resample_doy",
     "select_time",
     "time_bnds",
-    "within_bnds_doy",
     "uniform_calendars",
-    "build_climatology_bounds",
+    "within_bnds_doy",
+    "yearly_interpolated_doy",
+    "yearly_random_doy",
 ]
 
 # Maximum day of year in each calendar.
@@ -96,6 +99,43 @@ def date_range(
     return xr.cftime_range(*args, calendar=calendar, **kwargs)
 
 
+def yearly_interpolated_doy(
+    time: pd.DatetimeIndex | CFTimeIndex, source_calendar: str, target_calendar: str
+):
+    """Return the nearest day in the target calendar of the corresponding "decimal year" in the source calendar."""
+    yr = int(time.dt.year[0])
+    return np.round(
+        days_in_year(yr, target_calendar)
+        * time.dt.dayofyear
+        / days_in_year(yr, source_calendar)
+    ).astype(int)
+
+
+def yearly_random_doy(
+    time: pd.DatetimeIndex | CFTimeIndex,
+    rng: np.random.Generator,
+    source_calendar: str,
+    target_calendar: str,
+):
+    """Return a day of year in the new calendar.
+
+    Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
+    """
+    yr = int(time.dt.year[0])
+    new_doy = np.arange(360) + 1
+    rm_idx = rng.integers(0, 72, 5) + (np.arange(5) * 72)
+    if source_calendar == "360_day":
+        for idx in rm_idx:
+            new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
+        if days_in_year(yr, target_calendar) == 366:
+            new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
+    elif target_calendar == "360_day":
+        new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
+        if days_in_year(yr, source_calendar) == 366:
+            new_doy = np.insert(new_doy, 60, -1)
+    return new_doy[time.dt.dayofyear - 1]
+
+
 def get_calendar(obj: Any, dim: str = "time") -> str:
     """Return the calendar of an object.
 
@@ -321,38 +361,18 @@ def convert_calendar(
     # TODO Maybe the 5-6 days to remove could be given by the user?
     if align_on in ["year", "random"]:
         if align_on == "year":
+            new_doy = source.time.groupby(f"{dim}.year").map(
+                yearly_interpolated_doy,
+                source_calendar=cal_src,
+                target_calendar=cal_tgt,
+            )
 
-            def _yearly_interp_doy(time):
-                # Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar
-                yr = int(time.dt.year[0])
-                return np.round(
-                    days_in_year(yr, cal_tgt)
-                    * time.dt.dayofyear
-                    / days_in_year(yr, cal_src)
-                ).astype(int)
-
-            new_doy = source.time.groupby(f"{dim}.year").map(_yearly_interp_doy)
         elif align_on == "random":
-
-            def _yearly_random_doy(time, rng):
-                # Return a doy in the new calendar, removing the Feb 29th and 5 other
-                # days chosen randomly within 5 sections of 72 days.
-                yr = int(time.dt.year[0])
-                new_doy = np.arange(360) + 1
-                rm_idx = rng.integers(0, 72, 5) + (np.arange(5) * 72)
-                if cal_src == "360_day":
-                    for idx in rm_idx:
-                        new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
-                    if days_in_year(yr, cal_tgt) == 366:
-                        new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
-                elif cal_tgt == "360_day":
-                    new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
-                    if days_in_year(yr, cal_src) == 366:
-                        new_doy = np.insert(new_doy, 60, -1)
-                return new_doy[time.dt.dayofyear - 1]
-
             new_doy = source.time.groupby(f"{dim}.year").map(
-                _yearly_random_doy, rng=np.random.default_rng()
+                yearly_random_doy,
+                rng=np.random.default_rng(),
+                source_calendar=cal_src,
+                target_calendar=cal_tgt,
             )
 
         # Convert the source datetimes, but override the doy with our new doys

diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py
@@ -600,7 +600,7 @@ def _gen_returns_section(cf_attrs: Sequence[dict[str, Any]]):
                     attr = "<Dynamically generated string>"
                 added_section += f" **{key}**: {attr};"
         if added_section:
-            section = f"{section}, with additional attributes:{added_section[:-1]}"
+            section = f"{section}, with additional attributes:{added_section[:-1]}\n"
     return section
 
 

diff --git a/xclim/core/units.py b/xclim/core/units.py
@@ -932,7 +932,7 @@ def check_units(val: str | int | float | None, dim: str | None) -> None:
 
 
 def declare_units(
-    **units_by_name: dict[str, str],
+    **units_by_name: str,
 ) -> Callable:
     """Create a decorator to check units of function arguments.
 

diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py
@@ -5,6 +5,7 @@
 """
 from __future__ import annotations
 
+import warnings
 from glob import glob
 from pathlib import Path
 from typing import Any, Sequence
@@ -19,11 +20,12 @@
 
 def create_ensemble(
     datasets: Any,
-    mf_flag: bool = False,
+    multifile: bool = False,
     resample_freq: str | None = None,
     calendar: str | None = None,
     realizations: Sequence[Any] | None = None,
     cal_kwargs: dict | None = None,
+    mf_flag: bool | str = "UNSET",  # noqa
     **xr_kwargs,
 ) -> xr.Dataset:
     """Create an xarray dataset of an ensemble of climate simulation from a list of netcdf files.
@@ -38,32 +40,32 @@ def create_ensemble(
     Parameters
     ----------
     datasets : list or dict or string
-      List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of
-      lists where each sublist contains input .nc files of an xarray multifile Dataset.
+      List of netcdf file paths or xarray Dataset/DataArray objects . If `multifile` is True, ncfiles should be a
+      list of lists where each sublist contains input .nc files of an xarray multifile Dataset.
       If DataArray objects are passed, they should have a name in order to be transformed into Datasets.
       A dictionary can be passed instead of a list, in which case the keys are used as coordinates along the new
       `realization` axis.
       If a string is passed, it is assumed to be a glob pattern for finding datasets.
-    mf_flag : bool
+    multifile : bool
       If True, climate simulations are treated as xarray multifile Datasets before concatenation.
-      Only applicable when "datasets" is sequence of list of file paths.
+      Only applicable when "datasets" is sequence of list of file paths. Default: False.
     resample_freq : Optional[str]
       If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned.
       If resample_freq is set, the time coordinate of each member will be modified to fit this frequency.
     calendar : str, optional
       The calendar of the time coordinate of the ensemble.
       By default, the smallest common calendar is chosen. For example, a mixed input of "noleap" and "360_day" will default to "noleap".
       'default' is the standard calendar using np.datetime64 objects (xarray's "standard" with `use_cftime=False`).
-    realizations: sequence, optional
+    realizations : sequence, optional
       The coordinate values for the new `realization` axis.
       If None (default), the new axis has a simple integer coordinate.
       This argument shouldn't be used if `datasets` is a glob pattern as the dataset order is random.
     cal_kwargs : dict, optional
-      Additionnal arguments to pass to py:func:`xclim.core.calendar.convert_calendar`.
+      Additional arguments to pass to py:func:`xclim.core.calendar.convert_calendar`.
       For conversions involving '360_day', the align_on='date' option is used by default.
     **xr_kwargs
       Any keyword arguments to be given to `xr.open_dataset` when opening the files
-      (or to `xr.open_mfdataset` if mf_flag is True)
+      (or to `xr.open_mfdataset` if `multifile` is True)
 
     Returns
     -------
@@ -90,7 +92,7 @@ def create_ensemble(
 
         # Simulation 2 is also a list of .nc files:
         datasets.extend(Path("/dir2").glob("*.nc"))
-        ens = create_ensemble(datasets, mf_flag=True)
+        ens = create_ensemble(datasets, multifile=True)
     """
     if isinstance(datasets, dict):
         if realizations is None:
@@ -103,9 +105,18 @@ def create_ensemble(
             "is a glob pattern, as the final order is random."
         )
 
+    if mf_flag != "UNSET":
+        warnings.warn(
+            "The `mf_flag` argument is being deprecated in favour of `multifile` in `create.ensemble()`. "
+            "This change will be made effective from `xclim>=0.43.0`. Please update your scripts accordingly",
+            FutureWarning,
+            stacklevel=3,
+        )
+        multifile = mf_flag
+
     ds = _ens_align_datasets(
         datasets,
-        mf_flag,
+        multifile,
         resample_freq,
         calendar=calendar,
         cal_kwargs=cal_kwargs or {},
@@ -118,8 +129,8 @@ def create_ensemble(
     dim = xr.IndexVariable("realization", list(realizations), attrs={"axis": "E"})
 
     ens = xr.concat(ds, dim)
-    for vname, var in ds[0].variables.items():
-        ens[vname].attrs.update(**var.attrs)
+    for var_name, var in ds[0].variables.items():
+        ens[var_name].attrs.update(**var.attrs)
     ens.attrs.update(**ds[0].attrs)
 
     return ens
@@ -336,22 +347,23 @@ def ensemble_percentiles(
 
 def _ens_align_datasets(
     datasets: list[xr.Dataset | Path | str | list[Path | str]] | str,
-    mf_flag: bool = False,
+    multifile: bool = False,
     resample_freq: str | None = None,
     calendar: str = "default",
     cal_kwargs: dict | None = None,
+    mf_flag: bool | str = "UNSET",  # noqa
     **xr_kwargs,
 ) -> list[xr.Dataset]:
     """Create a list of aligned xarray Datasets for ensemble Dataset creation.
 
     Parameters
     ----------
     datasets : list[xr.Dataset | xr.DataArray | Path | str | list[Path | str]] or str
-        List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, 'datasets' should be a list
-        of lists where each sublist contains input NetCDF files of a xarray multi-file Dataset.
+        List of netcdf file paths or xarray Dataset/DataArray objects . If `multifile` is True, 'datasets' should be a
+        list of lists where each sublist contains input NetCDF files of a xarray multi-file Dataset.
         DataArrays should have a name, so they can be converted to datasets.
         If a string, it is assumed to be a glob pattern for finding datasets.
-    mf_flag : bool
+    multifile : bool
         If True climate simulations are treated as xarray multi-file datasets before concatenation.
         Only applicable when 'datasets' is a sequence of file paths.
     resample_freq : str, optional
@@ -375,10 +387,19 @@ def _ens_align_datasets(
     if isinstance(datasets, str):
         datasets = glob(datasets)
 
+    if mf_flag != "UNSET":
+        warnings.warn(
+            "The `mf_flag` argument is being deprecated in favour of `multifile` in `_ens_align_datasets()`. "
+            "This change will be made effective from `xclim>=0.43.0`. Please update your scripts accordingly",
+            FutureWarning,
+            stacklevel=3,
+        )
+        multifile = mf_flag
+
     ds_all = []
     calendars = []
     for i, n in enumerate(datasets):
-        if mf_flag:
+        if multifile:
             ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs)
         else:
             if isinstance(n, xr.Dataset):

diff --git a/xclim/ensembles/_robustness.py b/xclim/ensembles/_robustness.py
@@ -23,7 +23,7 @@
 def change_significance(
     fut: xr.DataArray | xr.Dataset,
     ref: xr.DataArray | xr.Dataset = None,
-    test: str = "ttest",
+    test: str | None = "ttest",
     weights: xr.DataArray = None,
     p_vals: bool = False,
     **kwargs,