From 81537f679cc582009cae3779edb66bf3d3d12d02 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 18 Jul 2024 14:57:53 -0400 Subject: [PATCH 01/13] Generic season index - fix frost_free_season --- CHANGELOG.rst | 6 ++ xclim/indices/_threshold.py | 187 ++++++++++++++++++++---------------- xclim/indices/generic.py | 75 +++++++++++++++ xclim/indices/run_length.py | 17 +++- 4 files changed, 201 insertions(+), 84 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2d548c408..a79b77242 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,10 @@ v0.52.0 (unreleased) -------------------- Contributors to this version: David Huard (:user:`huard`), Pascal Bourgault (:user:`aulemahal`). +Breaking changes +^^^^^^^^^^^^^^^^ +* Definition of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and xclim's notion of `season` in general. Indicator and indices signature have changed. + Bug fixes ^^^^^^^^^ * Addressed a bug found in ``wet_spell_*`` indicators that was contributing to erroneous results. A new generic spell length statistic function ``xclim.indices.generic.spell_length_statistics`` is now used in wet and dry spells indicators. (:issue:`1834`, :pull:`1838`). @@ -13,6 +17,8 @@ Bug fixes Internal changes ^^^^^^^^^^^^^^^^ * Changed french translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`). +* Added ``xclim.indices.generic.season`` to make season start, end and length indices. Added a `stat` argument to ``xclim.indices.run_length.season`` to avoid returning a dataset. + v0.51.0 (2024-07-04) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 04133959a..883d1ea98 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -26,6 +26,7 @@ cumulative_difference, domain_count, first_day_threshold_reached, + season, spell_length_statistics, threshold_count, ) @@ -1248,16 +1249,16 @@ def frost_season_length( >>> fsl_sh = frost_season_length(tasmin, freq="YS") """ - thresh = convert_units_to(thresh, tasmin) - cond = compare(tasmin, op, thresh, constrain=("<=", "<")) - - out = cond.resample(time=freq).map( - rl.season_length, + return season( + tasmin, + thresh=thresh, window=window, - date=mid_date, - dim="time", + op=op, + stat="length", + freq=freq, + mid_date=mid_date, + constrain=("<", "<="), ) - return to_agg_units(out, tasmin, "count") @declare_units(tasmin="[temperature]", thresh="[temperature]") @@ -1265,13 +1266,16 @@ def frost_free_season_start( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", window: int = 5, + mid_date: DayOfYearStr = "07-01", + op: str = ">=", freq: str = "YS", ) -> xarray.DataArray: r"""Start of the frost free season. - Day of the year of the start of a sequence of days with minimum temperatures consistently above or equal to a - threshold (default: 0℃), after a period of `N` days (default: 5) with minimum temperatures consistently - above the same threshold. + The frost free season starts when a sequence of `window` consecutive days are above the threshold + and ends when a sequence of consecutive days of the same length are under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, the start must occur before and the end after for the season to be valid. Parameters ---------- @@ -1280,53 +1284,58 @@ def frost_free_season_start( thresh : Quantified Threshold temperature on which to base evaluation. window : int - Minimum number of days with temperature above threshold needed for evaluation. + Minimum number of days with temperature above/under threshold to start/end the season. + mid_date : DayOfYearStr, optional + A date what must be included in the season. + op : {'>', '>=', 'ge', 'gt'} + How to compare tasmin and the threshold. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] - Day of the year when minimum temperature is superior to a threshold - over a given number of days for the first time. - If there is no such day or if a frost free season is not detected, returns np.nan. + Day of the year when the frost free season starts. Notes ----- Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 - to 365 or 366. The start date of the start of growing season is given by the smallest index :math:`i`: + to 365 or 366. The start date of the season is given by the smallest index :math:`i`: .. math:: \prod_{j=i}^{i+w} [x_j >= thresh] where :math:`w` is the number of days the temperature threshold should be met or exceeded, - and :math:`[P]` is 1 if :math:`P` is true, and 0 if false. + and `i` must be earlier than `mid_date`. """ - thresh = convert_units_to(thresh, tasmin) - over = tasmin >= thresh - out = over.resample(time=freq).map(rl.first_run, window=window, coord="dayofyear") - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tasmin)) - return out + return season( + tasmin, + thresh=thresh, + window=window, + op=op, + stat="start", + freq=freq, + mid_date=mid_date, + constrain=(">", ">="), + ) @declare_units(tasmin="[temperature]", thresh="[temperature]") def frost_free_season_end( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", - mid_date: DayOfYearStr = "07-01", window: int = 5, + mid_date: DayOfYearStr = "07-01", + op: str = ">=", freq: str = "YS", ) -> xarray.DataArray: r"""End of the frost free season. - Day of the year of the start of a sequence of days with minimum temperatures consistently below a threshold - (default: 0℃), after a period of `N` days (default: 5) with minimum temperatures consistently above the same - threshold. - - Warnings - -------- - The default `freq` and `mid_date` parameters are valid for the northern hemisphere. + The frost free season starts when a sequence of `window` consecutive days are above the threshold + and ends when a sequence of consecutive days of the same length are under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, the start must occur before and the end after for the season to be valid. Parameters ---------- @@ -1334,89 +1343,105 @@ def frost_free_season_end( Minimum daily temperature. thresh : Quantified Threshold temperature on which to base evaluation. - mid_date : str - Date of the year after which to look for the end of the season. Should have the format '%m-%d'. window : int - Minimum number of days with temperature below threshold needed for evaluation. + Minimum number of days with temperature above/under threshold to start/end the season. + mid_date : DayOfYearStr, optional + A date what must be included in the season. + op : {'>', '>=', 'ge', 'gt'} + How to compare tasmin and the threshold. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] - Day of the year when minimum temperature is inferior to a threshold over a given number of days for the first time. - If there is no such day or if a frost free season is not detected, returns np.nan. - If the frost free season does not end within the time period, returns the last day of the period. - """ - thresh = convert_units_to(thresh, tasmin) - cond = tasmin >= thresh + Day of the year when the frost free season starts. - out = cond.resample(time=freq).map( - rl.run_end_after_date, + Notes + ----- + Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 + to 365 or 366. The start date is given by the smallest index :math:`i`: + + .. math:: + + \prod_{k=i}^{i+w} [x_k >= thresh] + + while the end date is given bt the largest index :math:`j`: + + .. math:: + + \prod_{k=j}^{j+w} [x_k < thresh] + + where :math:`w` is the number of days the temperature threshold should be exceeded/subceeded. + An end is only valid if a start is also found and the end must happen later than `mid_date` + while the start must happen earlier. + """ + return season( + tasmin, + thresh=thresh, window=window, - date=mid_date, - dim="time", - coord="dayofyear", + op=op, + stat="end", + freq=freq, + mid_date=mid_date, + constrain=(">", ">="), ) - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tasmin)) - return out @declare_units(tasmin="[temperature]", thresh="[temperature]") def frost_free_season_length( tasmin: xarray.DataArray, + thresh: Quantified = "0.0 degC", window: int = 5, mid_date: DayOfYearStr | None = "07-01", - thresh: Quantified = "0.0 degC", - freq: str = "YS", op: str = ">=", + freq: str = "YS", ) -> xarray.DataArray: - r"""Frost free season length. - - The number of days between the first occurrence of at least `N` (default: 5) consecutive days with minimum daily - temperature above a threshold (default: 0℃) and the first occurrence of at least `N` consecutive days with - minimum daily temperature below the same threshold. - A mid-date can be given to limit the earliest day the end of season can take. + r"""Length of the frost free season. - Warnings - -------- - The default `freq` and `mid_date` parameters are valid for the northern hemisphere. + The frost free season starts when a sequence of `window` consecutive days are above the threshold + and ends when a sequence of consecutive days of the same length are under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, the start must occur before and the end after for the season to be valid. Parameters ---------- tasmin : xarray.DataArray Minimum daily temperature. - window : int - Minimum number of days with temperature above threshold to mark the beginning and end of frost free season. - mid_date : str, optional - Date the must be included in the season. It is the earliest the end of the season can be. - If None, there is no limit. thresh : Quantified Threshold temperature on which to base evaluation. + window : int + Minimum number of days with temperature above/under threshold to start/end the season. + mid_date : DayOfYearStr, optional + A date what must be included in the season. + op : {'>', '>=', 'ge', 'gt'} + How to compare tasmin and the threshold. freq : str Resampling frequency. - op : {">", ">=", "gt", "ge"} - Comparison operation. Default: ">=". Returns ------- xarray.DataArray, [time] - Frost free season length. + Length of the frost free season. Notes ----- - Let :math:`TN_{ij}` be the minimum temperature at day :math:`i` of period :math:`j`. Then counted is - the number of days between the first occurrence of at least N consecutive days with: + Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 + to 365 or 366. The start date is given by the smallest index :math:`i`: .. math:: - TN_{ij} >= 0 ℃ + \prod_{k=i}^{i+w} [x_k >= thresh] - and the first subsequent occurrence of at least N consecutive days with: + while the end date is given bt the largest index :math:`j`: .. math:: - TN_{ij} < 0 ℃ + \prod_{k=j}^{j+w} [x_k < thresh] + + where :math:`w` is the number of days the temperature threshold should be exceeded/subceeded. + An end is only valid if a start is also found and the end must happen later than `mid_date` + while the start must happen earlier. Examples -------- @@ -1431,16 +1456,16 @@ def frost_free_season_length( >>> ffsl_sh = frost_free_season_length(tasmin, freq="YS-JUL") """ - thresh = convert_units_to(thresh, tasmin) - cond = compare(tasmin, op, thresh, constrain=(">=", ">")) - - out = cond.resample(time=freq).map( - rl.season_length, + return season( + tasmin, + thresh=thresh, window=window, - date=mid_date, - dim="time", + op=op, + stat="length", + freq=freq, + mid_date=mid_date, + constrain=(">", ">="), ) - return to_agg_units(out, tasmin, "count") @declare_units(tasmin="[temperature]", thresh="[temperature]") @@ -1454,15 +1479,15 @@ def frost_free_spell_max_length( ) -> xarray.DataArray: r"""Longest cold spell. - Longest spell of low temperatures over a given period. - Longest series of at least {window} consecutive days with temperature at or below {thresh}. + Longest spell of warm temperatures over a given period. + Longest series of at least {window} consecutive days with temperature at or above the threshold. Parameters ---------- tasmin : xarray.DataArray Minimum daily temperature. thresh : Quantified - The temperature threshold needed to trigger a cold spell. + The temperature threshold needed to trigger a frost free spell. window : int Minimum number of days with temperatures above thresholds to qualify as a frost free day. freq : str diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index d06b63c8d..b174cc6db 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -475,6 +475,81 @@ def spell_length_statistics( return to_agg_units(out, data, "count") +@declare_relative_units(thresh="") +def season( + data: xarray.DataArray, + thresh: Quantified, + window: int, + op: str, + stat: str, + freq: str, + mid_date: DayOfYearStr | None = None, + constrain: Sequence[str] | None = None, +) -> xarray.DataArray: + r"""Season + + A season starts when a variable respects some condition for a consecutive run of `N` days. It stops + when the condition is inverted for `N` days. Runs where the condition is not met for fewer than `N` days + are thus allowed. Additionally a middle date can serve as a maximal start date and minimum end date. + + Parameters + ---------- + data : xarray.DataArray + Variable. + thresh : Quantified + Threshold on which to base evaluation. + window : int + Minimum number of days that the condition must be met / not met for the start / end of the season. + op : str + Comparison operation. + stat : {'start', 'end', 'length'} + Which season facet to return. + freq : str + Resampling frequency. + mid_date : DayOfYearStr, optional + An optional middle date to restrict the possible start and end of the season. + constrain : Sequence of strings, optional + A list of acceptable comparison operators. Optional, but indicators wrapping this function should inject it. + + Returns + ------- + xarray.DataArray, [dimensionless] or [time] + Depends on 'stat'. If 'start' or 'end', this is the day of year of the season's start or end. + If 'length', this is the length of the season. + + Examples + -------- + >>> season(tas, thresh="0 °C", window=5, op=">", stat="start", freq="YS") + + Returns the start of the "frost-free" season. The season starts with 5 consecutive days with mean temperature + above 0°C and ends with as many days under or equal to 0°C. And end does not need to be found for a start to be valid. + + >>> season( + ... pr, + ... thresh="2 mm/d", + ... window=7, + ... op="<=", + ... mid_date="08-01", + ... stat="length", + ... freq="YS", + ... ) + + Returns the length of the "dry" season. The season starts with 7 consecutive days with precipitation under or equal to + 2 mm/d and ends with as many days above 2 mm/d. If no start is found before the first of august, the season is invalid. + If a start is found but no end, the end is set to the last day of the period (December 31st if the dataset is complete). + """ + thresh = convert_units_to(thresh, data) + cond = compare(data, op, thresh, constrain=constrain) + out = cond.resample(time=freq).map( + rl.season, window=window, date=mid_date, stat=stat, coord="dayofyear" + ) + if stat == "length": + return to_agg_units(out, data, "count") + # else, a date + out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(data)) + return out + + # CF-INDEX-META Indices diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 0aad6f947..22e2b1a8b 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -760,8 +760,9 @@ def season( window: int, date: DayOfYearStr | None = None, dim: str = "time", + stat: str | None = None, coord: str | bool | None = False, -) -> xr.Dataset: +) -> xr.Dataset | xr.DataArray: """Calculate the bounds of a season along a dimension. A "season" is a run of True values that may include breaks under a given length (`window`). @@ -778,6 +779,9 @@ def season( The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). + stat : {'start', 'end', 'length'}, optional + Which facet to return. If absent (default), a Dataset with all three is returned. + If present, only the specified variable is returned. coord : Optional[str] If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the @@ -785,8 +789,9 @@ def season( Returns ------- - xr.Dataset - "dim" is reduced to "season_bnds" with 2 elements : season start and season end, both indices of da[dim]. + xr.Dataset or xr.DataArray + If `stat` is absent, a Dataset with three variables : start, end and length of the season. + If `stat` is given, only the specified variable is returned, as a DataArray. Notes ----- @@ -797,6 +802,10 @@ def season( Example : Length of the "warm season", where T > 25°C, with date = 1st August. Let's say the temperature is over 25 for all June, but July and august have very cold temperatures. Instead of returning 30 days (June), the function will return 61 days (July + June). + + The season's length is always the difference between the end and the start, except if no + end was found before the end of the data, in which case the end is the last day and the length + goes up to the boundary (i.e. :math:`end - start = length - 1`). """ beg = first_run(da, window=window, dim=dim) # Invert the condition and mask all values after beginning @@ -871,6 +880,8 @@ def season( long_name="Length of the season.", description="Number of steps of the original series in the season, between 'start' and 'end'.", ) + if stat is not None: + return out[stat] return out From 21bcde9e008613617393e3ffd0e6116ae1cb5c71 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 18 Jul 2024 18:35:19 -0400 Subject: [PATCH 02/13] Split rl.season in its components to avoid unneeded calc - fix test - season_length is 0 even if no start was found --- CHANGELOG.rst | 1 + tests/test_indices.py | 2 +- tests/test_run_length.py | 2 +- tests/test_temperature.py | 4 +- xclim/indices/generic.py | 8 +- xclim/indices/run_length.py | 269 ++++++++++++++++++++++++++---------- 6 files changed, 203 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a79b77242..ff8da0baa 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Contributors to this version: David Huard (:user:`huard`), Pascal Bourgault (:us Breaking changes ^^^^^^^^^^^^^^^^ * Definition of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and xclim's notion of `season` in general. Indicator and indices signature have changed. +* Season length indicators have been modified to return ``0`` for all cases where a proper season was not found, but the data is valid. Previously, a ``nan`` was given if neither a start or an end were found, even if the data was valid, and a ``0`` was given if an end was found but no start. Bug fixes ^^^^^^^^^ diff --git a/tests/test_indices.py b/tests/test_indices.py index 330b3c6e5..0832b89ca 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -1092,7 +1092,7 @@ class TestGrowingSeasonLength: [ ("1950-01-01", "1951-01-01", 0), # No growing season ("2000-01-01", "2000-12-31", 365), # All year growing season - ("2000-07-10", "2001-01-01", np.nan), # End happens before start + ("2000-07-10", "2001-01-01", 0), # End happens before start ("2000-06-15", "2001-01-01", 199), # No end ("2000-06-15", "2000-07-15", 31), # Normal case ], diff --git a/tests/test_run_length.py b/tests/test_run_length.py index 7617c6f87..c4a886614 100644 --- a/tests/test_run_length.py +++ b/tests/test_run_length.py @@ -470,7 +470,7 @@ class TestRunsWithDates: [ ("07-01", 210, 70), ("07-01", 190, 50), - ("04-01", 150, np.NaN), # date falls early + ("04-01", 150, 0), # date falls early ("11-01", 150, 165), # date ends late (None, 150, 10), # no date, real length ], diff --git a/tests/test_temperature.py b/tests/test_temperature.py index 5b069fd34..895da05b7 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -367,7 +367,7 @@ def test_real_data(self, atmosds): class TestFrostSeasonLength: def test_simple(self, tasmin_series): - a = np.zeros(730) + K2C + 15 + a = np.zeros(731) + K2C + 15 a[300:400] = K2C - 5 a[404:407] = K2C - 5 tasmin = tasmin_series(a, start="2000-01-01") @@ -379,7 +379,7 @@ def test_simple(self, tasmin_series): np.testing.assert_array_equal(out, [np.nan, 100, np.nan]) out = atmos.frost_season_length(tasmin=tasmin, mid_date="07-01", freq="YS") - np.testing.assert_array_equal(out, [np.nan, np.nan]) + np.testing.assert_array_equal(out, [0, 181]) class TestColdSpellDays: diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index b174cc6db..970e24297 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -540,9 +540,11 @@ def season( """ thresh = convert_units_to(thresh, data) cond = compare(data, op, thresh, constrain=constrain) - out = cond.resample(time=freq).map( - rl.season, window=window, date=mid_date, stat=stat, coord="dayofyear" - ) + FUNC = {"start": rl.season_start, "end": rl.season_end, "length": rl.season_length} + map_kwargs = dict(window=window, date=mid_date) + if stat in ["start", "end"]: + map_kwargs["coord"] = "dayofyear" + out = cond.resample(time=freq).map(FUNC[stat], **map_kwargs) if stat == "length": return to_agg_units(out, data, "count") # else, a date diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 22e2b1a8b..da13cc411 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -755,19 +755,121 @@ def extract_events( return runs +def season_start( + da: xr.DataArray, + window: int, + date: DayOfYearStr | None = None, + dim: str = "time", + coord: str | bool | None = False, +) -> xr.DataArray: + """Start of a season. + + See :py:func:`season`. + + Parameters + ---------- + da : xr.DataArray + Input N-dimensional DataArray (boolean). + window : int + Minimum duration of consecutive values to start and end the season. + date : DayOfYearStr, optional + The date (in MM-DD format) that a season must include to be considered valid. + dim : str + Dimension along which to calculate the season (default: 'time'). + coord : Optional[str] + If not False, the function returns values along `dim` instead of indexes. + If `dim` has a datetime dtype, `coord` can also be a str of the name of the + DateTimeAccessor object to use (ex: 'dayofyear'). + + Returns + ------- + xr.DataArray + Start of the season, units depend on `coord`. + + See Also + -------- + season + season_end + season_length + """ + return first_run_before_date(da, window=window, date=date, dim=dim, coord=coord) + + +def season_end( + da: xr.DataArray, + window: int, + date: DayOfYearStr | None = None, + dim: str = "time", + coord: str | bool | None = False, + _beg: xr.DataArray | None = None, +) -> xr.DataArray: + """End of a season. + + See :py:func:`season`. There are two differences between this and :py:func:`first_run_after_date`: + + + Parameters + ---------- + da : xr.DataArray + Input N-dimensional DataArray (boolean). + window : int + Minimum duration of consecutive values to start and end the season. + date : DayOfYearStr, optional + The date (in MM-DD format) that a run must include to be considered valid. + dim : str + Dimension along which to calculate consecutive run (default: 'time'). + coord : Optional[str] + If not False, the function returns values along `dim` instead of indexes. + If `dim` has a datetime dtype, `coord` can also be a str of the name of the + DateTimeAccessor object to use (ex: 'dayofyear'). + + Returns + ------- + xr.DataArray + End of the season, units depend on `coord`. + If there is a start is found but no end, the end is set to the last element. + + See Also + -------- + season + season_start + season_length + """ + # Fast path for `season` and `season_length` + if _beg is not None: + beg = _beg + else: + beg = season_start(da, window=window, dim=dim, date=date, coord=False) + # Invert the condition and mask all values after beginning + # we fillna(0) as so to differentiate series with no runs and all-nan series + not_da = (~da).where(da[dim].copy(data=np.arange(da[dim].size)) >= beg.fillna(0)) + end = first_run_after_date(not_da, window=window, dim=dim, date=date, coord=False) + if _beg is None: + # Where end is null and beg is not null (valid data, no end detected), put the last index + # Don't do this in the fast path, so that the length can use this information + end = xr.where(end.isnull() & beg.notnull(), da[dim].size - 1, end) + end = end.where(beg.notnull()) + if coord: + crd = da[dim] + if isinstance(coord, str): + crd = getattr(crd.dt, coord) + end = lazy_indexing(crd, end) + return end + + def season( da: xr.DataArray, window: int, date: DayOfYearStr | None = None, dim: str = "time", - stat: str | None = None, coord: str | bool | None = False, -) -> xr.Dataset | xr.DataArray: +) -> xr.Dataset: """Calculate the bounds of a season along a dimension. A "season" is a run of True values that may include breaks under a given length (`window`). - The start is computed as the first run of `window` True values, then end as the first subsequent run - of `window` False values. If a date is passed, it must be included in the season. + The start is computed as the first run of `window` True values, and the end as the first subsequent run + of `window` False values. The end element is the first element after the season. + If a date is given, it must be included in the season. Parameters ---------- @@ -779,9 +881,6 @@ def season( The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). - stat : {'start', 'end', 'length'}, optional - Which facet to return. If absent (default), a Dataset with all three is returned. - If present, only the specified variable is returned. coord : Optional[str] If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the @@ -789,9 +888,8 @@ def season( Returns ------- - xr.Dataset or xr.DataArray - If `stat` is absent, a Dataset with three variables : start, end and length of the season. - If `stat` is given, only the specified variable is returned, as a DataArray. + xr.Dataset + A Dataset with three variables : start, end and length of the season. Notes ----- @@ -803,55 +901,36 @@ def season( 25 for all June, but July and august have very cold temperatures. Instead of returning 30 days (June), the function will return 61 days (July + June). - The season's length is always the difference between the end and the start, except if no - end was found before the end of the data, in which case the end is the last day and the length - goes up to the boundary (i.e. :math:`end - start = length - 1`). - """ - beg = first_run(da, window=window, dim=dim) - # Invert the condition and mask all values after beginning - # we fillna(0) as so to differentiate series with no runs and all-nan series - not_da = (~da).where(da[dim].copy(data=np.arange(da[dim].size)) >= beg.fillna(0)) + The season's length is always the difference between the end and the start. Except if no + season end was found before the end of the data. In that case the end is set to last element and + the length is set to the data size minus the start index. Thus, for the specific case, :math:`length = end - start + 1`, + because the end falls on the last element of the season instead of the subsequent one. - # Mask also values after "date" - mid_idx = index_of_date(da[dim], date, max_idxs=1, default=0) - if mid_idx.size == 0: - # The date is not within the group. Happens at boundaries. - base = da.isel({dim: 0}) # To have the proper shape - beg = xr.full_like(base, np.nan, float).drop_vars(dim) - end = xr.full_like(base, np.nan, float).drop_vars(dim) - length = xr.full_like(base, np.nan, float).drop_vars(dim) - else: - if date is not None: - # If the beginning was after the mid date, both bounds are NaT. - valid_start = beg < mid_idx.squeeze() - else: - valid_start = True - - not_da = not_da.where(da[dim] >= da[dim][mid_idx][0]) - end = first_run( - not_da, - window=window, - dim=dim, - ) - # If there was a beginning but no end, season goes to the end of the array - no_end = beg.notnull() & end.isnull() - - # Length - length = end - beg - - # No end: length is actually until the end of the array, so it is missing 1 - length = xr.where(no_end, da[dim].size - beg, length) - # Where the beginning was before the mid-date, invalid. - length = length.where(valid_start) - # Where there were data points, but no season : put 0 length - length = xr.where(beg.isnull() & end.notnull(), 0, length) - - # No end: end defaults to the last element (this differs from length, but heh) - end = xr.where(no_end, da[dim].size - 1, end) - - # Where the beginning was before the mid-date - beg = beg.where(valid_start) - end = end.where(valid_start) + See Also + -------- + season_start + season_end + season_length + """ + beg = season_start(da, window=window, dim=dim, date=date, coord=False) + # Use fast path in season_end : no recomputing of start, no masking of end where beg.isnull() and don't set end if none found + end = season_end(da, window=window, dim=dim, date=date, _beg=beg, coord=False) + # Three cases : + # start no start + # end e - s 0 + # no end size - s 0 + # da is boolean, so we have no way of knowing if the absence of season is due to missing values or to an actual absence. + length = xr.where( + beg.isnull(), + 0, + # Where there is no end, from the start to the boundary + xr.where(end.isnull(), da[dim].size - beg, end - beg), + ) + # Now masks ends + # Still give an end if we didn't find any : put the last element + # This breaks the length = end - beg, but yields a truer length + end = xr.where(end.isnull() & beg.notnull(), da[dim].size - 1, end) + end = end.where(beg.notnull()) if coord: crd = da[dim] @@ -866,7 +945,6 @@ def season( coordstr = "index" out = xr.Dataset({"start": beg, "end": end, "length": length}) - out.start.attrs.update( long_name="Start of the season.", description=f"First {coordstr} of a run of at least {window} steps respecting the condition.", @@ -880,8 +958,6 @@ def season( long_name="Length of the season.", description="Number of steps of the original series in the season, between 'start' and 'end'.", ) - if stat is not None: - return out[stat] return out @@ -891,11 +967,9 @@ def season_length( date: DayOfYearStr | None = None, dim: str = "time", ) -> xr.DataArray: - """Return the length of the longest semi-consecutive run of True values (optionally including a given date). + """Length of a season. - A "season" is a run of True values that may include breaks under a given length (`window`). - The start is computed as the first run of `window` True values, then end as the first subsequent run - of `window` False values. If a date is passed, it must be included in the season. + See :py:func:`season`. Parameters ---------- @@ -911,18 +985,13 @@ def season_length( Returns ------- xr.DataArray, [int] - Length of the longest run of True values along a given dimension (inclusive of a given date) - without breaks longer than a given length. + Length of the season, in number of elements along dimension `time`. - Notes - ----- - The run can include holes of False or NaN values, so long as they do not exceed the window size. - - If a date is given, the season start and end are forced to be on each side of this date. This means that - even if the "real" season has been over for a long time, this is the date used in the length calculation. - Example : Length of the "warm season", where T > 25°C, with date = 1st August. Let's say the temperature is over - 25 for all June, but July and august have very cold temperatures. Instead of returning 30 days (June), the function - will return 61 days (July + June). + See Also + -------- + season + season_start + season_end """ seas = season(da, window, date, dim, coord=False) return seas.length @@ -1064,6 +1133,54 @@ def last_run_before_date( return last_run(run, window=window, dim=dim, coord=coord) +def first_run_before_date( + da: xr.DataArray, + window: int, + date: DayOfYearStr | None = "07-01", + dim: str = "time", + coord: bool | str | None = "dayofyear", +) -> xr.DataArray: + """Return the index of the first item of the first run before a given date. + + Parameters + ---------- + da : xr.DataArray + Input N-dimensional DataArray (boolean). + window : int + Minimum duration of consecutive run to accumulate values. + date : DayOfYearStr + The date before which to look for the run. + dim : str + Dimension along which to calculate consecutive run (default: 'time'). + coord : Optional[Union[bool, str]] + If not False, the function returns values along `dim` instead of indexes. + If `dim` has a datetime dtype, `coord` can also be a str of the name of the + DateTimeAccessor object to use (ex: 'dayofyear'). + + Returns + ------- + xr.DataArray + Index (or coordinate if `coord` is not False) of first item in the first valid run. + Returns np.nan if there are no valid runs. + """ + if date is not None: + mid_idx = index_of_date(da[dim], date, max_idxs=1, default=0) + if ( + mid_idx.size == 0 + ): # The date is not within the group. Happens at boundaries. + return xr.full_like(da.isel({dim: 0}), np.nan, float).drop_vars(dim) + # Mask anything after the mid_date + window - 1 + # Thus, the latest run possible can begin on the day just before mid_idx + da = da.where(da[dim] < da[dim][mid_idx + window - 1][0]) + + return first_run( + da, + window=window, + dim=dim, + coord=coord, + ) + + @njit def _rle_1d(ia): y = ia[1:] != ia[:-1] # pairwise unequal (string safe) From 18e26049aa1f496652b997956503e4a48fb8f0a3 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 18 Jul 2024 18:42:47 -0400 Subject: [PATCH 03/13] upd changes --- CHANGELOG.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ff8da0baa..6cf67bfad 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,8 +8,8 @@ Contributors to this version: David Huard (:user:`huard`), Pascal Bourgault (:us Breaking changes ^^^^^^^^^^^^^^^^ -* Definition of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and xclim's notion of `season` in general. Indicator and indices signature have changed. -* Season length indicators have been modified to return ``0`` for all cases where a proper season was not found, but the data is valid. Previously, a ``nan`` was given if neither a start or an end were found, even if the data was valid, and a ``0`` was given if an end was found but no start. +* Definition of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and xclim's notion of `season` in general. Indicator and indices signature have changed. (:pull:`1845`). +* Season length indicators have been modified to return ``0`` for all cases where a proper season was not found, but the data is valid. Previously, a ``nan`` was given if neither a start or an end were found, even if the data was valid, and a ``0`` was given if an end was found but no start. (:pull:`1845`). Bug fixes ^^^^^^^^^ @@ -18,8 +18,7 @@ Bug fixes Internal changes ^^^^^^^^^^^^^^^^ * Changed french translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`). -* Added ``xclim.indices.generic.season`` to make season start, end and length indices. Added a `stat` argument to ``xclim.indices.run_length.season`` to avoid returning a dataset. - +* Added ``xclim.indices.generic.season`` to make season start, end and length indices. Added a `stat` argument to ``xclim.indices.run_length.season`` to avoid returning a dataset. (:pull:`1845`). v0.51.0 (2024-07-04) From d9ffe771c3bad49a4357286ef437a7942b555150 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 18 Jul 2024 19:04:11 -0400 Subject: [PATCH 04/13] Change growing season length to use the same logic --- xclim/indices/_threshold.py | 115 ++++++++++++++++++------------------ xclim/indices/run_length.py | 2 +- 2 files changed, 58 insertions(+), 59 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 883d1ea98..1c25ae63e 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -979,14 +979,21 @@ def growing_degree_days( def growing_season_start( tas: xarray.DataArray, thresh: Quantified = "5.0 degC", + mid_date: DayOfYearStr | None = "07-01", window: int = 5, freq: str = "YS", op: Literal[">", ">=", "gt", "ge"] = ">=", ) -> xarray.DataArray: r"""Start of the growing season. - Day of the year of the start of a sequence of days with mean daily temperatures consistently above or equal to a - given threshold (default: 5℃). + The growing season starts with the first sequence of a minimum length of consecutive days above the threshold + and ends with the first sequence of the same minimum length of consecutive days under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, a start can't happen later and an end can't happen earlier. + + Warnings + -------- + The default `freq` and `mid_date` parameters are valid for the northern hemisphere. Parameters ---------- @@ -994,6 +1001,8 @@ def growing_season_start( Mean daily temperature. thresh : Quantified Threshold temperature on which to base evaluation. + mid_date : str, optional + Date of the year after before which the season must start. Should have the format '%m-%d'. window : int Minimum number of days with temperature above threshold needed for evaluation. freq : str @@ -1004,42 +1013,35 @@ def growing_season_start( Returns ------- xarray.DataArray, [dimensionless] - Day of the year when temperature is superior to a threshold over a given number of days for the first time. - If there is no such day or if a growing season is not detected, returns np.nan. - - Notes - ----- - Let :math:`x_i` be the daily mean temperature at day of the year :math:`i` for values of :math:`i` going from 1 - to 365 or 366. The start date of the start of growing season is given by the smallest index :math:`i`: - - .. math:: - - \prod_{j=i}^{i+w} [x_j >= thresh] - - where :math:`w` is the number of days the temperature threshold should be met or exceeded, - and :math:`[P]` is 1 if :math:`P` is true, and 0 if false. + Start of the growing season. """ - thresh = convert_units_to(thresh, tas) - cond = compare(tas, op, thresh, constrain=(">=", ">")) - - out = cond.resample(time=freq).map(rl.first_run, window=window, coord="dayofyear") - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tas)) - return out + return season( + tas, + thresh=thresh, + mid_date=mid_date, + window=window, + freq=freq, + op=op, + constrain=(">", ">="), + stat="start", + ) @declare_units(tas="[temperature]", thresh="[temperature]") def growing_season_end( tas: xarray.DataArray, thresh: Quantified = "5.0 degC", - mid_date: DayOfYearStr = "07-01", + mid_date: DayOfYearStr | None = "07-01", window: int = 5, freq: str = "YS", - op: Literal["<", "<=", "lt", "le"] = "<", + op: Literal[">", ">=", "lt", "le"] = ">", ) -> xarray.DataArray: r"""End of the growing season. - Day of the year of the start of a sequence of `N` (default: 5) days with mean temperatures consistently below a - given threshold (default: 5℃), occurring after a given calendar date (default: July 1). + The growing season starts with the first sequence of a minimum length of consecutive days above the threshold + and ends with the first sequence of the same minimum length of consecutive days under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, a start can't happen later and an end can't happen earlier. Warnings -------- @@ -1051,21 +1053,20 @@ def growing_season_end( Mean daily temperature. thresh : Quantified Threshold temperature on which to base evaluation. - mid_date : str + mid_date : str, optional Date of the year after which to look for the end of the season. Should have the format '%m-%d'. window : int Minimum number of days with temperature below threshold needed for evaluation. freq : str Resampling frequency. - op : {"<", "<=", "lt", "le"} - Comparison operation. Default: "<". + op : {">", ">=", "gt", "ge"} + Comparison operation. Default: ">". Note that this comparison is what defines the season. + The end of the season happens when the condition is NOT met for `window` consecutive days. Returns ------- xarray.DataArray, [dimensionless] - Day of the year when temperature is inferior to a threshold over a given number of days for the first time. - If there is no such day or if a growing season is not detected, returns np.nan. - If the growing season does not end within the time period, returns the last day of the period. + End of the growing season. Notes ----- @@ -1079,20 +1080,16 @@ def growing_season_end( where :math:`w` is the number of days where temperature should be inferior to a given threshold after a given date, and :math:`[P]` is 1 if :math:`P` is true, and 0 if false. """ - thresh = convert_units_to(thresh, tas) - - # Note: The following operation is inverted here so that there is less confusion for users. - cond = ~compare(tas, op, thresh, constrain=("<=", "<")) - - out = cond.resample(time=freq).map( - rl.run_end_after_date, + return season( + tas, + thresh=thresh, + mid_date=mid_date, window=window, - date=mid_date, - dim="time", - coord="dayofyear", + freq=freq, + op=op, + constrain=(">", ">="), + stat="end", ) - out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(tas)) - return out @declare_units(tas="[temperature]", thresh="[temperature]") @@ -1100,16 +1097,18 @@ def growing_season_length( tas: xarray.DataArray, thresh: Quantified = "5.0 degC", window: int = 6, - mid_date: DayOfYearStr = "07-01", + mid_date: DayOfYearStr | None = "07-01", freq: str = "YS", op: str = ">=", ) -> xarray.DataArray: r"""Growing season length. - The number of days between the first occurrence of at least `N` (default: 6) consecutive days with mean daily - temperature over a threshold (default: 5℃) and the first occurrence of at least `N` consecutive days with mean - daily temperature below the same threshold after a certain date, usually July 1st (06-01) in the northern emispher - and January 1st (01-01) in the southern hemisphere. + The growing season starts with the first sequence of a minimum length of consecutive days above the threshold + and ends with the first sequence of the same minimum length of consecutive days under the threshold. Sequences + of consecutive days under the threshold shorter then `window` are allowed within the season. + A middle date can be given, a start can't happen later and an end can't happen earlier. + If the season starts but never ends, the length is computed up to the end of the resampling period. + If no season start is found, but the data is valid, a length of 0 is returned. Warnings -------- @@ -1123,7 +1122,7 @@ def growing_season_length( Threshold temperature on which to base evaluation. window : int Minimum number of days with temperature above threshold to mark the beginning and end of growing season. - mid_date : str + mid_date : str, optional Date of the year after which to look for the end of the season. Should have the format '%m-%d'. freq : str Resampling frequency. @@ -1168,16 +1167,16 @@ def growing_season_length( :cite:cts:`project_team_eca&d_algorithm_2013` """ - thresh = convert_units_to(thresh, tas) - cond = compare(tas, op, thresh, constrain=(">=", ">")) - - out = cond.resample(time=freq).map( - rl.season_length, + return season( + tas, + thresh=thresh, + mid_date=mid_date, window=window, - date=mid_date, - dim="time", + freq=freq, + op=op, + constrain=(">", ">="), + stat="length", ) - return to_agg_units(out, tas, "count") @declare_units(tasmin="[temperature]", thresh="[temperature]") diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index da13cc411..ba972695a 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -869,7 +869,7 @@ def season( A "season" is a run of True values that may include breaks under a given length (`window`). The start is computed as the first run of `window` True values, and the end as the first subsequent run of `window` False values. The end element is the first element after the season. - If a date is given, it must be included in the season. + If a date is given, it must be included in the season, i.e. the start can't happen laterand the end can't happen earlier. Parameters ---------- From 3e4c9299698c6407fb3a021dcf4f26746ecd1508 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 19 Jul 2024 13:35:22 -0400 Subject: [PATCH 05/13] apply generic season to snow season --- xclim/indices/_threshold.py | 94 +++++-------------------------------- 1 file changed, 12 insertions(+), 82 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 1c25ae63e..bbede9ed5 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -382,19 +382,8 @@ def snd_season_end( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snd) - cond = snd >= thresh - - resampled = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .end - ) - resampled = resampled.assign_attrs( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snd) - ) - snd_se = resampled.where(~valid) + out = season(snd, thresh, window=window, op=">=", stat="end", freq=freq) + snd_se = out.where(~valid) return snd_se @@ -431,19 +420,8 @@ def snw_season_end( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snw) - cond = snw >= thresh - - resampled = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .end - ) - resampled.attrs.update( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snw) - ) - snw_se = resampled.where(~valid) + out = season(snw, thresh, window=window, op=">=", stat="end", freq=freq) + snw_se = out.where(~valid) return snw_se @@ -480,24 +458,8 @@ def snd_season_start( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snd) - cond = snd >= thresh - - resampled = ( - cond.resample(time=freq) - .map( - rl.season, - window=window, - dim="time", - coord="dayofyear", - ) - .start - ) - resampled.attrs.update( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snd) - ) - snd_ss = resampled.where(~valid) + out = season(snd, thresh, window=window, op=">=", stat="start", freq=freq) + snd_ss = out.where(~valid) return snd_ss @@ -535,24 +497,8 @@ def snw_season_start( """ valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snw) - cond = snw >= thresh - - resampled = ( - cond.resample(time=freq) - .map( - rl.season, - window=window, - dim="time", - coord="dayofyear", - ) - .start - ) - resampled.attrs.update( - units="", is_dayofyear=np.int32(1), calendar=get_calendar(snw) - ) - snw_ss = resampled.where(~valid) + out = season(snw, thresh, window=window, op=">=", stat="start", freq=freq) + snw_ss = out.where(~valid) return snw_ss @@ -589,16 +535,8 @@ def snd_season_length( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snd) - cond = snd >= thresh - - snd_sl = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .length - ) - snd_sl = to_agg_units(snd_sl.where(~valid), snd, "count") + out = season(snd, thresh, window=window, op=">=", stat="length", freq=freq) + snd_sl = out.where(~valid) return snd_sl @@ -635,16 +573,8 @@ def snw_season_length( :cite:cts:`chaumont_elaboration_2017` """ valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq) - - thresh = convert_units_to(thresh, snw) - cond = snw >= thresh - - snw_sl = ( - cond.resample(time=freq) - .map(rl.season, window=window, dim="time", coord="dayofyear") - .length - ) - snw_sl = to_agg_units(snw_sl.where(~valid), snw, "count") + out = season(snw, thresh, window=window, op=">=", stat="length", freq=freq) + snw_sl = out.where(~valid) return snw_sl From 975793fe5edf6a74551cf7655d744aa04a8941bc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 13:25:43 -0400 Subject: [PATCH 06/13] Add missing func to __all__ --- xclim/indices/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 54f9a4e6d..ea6933cfe 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -51,8 +51,10 @@ "get_zones", "interday_diurnal_temperature_range", "last_occurrence", + "season", "select_resample_op", "spell_length", + "spell_length_statistics", "statistics", "temperature_sum", "threshold_count", From e51081fa6fb19d5441ca92807ed650d18b1a305d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 13:41:22 -0400 Subject: [PATCH 07/13] Add Notes to season docstring, rewrite mid_date doc --- xclim/indices/generic.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index ea6933cfe..fc374ffdd 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -509,7 +509,7 @@ def season( freq : str Resampling frequency. mid_date : DayOfYearStr, optional - An optional middle date to restrict the possible start and end of the season. + An optional middle date. The start must happen before and the end after for the season to be valid. constrain : Sequence of strings, optional A list of acceptable comparison operators. Optional, but indicators wrapping this function should inject it. @@ -539,6 +539,12 @@ def season( Returns the length of the "dry" season. The season starts with 7 consecutive days with precipitation under or equal to 2 mm/d and ends with as many days above 2 mm/d. If no start is found before the first of august, the season is invalid. If a start is found but no end, the end is set to the last day of the period (December 31st if the dataset is complete). + + See Also + -------- + xclim.indices.run_length.season_start + xclim.indices.run_length.season_length + xclim.indices.run_length.season_end """ thresh = convert_units_to(thresh, data) cond = compare(data, op, thresh, constrain=constrain) From 91dd27ff733f833d812778ca53518762c72ff0f3 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 13:49:13 -0400 Subject: [PATCH 08/13] Apply suggestions from code review Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index af22f20c2..4e119f8cf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,7 +18,7 @@ Breaking changes Breaking changes ^^^^^^^^^^^^^^^^ -* Definition of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and xclim's notion of `season` in general. Indicator and indices signature have changed. (:pull:`1845`). +* The definitions of the ``frost_free_season_start`` and ``frost_free_season_end`` have been slightly changed to be coherent with the ``frost_free_season_length`` and `xclim`'s notion of `season` in general. Indicator and indices signature have changed. (:pull:`1845`). * Season length indicators have been modified to return ``0`` for all cases where a proper season was not found, but the data is valid. Previously, a ``nan`` was given if neither a start or an end were found, even if the data was valid, and a ``0`` was given if an end was found but no start. (:pull:`1845`). Bug fixes @@ -31,7 +31,7 @@ Internal changes ^^^^^^^^^^^^^^^^ * Changed the French translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`). * In order to adapt to changes in `pytest`, the doctest fixtures have been split from the main testing suite and doctests are now run using ``$ python -c 'from xclim.testing.utils import run_doctests; run_doctests()'``. (:pull:`1632`). -* Added ``xclim.indices.generic.season`` to make season start, end and length indices. Added a `stat` argument to ``xclim.indices.run_length.season`` to avoid returning a dataset. (:pull:`1845`). +* Added ``xclim.indices.generic.season`` to make season start, end, and length indices. Added a ``stat`` argument to ``xclim.indices.run_length.season`` to avoid returning a dataset. (:pull:`1845`). CI changes ^^^^^^^^^^ From bed832b5bcb15da989f3d698eb16ff8d57c311b3 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 13:55:21 -0400 Subject: [PATCH 09/13] Rename date to mid_date for all rl.season funcs - complete docstring --- xclim/indices/generic.py | 2 +- xclim/indices/run_length.py | 36 +++++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index fc374ffdd..5c3aad656 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -549,7 +549,7 @@ def season( thresh = convert_units_to(thresh, data) cond = compare(data, op, thresh, constrain=constrain) FUNC = {"start": rl.season_start, "end": rl.season_end, "length": rl.season_length} - map_kwargs = dict(window=window, date=mid_date) + map_kwargs = dict(window=window, mid_date=mid_date) if stat in ["start", "end"]: map_kwargs["coord"] = "dayofyear" out = cond.resample(time=freq).map(FUNC[stat], **map_kwargs) diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index fb59941bf..eeb1d8e80 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -758,7 +758,7 @@ def extract_events( def season_start( da: xr.DataArray, window: int, - date: DayOfYearStr | None = None, + mid_date: DayOfYearStr | None = None, dim: str = "time", coord: str | bool | None = False, ) -> xr.DataArray: @@ -772,7 +772,7 @@ def season_start( Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive values to start and end the season. - date : DayOfYearStr, optional + mid_date : DayOfYearStr, optional The date (in MM-DD format) that a season must include to be considered valid. dim : str Dimension along which to calculate the season (default: 'time'). @@ -792,20 +792,22 @@ def season_start( season_end season_length """ - return first_run_before_date(da, window=window, date=date, dim=dim, coord=coord) + return first_run_before_date(da, window=window, date=mid_date, dim=dim, coord=coord) def season_end( da: xr.DataArray, window: int, - date: DayOfYearStr | None = None, + mid_date: DayOfYearStr | None = None, dim: str = "time", coord: str | bool | None = False, _beg: xr.DataArray | None = None, ) -> xr.DataArray: """End of a season. - See :py:func:`season`. There are two differences between this and :py:func:`first_run_after_date`: + See :py:func:`season`. Similar to :py:func:`first_run_after_date` but here a season + must have a start for an end to be valid. Also, if no end is found but a start was found + the end is set to the last element of the series. Parameters @@ -814,7 +816,7 @@ def season_end( Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive values to start and end the season. - date : DayOfYearStr, optional + mid_date : DayOfYearStr, optional The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). @@ -839,11 +841,13 @@ def season_end( if _beg is not None: beg = _beg else: - beg = season_start(da, window=window, dim=dim, date=date, coord=False) + beg = season_start(da, window=window, dim=dim, mid_date=mid_date, coord=False) # Invert the condition and mask all values after beginning # we fillna(0) as so to differentiate series with no runs and all-nan series not_da = (~da).where(da[dim].copy(data=np.arange(da[dim].size)) >= beg.fillna(0)) - end = first_run_after_date(not_da, window=window, dim=dim, date=date, coord=False) + end = first_run_after_date( + not_da, window=window, dim=dim, date=mid_date, coord=False + ) if _beg is None: # Where end is null and beg is not null (valid data, no end detected), put the last index # Don't do this in the fast path, so that the length can use this information @@ -860,7 +864,7 @@ def season_end( def season( da: xr.DataArray, window: int, - date: DayOfYearStr | None = None, + mid_date: DayOfYearStr | None = None, dim: str = "time", coord: str | bool | None = False, ) -> xr.Dataset: @@ -877,7 +881,7 @@ def season( Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive values to start and end the season. - date : DayOfYearStr, optional + mid_date : DayOfYearStr, optional The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). @@ -912,9 +916,11 @@ def season( season_end season_length """ - beg = season_start(da, window=window, dim=dim, date=date, coord=False) + beg = season_start(da, window=window, dim=dim, mid_date=mid_date, coord=False) # Use fast path in season_end : no recomputing of start, no masking of end where beg.isnull() and don't set end if none found - end = season_end(da, window=window, dim=dim, date=date, _beg=beg, coord=False) + end = season_end( + da, window=window, dim=dim, mid_date=mid_date, _beg=beg, coord=False + ) # Three cases : # start no start # end e - s 0 @@ -964,7 +970,7 @@ def season( def season_length( da: xr.DataArray, window: int, - date: DayOfYearStr | None = None, + mid_date: DayOfYearStr | None = None, dim: str = "time", ) -> xr.DataArray: """Length of a season. @@ -977,7 +983,7 @@ def season_length( Input N-dimensional DataArray (boolean). window : int Minimum duration of consecutive values to start and end the season. - date : DayOfYearStr, optional + mid_date : DayOfYearStr, optional The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). @@ -993,7 +999,7 @@ def season_length( season_start season_end """ - seas = season(da, window, date, dim, coord=False) + seas = season(da, window, mid_date, dim, coord=False) return seas.length From 3835ced78bdffc13bf2f6399a17c99d46eed93a4 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 14:36:43 -0400 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- xclim/indices/_threshold.py | 2 +- xclim/indices/generic.py | 2 +- xclim/indices/run_length.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 9cb251728..d6be3ab9a 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -1406,7 +1406,7 @@ def frost_free_spell_max_length( op: str = ">=", resample_before_rl: bool = True, ) -> xarray.DataArray: - r"""Longest cold spell. + r"""Longest frost free spell. Longest spell of warm temperatures over a given period. Longest series of at least {window} consecutive days with temperature at or above the threshold. diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 5c3aad656..53747e8e0 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -488,7 +488,7 @@ def season( mid_date: DayOfYearStr | None = None, constrain: Sequence[str] | None = None, ) -> xarray.DataArray: - r"""Season + r"""Season. A season starts when a variable respects some condition for a consecutive run of `N` days. It stops when the condition is inverted for `N` days. Runs where the condition is not met for fewer than `N` days diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index eeb1d8e80..47ec810cf 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -820,7 +820,7 @@ def season_end( The date (in MM-DD format) that a run must include to be considered valid. dim : str Dimension along which to calculate consecutive run (default: 'time'). - coord : Optional[str] + coord : str, optional If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the DateTimeAccessor object to use (ex: 'dayofyear'). @@ -873,7 +873,7 @@ def season( A "season" is a run of True values that may include breaks under a given length (`window`). The start is computed as the first run of `window` True values, and the end as the first subsequent run of `window` False values. The end element is the first element after the season. - If a date is given, it must be included in the season, i.e. the start can't happen laterand the end can't happen earlier. + If a date is given, it must be included in the season, i.e. the start cannot occur later and the end cannot occur earlier. Parameters ---------- @@ -893,7 +893,7 @@ def season( Returns ------- xr.Dataset - A Dataset with three variables : start, end and length of the season. + A Dataset with three variables : start, end, and length of the season. Notes ----- @@ -1158,10 +1158,10 @@ def first_run_before_date( The date before which to look for the run. dim : str Dimension along which to calculate consecutive run (default: 'time'). - coord : Optional[Union[bool, str]] + coord : bool or str, optional If not False, the function returns values along `dim` instead of indexes. If `dim` has a datetime dtype, `coord` can also be a str of the name of the - DateTimeAccessor object to use (ex: 'dayofyear'). + DateTimeAccessor object to use (e.g. 'dayofyear'). Returns ------- From 7acab2b5a182ccf5eb288e10957e210704661220 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 14:40:05 -0400 Subject: [PATCH 11/13] Suggestions from review for signatures and docstrings --- xclim/indices/_threshold.py | 20 ++++++++++++-------- xclim/indices/run_length.py | 7 ++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index d6be3ab9a..dc33bb60f 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -932,7 +932,8 @@ def growing_season_start( thresh : Quantified Threshold temperature on which to base evaluation. mid_date : str, optional - Date of the year after before which the season must start. Should have the format '%m-%d'. + Date of the year before which the season must start. Should have the format '%m-%d'. + ``None`` removes that constraint. window : int Minimum number of days with temperature above threshold needed for evaluation. freq : str @@ -985,6 +986,7 @@ def growing_season_end( Threshold temperature on which to base evaluation. mid_date : str, optional Date of the year after which to look for the end of the season. Should have the format '%m-%d'. + ``None`` removes that constraint. window : int Minimum number of days with temperature below threshold needed for evaluation. freq : str @@ -1053,7 +1055,8 @@ def growing_season_length( window : int Minimum number of days with temperature above threshold to mark the beginning and end of growing season. mid_date : str, optional - Date of the year after which to look for the end of the season. Should have the format '%m-%d'. + Date of the year before which the season must start and after which it can end. Should have the format '%m-%d'. + ``None`` removes that constraint. freq : str Resampling frequency. op : {">", ">=", "gt", "ge"} @@ -1137,7 +1140,7 @@ def frost_season_length( Minimum number of days with temperature below threshold to mark the beginning and end of frost season. mid_date : str, optional Date the must be included in the season. It is the earliest the end of the season can be. - If None, there is no limit. + ``None`` removes that constraint. thresh : Quantified Threshold temperature on which to base evaluation. freq : str @@ -1195,7 +1198,7 @@ def frost_free_season_start( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", window: int = 5, - mid_date: DayOfYearStr = "07-01", + mid_date: DayOfYearStr | None = "07-01", op: str = ">=", freq: str = "YS", ) -> xarray.DataArray: @@ -1215,7 +1218,8 @@ def frost_free_season_start( window : int Minimum number of days with temperature above/under threshold to start/end the season. mid_date : DayOfYearStr, optional - A date what must be included in the season. + A date that must be included in the season. + ``None`` removes that constraint. op : {'>', '>=', 'ge', 'gt'} How to compare tasmin and the threshold. freq : str @@ -1255,7 +1259,7 @@ def frost_free_season_end( tasmin: xarray.DataArray, thresh: Quantified = "0.0 degC", window: int = 5, - mid_date: DayOfYearStr = "07-01", + mid_date: DayOfYearStr | None = "07-01", op: str = ">=", freq: str = "YS", ) -> xarray.DataArray: @@ -1275,7 +1279,7 @@ def frost_free_season_end( window : int Minimum number of days with temperature above/under threshold to start/end the season. mid_date : DayOfYearStr, optional - A date what must be included in the season. + A date what must be included in the season. ``None`` removes that constraint. op : {'>', '>=', 'ge', 'gt'} How to compare tasmin and the threshold. freq : str @@ -1342,7 +1346,7 @@ def frost_free_season_length( window : int Minimum number of days with temperature above/under threshold to start/end the season. mid_date : DayOfYearStr, optional - A date what must be included in the season. + A date what must be included in the season. ``None`` removes that constraint. op : {'>', '>=', 'ge', 'gt'} How to compare tasmin and the threshold. freq : str diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 47ec810cf..f1bf6b0cb 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -893,7 +893,10 @@ def season( Returns ------- xr.Dataset - A Dataset with three variables : start, end, and length of the season. + Dataset variables: + start : start of the season (index or units depending on ``coord``) + end : end of the season (index or units depending on ``coord``) + length : length of the season (in number of elements along ``dim``) Notes ----- @@ -975,8 +978,6 @@ def season_length( ) -> xr.DataArray: """Length of a season. - See :py:func:`season`. - Parameters ---------- da : xr.DataArray From 074fd54419e09e1f0a0838a19130e16cd57b50a0 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 15:17:08 -0400 Subject: [PATCH 12/13] Change arg in test too --- tests/test_run_length.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_run_length.py b/tests/test_run_length.py index da0db9efa..b0dc1501a 100644 --- a/tests/test_run_length.py +++ b/tests/test_run_length.py @@ -492,7 +492,7 @@ def test_season_length(self, tas_series, date, end, expected, use_dask, ufunc): runs, window=1, dim="time", - date=date, + mid_date=date, ) np.testing.assert_array_equal(np.mean(out.load()), expected) @@ -625,7 +625,7 @@ def test_run_with_dates_different_calendars(self, calendar, expected): out = ( (tas > 0) .resample(time="YS-MAR") - .map(rl.season_length, date="03-02", window=2) + .map(rl.season_length, mid_date="03-02", window=2) ) np.testing.assert_array_equal(out.values[1:], [250, 250]) @@ -643,9 +643,7 @@ def test_run_with_dates_different_calendars(self, calendar, expected): ) np.testing.assert_array_equal(out.values[1:], np.array(expected) + 1) - @pytest.mark.parametrize( - "func", [rl.first_run_after_date, rl.season_length, rl.run_end_after_date] - ) + @pytest.mark.parametrize("func", [rl.first_run_after_date, rl.run_end_after_date]) def test_too_many_dates(self, func, tas_series): tas = tas_series(np.zeros(730), start="2000-01-01") with pytest.raises(ValueError, match="More than 1 instance of date"): From 986a6d8e352b15b6bd529229099ffd808833f07d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 1 Aug 2024 16:08:14 -0400 Subject: [PATCH 13/13] generic indice infers context --- xclim/indices/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 53747e8e0..282a4f40c 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -546,7 +546,7 @@ def season( xclim.indices.run_length.season_length xclim.indices.run_length.season_end """ - thresh = convert_units_to(thresh, data) + thresh = convert_units_to(thresh, data, context="infer") cond = compare(data, op, thresh, constrain=constrain) FUNC = {"start": rl.season_start, "end": rl.season_end, "length": rl.season_length} map_kwargs = dict(window=window, mid_date=mid_date)