From 48efef8b506f95894f5ccbfbfccb86bd5346b799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Wed, 12 Jun 2024 21:39:52 -0400 Subject: [PATCH 01/14] black ice events --- CHANGES.rst | 11 ++++- tests/test_indices.py | 28 +++++++++++- xclim/core/units.py | 1 + xclim/indices/_threshold.py | 89 ++++++++++++++++++++++++++++++++++++- xclim/indices/run_length.py | 34 +++++++++++--- 5 files changed, 154 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 60120c214..722a7767a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,16 @@ Changelog v0.50.0 (unreleased) -------------------- -Contributors to this version: Trevor James Smith (:user:`Zeitsperre`). +Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dupuis (:user:`coxipi`). + +New indicators +^^^^^^^^^^^^^^ +* New indicator ``black_ice_events`` gives statistics about black ice sequences. + + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. Internal changes ^^^^^^^^^^^^^^^^ diff --git a/tests/test_indices.py b/tests/test_indices.py index e2a8ccc47..4e802ac0b 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -23,7 +23,7 @@ from xclim import indices as xci from xclim.core.calendar import convert_calendar, date_range, percentile_doy from xclim.core.options import set_options -from xclim.core.units import ValidationError, convert_units_to, units +from xclim.core.units import ValidationError, convert_units_to, rate2amount, units K2C = 273.15 @@ -338,6 +338,32 @@ def test_bedd(self, method, end_date, deg_days, max_deg_days): if method == "icclim": np.testing.assert_array_equal(bedd, bedd_high_lat) + def test_black_ice_events(self, open_dataset): + times = pd.date_range("1950-01-01", "1950-01-31", freq="D") + da = xr.DataArray( + np.zeros(len(times)), + dims={"time"}, + coords={"time": times}, + attrs={"units": "kg m-2 s-1"}, + ) + + # Two sequences separated by 3 days, which will be split with window_stop = 3 + da[0:10] = 1 + da[13:20] = 1 + out = xci.black_ice_events( + da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 + ) + assert (out.run_lengths == [10, 7]).all() + + # Two sequences separated by 2 days, which will form a single large event + da[10] = 1 + out = xci.black_ice_events( + da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 + ) + pram = rate2amount(da) + assert out.run_lengths == 20 + assert out.cumulative_precipitation == pram.sum() + def test_cool_night_index(self, open_dataset): ds = open_dataset("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712.nc") ds = ds.rename(dict(tas="tasmin")) diff --git a/xclim/core/units.py b/xclim/core/units.py index 7f432eab7..12de3ac7a 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -409,6 +409,7 @@ def cf_conversion( FREQ_UNITS = { "D": "d", "W": "week", + "h": "h", } """ Resampling frequency units for :py:func:`xclim.core.units.infer_sampling_units`. diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 150c55b04..16b3104fb 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -6,11 +6,12 @@ import numpy as np import xarray -from xclim.core.calendar import doy_from_string, get_calendar, select_time +from xclim.core.calendar import doy_from_string, get_calendar, parse_offset, select_time from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, declare_units, + ensure_cf_units, pint2cfunits, rate2amount, str2pint, @@ -36,6 +37,7 @@ # -------------------------------------------------- # __all__ = [ + "black_ice_events", "calm_days", "cold_spell_days", "cold_spell_frequency", @@ -105,6 +107,91 @@ ] +@declare_units(pr="[precipitation]", thresh="[precipitation]") +def black_ice_events( + pr: xarray.DataArray, + thresh: Quantified = "1 kg m-2 d-1", + window_start: int = 3, + window_stop: int = 3, + freq: str | None = None, +) -> xarray.Dataset: + r"""Black ice events. + + Parameters + ---------- + pr : xarray.DataArray + Black ice precipitation (`prfr`) + thresh : Quantified + Threshold that must be exceeded to be considered an event + window_start: int + Number of time steps above the threshold required to start an event + window_stop : int + Number of time steps below the threshold required to stop an event + freq : str + Resampling frequency. + + Returns + ------- + xarray.DataArray, [time] + Number of days with average near-surface wind speed below threshold. + """ + freq = xarray.infer_freq(pr.time) + mag, units, _, _ = parse_offset(freq) + # condition to respect for `window_start` time steps to start a run + thresh = convert_units_to(thresh, pr) + da_start = pr >= thresh + da_stop = not da_start + + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of black ice with pauses + # not exceeding `window_stop` + runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of black ice events + # I think int16 is safe enough + ds = rl.rle(runs).to_dataset(name="run_lengths") + ds["run_lengths"] = ds.run_lengths.astype(np.int16) + ds.run_lengths.attrs["units"] = "" + + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["precipitation_duration"] = ( + rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + * mag + ) + ds["precipitation_duration"].attrs["units"] = units + + # Cumulated precipitation in a given black ice event + pram = rate2amount(pr) + ds["cumulative_precipitation"] = rl._cumsum_reset( + pram.where(runs == 1), index="first", reset_on_zero=False + ) + ds["cumulative_precipitation"].attrs["units"] = pram.units + + # Reduce time dim to event dimension + mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) + ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) + + # start time : with current implementation of time reduction above, + # this is not necessary, but it could be if we choose another way. + # ds["start"] = ds["time"].broadcast_like(ds) + + # Other indices that could be completely done outside of the function, no input needed anymore + ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) + ds.number_of_events.attrs["units"] = "" + + ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] + units = ( + f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" + ) + ds["rate"].attrs["units"] = ensure_cf_units(units) + + ds.attrs["units"] = "" + return ds + + @declare_units(sfcWind="[speed]", thresh="[speed]") def calm_days( sfcWind: xarray.DataArray, thresh: Quantified = "2 m s-1", freq: str = "MS" diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index b3015230a..2917d5150 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -118,10 +118,11 @@ def resample_and_rl( return out -def _cumsum_reset_on_zero( +def _cumsum_reset( da: xr.DataArray, dim: str = "time", index: str = "last", + reset_on_zero: bool = True, ) -> xr.DataArray: """Compute the cumulative sum for each series of numbers separated by zero. @@ -134,6 +135,9 @@ def _cumsum_reset_on_zero( index : {'first', 'last'} If 'first', the largest value of the cumulative sum is indexed with the first element in the run. If 'last'(default), with the last element in the run. + reset_on_zero : bool + If True, the cumulative sum is reset on each zero value of `da`. Otherwise, the cumulative sum resets + on NaNs. Default is True. Returns ------- @@ -145,7 +149,10 @@ def _cumsum_reset_on_zero( # Example: da == 100110111 -> cs_s == 100120123 cs = da.cumsum(dim=dim) # cumulative sum e.g. 111233456 - cs2 = cs.where(da == 0) # keep only numbers at positions of zeroes e.g. N11NN3NNN + cond = da == 0 if reset_on_zero else da.isnull() # reset condition + cs2 = cs.where( + cond + ) # keep only numbers at positions of zeroes e.g. N11NN3NNN (default) cs2[{dim: 0}] = 0 # put a zero in front e.g. 011NN3NNN cs2 = cs2.ffill(dim=dim) # e.g. 011113333 out = cs - cs2 @@ -186,7 +193,7 @@ def rle( da = da[{dim: slice(None, None, -1)}] # Get cumulative sum for each series of 1, e.g. da == 100110111 -> cs_s == 100120123 - cs_s = _cumsum_reset_on_zero(da, dim) + cs_s = _cumsum_reset(da, dim) # Keep total length of each series (and also keep 0's), e.g. 100120123 -> 100N20NN3 # Keep numbers with a 0 to the right and also the last number @@ -495,7 +502,7 @@ def find_boundary_run(runs, position): else: # _cusum_reset_on_zero() is an intermediate step in rle, which is sufficient here - d = _cumsum_reset_on_zero(da, dim=dim, index=position) + d = _cumsum_reset(da, dim=dim, index=position) d = xr.where(d >= window, 1, 0) # for "first" run, return "first" element in the run (and conversely for "last" run) if freq is not None: @@ -744,8 +751,8 @@ def extract_events( da_start = da_start.astype(int).fillna(0) da_stop = da_stop.astype(int).fillna(0) - start_runs = _cumsum_reset_on_zero(da_start, dim=dim, index="first") - stop_runs = _cumsum_reset_on_zero(da_stop, dim=dim, index="first") + start_runs = _cumsum_reset(da_start, dim=dim, index="first") + stop_runs = _cumsum_reset(da_stop, dim=dim, index="first") start_positions = xr.where(start_runs >= window_start, 1, np.NaN) stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN) @@ -755,6 +762,21 @@ def extract_events( return runs +def runs_with_holes(da_start, window_start, da_stop, window_stop, dim="time"): + """Runs with holes""" + da_start = da_start.astype(int).fillna(0) + da_stop = da_stop.astype(int).fillna(0) + + start_runs = _cumsum_reset(da_start, dim=dim, index="first") + stop_runs = _cumsum_reset(da_stop, dim=dim, index="first") + start_positions = xr.where(start_runs >= window_start, 1, np.NaN) + stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN) + + # start positions (1) are f-filled until a stop position (0) is met + runs = stop_positions.combine_first(start_positions).ffill(dim=dim).fillna(0) + return runs + + def season( da: xr.DataArray, window: int, From 35b0f75754dddb9f693d75c596fcce515a430bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Wed, 12 Jun 2024 21:47:10 -0400 Subject: [PATCH 02/14] black ice -> freezing rain --- CHANGES.rst | 2 +- tests/test_indices.py | 6 +- xclim/indices/_threshold.py | 172 ++++++++++++++++++------------------ 3 files changed, 90 insertions(+), 90 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 722a7767a..e791b36c4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,7 +8,7 @@ Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dup New indicators ^^^^^^^^^^^^^^ -* New indicator ``black_ice_events`` gives statistics about black ice sequences. +* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. New features and enhancements diff --git a/tests/test_indices.py b/tests/test_indices.py index 4e802ac0b..48e43929e 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -338,7 +338,7 @@ def test_bedd(self, method, end_date, deg_days, max_deg_days): if method == "icclim": np.testing.assert_array_equal(bedd, bedd_high_lat) - def test_black_ice_events(self, open_dataset): + def test_freezing_rain_events(self, open_dataset): times = pd.date_range("1950-01-01", "1950-01-31", freq="D") da = xr.DataArray( np.zeros(len(times)), @@ -350,14 +350,14 @@ def test_black_ice_events(self, open_dataset): # Two sequences separated by 3 days, which will be split with window_stop = 3 da[0:10] = 1 da[13:20] = 1 - out = xci.black_ice_events( + out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) assert (out.run_lengths == [10, 7]).all() # Two sequences separated by 2 days, which will form a single large event da[10] = 1 - out = xci.black_ice_events( + out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) pram = rate2amount(da) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 16b3104fb..3a7bd3cf1 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -37,7 +37,6 @@ # -------------------------------------------------- # __all__ = [ - "black_ice_events", "calm_days", "cold_spell_days", "cold_spell_frequency", @@ -54,6 +53,7 @@ "first_day_temperature_above", "first_day_temperature_below", "first_snowfall", + "freezing_rain_events", "frost_free_season_end", "frost_free_season_length", "frost_free_season_start", @@ -107,91 +107,6 @@ ] -@declare_units(pr="[precipitation]", thresh="[precipitation]") -def black_ice_events( - pr: xarray.DataArray, - thresh: Quantified = "1 kg m-2 d-1", - window_start: int = 3, - window_stop: int = 3, - freq: str | None = None, -) -> xarray.Dataset: - r"""Black ice events. - - Parameters - ---------- - pr : xarray.DataArray - Black ice precipitation (`prfr`) - thresh : Quantified - Threshold that must be exceeded to be considered an event - window_start: int - Number of time steps above the threshold required to start an event - window_stop : int - Number of time steps below the threshold required to stop an event - freq : str - Resampling frequency. - - Returns - ------- - xarray.DataArray, [time] - Number of days with average near-surface wind speed below threshold. - """ - freq = xarray.infer_freq(pr.time) - mag, units, _, _ = parse_offset(freq) - # condition to respect for `window_start` time steps to start a run - thresh = convert_units_to(thresh, pr) - da_start = pr >= thresh - da_stop = not da_start - - # Get basic blocks to work with, our runs with holes and the lengths of those runs - # Series of ones indicating where we have continuous runs of black ice with pauses - # not exceeding `window_stop` - runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) - - # Compute the length of black ice events - # I think int16 is safe enough - ds = rl.rle(runs).to_dataset(name="run_lengths") - ds["run_lengths"] = ds.run_lengths.astype(np.int16) - ds.run_lengths.attrs["units"] = "" - - # Time duration where the precipitation threshold is exceeded during an event - # (duration of complete run - duration of holes in the run ) - ds["precipitation_duration"] = ( - rl._cumsum_reset( - da_start.where(runs == 1), index="first", reset_on_zero=False - ).astype(np.int16) - * mag - ) - ds["precipitation_duration"].attrs["units"] = units - - # Cumulated precipitation in a given black ice event - pram = rate2amount(pr) - ds["cumulative_precipitation"] = rl._cumsum_reset( - pram.where(runs == 1), index="first", reset_on_zero=False - ) - ds["cumulative_precipitation"].attrs["units"] = pram.units - - # Reduce time dim to event dimension - mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) - ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) - - # start time : with current implementation of time reduction above, - # this is not necessary, but it could be if we choose another way. - # ds["start"] = ds["time"].broadcast_like(ds) - - # Other indices that could be completely done outside of the function, no input needed anymore - ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) - ds.number_of_events.attrs["units"] = "" - - ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] - units = ( - f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" - ) - ds["rate"].attrs["units"] = ensure_cf_units(units) - - ds.attrs["units"] = "" - return ds - - @declare_units(sfcWind="[speed]", thresh="[speed]") def calm_days( sfcWind: xarray.DataArray, thresh: Quantified = "2 m s-1", freq: str = "MS" @@ -2743,6 +2658,91 @@ def wetdays_prop( return fwd +@declare_units(pr="[precipitation]", thresh="[precipitation]") +def freezing_rain_events( + pr: xarray.DataArray, + thresh: Quantified = "1 kg m-2 d-1", + window_start: int = 3, + window_stop: int = 3, + freq: str | None = None, +) -> xarray.Dataset: + r"""Black ice events. + + Parameters + ---------- + pr : xarray.DataArray + Black ice precipitation (`prfr`) + thresh : Quantified + Threshold that must be exceeded to be considered an event + window_start: int + Number of time steps above the threshold required to start an event + window_stop : int + Number of time steps below the threshold required to stop an event + freq : str + Resampling frequency. + + Returns + ------- + xarray.DataArray, [time] + Number of days with average near-surface wind speed below threshold. + """ + freq = xarray.infer_freq(pr.time) + mag, units, _, _ = parse_offset(freq) + # condition to respect for `window_start` time steps to start a run + thresh = convert_units_to(thresh, pr) + da_start = pr >= thresh + da_stop = (1 - da_start).astype(bool) + + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of freezing rain with pauses + # not exceeding `window_stop` + runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of freezing rain events + # I think int16 is safe enough + ds = rl.rle(runs).to_dataset(name="run_lengths") + ds["run_lengths"] = ds.run_lengths.astype(np.int16) + ds.run_lengths.attrs["units"] = "" + + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["precipitation_duration"] = ( + rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + * mag + ) + ds["precipitation_duration"].attrs["units"] = units + + # Cumulated precipitation in a given freezing rain event + pram = rate2amount(pr) + ds["cumulative_precipitation"] = rl._cumsum_reset( + pram.where(runs == 1), index="first", reset_on_zero=False + ) + ds["cumulative_precipitation"].attrs["units"] = pram.units + + # Reduce time dim to event dimension + mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) + ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) + + # start time : with current implementation of time reduction above, + # this is not necessary, but it could be if we choose another way. + # ds["start"] = ds["time"].broadcast_like(ds) + + # Other indices that could be completely done outside of the function, no input needed anymore + ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) + ds.number_of_events.attrs["units"] = "" + + ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] + units = ( + f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" + ) + ds["rate"].attrs["units"] = ensure_cf_units(units) + + ds.attrs["units"] = "" + return ds + + @declare_units(tasmin="[temperature]", thresh="[temperature]") def maximum_consecutive_frost_days( tasmin: xarray.DataArray, From 44d868112529ef8c6fd38de578eac4a1a1205a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Wed, 12 Jun 2024 21:50:12 -0400 Subject: [PATCH 03/14] update CHANGES --- CHANGES.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bdedc28b1..d4cac4682 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,14 +8,14 @@ Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dup New indicators ^^^^^^^^^^^^^^ -* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. +* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. (:pull:`1778`). New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * New properties: Bivariate Spell Length (``xclim.sdba.properties.bivariate_spell_length``), generalized spell lengths with an argument for `window`, and specific spell lengths with `window` fixed to 1 (``xclim.sdba.propertiies.threshold_count``, ``xclim.sdba.propertiies.bivariate_threshold_count``). (:pull:`1758`). * New option `normalize` in ``sdba.measures.taylordiagram`` to obtain normalized Taylor diagrams (divide standard deviations by standard deviation of the reference). (:pull:`1764`). -* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. +* ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. (:pull:`1778`). Breaking changes ^^^^^^^^^^^^^^^^ From d036823472e0aa6057346480f5d62c2d562fe289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Fri, 14 Jun 2024 19:57:51 -0400 Subject: [PATCH 04/14] event dimension with padding --- tests/test_indices.py | 8 +++--- xclim/indices/_threshold.py | 56 +++++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/tests/test_indices.py b/tests/test_indices.py index 8305ba727..297175f51 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -353,16 +353,16 @@ def test_freezing_rain_events(self, open_dataset): out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) - assert (out.run_lengths == [10, 7]).all() + assert (out.run_lengths.values[0:2] == [10, 7]).all() # Two sequences separated by 2 days, which will form a single large event - da[10] = 1 + da[12] = 1 out = xci.freezing_rain_events( da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 ) pram = rate2amount(da) - assert out.run_lengths == 20 - assert out.cumulative_precipitation == pram.sum() + assert out.run_lengths.values[0] == 20 + assert (out.cumulative_precipitation - pram.sum()).sum() == 0 def test_cool_night_index(self, open_dataset): ds = open_dataset("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712.nc") diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 6b7a29561..6ed7601f9 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -2664,12 +2664,12 @@ def freezing_rain_events( window_stop: int = 3, freq: str | None = None, ) -> xarray.Dataset: - r"""Black ice events. + r"""Freezing rain events. Parameters ---------- pr : xarray.DataArray - Black ice precipitation (`prfr`) + Freezing precipitation (`prfr`) thresh : Quantified Threshold that must be exceeded to be considered an event window_start: int @@ -2681,8 +2681,7 @@ def freezing_rain_events( Returns ------- - xarray.DataArray, [time] - Number of days with average near-surface wind speed below threshold. + xarray.Dataset """ freq = xarray.infer_freq(pr.time) mag, units, _, _ = parse_offset(freq) @@ -2712,25 +2711,58 @@ def freezing_rain_events( ) ds["precipitation_duration"].attrs["units"] = units - # Cumulated precipitation in a given freezing rain event + # # Cumulated precipitation in a given freezing rain event pram = rate2amount(pr) ds["cumulative_precipitation"] = rl._cumsum_reset( pram.where(runs == 1), index="first", reset_on_zero=False ) ds["cumulative_precipitation"].attrs["units"] = pram.units - # Reduce time dim to event dimension - mask = (ds.run_lengths > 0).any(dim=[d for d in ds.dims if d != "time"]) - ds = ds.where(mask).dropna(dim="time").rename({"time": "event"}) - - # start time : with current implementation of time reduction above, - # this is not necessary, but it could be if we choose another way. - # ds["start"] = ds["time"].broadcast_like(ds) + # Keep time as a variable, it will be used to keep start of events + ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) + # I have to convert it to an integer for the filtering, time object won't do + # Since there are conversion needing a time object earlier, I think it's ok + # to assume this here? + time_min = ds.start.min() + ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) + + # Filter events: Reduce time dimension + def _filter_events(da, rl, max_event_number): + out = np.full(max_event_number, np.NaN) + events_start = da[rl > 0] + out[: len(events_start)] = events_start + return out + + max_event_number = int(np.ceil(pr.time.size / (window_start + window_stop))) + v_attrs = {v: ds[v].attrs for v in ds.data_vars} + ds = xarray.apply_ufunc( + _filter_events, + ds, + ds.run_lengths, + input_core_dims=[["time"], ["time"]], + output_core_dims=[["event"]], + kwargs=dict(max_event_number=max_event_number), + output_sizes={"event": max_event_number}, + dask="parallelized", + vectorize=True, + ).assign_attrs(ds.attrs) + + ds["event"] = np.arange(1, ds.event.size + 1) + for v in ds.data_vars: + ds[v].attrs = v_attrs[v] + + # convert back start to a time + ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( + "timedelta64[ns]" + ) # Other indices that could be completely done outside of the function, no input needed anymore + + # number of events ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) ds.number_of_events.attrs["units"] = "" + # mean rate of precipitation during event ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] units = ( f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" From d556d95a560a156bc8c705a9ecb57accf8676df5 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 6 Sep 2024 17:44:39 -0400 Subject: [PATCH 05/14] first pass to generalize --- xclim/indices/_threshold.py | 121 +-------------------------------- xclim/indices/generic.py | 132 +++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 121 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index a6989aaca..2e7d9ea8d 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -8,12 +8,11 @@ import xarray from xclim.core import DayOfYearStr, Quantified -from xclim.core.calendar import doy_from_string, get_calendar, parse_offset +from xclim.core.calendar import doy_from_string, get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, declare_units, - ensure_cf_units, pint2cfunits, rate2amount, str2pint, @@ -55,7 +54,6 @@ "first_day_temperature_above", "first_day_temperature_below", "first_snowfall", - "freezing_rain_events", "frost_free_season_end", "frost_free_season_length", "frost_free_season_start", @@ -2624,123 +2622,6 @@ def wetdays_prop( return fwd -@declare_units(pr="[precipitation]", thresh="[precipitation]") -def freezing_rain_events( - pr: xarray.DataArray, - thresh: Quantified = "1 kg m-2 d-1", - window_start: int = 3, - window_stop: int = 3, - freq: str | None = None, -) -> xarray.Dataset: - r"""Freezing rain events. - - Parameters - ---------- - pr : xarray.DataArray - Freezing precipitation (`prfr`) - thresh : Quantified - Threshold that must be exceeded to be considered an event - window_start: int - Number of time steps above the threshold required to start an event - window_stop : int - Number of time steps below the threshold required to stop an event - freq : str - Resampling frequency. - - Returns - ------- - xarray.Dataset - """ - freq = xarray.infer_freq(pr.time) - mag, units, _, _ = parse_offset(freq) - # condition to respect for `window_start` time steps to start a run - thresh = convert_units_to(thresh, pr) - da_start = pr >= thresh - da_stop = (1 - da_start).astype(bool) - - # Get basic blocks to work with, our runs with holes and the lengths of those runs - # Series of ones indicating where we have continuous runs of freezing rain with pauses - # not exceeding `window_stop` - runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) - - # Compute the length of freezing rain events - # I think int16 is safe enough - ds = rl.rle(runs).to_dataset(name="run_lengths") - ds["run_lengths"] = ds.run_lengths.astype(np.int16) - ds.run_lengths.attrs["units"] = "" - - # Time duration where the precipitation threshold is exceeded during an event - # (duration of complete run - duration of holes in the run ) - ds["precipitation_duration"] = ( - rl._cumsum_reset( - da_start.where(runs == 1), index="first", reset_on_zero=False - ).astype(np.int16) - * mag - ) - ds["precipitation_duration"].attrs["units"] = units - - # # Cumulated precipitation in a given freezing rain event - pram = rate2amount(pr) - ds["cumulative_precipitation"] = rl._cumsum_reset( - pram.where(runs == 1), index="first", reset_on_zero=False - ) - ds["cumulative_precipitation"].attrs["units"] = pram.units - - # Keep time as a variable, it will be used to keep start of events - ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) - # I have to convert it to an integer for the filtering, time object won't do - # Since there are conversion needing a time object earlier, I think it's ok - # to assume this here? - time_min = ds.start.min() - ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) - - # Filter events: Reduce time dimension - def _filter_events(da, rl, max_event_number): - out = np.full(max_event_number, np.NaN) - events_start = da[rl > 0] - out[: len(events_start)] = events_start - return out - - max_event_number = int(np.ceil(pr.time.size / (window_start + window_stop))) - v_attrs = {v: ds[v].attrs for v in ds.data_vars} - ds = xarray.apply_ufunc( - _filter_events, - ds, - ds.run_lengths, - input_core_dims=[["time"], ["time"]], - output_core_dims=[["event"]], - kwargs=dict(max_event_number=max_event_number), - output_sizes={"event": max_event_number}, - dask="parallelized", - vectorize=True, - ).assign_attrs(ds.attrs) - - ds["event"] = np.arange(1, ds.event.size + 1) - for v in ds.data_vars: - ds[v].attrs = v_attrs[v] - - # convert back start to a time - ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( - "timedelta64[ns]" - ) - - # Other indices that could be completely done outside of the function, no input needed anymore - - # number of events - ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) - ds.number_of_events.attrs["units"] = "" - - # mean rate of precipitation during event - ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] - units = ( - f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" - ) - ds["rate"].attrs["units"] = ensure_cf_units(units) - - ds.attrs["units"] = "" - return ds - - @declare_units(tasmin="[temperature]", thresh="[temperature]") def maximum_consecutive_frost_days( tasmin: xarray.DataArray, diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 43bc85720..42080a21d 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -18,12 +18,18 @@ from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS # noqa from xclim.core import DayOfYearStr, Quantified -from xclim.core.calendar import doy_to_days_since, get_calendar, select_time +from xclim.core.calendar import ( + doy_to_days_since, + get_calendar, + parse_offset, + select_time, +) from xclim.core.units import ( convert_units_to, declare_relative_units, infer_context, pint2cfunits, + rate2amount, str2pint, to_agg_units, ) @@ -1474,3 +1480,127 @@ def detrend( trend = xr.polyval(ds[dim], coeff.polyfit_coefficients) with xr.set_options(keep_attrs=True): return ds - trend + + +@declare_relative_units(thresh="") +def thresholded_events( + data: xr.DataArray, + thresh: Quantified = "1 kg m-2 d-1", + window_start: int = 3, + window_stop: int = 3, + freq: str | None = None, + data_is_rate: bool = False, +) -> xr.Dataset: + r"""Thresholded events. + + Parameters + ---------- + data : xr.DataArray + Variable. + thresh : Quantified + Threshold that must be exceeded to be considered an event + window_start: int + Number of time steps above the threshold required to start an event + window_stop : int + Number of time steps below the threshold required to stop an event + freq : str + Resampling frequency. + data_is_rate : bool + True if the data is a rate that needs to be converted to an amount + when computing an accumulation. + + Returns + ------- + xr.Dataset + """ + freq = xr.infer_freq(data.time) + mag, units, _, _ = parse_offset(freq) + # condition to respect for `window_start` time steps to start a run + thresh = convert_units_to(thresh, data) + da_start = data >= thresh + da_stop = (1 - da_start).astype(bool) + + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of freezing rain with pauses + # not exceeding `window_stop` + runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of freezing rain events + # I think int16 is safe enough + ds = rl.rle(runs).astype(np.int16).to_dataset(name="run_lengths") + ds.run_lengths.attrs["units"] = "" + + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["effective_duration"] = ( + rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + * mag + ) + ds["effective_duration"].attrs["units"] = units + + # # Cumulated precipitation in a given freezing rain event + if data_is_rate: + dataam = rate2amount(data) + else: + dataam = data + ds["event_accumulation"] = rl._cumsum_reset( + dataam.where(runs == 1), index="first", reset_on_zero=False + ) + ds["event_accumulation"].attrs["units"] = dataam.units + + # Keep time as a variable, it will be used to keep start of events + ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) + # I have to convert it to an integer for the filtering, time object won't do + # Since there are conversion needing a time object earlier, I think it's ok + # to assume this here? + time_min = ds.start.min() + ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) + + # Filter events: Reduce time dimension + def _filter_events(da, rl, max_event_number): + out = np.full(max_event_number, np.NaN) + events_start = da[rl > 0] + out[: len(events_start)] = events_start + return out + + max_event_number = int(np.ceil(data.time.size / (window_start + window_stop))) + v_attrs = {v: ds[v].attrs for v in ds.data_vars} + ds = xr.apply_ufunc( + _filter_events, + ds, + ds.run_lengths, + input_core_dims=[["time"], ["time"]], + output_core_dims=[["event"]], + kwargs=dict(max_event_number=max_event_number), + output_sizes={"event": max_event_number}, + dask="parallelized", + vectorize=True, + ).assign_attrs(ds.attrs) + + ds["event"] = np.arange(1, ds.event.size + 1) + for v in ds.data_vars: + ds[v].attrs = v_attrs[v] + + # convert back start to a time + # TODO fix for calendars + ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( + "timedelta64[ns]" + ) + return ds + + # # Other indices that could be completely done outside of the function, no input needed anymore + # # number of events + # ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) + # ds.number_of_events.attrs["units"] = "" + + # # mean rate of precipitation during event + # ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] + # units = ( + # f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" + # ) + # ds["rate"].attrs["units"] = ensure_cf_units(units) + + # ds.attrs["units"] = "" + # return ds From d27ba5d500e578f870ba42deaa0dfeada246b20b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 2 Oct 2024 17:36:03 -0400 Subject: [PATCH 06/14] cleaner? --- xclim/indices/generic.py | 53 +++++++++++++++++-------------------- xclim/indices/run_length.py | 24 +++-------------- 2 files changed, 29 insertions(+), 48 deletions(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index f2455eef5..3cc514c65 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -20,7 +20,6 @@ from xclim.core.calendar import ( doy_to_days_since, get_calendar, - parse_offset, select_time, ) from xclim.core.units import ( @@ -1481,13 +1480,14 @@ def detrend( return ds - trend -@declare_relative_units(thresh="") +# @declare_relative_units(thresh="") + + def thresholded_events( data: xr.DataArray, - thresh: Quantified = "1 kg m-2 d-1", - window_start: int = 3, - window_stop: int = 3, - freq: str | None = None, + thresh: Quantified, + window_start: int, + window_stop: int, data_is_rate: bool = False, ) -> xr.Dataset: r"""Thresholded events. @@ -1502,8 +1502,6 @@ def thresholded_events( Number of time steps above the threshold required to start an event window_stop : int Number of time steps below the threshold required to stop an event - freq : str - Resampling frequency. data_is_rate : bool True if the data is a rate that needs to be converted to an amount when computing an accumulation. @@ -1512,12 +1510,10 @@ def thresholded_events( ------- xr.Dataset """ - freq = xr.infer_freq(data.time) - mag, units, _, _ = parse_offset(freq) # condition to respect for `window_start` time steps to start a run thresh = convert_units_to(thresh, data) da_start = data >= thresh - da_stop = (1 - da_start).astype(bool) + da_stop = ~da_start # Get basic blocks to work with, our runs with holes and the lengths of those runs # Series of ones indicating where we have continuous runs of freezing rain with pauses @@ -1526,28 +1522,25 @@ def thresholded_events( # Compute the length of freezing rain events # I think int16 is safe enough - ds = rl.rle(runs).astype(np.int16).to_dataset(name="run_lengths") - ds.run_lengths.attrs["units"] = "" + ds = rl.rle(runs).astype(np.int16).to_dataset(name="length") + ds.length.attrs["units"] = "1" # Time duration where the precipitation threshold is exceeded during an event # (duration of complete run - duration of holes in the run ) - ds["effective_duration"] = ( - rl._cumsum_reset( - da_start.where(runs == 1), index="first", reset_on_zero=False - ).astype(np.int16) - * mag - ) - ds["effective_duration"].attrs["units"] = units + ds["effective_length"] = rl._cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + ds["effective_length"].attrs["units"] = "1" # # Cumulated precipitation in a given freezing rain event if data_is_rate: dataam = rate2amount(data) else: dataam = data - ds["event_accumulation"] = rl._cumsum_reset( + ds["accumulation"] = rl._cumsum_reset( dataam.where(runs == 1), index="first", reset_on_zero=False ) - ds["event_accumulation"].attrs["units"] = dataam.units + ds["accumulation"].attrs["units"] = dataam.units # Keep time as a variable, it will be used to keep start of events ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) @@ -1555,11 +1548,13 @@ def thresholded_events( # Since there are conversion needing a time object earlier, I think it's ok # to assume this here? time_min = ds.start.min() - ds["start"] = (ds.start - time_min).astype("timedelta64[s]").astype(int) + ds["start"] = ds.start.copy( + data=(ds.start - time_min).values.astype("timedelta64[s]").astype(int) + ) # Filter events: Reduce time dimension def _filter_events(da, rl, max_event_number): - out = np.full(max_event_number, np.NaN) + out = np.full(max_event_number, np.nan) events_start = da[rl > 0] out[: len(events_start)] = events_start return out @@ -1569,24 +1564,26 @@ def _filter_events(da, rl, max_event_number): ds = xr.apply_ufunc( _filter_events, ds, - ds.run_lengths, + ds.length, input_core_dims=[["time"], ["time"]], output_core_dims=[["event"]], kwargs=dict(max_event_number=max_event_number), - output_sizes={"event": max_event_number}, + dask_gufunc_kwargs=dict(output_sizes={"event": max_event_number}), dask="parallelized", vectorize=True, ).assign_attrs(ds.attrs) ds["event"] = np.arange(1, ds.event.size + 1) + ds.event.assign_attrs(units="") for v in ds.data_vars: ds[v].attrs = v_attrs[v] # convert back start to a time # TODO fix for calendars - ds["start"] = time_min.astype("datetime64[ns]") + ds["start"].astype( - "timedelta64[ns]" + ds["start"] = time_min + ds.start.copy( + data=ds.start.values.astype("timedelta64[s]").astype("timedelta64[ns]") ) + ds["start"].assign_attrs(units="") return ds # # Other indices that could be completely done outside of the function, no input needed anymore diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 04aeadba0..5ee85c254 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -717,7 +717,7 @@ def get_out(rls): return da.copy(data=out.transpose(*da.dims).data) -def extract_events( +def runs_with_holes( da_start: xr.DataArray, window_start: int, da_stop: xr.DataArray, @@ -747,31 +747,15 @@ def extract_events( Notes ----- A season (as defined in ``season``) could be considered as an event with `window_stop == window_start` and `da_stop == 1 - da_start`, - although it has more constraints on when to start and stop a run through the `date` argument. + although it has more constraints on when to start and stop a run through the `date` argument and only one season can be found. """ da_start = da_start.astype(int).fillna(0) da_stop = da_stop.astype(int).fillna(0) start_runs = _cumsum_reset(da_start, dim=dim, index="first") stop_runs = _cumsum_reset(da_stop, dim=dim, index="first") - start_positions = xr.where(start_runs >= window_start, 1, np.NaN) - stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN) - - # start positions (1) are f-filled until a stop position (0) is met - runs = stop_positions.combine_first(start_positions).ffill(dim=dim).fillna(0) - - return runs - - -def runs_with_holes(da_start, window_start, da_stop, window_stop, dim="time"): - """Runs with holes""" - da_start = da_start.astype(int).fillna(0) - da_stop = da_stop.astype(int).fillna(0) - - start_runs = _cumsum_reset(da_start, dim=dim, index="first") - stop_runs = _cumsum_reset(da_stop, dim=dim, index="first") - start_positions = xr.where(start_runs >= window_start, 1, np.NaN) - stop_positions = xr.where(stop_runs >= window_stop, 0, np.NaN) + start_positions = xr.where(start_runs >= window_start, 1, np.nan) + stop_positions = xr.where(stop_runs >= window_stop, 0, np.nan) # start positions (1) are f-filled until a stop position (0) is met runs = stop_positions.combine_first(start_positions).ffill(dim=dim).fillna(0) From 3a4ebbba15df8b83f4cbc913ba83466580e1c69d Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 3 Oct 2024 18:14:57 -0400 Subject: [PATCH 07/14] Working find events --- tests/test_generic.py | 304 ++++++++++++++++++++++++++++++++++++ xclim/indices/generic.py | 197 ++++++++--------------- xclim/indices/run_length.py | 142 +++++++++++++++++ 3 files changed, 514 insertions(+), 129 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 0054dd935..9caaf0de9 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -8,6 +8,7 @@ from xclim.core.calendar import doy_to_days_since, select_time from xclim.indices import generic +from xclim.testing.helpers import assert_lazy K2C = 273.15 @@ -768,3 +769,306 @@ def test_spell_length_statistics_multi(tasmin_series, tasmax_series): ) xr.testing.assert_equal(outs, outm) np.testing.assert_allclose(outc, 1) + + +class TestThresholdedEvents: + + @pytest.mark.parametrize("use_dask", [True, False]) + def test_simple(self, pr_series, use_dask): + arr = np.array( + [ + 0, + 0, + 0, + 1, + 2, + 3, + 0, + 3, + 3, + 10, + 0, + 0, + 0, + 0, + 0, + 1, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 3, + 3, + 2, + 0, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + ] + ) + pr = pr_series(arr, start="2000-01-01", units="mm") + if use_dask: + pr = pr.chunk(-1) + + with assert_lazy: + out = generic.thresholded_events( + pr, + thresh="1 mm", + op=">=", + window=3, + ) + + assert out.event.size == np.ceil(arr.size / 6) + out = out.load().dropna("event", how="all") + + np.testing.assert_array_equal(out.event_length, [7, 4, 4]) + np.testing.assert_array_equal(out.event_effective_length, [6, 4, 4]) + np.testing.assert_array_equal(out.event_sum, [22, 7, 9]) + np.testing.assert_array_equal( + out.event_start, + np.array( + ["2000-01-04", "2000-01-16", "2000-01-26"], dtype="datetime64[ns]" + ), + ) + + @pytest.mark.parametrize("use_dask", [True, False]) + def test_diff_windows(self, pr_series, use_dask): + arr = np.array( + [ + 0, + 0, + 0, + 1, + 2, + 3, + 0, + 3, + 3, + 10, + 0, + 0, + 0, + 0, + 0, + 1, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 3, + 3, + 2, + 0, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + ] + ) + pr = pr_series(arr, start="2000-01-01", units="mm") + if use_dask: + pr = pr.chunk(-1) + + # different window stop + out = ( + generic.thresholded_events( + pr, thresh="2 mm", op=">=", window=3, window_stop=4 + ) + .load() + .dropna("event", how="all") + ) + + np.testing.assert_array_equal(out.event_length, [3, 3, 7]) + np.testing.assert_array_equal(out.event_effective_length, [3, 3, 4]) + np.testing.assert_array_equal(out.event_sum, [16, 6, 10]) + np.testing.assert_array_equal( + out.event_start, + np.array( + ["2000-01-08", "2000-01-17", "2000-01-27"], dtype="datetime64[ns]" + ), + ) + + @pytest.mark.parametrize("use_dask", [True, False]) + def test_cftime(self, pr_series, use_dask): + arr = np.array( + [ + 0, + 0, + 0, + 1, + 2, + 3, + 0, + 3, + 3, + 10, + 0, + 0, + 0, + 0, + 0, + 1, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 3, + 3, + 2, + 0, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + ] + ) + pr = pr_series(arr, start="2000-01-01", units="mm").convert_calendar("noleap") + if use_dask: + pr = pr.chunk(-1) + + # cftime + with assert_lazy: + out = generic.thresholded_events( + pr, + thresh="1 mm", + op=">=", + window=3, + ) + out = out.load().dropna("event", how="all") + + np.testing.assert_array_equal(out.event_length, [7, 4, 4]) + np.testing.assert_array_equal(out.event_effective_length, [6, 4, 4]) + np.testing.assert_array_equal(out.event_sum, [22, 7, 9]) + exp = xr.DataArray( + [1, 2, 3], + dims=("time",), + coords={ + "time": np.array( + ["2000-01-04", "2000-01-16", "2000-01-26"], dtype="datetime64[ns]" + ) + }, + ) + np.testing.assert_array_equal( + out.event_start, exp.convert_calendar("noleap").time + ) + + @pytest.mark.parametrize("use_dask", [True, False]) + def test_freq(self, pr_series, use_dask): + jan = [ + 0, + 0, + 0, + 1, + 2, + 3, + 0, + 3, + 3, + 10, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 3, + 2, + ] + fev = [ + 2, + 2, + 1, + 0, + 0, + 0, + 3, + 3, + 4, + 5, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + pr = pr_series(np.array(jan + fev), start="2000-01-01", units="mm") + if use_dask: + pr = pr.chunk(-1) + + with assert_lazy: + out = generic.thresholded_events( + pr, thresh="1 mm", op=">=", window=3, freq="MS" + ) + assert out.event_length.shape == (2, 6) + out = out.load().dropna("event", how="all") + + np.testing.assert_array_equal(out.event_length, [[7, 6, 4], [3, 5, np.nan]]) + np.testing.assert_array_equal( + out.event_effective_length, [[6, 6, 4], [3, 5, np.nan]] + ) + np.testing.assert_array_equal(out.event_sum, [[22, 12, 10], [5, 17, np.nan]]) + np.testing.assert_array_equal( + out.event_start, + np.array( + [ + ["2000-01-04", "2000-01-17", "2000-01-28"], + ["2000-02-01", "2000-02-07", "NaT"], + ], + dtype="datetime64[ns]", + ), + ) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 997a60e79..c54232581 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -27,12 +27,10 @@ declare_relative_units, infer_context, pint2cfunits, - rate2amount, str2pint, to_agg_units, ) from xclim.indices import run_length as rl -from xclim.indices.helpers import resample_map __all__ = [ "aggregate_between_dates", @@ -96,15 +94,14 @@ def select_resample_op( The maximum value for each period. """ da = select_time(da, **indexer) + r = da.resample(time=freq) if isinstance(op, str): op = _xclim_ops.get(op, op) if isinstance(op, str): - out = getattr(da.resample(time=freq), op.replace("integral", "sum"))( - dim="time", keep_attrs=True - ) + out = getattr(r, op.replace("integral", "sum"))(dim="time", keep_attrs=True) else: with xr.set_options(keep_attrs=True): - out = resample_map(da, "time", freq, op) + out = r.map(op) op = op.__name__ if out_units is not None: return out.assign_attrs(units=out_units) @@ -439,7 +436,7 @@ def spell_mask( mask = getattr(mask, var_reducer)("variable") # We need to filter out the spells shorter than "window" # find sequences of consecutive respected constraints - cs_s = rl._cumsum_reset_on_zero(mask) + cs_s = rl._cumsum_reset(mask) # end of these sequences cs_s = cs_s.where(mask.shift({"time": -1}, fill_value=0) == 0) # propagate these end of sequences @@ -741,7 +738,7 @@ def season( map_kwargs = {"window": window, "mid_date": mid_date} if stat in ["start", "end"]: map_kwargs["coord"] = "dayofyear" - out = resample_map(cond, "time", freq, FUNC[stat], map_kwargs=map_kwargs) + out = cond.resample(time=freq).map(FUNC[stat], **map_kwargs) if stat == "length": return to_agg_units(out, data, "count") # else, a date @@ -902,12 +899,11 @@ def first_occurrence( cond = compare(data, op, threshold, constrain) - out = resample_map( - cond, - "time", - freq, + out = cond.resample(time=freq).map( rl.first_run, - map_kwargs=dict(window=1, dim="time", coord="dayofyear"), + window=1, + dim="time", + coord="dayofyear", ) out.attrs["units"] = "" return out @@ -948,12 +944,11 @@ def last_occurrence( cond = compare(data, op, threshold, constrain) - out = resample_map( - cond, - "time", - freq, + out = cond.resample(time=freq).map( rl.last_run, - map_kwargs=dict(window=1, dim="time", coord="dayofyear"), + window=1, + dim="time", + coord="dayofyear", ) out.attrs["units"] = "" return out @@ -994,12 +989,11 @@ def spell_length( cond = compare(data, op, threshold) - out = resample_map( - cond, - "time", - freq, + out = cond.resample(time=freq).map( rl.rle_statistics, - map_kwargs=dict(reducer=reducer, window=1, dim="time"), + reducer=reducer, + window=1, + dim="time", ) return to_agg_units(out, data, "count") @@ -1339,12 +1333,12 @@ def first_day_threshold_reached( cond = compare(data, op, threshold, constrain=constrain) - out: xr.DataArray = resample_map( - cond, - "time", - freq, + out: xr.DataArray = cond.resample(time=freq).map( rl.first_run_after_date, - map_kwargs=dict(window=window, date=after_date, dim="time", coord="dayofyear"), + window=window, + date=after_date, + dim="time", + coord="dayofyear", ) out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(data)) return out @@ -1485,123 +1479,68 @@ def detrend( return ds - trend -# @declare_relative_units(thresh="") - - +@declare_relative_units(thresh="") def thresholded_events( data: xr.DataArray, thresh: Quantified, - window_start: int, - window_stop: int, - data_is_rate: bool = False, + op: str, + window: int, + thresh_stop: Quantified | None = None, + op_stop: str | None = None, + window_stop: int | None = None, + freq: str | None = None, ) -> xr.Dataset: r"""Thresholded events. + Finds all events along the time dimension. An event starts if the start condition is fulfilled for a given number of consecutive time steps. + It ends when the end condition is fulfilled for a given number of consecutive time steps. + + Conditions are simple comparison of the data with a threshold: ``cond = data op thresh``. + The end conditions defaults to the negation of the start condition. + + The resulting ``event`` dimension always has its maximal possible size : ``data.size / (window + window_stop)``. + Parameters ---------- data : xr.DataArray Variable. thresh : Quantified - Threshold that must be exceeded to be considered an event - window_start: int - Number of time steps above the threshold required to start an event - window_stop : int - Number of time steps below the threshold required to stop an event - data_is_rate : bool - True if the data is a rate that needs to be converted to an amount - when computing an accumulation. + Threshold defining the event. + op : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"} + Logical operator defining the event, e.g. arr > thresh. + window: int + Number of time steps where the event condition must be true to start an event. + thresh_stop : Quantified, optional + Threshold defining the end of an event. Defaults to `thresh`. + op_stop : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"}, optional + Logical operator for the end of an event. Defaults to the opposite of `op`. + window_stop: int, optional + Number of time steps where the end condition must be true to end an event. Defaults to `window`. + freq: str, optional + A frequency to divide the data into periods. If absent, the output has not time dimension. + If given, the events are searched within in each resample period independently. Returns ------- - xr.Dataset + xr.Dataset, same shape as the data except the time dimension is replaced by an "event" dimension. + event_length: The number of time steps in each event + event_effective_length: The number of time steps of even event where the start condition is true. + event_sum: The sum within each event, only considering the steps where start condition is true. + event_start: The datetime of the start of the run. """ # condition to respect for `window_start` time steps to start a run + window_stop = window_stop or window thresh = convert_units_to(thresh, data) - da_start = data >= thresh - da_stop = ~da_start - - # Get basic blocks to work with, our runs with holes and the lengths of those runs - # Series of ones indicating where we have continuous runs of freezing rain with pauses - # not exceeding `window_stop` - runs = rl.runs_with_holes(da_start, window_start, da_stop, window_stop) - - # Compute the length of freezing rain events - # I think int16 is safe enough - ds = rl.rle(runs).astype(np.int16).to_dataset(name="length") - ds.length.attrs["units"] = "1" - - # Time duration where the precipitation threshold is exceeded during an event - # (duration of complete run - duration of holes in the run ) - ds["effective_length"] = rl._cumsum_reset( - da_start.where(runs == 1), index="first", reset_on_zero=False - ).astype(np.int16) - ds["effective_length"].attrs["units"] = "1" - - # # Cumulated precipitation in a given freezing rain event - if data_is_rate: - dataam = rate2amount(data) + + # Start and end conditions + da_start = compare(data, op, thresh) + if thresh_stop is None and op_stop is None: + da_stop = ~da_start else: - dataam = data - ds["accumulation"] = rl._cumsum_reset( - dataam.where(runs == 1), index="first", reset_on_zero=False - ) - ds["accumulation"].attrs["units"] = dataam.units - - # Keep time as a variable, it will be used to keep start of events - ds["start"] = ds["time"].broadcast_like(ds) # .astype(int) - # I have to convert it to an integer for the filtering, time object won't do - # Since there are conversion needing a time object earlier, I think it's ok - # to assume this here? - time_min = ds.start.min() - ds["start"] = ds.start.copy( - data=(ds.start - time_min).values.astype("timedelta64[s]").astype(int) - ) + thresh_stop = convert_units_to(thresh_stop or thresh, data) + if op_stop is not None: + da_stop = compare(data, op_stop, thresh_stop) + else: + da_stop = ~compare(data, op, thresh_stop) - # Filter events: Reduce time dimension - def _filter_events(da, rl, max_event_number): - out = np.full(max_event_number, np.nan) - events_start = da[rl > 0] - out[: len(events_start)] = events_start - return out - - max_event_number = int(np.ceil(data.time.size / (window_start + window_stop))) - v_attrs = {v: ds[v].attrs for v in ds.data_vars} - ds = xr.apply_ufunc( - _filter_events, - ds, - ds.length, - input_core_dims=[["time"], ["time"]], - output_core_dims=[["event"]], - kwargs=dict(max_event_number=max_event_number), - dask_gufunc_kwargs=dict(output_sizes={"event": max_event_number}), - dask="parallelized", - vectorize=True, - ).assign_attrs(ds.attrs) - - ds["event"] = np.arange(1, ds.event.size + 1) - ds.event.assign_attrs(units="") - for v in ds.data_vars: - ds[v].attrs = v_attrs[v] - - # convert back start to a time - # TODO fix for calendars - ds["start"] = time_min + ds.start.copy( - data=ds.start.values.astype("timedelta64[s]").astype("timedelta64[ns]") - ) - ds["start"].assign_attrs(units="") - return ds - - # # Other indices that could be completely done outside of the function, no input needed anymore - # # number of events - # ds["number_of_events"] = (ds["run_lengths"] > 0).sum(dim="event").astype(np.int16) - # ds.number_of_events.attrs["units"] = "" - - # # mean rate of precipitation during event - # ds["rate"] = ds["cumulative_precipitation"] / ds["precipitation_duration"] - # units = ( - # f"{ds['cumulative_precipitation'].units}/{ds['precipitation_duration'].units}" - # ) - # ds["rate"].attrs["units"] = ensure_cf_units(units) - - # ds.attrs["units"] = "" - # return ds + return rl.find_events(da_start, window, da_stop, window_stop, data, freq) diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 60dacf261..38e6a0d2f 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -12,6 +12,7 @@ from warnings import warn import numpy as np +import pandas as pd import xarray as xr from numba import njit from xarray.core.utils import get_temp_dimname @@ -1689,3 +1690,144 @@ def suspicious_run( keep_attrs=True, kwargs={"window": window, "op": op, "thresh": thresh}, ) + + +def _find_events(da_start, da_stop, data, window_start, window_stop): + # Get basic blocks to work with, our runs with holes and the lengths of those runs + # Series of ones indicating where we have continuous runs of freezing rain with pauses + # not exceeding `window_stop` + runs = runs_with_holes(da_start, window_start, da_stop, window_stop) + + # Compute the length of freezing rain events + # I think int16 is safe enough, fillna first to suppress warning + ds = rle(runs).fillna(0).astype(np.int16).to_dataset(name="event_length") + # Time duration where the precipitation threshold is exceeded during an event + # (duration of complete run - duration of holes in the run ) + ds["event_effective_length"] = _cumsum_reset( + da_start.where(runs == 1), index="first", reset_on_zero=False + ).astype(np.int16) + + if data is not None: + # Ex: Cumulated precipitation in a given freezing rain event + ds["event_sum"] = _cumsum_reset( + data.where(runs == 1), index="first", reset_on_zero=False + ) + + # Keep time as a variable, it will be used to keep start of events + ds["event_start"] = ds["time"].broadcast_like(ds) # .astype(int) + # We convert to an integer for the filtering, time object won't do well in the apply_ufunc/vectorize + time_min = ds.event_start.min() + ds["event_start"] = ds.event_start.copy( + data=(ds.event_start - time_min).values.astype("timedelta64[s]").astype(int) + ) + + # Filter events: Reduce time dimension + def _filter_events(da, rl, max_event_number): + out = np.full(max_event_number, np.nan) + events_start = da[rl > 0] + out[: len(events_start)] = events_start + return out + + # Dask inputs need to be told their length before computing anything. + max_event_number = int(np.ceil(da_start.time.size / (window_start + window_stop))) + ds = xr.apply_ufunc( + _filter_events, + ds, + ds.event_length, + input_core_dims=[["time"], ["time"]], + output_core_dims=[["event"]], + kwargs=dict(max_event_number=max_event_number), + dask_gufunc_kwargs=dict(output_sizes={"event": max_event_number}), + dask="parallelized", + vectorize=True, + ) + + # convert back start to a time + if time_min.dtype == "O": + # Can't add a numpy array of timedeltas to an array of cftime (because they have non-compatible dtypes) + # Also, we can't add cftime to NaTType. So we fill with negative timedeltas and mask them after the addition + + def _get_start_cftime(deltas, time_min=None): + starts = time_min + pd.to_timedelta(deltas, "s").to_pytimedelta() + starts[starts < time_min] = np.nan + return starts + + ds["event_start"] = xr.apply_ufunc( + _get_start_cftime, + ds.event_start.fillna(-1), + dask="parallelized", + kwargs={"time_min": time_min.item()}, + output_dtypes=[time_min.dtype], + ) + else: + ds["event_start"] = ds.event_start.copy( + data=time_min.values + ds.event_start.data.astype("timedelta64[s]") + ) + + ds["event"] = np.arange(1, ds.event.size + 1) + ds["event_length"].attrs["units"] = "1" + ds["event_effective_length"].attrs["units"] = "1" + ds["event_start"].attrs["units"] = "" + if data is not None: + ds["event_sum"].attrs["units"] = data.units + return ds + + +# TODO: Implement more event stats ? (max, effective sum, etc) +def find_events( + condition: xr.DataArray, + window: int, + condition_stop: xr.DataArray | None = None, + window_stop: int | None = None, + data: xr.DataArray | None = None, + freq: str | None = None, +): + """Find events (runs). + + An event starts with a run of ``window`` consecutive True values in the condition + and stops with ``window_stop`` consecutive True values in the stop condition. + + This returns a Dataset with each event along an `event` dimension. It does not + perform statistics over the events like other function in this module do. + + Parameters + ---------- + condition : DataArray of boolean values + The boolean mask, true where the start condition of the event is fulfilled. + window: int + The number of consecutive True values for an event to start. + condition_stop : DataArray of boolean values, optional + The stopping boolean mask, true where the end condition of the event is fulfilled. + Defaults to the opposite of ``condition``. + window_stop : int, optional + The number of consecutive True values in ``condition_stop`` for an event to end. + Defaults to ``window``. + data: DataArray, optional + The actual data. If present, its sum within each event is added to the output. + freq: str, optional + A frequency to divide the data into periods. If absent, the output has not time dimension. + If given, the events are searched within in each resample period independently. + + Returns + ------- + xr.Dataset, same shape as the data it has a new "event" dimension (and the time dimension is resample or removed, according to ``freq``). + event_length: The number of time steps in each event + event_effective_length: The number of time steps of even event where the start condition is true. + event_start: The datetime of the start of the run. + event_sum: The sum within each event, only considering the steps where start condition is true. Only present if ``data`` is given. + """ + window_stop = window_stop or window + if condition_stop is None: + condition_stop = ~condition + + if freq is None: + return _find_events(condition, condition_stop, data, window, window_stop) + + ds = xr.Dataset({"da_start": condition, "da_stop": condition_stop}) + if data is not None: + ds = ds.assign(data=data) + return ds.resample(time=freq).map( + lambda grp: _find_events( + grp.da_start, grp.da_stop, grp.get("data", None), window, window_stop + ) + ) From 74b8cc95d9fd861c3d1f7f332d7c16bb7c6e873e Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 3 Oct 2024 18:16:10 -0400 Subject: [PATCH 08/14] Add quantified support to rate2amount --- CHANGELOG.rst | 3 ++- xclim/core/units.py | 65 ++++++++++++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4ed1497bd..a5c4b7c92 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,7 +14,6 @@ Announcements New indicators ^^^^^^^^^^^^^^ * New ``heat_spell_frequency``, ``heat_spell_max_length`` and ``heat_spell_total_length`` : spell length statistics on a bivariate condition that uses the average over a window by default. (:pull:`1885`). -* New indicator ``freezing_rain_events`` gives statistics about freezing rain sequences. (:pull:`1778`). New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -22,6 +21,8 @@ New features and enhancements * Indicator parameters can now be assigned a new name, different from the argument name in the compute function. (:pull:`1885`). * New global option ``resample_map_blocks`` to wrap all ``resample().map()`` code inside a ``xr.map_blocks`` to lower the number of dask tasks. Uses utility ``xclim.indices.helpers.resample_map`` and requires ``flox`` to ensure the chunking allows such block-mapping. Defaults to False. (:pull:`1848`). * ``xclim.indices.run_length.runs_with_holes`` allows to input a condition that must be met for a run to start and a second condition that must be met for the run to stop. (:pull:`1778`). +* New generic compute function ``xclim.indices.generic.thresholded_events`` that finds events based on a threshold condition and returns basic stats for each. See also ``xclim.indices.run_length.find_events``. (:pull:`1778`). +* ``xclim.core.units.rate2amount`` and ``xclim.core.units.amount2rate`` can now also accept quantities (pint objects or strings), in which case the ``dim`` argument must be the ``time`` coordinate through which we can find the sampling rate. (:pull:`1778`). Bug fixes ^^^^^^^^^ diff --git a/xclim/core/units.py b/xclim/core/units.py index 13f7a5e79..0dcdfbb38 100644 --- a/xclim/core/units.py +++ b/xclim/core/units.py @@ -623,8 +623,8 @@ def to_agg_units( def _rate_and_amount_converter( - da: xr.DataArray, - dim: str = "time", + da: Quantified, + dim: str | xr.DataArray = "time", to: str = "amount", sampling_rate_from_coord: bool = False, out_units: str | None = None, @@ -633,10 +633,27 @@ def _rate_and_amount_converter( m = 1 u = None # Default to assume a non-uniform axis label: Literal["lower", "upper"] = "lower" # Default to "lower" label for diff - time = da[dim] + if isinstance(dim, str): + if not isinstance(da, xr.DataArray): + raise ValueError( + "If `dim` is a string, the data to convert must be a DataArray." + ) + time = da[dim] + else: + time = dim + dim = time.name + + # We accept str, Quantity or DataArray + # Ensure the code below has a DataArray, so its simpler + # We convert back at the end + orig_da = da + if isinstance(da, str): + da = str2pint(da) + if isinstance(da, units.Quantity): + da = xr.DataArray(da.magnitude, attrs={"units": f"{da.units:~cf}"}) try: - freq = xr.infer_freq(da[dim]) + freq = xr.infer_freq(time) except ValueError as err: if sampling_rate_from_coord: freq = None @@ -670,7 +687,7 @@ def _rate_and_amount_converter( ), dims=(dim,), name=dim, - attrs=da[dim].attrs, + attrs=time.attrs, ) else: m, u = multi, FREQ_UNITS[base] @@ -684,7 +701,7 @@ def _rate_and_amount_converter( # and `label` has been updated accordingly. dt = ( time.diff(dim, label=label) - .reindex({dim: da[dim]}, method="ffill") + .reindex({dim: time}, method="ffill") .astype(float) ) dt = dt / 1e9 # Convert to seconds @@ -717,15 +734,17 @@ def _rate_and_amount_converter( out = out.assign_attrs(standard_name=new_name) if out_units: - out = cast(xr.DataArray, convert_units_to(out, out_units)) + out = convert_units_to(out, out_units) + if not isinstance(orig_da, xr.DataArray): + out = units.Quantity(out.item(), out.attrs["units"]) return out @_register_conversion("amount2rate", "from") def rate2amount( - rate: xr.DataArray, - dim: str = "time", + rate: Quantified, + dim: str | xr.DataArray = "time", sampling_rate_from_coord: bool = False, out_units: str | None = None, ) -> xr.DataArray: @@ -739,10 +758,10 @@ def rate2amount( Parameters ---------- - rate : xr.DataArray + rate : xr.DataArray, pint.Quantity or string "Rate" variable, with units of "amount" per time. Ex: Precipitation in "mm / d". - dim : str - The time dimension. + dim : str or DataArray + The name of time dimension or the coordinate itself. sampling_rate_from_coord : boolean For data with irregular time coordinates. If True, the diff of the time coordinate will be used as the sampling rate, meaning each data point will be assumed to apply for the interval ending at the next point. See notes. @@ -757,7 +776,7 @@ def rate2amount( Returns ------- - xr.DataArray + xr.DataArray or Quantity Examples -------- @@ -805,8 +824,8 @@ def rate2amount( @_register_conversion("amount2rate", "to") def amount2rate( - amount: xr.DataArray, - dim: str = "time", + amount: Quantified, + dim: str | xr.DataArray = "time", sampling_rate_from_coord: bool = False, out_units: str | None = None, ) -> xr.DataArray: @@ -820,10 +839,10 @@ def amount2rate( Parameters ---------- - amount : xr.DataArray + amount : xr.DataArray, pint.Quantity or string "amount" variable. Ex: Precipitation amount in "mm". - dim : str - The time dimension. + dim : str or xr.DataArray + The name of the time dimension or the time coordinate itself. sampling_rate_from_coord : boolean For data with irregular time coordinates. If True, the diff of the time coordinate will be used as the sampling rate, @@ -840,7 +859,7 @@ def amount2rate( Returns ------- - xr.DataArray + xr.DataArray or Quantity See Also -------- @@ -1158,12 +1177,16 @@ def check_units( ) -def _check_output_has_units(out: xr.DataArray | tuple[xr.DataArray]) -> None: +def _check_output_has_units( + out: xr.DataArray | tuple[xr.DataArray] | xr.Dataset, +) -> None: """Perform very basic sanity check on the output. Indices are responsible for unit management. If this fails, it's a developer's error. """ - if not isinstance(out, tuple): + if isinstance(out, xr.Dataset): + out = out.data_vars.values() + elif not isinstance(out, tuple): out = (out,) for outd in out: From 9d80208d2411ccf4d0aecbf24ae22a8a753472ed Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 3 Oct 2024 18:23:44 -0400 Subject: [PATCH 09/14] Fix bad merge... --- tests/test_generic.py | 10 ++++---- tests/test_indices.py | 28 +-------------------- xclim/indices/generic.py | 53 ++++++++++++++++++++-------------------- 3 files changed, 33 insertions(+), 58 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 9caaf0de9..9260d5cc5 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -815,7 +815,7 @@ def test_simple(self, pr_series, use_dask): 0, 0, ] - ) + ) # noqa pr = pr_series(arr, start="2000-01-01", units="mm") if use_dask: pr = pr.chunk(-1) @@ -883,7 +883,7 @@ def test_diff_windows(self, pr_series, use_dask): 0, 0, ] - ) + ) # noqa pr = pr_series(arr, start="2000-01-01", units="mm") if use_dask: pr = pr.chunk(-1) @@ -949,7 +949,7 @@ def test_cftime(self, pr_series, use_dask): 0, 0, ] - ) + ) # noqa pr = pr_series(arr, start="2000-01-01", units="mm").convert_calendar("noleap") if use_dask: pr = pr.chunk(-1) @@ -1014,7 +1014,7 @@ def test_freq(self, pr_series, use_dask): 2, 3, 2, - ] + ] # noqa fev = [ 2, 2, @@ -1045,7 +1045,7 @@ def test_freq(self, pr_series, use_dask): 0, 0, 0, - ] + ] # noqa pr = pr_series(np.array(jan + fev), start="2000-01-01", units="mm") if use_dask: pr = pr.chunk(-1) diff --git a/tests/test_indices.py b/tests/test_indices.py index 894bb0819..cc6aabf9b 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -27,7 +27,7 @@ from xclim.core import ValidationError from xclim.core.calendar import percentile_doy from xclim.core.options import set_options -from xclim.core.units import convert_units_to, rate2amount, units +from xclim.core.units import convert_units_to, units K2C = 273.15 @@ -346,32 +346,6 @@ def test_bedd(self, method, end_date, deg_days, max_deg_days): if method == "icclim": np.testing.assert_array_equal(bedd, bedd_high_lat) - def test_freezing_rain_events(self, open_dataset): - times = pd.date_range("1950-01-01", "1950-01-31", freq="D") - da = xr.DataArray( - np.zeros(len(times)), - dims={"time"}, - coords={"time": times}, - attrs={"units": "kg m-2 s-1"}, - ) - - # Two sequences separated by 3 days, which will be split with window_stop = 3 - da[0:10] = 1 - da[13:20] = 1 - out = xci.freezing_rain_events( - da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 - ) - assert (out.run_lengths.values[0:2] == [10, 7]).all() - - # Two sequences separated by 2 days, which will form a single large event - da[12] = 1 - out = xci.freezing_rain_events( - da, thresh="0.5 kg m-2 s-1", window_start=3, window_stop=3 - ) - pram = rate2amount(da) - assert out.run_lengths.values[0] == 20 - assert (out.cumulative_precipitation - pram.sum()).sum() == 0 - def test_cool_night_index(self, open_dataset): ds = open_dataset("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200701-200712.nc") ds = ds.rename(dict(tas="tasmin")) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index c54232581..6b81261e2 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -17,11 +17,7 @@ from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS # noqa from xclim.core import DayOfYearStr, Quantified -from xclim.core.calendar import ( - doy_to_days_since, - get_calendar, - select_time, -) +from xclim.core.calendar import doy_to_days_since, get_calendar, select_time from xclim.core.units import ( convert_units_to, declare_relative_units, @@ -31,6 +27,7 @@ to_agg_units, ) from xclim.indices import run_length as rl +from xclim.indices.helpers import resample_map __all__ = [ "aggregate_between_dates", @@ -94,14 +91,15 @@ def select_resample_op( The maximum value for each period. """ da = select_time(da, **indexer) - r = da.resample(time=freq) if isinstance(op, str): op = _xclim_ops.get(op, op) if isinstance(op, str): - out = getattr(r, op.replace("integral", "sum"))(dim="time", keep_attrs=True) + out = getattr(da.resample(time=freq), op.replace("integral", "sum"))( + dim="time", keep_attrs=True + ) else: with xr.set_options(keep_attrs=True): - out = r.map(op) + out = resample_map(da, "time", freq, op) op = op.__name__ if out_units is not None: return out.assign_attrs(units=out_units) @@ -738,7 +736,7 @@ def season( map_kwargs = {"window": window, "mid_date": mid_date} if stat in ["start", "end"]: map_kwargs["coord"] = "dayofyear" - out = cond.resample(time=freq).map(FUNC[stat], **map_kwargs) + out = resample_map(cond, "time", freq, FUNC[stat], map_kwargs=map_kwargs) if stat == "length": return to_agg_units(out, data, "count") # else, a date @@ -899,11 +897,12 @@ def first_occurrence( cond = compare(data, op, threshold, constrain) - out = cond.resample(time=freq).map( + out = resample_map( + cond, + "time", + freq, rl.first_run, - window=1, - dim="time", - coord="dayofyear", + map_kwargs=dict(window=1, dim="time", coord="dayofyear"), ) out.attrs["units"] = "" return out @@ -944,11 +943,12 @@ def last_occurrence( cond = compare(data, op, threshold, constrain) - out = cond.resample(time=freq).map( + out = resample_map( + cond, + "time", + freq, rl.last_run, - window=1, - dim="time", - coord="dayofyear", + map_kwargs=dict(window=1, dim="time", coord="dayofyear"), ) out.attrs["units"] = "" return out @@ -989,11 +989,12 @@ def spell_length( cond = compare(data, op, threshold) - out = cond.resample(time=freq).map( + out = resample_map( + cond, + "time", + freq, rl.rle_statistics, - reducer=reducer, - window=1, - dim="time", + map_kwargs=dict(reducer=reducer, window=1, dim="time"), ) return to_agg_units(out, data, "count") @@ -1333,12 +1334,12 @@ def first_day_threshold_reached( cond = compare(data, op, threshold, constrain=constrain) - out: xr.DataArray = cond.resample(time=freq).map( + out: xr.DataArray = resample_map( + cond, + "time", + freq, rl.first_run_after_date, - window=window, - date=after_date, - dim="time", - coord="dayofyear", + map_kwargs=dict(window=window, date=after_date, dim="time", coord="dayofyear"), ) out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(data)) return out From f411144d8f2591168ad9d2418b4b3f3d8dcd94aa Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 7 Oct 2024 15:11:11 -0400 Subject: [PATCH 10/14] fix fmt, fix tests --- tests/test_generic.py | 203 ++---------------------------------- tests/test_run_length.py | 8 +- xclim/indices/_agro.py | 2 +- xclim/indices/generic.py | 6 +- xclim/indices/run_length.py | 7 +- 5 files changed, 21 insertions(+), 205 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 9260d5cc5..cf2b9c263 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -775,47 +775,7 @@ class TestThresholdedEvents: @pytest.mark.parametrize("use_dask", [True, False]) def test_simple(self, pr_series, use_dask): - arr = np.array( - [ - 0, - 0, - 0, - 1, - 2, - 3, - 0, - 3, - 3, - 10, - 0, - 0, - 0, - 0, - 0, - 1, - 2, - 2, - 2, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 3, - 3, - 2, - 0, - 0, - 0, - 2, - 0, - 0, - 0, - 0, - ] - ) # noqa + arr = np.array([0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 0, 0, 0, 2, 0, 0, 0, 0]) # fmt: skip pr = pr_series(arr, start="2000-01-01", units="mm") if use_dask: pr = pr.chunk(-1) @@ -828,12 +788,12 @@ def test_simple(self, pr_series, use_dask): window=3, ) - assert out.event.size == np.ceil(arr.size / 6) + assert out.event.size == np.ceil(arr.size / (3 + 1)) out = out.load().dropna("event", how="all") - np.testing.assert_array_equal(out.event_length, [7, 4, 4]) - np.testing.assert_array_equal(out.event_effective_length, [6, 4, 4]) - np.testing.assert_array_equal(out.event_sum, [22, 7, 9]) + np.testing.assert_array_equal(out.event_length, [3, 3, 4, 4]) + np.testing.assert_array_equal(out.event_effective_length, [3, 3, 4, 4]) + np.testing.assert_array_equal(out.event_sum, [6, 16, 7, 9]) np.testing.assert_array_equal( out.event_start, np.array( @@ -843,47 +803,7 @@ def test_simple(self, pr_series, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_diff_windows(self, pr_series, use_dask): - arr = np.array( - [ - 0, - 0, - 0, - 1, - 2, - 3, - 0, - 3, - 3, - 10, - 0, - 0, - 0, - 0, - 0, - 1, - 2, - 2, - 2, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 3, - 3, - 2, - 0, - 0, - 0, - 2, - 0, - 0, - 0, - 0, - ] - ) # noqa + arr = np.array([0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 0, 0, 0, 2, 0, 0, 0, 0]) # fmt: skip pr = pr_series(arr, start="2000-01-01", units="mm") if use_dask: pr = pr.chunk(-1) @@ -909,47 +829,7 @@ def test_diff_windows(self, pr_series, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_cftime(self, pr_series, use_dask): - arr = np.array( - [ - 0, - 0, - 0, - 1, - 2, - 3, - 0, - 3, - 3, - 10, - 0, - 0, - 0, - 0, - 0, - 1, - 2, - 2, - 2, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 3, - 3, - 2, - 0, - 0, - 0, - 2, - 0, - 0, - 0, - 0, - ] - ) # noqa + arr = np.array([0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 3, 3, 2, 0, 0, 0, 2, 0, 0, 0, 0]) # fmt: skip pr = pr_series(arr, start="2000-01-01", units="mm").convert_calendar("noleap") if use_dask: pr = pr.chunk(-1) @@ -961,6 +841,7 @@ def test_cftime(self, pr_series, use_dask): thresh="1 mm", op=">=", window=3, + window_stop=3, ) out = out.load().dropna("event", how="all") @@ -982,77 +863,15 @@ def test_cftime(self, pr_series, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_freq(self, pr_series, use_dask): - jan = [ - 0, - 0, - 0, - 1, - 2, - 3, - 0, - 3, - 3, - 10, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - 2, - 2, - 2, - 2, - 2, - 0, - 0, - 0, - 0, - 0, - 3, - 2, - 3, - 2, - ] # noqa - fev = [ - 2, - 2, - 1, - 0, - 0, - 0, - 3, - 3, - 4, - 5, - 2, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ] # noqa + jan = [0, 0, 0, 1, 2, 3, 0, 3, 3, 10, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 3, 2, 3, 2] # fmt: skip + fev = [2, 2, 1, 0, 0, 0, 3, 3, 4, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # fmt: skip pr = pr_series(np.array(jan + fev), start="2000-01-01", units="mm") if use_dask: pr = pr.chunk(-1) with assert_lazy: out = generic.thresholded_events( - pr, thresh="1 mm", op=">=", window=3, freq="MS" + pr, thresh="1 mm", op=">=", window=3, freq="MS", window_stop=3 ) assert out.event_length.shape == (2, 6) out = out.load().dropna("event", how="all") diff --git a/tests/test_run_length.py b/tests/test_run_length.py index a109f83e6..0be7aa507 100644 --- a/tests/test_run_length.py +++ b/tests/test_run_length.py @@ -126,7 +126,7 @@ def test_rle(ufunc, use_dask, index): @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("index", ["first", "last"]) -def test_extract_events_identity(use_dask, index): +def test_runs_with_holes_identity(use_dask, index): # implement more tests, this is just to show that this reproduces the behaviour # of rle values = np.zeros((10, 365, 4, 4)) @@ -137,19 +137,19 @@ def test_extract_events_identity(use_dask, index): if use_dask: da = da.chunk({"a": 1, "b": 2}) - events = rl.extract_events(da != 0, 1, da == 0, 1) + events = rl.runs_with_holes(da != 0, 1, da == 0, 1) expected = da np.testing.assert_array_equal(events, expected) -def test_extract_events(): +def test_runs_with_holes(): values = np.zeros(365) time = pd.date_range("2000-01-01", periods=365, freq="D") a = [0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0] values[0 : len(a)] = a da = xr.DataArray(values, coords={"time": time}, dims=("time")) - events = rl.extract_events(da == 1, 1, da == 0, 3) + events = rl.runs_with_holes(da == 1, 1, da == 0, 3) expected = values * 0 expected[1:11] = 1 diff --git a/xclim/indices/_agro.py b/xclim/indices/_agro.py index d26b0a918..eb44fc29d 100644 --- a/xclim/indices/_agro.py +++ b/xclim/indices/_agro.py @@ -1037,7 +1037,7 @@ def _get_first_run_start(_pram): raise ValueError(f"Unknown method_dry_start: {method_dry_start}.") # First and second condition combined in a run length - events = rl.extract_events(da_start, 1, da_stop, window_dry) + events = rl.runs_with_holes(da_start, 1, da_stop, window_dry) run_positions = rl.rle(events) >= (window_not_dry_start + window_wet_start) return _get_first_run(run_positions, date_min_start, date_max_start) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 6b81261e2..44071b65f 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -1488,7 +1488,7 @@ def thresholded_events( window: int, thresh_stop: Quantified | None = None, op_stop: str | None = None, - window_stop: int | None = None, + window_stop: int = 1, freq: str | None = None, ) -> xr.Dataset: r"""Thresholded events. @@ -1516,7 +1516,7 @@ def thresholded_events( op_stop : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"}, optional Logical operator for the end of an event. Defaults to the opposite of `op`. window_stop: int, optional - Number of time steps where the end condition must be true to end an event. Defaults to `window`. + Number of time steps where the end condition must be true to end an event. Defaults to 1. freq: str, optional A frequency to divide the data into periods. If absent, the output has not time dimension. If given, the events are searched within in each resample period independently. @@ -1529,8 +1529,6 @@ def thresholded_events( event_sum: The sum within each event, only considering the steps where start condition is true. event_start: The datetime of the start of the run. """ - # condition to respect for `window_start` time steps to start a run - window_stop = window_stop or window thresh = convert_units_to(thresh, data) # Start and end conditions diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 38e6a0d2f..732bcaf38 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1778,7 +1778,7 @@ def find_events( condition: xr.DataArray, window: int, condition_stop: xr.DataArray | None = None, - window_stop: int | None = None, + window_stop: int = 1, data: xr.DataArray | None = None, freq: str | None = None, ): @@ -1799,9 +1799,9 @@ def find_events( condition_stop : DataArray of boolean values, optional The stopping boolean mask, true where the end condition of the event is fulfilled. Defaults to the opposite of ``condition``. - window_stop : int, optional + window_stop : int The number of consecutive True values in ``condition_stop`` for an event to end. - Defaults to ``window``. + Defaults to 1. data: DataArray, optional The actual data. If present, its sum within each event is added to the output. freq: str, optional @@ -1816,7 +1816,6 @@ def find_events( event_start: The datetime of the start of the run. event_sum: The sum within each event, only considering the steps where start condition is true. Only present if ``data`` is given. """ - window_stop = window_stop or window if condition_stop is None: condition_stop = ~condition From cc02b198656054cfa815fe4d418446eff72142fd Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 7 Oct 2024 16:54:19 -0400 Subject: [PATCH 11/14] Add min_gap to spell length stats --- CHANGELOG.rst | 2 +- tests/test_temperature.py | 14 ++++++++++++ xclim/indicators/atmos/_temperature.py | 9 +++++--- xclim/indices/generic.py | 31 +++++++++++++++++++++++--- 4 files changed, 49 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1ff536974..201ca9e8a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,7 +13,7 @@ Announcements New indicators ^^^^^^^^^^^^^^ -* New ``heat_spell_frequency``, ``heat_spell_max_length`` and ``heat_spell_total_length`` : spell length statistics on a bivariate condition that uses the average over a window by default. (:pull:`1885`). +* New ``heat_spell_frequency``, ``heat_spell_max_length`` and ``heat_spell_total_length`` : spell length statistics on a bivariate condition that uses the average over a window by default. (:pull:`1885`, :pull:`1778`). * New ``chill_portion`` and ``chill_unit``: chill portion based on the Dynamic Model and chill unit based on the Utah model indicators. (:issue:`1753`, :pull:`1909`). New features and enhancements diff --git a/tests/test_temperature.py b/tests/test_temperature.py index 45f3c94b7..a16958a9f 100644 --- a/tests/test_temperature.py +++ b/tests/test_temperature.py @@ -640,6 +640,20 @@ def test_1d(self, tasmax_series, tasmin_series): ) np.testing.assert_allclose(hsf.values[:1], 0) + def test_gap(self, tasmax_series, tasmin_series): + tn1 = np.zeros(366) + tx1 = np.zeros(366) + tn1[:10] = np.array([20, 23, 23, 23, 20, 20, 23, 23, 23, 23]) + tx1[:10] = np.array([29, 31, 31, 31, 28, 28, 31, 31, 31, 31]) + + tn = tasmin_series(tn1 + K2C, start="1/1/2000") + tx = tasmax_series(tx1 + K2C, start="1/1/2000") + + hsf = atmos.heat_spell_frequency( + tn, tx, thresh_tasmin="22.1 C", thresh_tasmax="30.1 C", freq="YS", min_gap=3 + ) + np.testing.assert_allclose(hsf.values[:1], 1) + class TestHeatSpellMaxLength: def test_1d(self, tasmax_series, tasmin_series): diff --git a/xclim/indicators/atmos/_temperature.py b/xclim/indicators/atmos/_temperature.py index 46d6903ee..eb8ee8470 100644 --- a/xclim/indicators/atmos/_temperature.py +++ b/xclim/indicators/atmos/_temperature.py @@ -291,7 +291,8 @@ class TempHourlyWithIndexing(ResamplingIndicatorWithIndexing): long_name="Number of heat spells", description="{freq} number of heat spells events. A heat spell occurs when the {window}-day " "averages of daily minimum and maximum temperatures each exceed {thresh_tasmin} and {thresh_tasmax}. " - "All days of the {window}-day period are considered part of the spell.", + "All days of the {window}-day period are considered part of the spell. Gaps of under {min_gap} days are allowed " + "within a spell.", abstract="Number of heat spells. A heat spell occurs when rolling averages of daily minimum and maximum temperatures exceed given " "thresholds for a number of days.", cell_methods="", @@ -325,7 +326,8 @@ class TempHourlyWithIndexing(ResamplingIndicatorWithIndexing): long_name="Longest heat spell", description="{freq} maximum length of heat spells. A heat spell occurs when the {window}-day " "averages of daily minimum and maximum temperatures each exceed {thresh_tasmin} and {thresh_tasmax}. " - "All days of the {window}-day period are considered part of the spell.", + "All days of the {window}-day period are considered part of the spell. Gaps of under {min_gap} days are allowed " + "within a spell.", abstract="The longest heat spell of a period. A heat spell occurs when rolling averages of daily minimum and maximum temperatures exceed given " "thresholds for a number of days.", compute=indices.generic.bivariate_spell_length_statistics, @@ -357,7 +359,8 @@ class TempHourlyWithIndexing(ResamplingIndicatorWithIndexing): long_name="Total length of heat spells.", description="{freq} total length of heat spell events. " "A heat spell occurs when the {window}-day averages of daily minimum and maximum temperatures " - "each exceed {thresh_tasmin} and {thresh_tasmax}. All days of the {window}-day period are considered part of the spell.", + "each exceed {thresh_tasmin} and {thresh_tasmax}. All days of the {window}-day period are considered part of the spell." + "Gaps of under {min_gap} days are allowed within a spell.", abstract="Total length of heat spells. A heat spell occurs when rolling averages of daily minimum and maximum temperatures exceed given " "thresholds for a number of days.", compute=indices.generic.bivariate_spell_length_statistics, diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 44071b65f..9595b93b0 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -362,6 +362,7 @@ def spell_mask( win_reducer: str, op: str, thresh: float | Sequence[float], + min_gap: int = 1, weights: Sequence[float] = None, var_reducer: str = "all", ) -> xr.DataArray: @@ -384,6 +385,9 @@ def spell_mask( The threshold to compare the rolling statistics against, as ``window_stats op threshold``. If data is a list, this must be a list of the same length with a threshold for each variable. This function does not handle units and can't accept Quantified objects. + min_gap: int + The shortest possible gap between two spells. Spells closer than this are merged by assigning + the gap steps to the merged spell. weights: sequence of floats A list of weights of the same length as the window. Only supported if `win_reducer` is "mean". @@ -453,9 +457,17 @@ def spell_mask( if not np.isscalar(thresh): mask = getattr(mask, var_reducer)("variable") # True for all days part of a spell that respected the condition (shift because of the two rollings) - is_in_spell = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) + is_in_spell = (mask.rolling(time=window).sum() >= 1).shift( + time=-(window - 1), fill_value=False + ) # Cut back to the original size is_in_spell = is_in_spell.isel(time=slice(0, data.time.size)) + + if min_gap > 1: + is_in_spell = rl.runs_with_holes(is_in_spell, 1, ~is_in_spell, min_gap).astype( + bool + ) + return is_in_spell @@ -467,12 +479,15 @@ def _spell_length_statistics( op: str, spell_reducer: str | Sequence[str], freq: str, + min_gap: int = 1, resample_before_rl: bool = True, **indexer, ) -> xr.DataArray | Sequence[xr.DataArray]: if isinstance(spell_reducer, str): spell_reducer = [spell_reducer] - is_in_spell = spell_mask(data, window, win_reducer, op, thresh).astype(np.float32) + is_in_spell = spell_mask( + data, window, win_reducer, op, thresh, min_gap=min_gap + ).astype(np.float32) is_in_spell = select_time(is_in_spell, **indexer) outs = [] @@ -512,6 +527,7 @@ def spell_length_statistics( op: str, spell_reducer: str, freq: str, + min_gap: int = 1, resample_before_rl: bool = True, **indexer, ): @@ -537,6 +553,9 @@ def spell_length_statistics( Statistic on the spell lengths. If a list, multiple statistics are computed. freq : str Resampling frequency. + min_gap : int + The shortest possible gap between two spells. Spells closer than this are merged by assigning + the gap steps to the merged spell. resample_before_rl : bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. @@ -588,7 +607,8 @@ def spell_length_statistics( op, spell_reducer, freq, - resample_before_rl, + min_gap=min_gap, + resample_before_rl=resample_before_rl, **indexer, ) @@ -604,6 +624,7 @@ def bivariate_spell_length_statistics( op: str, spell_reducer: str, freq: str, + min_gap: int = 1, resample_before_rl: bool = True, **indexer, ): @@ -633,6 +654,9 @@ def bivariate_spell_length_statistics( Statistic on the spell lengths. If a list, multiple statistics are computed. freq : str Resampling frequency. + min_gap : int + The shortest possible gap between two spells. Spells closer than this are merged by assigning + the gap steps to the merged spell. resample_before_rl : bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. @@ -656,6 +680,7 @@ def bivariate_spell_length_statistics( op, spell_reducer, freq, + min_gap, resample_before_rl, **indexer, ) From 6080aa34c1a9e605b02bc9011870fbe30595ff7f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 8 Oct 2024 14:34:09 -0400 Subject: [PATCH 12/14] fix test --- tests/test_generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index cf2b9c263..c1c4f9f4e 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -797,7 +797,8 @@ def test_simple(self, pr_series, use_dask): np.testing.assert_array_equal( out.event_start, np.array( - ["2000-01-04", "2000-01-16", "2000-01-26"], dtype="datetime64[ns]" + ["2000-01-04", "2000-01-08", "2000-01-16", "2000-01-26"], + dtype="datetime64[ns]", ), ) From d32cf58d86f6cbe7341fa74847c312d9bc1f208f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 9 Oct 2024 16:04:37 -0400 Subject: [PATCH 13/14] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Éric Dupuis <71575674+coxipi@users.noreply.github.com> Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- tests/test_run_length.py | 3 +-- xclim/indices/run_length.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_run_length.py b/tests/test_run_length.py index b4b294d96..0c64e3a84 100644 --- a/tests/test_run_length.py +++ b/tests/test_run_length.py @@ -127,8 +127,7 @@ def test_rle(ufunc, use_dask, index): @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("index", ["first", "last"]) def test_runs_with_holes_identity(use_dask, index): - # implement more tests, this is just to show that this reproduces the behaviour - # of rle + # This test reproduces the behaviour or `rle` values = np.zeros((10, 365, 4, 4)) time = pd.date_range("2000-01-01", periods=365, freq="D") values[:, 1:11, ...] = 1 diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 757545014..046ef86e9 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1848,7 +1848,7 @@ def find_events( ---------- condition : DataArray of boolean values The boolean mask, true where the start condition of the event is fulfilled. - window: int + window : int The number of consecutive True values for an event to start. condition_stop : DataArray of boolean values, optional The stopping boolean mask, true where the end condition of the event is fulfilled. @@ -1856,9 +1856,9 @@ def find_events( window_stop : int The number of consecutive True values in ``condition_stop`` for an event to end. Defaults to 1. - data: DataArray, optional + data : DataArray, optional The actual data. If present, its sum within each event is added to the output. - freq: str, optional + freq : str, optional A frequency to divide the data into periods. If absent, the output has not time dimension. If given, the events are searched within in each resample period independently. From fb6cf7d23d40921cea59fbccd08dd4675c19f141 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 9 Oct 2024 16:05:18 -0400 Subject: [PATCH 14/14] Suggestions from review --- xclim/indicators/atmos/_temperature.py | 6 +++--- xclim/indices/run_length.py | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/xclim/indicators/atmos/_temperature.py b/xclim/indicators/atmos/_temperature.py index 1d7c16ed4..2f0cc1e87 100644 --- a/xclim/indicators/atmos/_temperature.py +++ b/xclim/indicators/atmos/_temperature.py @@ -307,7 +307,7 @@ class TempHourlyWithIndexing(ResamplingIndicatorWithIndexing): long_name="Number of heat spells", description="{freq} number of heat spells events. A heat spell occurs when the {window}-day " "averages of daily minimum and maximum temperatures each exceed {thresh_tasmin} and {thresh_tasmax}. " - "All days of the {window}-day period are considered part of the spell. Gaps of under {min_gap} days are allowed " + "All days of the {window}-day period are considered part of the spell. Gaps of fewer than {min_gap} day(s) are allowed " "within a spell.", abstract="Number of heat spells. A heat spell occurs when rolling averages of daily minimum and maximum temperatures exceed given " "thresholds for a number of days.", @@ -342,7 +342,7 @@ class TempHourlyWithIndexing(ResamplingIndicatorWithIndexing): long_name="Longest heat spell", description="{freq} maximum length of heat spells. A heat spell occurs when the {window}-day " "averages of daily minimum and maximum temperatures each exceed {thresh_tasmin} and {thresh_tasmax}. " - "All days of the {window}-day period are considered part of the spell. Gaps of under {min_gap} days are allowed " + "All days of the {window}-day period are considered part of the spell. Gaps of fewer than {min_gap} day(s) are allowed " "within a spell.", abstract="The longest heat spell of a period. A heat spell occurs when rolling averages of daily minimum and maximum temperatures exceed given " "thresholds for a number of days.", @@ -376,7 +376,7 @@ class TempHourlyWithIndexing(ResamplingIndicatorWithIndexing): description="{freq} total length of heat spell events. " "A heat spell occurs when the {window}-day averages of daily minimum and maximum temperatures " "each exceed {thresh_tasmin} and {thresh_tasmax}. All days of the {window}-day period are considered part of the spell." - "Gaps of under {min_gap} days are allowed within a spell.", + "Gaps of fewer than {min_gap} day(s) are allowed within a spell.", abstract="Total length of heat spells. A heat spell occurs when rolling averages of daily minimum and maximum temperatures exceed given " "thresholds for a number of days.", compute=indices.generic.bivariate_spell_length_statistics, diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 046ef86e9..5d5e71d97 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1747,9 +1747,12 @@ def suspicious_run( def _find_events(da_start, da_stop, data, window_start, window_stop): - # Get basic blocks to work with, our runs with holes and the lengths of those runs - # Series of ones indicating where we have continuous runs of freezing rain with pauses - # not exceeding `window_stop` + """Actual finding of events for each period. + + Get basic blocks to work with, our runs with holes and the lengths of those runs. + Series of ones indicating where we have continuous runs with pauses + not exceeding `window_stop` + """ runs = runs_with_holes(da_start, window_start, da_stop, window_stop) # Compute the length of freezing rain events