diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ed8eef321..5747f576e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,7 +13,8 @@ New features and enhancements Bug fixes ^^^^^^^^^ -* Fixed the indexer bug in the `xclim.indices.standardized_index_fit_params` when multiple or non-array indexers are specified and fitted parameters are reloaded from netCDF. (:issue:`1842`, :pull:`1843`). +* Fixed the indexer bug in the ``xclim.indices.standardized_index_fit_params`` when multiple or non-array indexers are specified and fitted parameters are reloaded from netCDF. (:issue:`1842`, :pull:`1843`). +* Addressed a bug found in ``wet_spell_*`` indicators that was contributing to erroneous results. A new generic spell length statistic function ``xclim.indices.generic.spell_length_statistics`` is now used in wet and dry spells indicators. (:issue:`1834`, :pull:`1838`). Internal changes ^^^^^^^^^^^^^^^^ diff --git a/tests/conftest.py b/tests/conftest.py index 133cea41e..943788f04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -382,9 +382,10 @@ def add_imports(xdoctest_namespace, threadsafe_data_dir) -> None: @pytest.fixture(autouse=True, scope="function") -def add_example_dataarray(xdoctest_namespace, tas_series) -> None: +def add_example_dataarray(xdoctest_namespace, tas_series, pr_series) -> None: ns = xdoctest_namespace ns["tas"] = tas_series(np.random.rand(365) * 20 + 253.15) + ns["pr"] = pr_series(np.random.rand(365) * 5) @pytest.fixture(autouse=True, scope="session") diff --git a/tests/test_indices.py b/tests/test_indices.py index 702955206..90fb45b3e 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -3402,7 +3402,7 @@ def test_water_budget(pr_series, evspsblpot_series): 3, 3, 7, - (2, 12, 20, 12, 20), + (1, 12, 20, 12, 20), ), ( [0.01] * 6 @@ -3693,7 +3693,7 @@ def test_hardiness_zones(tasmin_series, tmin, meth, zone): @pytest.mark.parametrize( - "pr,thresh1,thresh2,window,outs", + "pr,threshmin,threshsum,window,outs", [ ( [1.01] * 6 @@ -3706,72 +3706,84 @@ def test_hardiness_zones(tasmin_series, tmin, meth, zone): 3, 3, 7, - (3, 0, 20, 0, 20), + (1, 20, 0, 20, 0), ), ( - [0.01] * 6 - + [1.01] * 3 - + [0.51] * 2 - + [0.75] * 2 - + [0.51] - + [0.01] * 3 - + [0.01] * 3, + [0.01] * 40 + [1.01] * 10 + [0.01] * 40 + [1.01] * 20 + [0.01] * 40, + 1, + 2, 3, - 3, - 7, - (1, 6, 20, 4, 20), + (2, 34, 30, 22, 20), + ), + ( + [0.01] * 40 + [1.01] * 10 + [0.01] * 40 + [2.01] * 20 + [0.01] * 40, + 2, + 14, + 14, + (1, 34, 20, 34, 20), ), - ([3.01] * 358 + [0.99] * 14 + [3.01] * 358, 1, 14, 14, (1, 0, 0, 0, 0)), ], ) -def test_wet_spell(pr_series, pr, thresh1, thresh2, window, outs): +def test_wet_spell(pr_series, pr, threshmin, threshsum, window, outs): pr = pr_series(np.array(pr), start="1981-01-01", units="mm/day") - out_events, out_total_d_sum, out_total_d_max, out_max_d_sum, out_max_d_max = outs + out_events, out_total_d_sum, out_total_d_min, out_max_d_sum, out_max_d_min = outs events = xci.wet_spell_frequency( - pr, thresh=f"{thresh1} mm", window=window, freq="YS" + pr, thresh=f"{threshsum} mm", window=window, freq="YS", op="sum" ) total_d_sum = xci.wet_spell_total_length( pr, - thresh=f"{thresh2} mm", + thresh=f"{threshsum} mm", window=window, op="sum", freq="YS", ) - total_d_max = xci.wet_spell_total_length( - pr, thresh=f"{thresh1} mm", window=window, op="max", freq="YS" + total_d_min = xci.wet_spell_total_length( + pr, thresh=f"{threshmin} mm", window=window, op="min", freq="YS" ) max_d_sum = xci.wet_spell_max_length( pr, - thresh=f"{thresh2} mm", + thresh=f"{threshsum} mm", window=window, op="sum", freq="YS", ) - max_d_max = xci.wet_spell_max_length( - pr, thresh=f"{thresh1} mm", window=window, op="max", freq="YS" + max_d_min = xci.wet_spell_max_length( + pr, thresh=f"{threshmin} mm", window=window, op="min", freq="YS" ) np.testing.assert_allclose(events[0], [out_events], rtol=1e-1) np.testing.assert_allclose(total_d_sum[0], [out_total_d_sum], rtol=1e-1) - np.testing.assert_allclose(total_d_max[0], [out_total_d_max], rtol=1e-1) + np.testing.assert_allclose(total_d_min[0], [out_total_d_min], rtol=1e-1) np.testing.assert_allclose(max_d_sum[0], [out_max_d_sum], rtol=1e-1) - np.testing.assert_allclose(max_d_max[0], [out_max_d_max], rtol=1e-1) + np.testing.assert_allclose(max_d_min[0], [out_max_d_min], rtol=1e-1) def test_wet_spell_total_length_indexer(pr_series): - pr = pr_series([1] * 5 + [0] * 10 + [1] * 350, start="1900-01-01", units="mm/d") + pr = pr_series([1.01] * 5 + [0] * 360, start="1901-01-01", units="mm/d") out = xci.wet_spell_total_length( - pr, window=7, op="sum", thresh="3 mm", freq="MS", date_bounds=("01-10", "12-31") + pr, + window=10, + op="sum", + thresh="5 mm", + freq="MS", + date_bounds=("01-08", "12-31"), ) + # if indexing was done before spell finding, everything would be 0 np.testing.assert_allclose(out, [3] + [0] * 11) def test_wet_spell_max_length_indexer(pr_series): - pr = pr_series([1] * 5 + [0] * 10 + [1] * 350, start="1900-01-01", units="mm/d") + pr = pr_series([1.01] * 5 + [0] * 360, start="1901-01-01", units="mm/d") out = xci.wet_spell_max_length( - pr, window=7, op="sum", thresh="3 mm", freq="MS", date_bounds=("01-10", "12-31") + pr, + window=10, + op="sum", + thresh="5 mm", + freq="MS", + date_bounds=("01-08", "12-31"), ) + # if indexing was done before spell finding, everything would be 0 np.testing.assert_allclose(out, [3] + [0] * 11) @@ -3785,7 +3797,7 @@ def test_wet_spell_frequency_op(pr_series): test_max = xci.wet_spell_frequency(pr, thresh="1 mm", window=3, freq="MS", op="max") np.testing.assert_allclose(test_sum[0], [3], rtol=1e-1) - np.testing.assert_allclose(test_max[0], [4], rtol=1e-1) + np.testing.assert_allclose(test_max[0], [3], rtol=1e-1) class TestSfcWindMax: diff --git a/tests/test_precip.py b/tests/test_precip.py index ba470d985..fcf091c51 100644 --- a/tests/test_precip.py +++ b/tests/test_precip.py @@ -720,10 +720,10 @@ def test_dry_spell_frequency_op(open_dataset): ) np.testing.assert_allclose( - test_sum[0, :14], [1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0], rtol=1e-1 + test_sum[0, :14], [1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0], rtol=1e-1 ) np.testing.assert_allclose( - test_max[0, :14], [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 2, 1], rtol=1e-1 + test_max[0, :14], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 1], rtol=1e-1 ) assert ( "The monthly number of dry periods of 7 day(s) or more, " diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index ecf8d1ee3..0ebd484c2 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -7,7 +7,7 @@ import numpy as np import xarray -from xclim.core.calendar import doy_from_string, get_calendar, select_time +from xclim.core.calendar import doy_from_string, get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, @@ -26,6 +26,7 @@ cumulative_difference, domain_count, first_day_threshold_reached, + spell_length_statistics, threshold_count, ) @@ -3153,8 +3154,9 @@ def dry_spell_frequency( freq: str = "YS", resample_before_rl: bool = True, op: str = "sum", + **indexer, ) -> xarray.DataArray: - """Return the number of dry periods of n days and more. + r"""Return the number of dry periods of n days and more. Periods during which the accumulated or maximal daily precipitation amount on a window of n days is under threshold. @@ -3168,22 +3170,30 @@ def dry_spell_frequency( window : int Minimum length of the spells. freq : str - Resampling frequency. + Resampling frequency. resample_before_rl : bool - Determines if the resampling should take place before or after the run - length encoding (or a similar algorithm) is applied to runs. - op: {"sum","max"} - Operation to perform on the window. - Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the - threshold. - "max" checks that the maximal daily precipitation amount within the window is less than the threshold. - This is the same as verifying that each individual day is below the threshold. + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + op: {"sum", "max", "min", "mean"} + Operation to perform on the window. + Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the + threshold. + "max" checks that the maximal daily precipitation amount within the window is less than the threshold. + This is the same as verifying that each individual day is below the threshold. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + Indexing is done after finding the dry days, but before finding the spells. Returns ------- xarray.DataArray, [unitless] The {freq} number of dry periods of minimum {window} days. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Examples -------- >>> from xclim.indices import dry_spell_frequency @@ -3192,19 +3202,17 @@ def dry_spell_frequency( >>> dsf = dry_spell_frequency(pr=pr, op="max") """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - agg_pr = getattr(pram.rolling(time=window, center=True), op)() - cond = agg_pr < thresh - out = rl.resample_and_rl( - cond, - resample_before_rl, - rl.windowed_run_events, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op="<", + window=window, + win_reducer=op, + spell_reducer="count", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - out.attrs["units"] = "" - return out @declare_units(pr="[precipitation]", thresh="[length]") @@ -3217,7 +3225,7 @@ def dry_spell_total_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Total length of dry spells. + r"""Total length of dry spells. Total number of days in dry periods of a minimum length, during which the maximum or accumulated precipitation within a window of the same length is under a threshold. @@ -3230,11 +3238,18 @@ def dry_spell_total_length( Accumulated precipitation value under which a period is considered dry. window : int Number of days when the maximum or accumulated precipitation is under threshold. - op : {"max", "sum"} - Reduce operation. + op : {"sum", "max", "min", "mean"} + Operation to perform on the window. + Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the + threshold. + "max" checks that the maximal daily precipitation amount within the window is less than the threshold. + This is the same as verifying that each individual day is below the threshold. freq : str Resampling frequency. - indexer + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. @@ -3244,6 +3259,10 @@ def dry_spell_total_length( xarray.DataArray, [days] The {freq} total number of days in dry periods of minimum {window} days. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Notes ----- The algorithm assumes days before and after the timeseries are "wet", meaning that the condition for being @@ -3254,23 +3273,17 @@ def dry_spell_total_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() < thresh - dry = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) - dry = dry.isel(time=slice(0, pram.time.size)).astype(float) - - dry = select_time(dry, **indexer) - - out = rl.resample_and_rl( - dry, - resample_before_rl, - rl.windowed_run_count, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op="<", + window=window, + win_reducer=op, + spell_reducer="sum", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") @declare_units(pr="[precipitation]", thresh="[length]") @@ -3283,7 +3296,7 @@ def dry_spell_max_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Longest dry spell. + r"""Longest dry spell. Maximum number of consecutive days in a dry period of minimum length, during which the maximum or accumulated precipitation within a window of the same length is under a threshold. @@ -3300,11 +3313,18 @@ def dry_spell_max_length( Reduce operation. freq : str Resampling frequency. - indexer + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Returns ------- xarray.DataArray, [days] @@ -3320,22 +3340,17 @@ def dry_spell_max_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() < thresh - dry = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) - dry = dry.isel(time=slice(0, pram.time.size)).astype(float) - - dry = select_time(dry, **indexer) - - out = rl.resample_and_rl( - dry, - resample_before_rl, - rl.longest_run, + return spell_length_statistics( + pram, + threshold=thresh, + op="<", + window=window, + win_reducer=op, + spell_reducer="max", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") @declare_units(pr="[precipitation]", thresh="[length]") @@ -3346,10 +3361,11 @@ def wet_spell_frequency( freq: str = "YS", resample_before_rl: bool = True, op: str = "sum", + **indexer, ) -> xarray.DataArray: - """Return the number of wet periods of n days and more. + r"""Return the number of wet periods of n days and more. - Periods during which the accumulated or maximal daily precipitation amount on a window of n days is over threshold. + Periods during which the accumulated, minimal, or maximal daily precipitation amount on a window of n days is over threshold. Parameters ---------- @@ -3361,16 +3377,24 @@ def wet_spell_frequency( window : int Minimum length of the spells. freq : str - Resampling frequency. + Resampling frequency. resample_before_rl : bool - Determines if the resampling should take place before or after the run - length encoding (or a similar algorithm) is applied to runs. - op: {"sum","max"} - Operation to perform on the window. - Default is "sum", which checks that the sum of accumulated precipitation over the whole window is more than the - threshold. - "max" checks that the maximal daily precipitation amount within the window is more than the threshold. - This is the same as verifying that each individual day is above the threshold. + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + op : {"sum","min", "max", "mean"} + Operation to perform on the window. + Default is "sum", which checks that the sum of accumulated precipitation over the whole window is more than the + threshold. + "min" checks that the maximal daily precipitation amount within the window is more than the threshold. + This is the same as verifying that each individual day is above the threshold. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + Indexing is done after finding the wet days, but before finding the spells. + + See Also + -------- + xclim.indices.generic.spell_length_statistics Returns ------- @@ -3382,22 +3406,20 @@ def wet_spell_frequency( >>> from xclim.indices import wet_spell_frequency >>> pr = open_dataset(path_to_pr_file).pr >>> dsf = wet_spell_frequency(pr=pr, op="sum") - >>> dsf = wet_spell_frequency(pr=pr, op="max") + >>> dsf = wet_spell_frequency(pr=pr, op="min") """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - agg_pr = getattr(pram.rolling(time=window, center=True), op)() - cond = agg_pr >= thresh - out = rl.resample_and_rl( - cond, - resample_before_rl, - rl.windowed_run_events, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op=">=", + window=window, + win_reducer=op, + spell_reducer="count", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - out.attrs["units"] = "" - return out @declare_units(pr="[precipitation]", thresh="[length]") @@ -3410,9 +3432,9 @@ def wet_spell_total_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Total length of dry spells. + r"""Total length of wet spells. - Total number of days in wet periods of a minimum length, during which the maximum or + Total number of days in wet periods of a minimum length, during which the minimum or accumulated precipitation within a window of the same length is over a threshold. Parameters @@ -3420,17 +3442,27 @@ def wet_spell_total_length( pr : xarray.DataArray Daily precipitation. thresh : Quantified - Accumulated precipitation value over which a period is considered dry. + Accumulated precipitation value over which a period is considered wet. window : int Number of days when the maximum or accumulated precipitation is over threshold. - op : {"max", "sum"} + op : {"min", "sum", "max", "mean"} Reduce operation. + `min` means that all days within the minimum window must exceed the threshold. + `sum` means that the accumulated precipitation within the window must exceed the threshold. + In all cases, the whole window is marked a part of a wet spell. freq : str Resampling frequency. - indexer + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. - Indexing is done after finding the dry days, but before finding the spells. + Indexing is done after finding the wet days, but before finding the spells. + + See Also + -------- + xclim.indices.generic.spell_length_statistics Returns ------- @@ -3447,23 +3479,17 @@ def wet_spell_total_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() >= thresh - wet = (mask.rolling(time=window).sum() < 1).shift(time=-(window - 1)) - wet = wet.isel(time=slice(0, pram.time.size)).astype(float) - - wet = select_time(wet, **indexer) - - out = rl.resample_and_rl( - wet, - resample_before_rl, - rl.windowed_run_count, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op=">=", + window=window, + win_reducer=op, + spell_reducer="sum", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") @declare_units(pr="[precipitation]", thresh="[length]") @@ -3476,9 +3502,9 @@ def wet_spell_max_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Longest wet spell. + r"""Longest wet spell. - Maximum number of consecutive days in a wet period of minimum length, during which the maximum or + Maximum number of consecutive days in a wet period of minimum length, during which the minimum or accumulated precipitation within a window of the same length is over a threshold. Parameters @@ -3486,17 +3512,27 @@ def wet_spell_max_length( pr : xarray.DataArray Daily precipitation. thresh : Quantified - Accumulated precipitation value over which a period is considered dry. + Accumulated precipitation value over which a period is considered wet. window : int Number of days when the maximum or accumulated precipitation is over threshold. - op : {"max", "sum"} + op : {"min", "sum", "max", "mean"} Reduce operation. + `min` means that all days within the minimum window must exceed the threshold. + `sum` means that the accumulated precipitation within the window must exceed the threshold. + In all cases, the whole window is marked a part of a wet spell. freq : str Resampling frequency. - indexer + resample_before_rl: bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. - Indexing is done after finding the dry days, but before finding the spells. + Indexing is done after finding the wet days, but before finding the spells. + + See Also + -------- + xclim.indices.generic.spell_length_statistics Returns ------- @@ -3513,19 +3549,14 @@ def wet_spell_max_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() >= thresh - wet = (mask.rolling(time=window).sum() < 1).shift(time=-(window - 1)) - wet = wet.isel(time=slice(0, pram.time.size)).astype(float) - - wet = select_time(wet, **indexer) - - out = rl.resample_and_rl( - wet, - resample_before_rl, - rl.longest_run, + return spell_length_statistics( + pram, + threshold=thresh, + op=">=", + window=window, + win_reducer=op, + spell_reducer="max", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index a0e919f62..39831bc1f 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -352,6 +352,129 @@ def get_daily_events( return events +@declare_relative_units(threshold="") +def spell_length_statistics( + data: xarray.DataArray, + threshold: Quantified, + window: int, + win_reducer: str, + op: str, + spell_reducer: str, + freq: str, + resample_before_rl: bool = True, + **indexer, +): + r"""Statistics on spells lengths. + + A spell is when a statistic (`win_reducer`) over a minimum number (`window`) of consecutive timesteps respects a condition (`op` `thresh`). + This returns a statistic over the spell's count or length. + + Parameters + ---------- + data : xr.DataArray + Input data. + threshold : Quantified + Threshold to test against. + window : int + Minimum length of a spell. + win_reducer : {'min', 'max', 'sum', 'mean'} + Reduction along the spell length to compute the spell value. + Note that this does not matter when `window` is 1. + op : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"} + Logical operator. Ex: spell_value > thresh. + spell_reducer : {'max', 'sum', 'count'} + Statistic on the spell lengths. + freq : str + Resampling frequency. + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + Indexing is done after finding the days part of a spell, but before taking the spell statistics. + + Examples + -------- + >>> spell_length_statistics( + ... tas, + ... threshold="35 °C", + ... window=7, + ... op=">", + ... win_reducer="min", + ... spell_reducer="sum", + ... freq="YS", + ... ) + + Here, a day is part of a spell if it is in any seven (7) day period where the minimum temperature is over 35°C. + We then return the annual sum of the spell lengths, so the total number of days in such spells. + >>> from xclim.core.units import rate2amount + >>> pram = rate2amount(pr, out_units="mm") + >>> spell_length_statistics( + ... pram, + ... threshold="20 mm", + ... window=5, + ... op=">=", + ... win_reducer="sum", + ... spell_reducer="max", + ... freq="YS", + ... ) + + Here, a day is part of a spell if it is in any five (5) day period where the total accumulated precipitation reaches + or exceeds 20 mm. We then return the length of the longest of such spells. + """ + thresh = convert_units_to( + threshold, + data, + context=infer_context(standard_name=data.attrs.get("standard_name")), + ) + + if window == 1: # Fast path + is_in_spell = compare(data, op, thresh) + elif (win_reducer == "min" and op in [">", ">=", "ge", "gt"]) or ( + win_reducer == "max" and op in ["`<", "<=", "le", "lt"] + ): + # Fast path for specific cases, this yields a smaller dask graph (rolling twice is expensive!) + # For these two cases, a day can't be part of a spell if it doesn't respect the condition itself + mask = compare(data, op, thresh) + # We need to filter out the spells shorter than "window" + # find sequences of consecutive respected constraints + cs_s = rl._cumsum_reset_on_zero(mask) + # end of these sequences + cs_s = cs_s.where(mask.shift({"time": -1}, fill_value=0) == 0) + # propagate these end of sequences + # the `.where(mask>0, 0)` acts a stopper + is_in_spell = cs_s.where(cs_s >= window).where(mask > 0, 0).bfill("time") > 0 + else: + data_pad = data.pad(time=(0, window)) + # The spell-wise value to test + # For example "win_reducer='sum'", we want the sum over the minimum spell length (window) to be above the thresh + spell_value = getattr(data_pad.rolling(time=window), win_reducer)() + # True at the end of a spell respecting the condition + mask = compare(spell_value, op, thresh) + # True for all days part of a spell that respected the condition (shift because of the two rollings) + is_in_spell = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) + # Cut back to the original size + is_in_spell = is_in_spell.isel(time=slice(0, data.time.size)).astype(float) + + is_in_spell = select_time(is_in_spell, **indexer) + + out = rl.resample_and_rl( + is_in_spell, + resample_before_rl, + rl.rle_statistics, + reducer=spell_reducer, + # The code above already ensured only spell of the minimum length are selected + window=1, + freq=freq, + ) + + if spell_reducer == "count": + return out.assign_attrs(units="") + # All other cases are statistics of the number of timesteps + return to_agg_units(out, data, "count") + + # CF-INDEX-META Indices diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index b3015230a..0aad6f947 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1129,7 +1129,7 @@ def statistics_run_1d(arr: Sequence[bool], reducer: str, window: int) -> int: ---------- arr : Sequence[bool] Input array (bool) - reducer : {'mean', 'sum', 'min', 'max', 'std'} + reducer : {"mean", "sum", "min", "max", "std", "count"} Reducing function name. window : int Minimal length of runs to be included in the statistics @@ -1142,6 +1142,8 @@ def statistics_run_1d(arr: Sequence[bool], reducer: str, window: int) -> int: v, rl = rle_1d(arr)[:2] if not np.any(v) or np.all(v * rl < window): return 0 + if reducer == "count": + return (v * rl >= window).sum() func = getattr(np, f"nan{reducer}") return func(np.where(v * rl >= window, rl, np.NaN)) @@ -1358,9 +1360,14 @@ def _index_from_1d_array(indices, array): da2 = xr.DataArray(da.data, dims=(tmpname,), name=None) # for each chunk of index, take corresponding values from da out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name) + # Map blocks chunks aux coords. Replace them by non-chunked from the original array. + # This avoids unwanted loading of the aux coord in a resample.map, for example + for name, crd in out.coords.items(): + if uses_dask(crd) and name in index.coords and index[name].size == crd.size: + out = out.assign_coords(**{name: index[name]}) # mask where index was NaN. Drop any auxiliary coord, they are already on `out`. # Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal, which means loading them - # making lazy_indexing not lazy. + # making lazy_indexing not lazy. same issue as above out = out.where( ~invalid.drop_vars( [crd for crd in invalid.coords if crd not in invalid.dims]