From 1dfe0f06c2cc2236f08e60d1e50b6de44bdb0384 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 11 Jul 2024 17:13:39 -0400 Subject: [PATCH 01/13] Fix wetspell tests - count in rle_stats 1d --- tests/test_indices.py | 72 ++++++++------ xclim/indices/_threshold.py | 189 +++++++++++++++++++----------------- xclim/indices/generic.py | 109 +++++++++++++++++++++ xclim/indices/run_length.py | 4 +- 4 files changed, 256 insertions(+), 118 deletions(-) diff --git a/tests/test_indices.py b/tests/test_indices.py index 9629d22d5..330b3c6e5 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -3382,7 +3382,7 @@ def test_water_budget(pr_series, evspsblpot_series): 3, 3, 7, - (2, 12, 20, 12, 20), + (1, 12, 20, 12, 20), ), ( [0.01] * 6 @@ -3673,7 +3673,7 @@ def test_hardiness_zones(tasmin_series, tmin, meth, zone): @pytest.mark.parametrize( - "pr,thresh1,thresh2,window,outs", + "pr,threshmin,threshsum,window,outs", [ ( [1.01] * 6 @@ -3686,72 +3686,84 @@ def test_hardiness_zones(tasmin_series, tmin, meth, zone): 3, 3, 7, - (3, 0, 20, 0, 20), + (1, 20, 0, 20, 0), ), ( - [0.01] * 6 - + [1.01] * 3 - + [0.51] * 2 - + [0.75] * 2 - + [0.51] - + [0.01] * 3 - + [0.01] * 3, + [0.01] * 40 + [1.01] * 10 + [0.01] * 40 + [1.01] * 20 + [0.01] * 40, + 1, + 2, 3, - 3, - 7, - (1, 6, 20, 4, 20), + (2, 34, 30, 22, 20), + ), + ( + [0.01] * 40 + [1.01] * 10 + [0.01] * 40 + [2.01] * 20 + [0.01] * 40, + 2, + 14, + 14, + (1, 34, 20, 34, 20), ), - ([3.01] * 358 + [0.99] * 14 + [3.01] * 358, 1, 14, 14, (1, 0, 0, 0, 0)), ], ) -def test_wet_spell(pr_series, pr, thresh1, thresh2, window, outs): +def test_wet_spell(pr_series, pr, threshmin, threshsum, window, outs): pr = pr_series(np.array(pr), start="1981-01-01", units="mm/day") - out_events, out_total_d_sum, out_total_d_max, out_max_d_sum, out_max_d_max = outs + out_events, out_total_d_sum, out_total_d_min, out_max_d_sum, out_max_d_min = outs events = xci.wet_spell_frequency( - pr, thresh=f"{thresh1} mm", window=window, freq="YS" + pr, thresh=f"{threshsum} mm", window=window, freq="YS", op="sum" ) total_d_sum = xci.wet_spell_total_length( pr, - thresh=f"{thresh2} mm", + thresh=f"{threshsum} mm", window=window, op="sum", freq="YS", ) - total_d_max = xci.wet_spell_total_length( - pr, thresh=f"{thresh1} mm", window=window, op="max", freq="YS" + total_d_min = xci.wet_spell_total_length( + pr, thresh=f"{threshmin} mm", window=window, op="min", freq="YS" ) max_d_sum = xci.wet_spell_max_length( pr, - thresh=f"{thresh2} mm", + thresh=f"{threshsum} mm", window=window, op="sum", freq="YS", ) - max_d_max = xci.wet_spell_max_length( - pr, thresh=f"{thresh1} mm", window=window, op="max", freq="YS" + max_d_min = xci.wet_spell_max_length( + pr, thresh=f"{threshmin} mm", window=window, op="min", freq="YS" ) np.testing.assert_allclose(events[0], [out_events], rtol=1e-1) np.testing.assert_allclose(total_d_sum[0], [out_total_d_sum], rtol=1e-1) - np.testing.assert_allclose(total_d_max[0], [out_total_d_max], rtol=1e-1) + np.testing.assert_allclose(total_d_min[0], [out_total_d_min], rtol=1e-1) np.testing.assert_allclose(max_d_sum[0], [out_max_d_sum], rtol=1e-1) - np.testing.assert_allclose(max_d_max[0], [out_max_d_max], rtol=1e-1) + np.testing.assert_allclose(max_d_min[0], [out_max_d_min], rtol=1e-1) def test_wet_spell_total_length_indexer(pr_series): - pr = pr_series([1] * 5 + [0] * 10 + [1] * 350, start="1900-01-01", units="mm/d") + pr = pr_series([1.01] * 5 + [0] * 360, start="1901-01-01", units="mm/d") out = xci.wet_spell_total_length( - pr, window=7, op="sum", thresh="3 mm", freq="MS", date_bounds=("01-10", "12-31") + pr, + window=10, + op="sum", + thresh="5 mm", + freq="MS", + date_bounds=("01-08", "12-31"), ) + # if indexing was done before spell finding, everything would be 0 np.testing.assert_allclose(out, [3] + [0] * 11) def test_wet_spell_max_length_indexer(pr_series): - pr = pr_series([1] * 5 + [0] * 10 + [1] * 350, start="1900-01-01", units="mm/d") + pr = pr_series([1.01] * 5 + [0] * 360, start="1901-01-01", units="mm/d") out = xci.wet_spell_max_length( - pr, window=7, op="sum", thresh="3 mm", freq="MS", date_bounds=("01-10", "12-31") + pr, + window=10, + op="sum", + thresh="5 mm", + freq="MS", + date_bounds=("01-08", "12-31"), ) + # if indexing was done before spell finding, everything would be 0 np.testing.assert_allclose(out, [3] + [0] * 11) @@ -3765,7 +3777,7 @@ def test_wet_spell_frequency_op(pr_series): test_max = xci.wet_spell_frequency(pr, thresh="1 mm", window=3, freq="MS", op="max") np.testing.assert_allclose(test_sum[0], [3], rtol=1e-1) - np.testing.assert_allclose(test_max[0], [4], rtol=1e-1) + np.testing.assert_allclose(test_max[0], [3], rtol=1e-1) class TestSfcWindMax: diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index ecf8d1ee3..3f88c4a0c 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -7,7 +7,7 @@ import numpy as np import xarray -from xclim.core.calendar import doy_from_string, get_calendar, select_time +from xclim.core.calendar import doy_from_string, get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import ( convert_units_to, @@ -26,6 +26,7 @@ cumulative_difference, domain_count, first_day_threshold_reached, + spell_length_statistics, threshold_count, ) @@ -3153,6 +3154,7 @@ def dry_spell_frequency( freq: str = "YS", resample_before_rl: bool = True, op: str = "sum", + **indexer, ) -> xarray.DataArray: """Return the number of dry periods of n days and more. @@ -3184,6 +3186,10 @@ def dry_spell_frequency( xarray.DataArray, [unitless] The {freq} number of dry periods of minimum {window} days. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Examples -------- >>> from xclim.indices import dry_spell_frequency @@ -3192,19 +3198,17 @@ def dry_spell_frequency( >>> dsf = dry_spell_frequency(pr=pr, op="max") """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - agg_pr = getattr(pram.rolling(time=window, center=True), op)() - cond = agg_pr < thresh - out = rl.resample_and_rl( - cond, - resample_before_rl, - rl.windowed_run_events, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op="<", + window=window, + win_reducer=op, + spell_reducer="count", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - out.attrs["units"] = "" - return out @declare_units(pr="[precipitation]", thresh="[length]") @@ -3244,6 +3248,10 @@ def dry_spell_total_length( xarray.DataArray, [days] The {freq} total number of days in dry periods of minimum {window} days. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Notes ----- The algorithm assumes days before and after the timeseries are "wet", meaning that the condition for being @@ -3254,23 +3262,17 @@ def dry_spell_total_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() < thresh - dry = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) - dry = dry.isel(time=slice(0, pram.time.size)).astype(float) - - dry = select_time(dry, **indexer) - - out = rl.resample_and_rl( - dry, - resample_before_rl, - rl.windowed_run_count, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op="<", + window=window, + win_reducer=op, + spell_reducer="sum", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") @declare_units(pr="[precipitation]", thresh="[length]") @@ -3305,6 +3307,10 @@ def dry_spell_max_length( It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Returns ------- xarray.DataArray, [days] @@ -3320,22 +3326,17 @@ def dry_spell_max_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() < thresh - dry = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) - dry = dry.isel(time=slice(0, pram.time.size)).astype(float) - - dry = select_time(dry, **indexer) - - out = rl.resample_and_rl( - dry, - resample_before_rl, - rl.longest_run, + return spell_length_statistics( + pram, + threshold=thresh, + op="<", + window=window, + win_reducer=op, + spell_reducer="max", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") @declare_units(pr="[precipitation]", thresh="[length]") @@ -3346,10 +3347,11 @@ def wet_spell_frequency( freq: str = "YS", resample_before_rl: bool = True, op: str = "sum", + **indexer, ) -> xarray.DataArray: """Return the number of wet periods of n days and more. - Periods during which the accumulated or maximal daily precipitation amount on a window of n days is over threshold. + Periods during which the accumulated, minimal orr maximal daily precipitation amount on a window of n days is over threshold. Parameters ---------- @@ -3365,13 +3367,17 @@ def wet_spell_frequency( resample_before_rl : bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. - op: {"sum","max"} + op: {"sum","min"} Operation to perform on the window. Default is "sum", which checks that the sum of accumulated precipitation over the whole window is more than the threshold. - "max" checks that the maximal daily precipitation amount within the window is more than the threshold. + "min" checks that the maximal daily precipitation amount within the window is more than the threshold. This is the same as verifying that each individual day is above the threshold. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Returns ------- xarray.DataArray, [unitless] @@ -3382,22 +3388,20 @@ def wet_spell_frequency( >>> from xclim.indices import wet_spell_frequency >>> pr = open_dataset(path_to_pr_file).pr >>> dsf = wet_spell_frequency(pr=pr, op="sum") - >>> dsf = wet_spell_frequency(pr=pr, op="max") + >>> dsf = wet_spell_frequency(pr=pr, op="min") """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - agg_pr = getattr(pram.rolling(time=window, center=True), op)() - cond = agg_pr >= thresh - out = rl.resample_and_rl( - cond, - resample_before_rl, - rl.windowed_run_events, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op=">=", + window=window, + win_reducer=op, + spell_reducer="count", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - out.attrs["units"] = "" - return out @declare_units(pr="[precipitation]", thresh="[length]") @@ -3423,15 +3427,26 @@ def wet_spell_total_length( Accumulated precipitation value over which a period is considered dry. window : int Number of days when the maximum or accumulated precipitation is over threshold. - op : {"max", "sum"} + op : {"min", "max", "sum"} Reduce operation. + `min` means that all days within the minimum window must exceed the threshold. + `max` means that at least one day within the window must exceed the threshold. + `sum` means that the accumulated precipitation within the window must exceed the threshold. + In all cases, the whole window is marked a part of a wet spell. freq : str Resampling frequency. + resample_before_rl: bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Returns ------- xarray.DataArray, [days] @@ -3447,23 +3462,17 @@ def wet_spell_total_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() >= thresh - wet = (mask.rolling(time=window).sum() < 1).shift(time=-(window - 1)) - wet = wet.isel(time=slice(0, pram.time.size)).astype(float) - - wet = select_time(wet, **indexer) - - out = rl.resample_and_rl( - wet, - resample_before_rl, - rl.windowed_run_count, - window=1, + return spell_length_statistics( + pram, + threshold=thresh, + op=">=", + window=window, + win_reducer=op, + spell_reducer="sum", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") @declare_units(pr="[precipitation]", thresh="[length]") @@ -3489,15 +3498,26 @@ def wet_spell_max_length( Accumulated precipitation value over which a period is considered dry. window : int Number of days when the maximum or accumulated precipitation is over threshold. - op : {"max", "sum"} + op : {"min", "max", "sum"} Reduce operation. + `min` means that all days within the minimum window must exceed the threshold. + `max` means that at least one day within the window must exceed the threshold. + `sum` means that the accumulated precipitation within the window must exceed the threshold. + In all cases, the whole window is marked a part of a wet spell. freq : str Resampling frequency. + resample_before_rl: bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. + See Also + -------- + xclim.indices.generic.spell_length_statistics + Returns ------- xarray.DataArray, [days] @@ -3513,19 +3533,14 @@ def wet_spell_max_length( computation, compared to only three). """ pram = rate2amount(convert_units_to(pr, "mm/d", context="hydro"), out_units="mm") - thresh = convert_units_to(thresh, pram, context="hydro") - - pram_pad = pram.pad(time=(0, window)) - mask = getattr(pram_pad.rolling(time=window), op)() >= thresh - wet = (mask.rolling(time=window).sum() < 1).shift(time=-(window - 1)) - wet = wet.isel(time=slice(0, pram.time.size)).astype(float) - - wet = select_time(wet, **indexer) - - out = rl.resample_and_rl( - wet, - resample_before_rl, - rl.longest_run, + return spell_length_statistics( + pram, + threshold=thresh, + op=">=", + window=window, + win_reducer=op, + spell_reducer="max", freq=freq, + resample_before_rl=resample_before_rl, + **indexer, ) - return to_agg_units(out, pram, "count") diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index a0e919f62..76cb3afea 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -352,6 +352,115 @@ def get_daily_events( return events +@declare_relative_units(threshold="") +def spell_length_statistics( + data: xarray.DataArray, + threshold: Quantified, + window: int, + win_reducer: str, + op: str, + spell_reducer: str, + freq: str, + resample_before_rl: bool = True, + **indexer, +): + """Statistics on spells lengths. + + A spell is when a statistic (`win_reducer`) over a minimum number (`window`) of consecutive timesteps respects a condition (`op` `thresh`). + This returns a statistic over the spell's count or length. + + Parameters + ---------- + data: xr.DataArray + Input data. + threshold: Quantified + Threshold to test against. + window: int + Minimum length of a spell. + win_reducer: {'min', 'max', 'sum', 'mean'} + Reduction along the spell length to compute the spell value. + Note that this does not matter when `window` is 1. + op : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"} + Logical operator. Ex: spell_value > thresh. + spell_reducer: {'max', 'sum', 'count'} + Statistic on the spell lengths. + freq: str + Resampling frequency. + resample_before_rl: bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + indexer: + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + Indexing is done after finding the days part of a spell, but before taking the spell statistics. + + Examples + -------- + >>> spell_statistics( + ... tas, + ... thresh="35 °C", + ... window=7, + ... op=">", + ... win_reducer="min", + ... spell_reducer="sum", + ... freq="YS", + ... ) + + Here, a day is part of a spell if it is in any 7 day period where the minimum temperature is over 35°C. We then + return the annual sum of the spell lengths, so the total number of days in such spells. + + >>> pram = rate2amount(pr) + >>> spell_statistics( + ... pram, + ... thresh="20 mm", + ... window=5, + ... op=">=", + ... win_reducer="sum", + ... spell_reducer="max", + ... freq="YS", + ... ) + + Here, a day is part of a spell if it is in any 5 day period where the total accumulated precipitation reaches or exceeds + 20 mm. We then return the length of the longest of such spells. + """ + thresh = convert_units_to( + threshold, + data, + context=infer_context(standard_name=data.attrs.get("standard_name")), + ) + + if window == 1: # Fast path + is_in_spell = compare(data, op, thresh) + else: + data_pad = data.pad(time=(0, window)) + # The spell-wise value to test + # For example "win_reducer='sum'", we want the sum over the minimum spell length (window) to be above the thresh + spell_value = getattr(data_pad.rolling(time=window), win_reducer)() + # True at the end of a spell respecting the condition + mask = compare(spell_value, op, thresh) + # True for all days part of a spell that respected the condition (shift because of the two rollings) + is_in_spell = (mask.rolling(time=window).sum() >= 1).shift(time=-(window - 1)) + # Cut back to the original size + is_in_spell = is_in_spell.isel(time=slice(0, data.time.size)).astype(float) + + is_in_spell = select_time(is_in_spell, **indexer) + + out = rl.resample_and_rl( + is_in_spell, + resample_before_rl, + rl.rle_statistics, + reducer=spell_reducer, + # The code above already ensured only spell of the minimum length are selected + window=1, + freq=freq, + ) + + if spell_reducer == "count": + return out.assign_attrs(units="1") + # All other cases are statistics of the number of timesteps + return to_agg_units(out, data, "count") + + # CF-INDEX-META Indices diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index b3015230a..e89eff640 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1129,7 +1129,7 @@ def statistics_run_1d(arr: Sequence[bool], reducer: str, window: int) -> int: ---------- arr : Sequence[bool] Input array (bool) - reducer : {'mean', 'sum', 'min', 'max', 'std'} + reducer : {'mean', 'sum', 'min', 'max', 'std', 'count'} Reducing function name. window : int Minimal length of runs to be included in the statistics @@ -1142,6 +1142,8 @@ def statistics_run_1d(arr: Sequence[bool], reducer: str, window: int) -> int: v, rl = rle_1d(arr)[:2] if not np.any(v) or np.all(v * rl < window): return 0 + if reducer == "count": + return (v * rl >= window).sum() func = getattr(np, f"nan{reducer}") return func(np.where(v * rl >= window, rl, np.NaN)) From c7a2618960b2a41a0c1a0d648becd743d704f556 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 11 Jul 2024 17:19:04 -0400 Subject: [PATCH 02/13] upd changelog --- CHANGELOG.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2d7d7b285..9324bf497 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,11 @@ Changelog v0.52.0 (unreleased) -------------------- -Contributors to this version: David Huard (:user:`huard`). +Contributors to this version: David Huard (:user:`huard`), Pascal Bourgault (:user:`aulemahal`). + +Bug fixes +^^^^^^^^^ +* Wet spell indicators fixed. New generic spell length statistic function ``xclim.indices.generic.spell_length_statistics`` used in wet and dry spells indicators. (:issue:`1834`, :pull:`1838`). Internal changes ^^^^^^^^^^^^^^^^ From 09e9d629d32e98d281d1cdf9fedc4cc63bf5b241 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 11 Jul 2024 17:50:10 -0400 Subject: [PATCH 03/13] fix dry spell freq test --- tests/test_precip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_precip.py b/tests/test_precip.py index ba470d985..fcf091c51 100644 --- a/tests/test_precip.py +++ b/tests/test_precip.py @@ -720,10 +720,10 @@ def test_dry_spell_frequency_op(open_dataset): ) np.testing.assert_allclose( - test_sum[0, :14], [1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0], rtol=1e-1 + test_sum[0, :14], [1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0], rtol=1e-1 ) np.testing.assert_allclose( - test_max[0, :14], [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 2, 1], rtol=1e-1 + test_max[0, :14], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 1], rtol=1e-1 ) assert ( "The monthly number of dry periods of 7 day(s) or more, " From b752738cca9a670c731b6a5ec68e446685563013 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 16 Jul 2024 10:33:58 -0400 Subject: [PATCH 04/13] Apply suggestions from code review Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- CHANGELOG.rst | 2 +- xclim/indices/_threshold.py | 6 +++--- xclim/indices/generic.py | 18 +++++++++--------- xclim/indices/run_length.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9324bf497..2d548c408 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,7 +8,7 @@ Contributors to this version: David Huard (:user:`huard`), Pascal Bourgault (:us Bug fixes ^^^^^^^^^ -* Wet spell indicators fixed. New generic spell length statistic function ``xclim.indices.generic.spell_length_statistics`` used in wet and dry spells indicators. (:issue:`1834`, :pull:`1838`). +* Addressed a bug found in ``wet_spell_*`` indicators that was contributing to erroneous results. A new generic spell length statistic function ``xclim.indices.generic.spell_length_statistics`` is now used in wet and dry spells indicators. (:issue:`1834`, :pull:`1838`). Internal changes ^^^^^^^^^^^^^^^^ diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 3f88c4a0c..f8c4f1d3b 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -3351,7 +3351,7 @@ def wet_spell_frequency( ) -> xarray.DataArray: """Return the number of wet periods of n days and more. - Periods during which the accumulated, minimal orr maximal daily precipitation amount on a window of n days is over threshold. + Periods during which the accumulated, minimal, or maximal daily precipitation amount on a window of n days is over threshold. Parameters ---------- @@ -3438,7 +3438,7 @@ def wet_spell_total_length( resample_before_rl: bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. - indexer + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. @@ -3509,7 +3509,7 @@ def wet_spell_max_length( resample_before_rl: bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. - indexer + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 76cb3afea..4949b89a5 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -364,32 +364,32 @@ def spell_length_statistics( resample_before_rl: bool = True, **indexer, ): - """Statistics on spells lengths. + r"""Statistics on spells lengths. A spell is when a statistic (`win_reducer`) over a minimum number (`window`) of consecutive timesteps respects a condition (`op` `thresh`). This returns a statistic over the spell's count or length. Parameters ---------- - data: xr.DataArray + data : xr.DataArray Input data. - threshold: Quantified + threshold : Quantified Threshold to test against. - window: int + window : int Minimum length of a spell. - win_reducer: {'min', 'max', 'sum', 'mean'} + win_reducer : {'min', 'max', 'sum', 'mean'} Reduction along the spell length to compute the spell value. Note that this does not matter when `window` is 1. op : {">", "gt", "<", "lt", ">=", "ge", "<=", "le", "==", "eq", "!=", "ne"} Logical operator. Ex: spell_value > thresh. - spell_reducer: {'max', 'sum', 'count'} + spell_reducer : {'max', 'sum', 'count'} Statistic on the spell lengths. - freq: str + freq : str Resampling frequency. - resample_before_rl: bool + resample_before_rl : bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. - indexer: + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the days part of a spell, but before taking the spell statistics. diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index e89eff640..1edd62a22 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1129,7 +1129,7 @@ def statistics_run_1d(arr: Sequence[bool], reducer: str, window: int) -> int: ---------- arr : Sequence[bool] Input array (bool) - reducer : {'mean', 'sum', 'min', 'max', 'std', 'count'} + reducer : {"mean", "sum", "min", "max", "std", "count"} Reducing function name. window : int Minimal length of runs to be included in the statistics From 8c11f2ae45eb81eeec0bd0d503cbde49244a8a4d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:34:48 +0000 Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xclim/indices/_threshold.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index f8c4f1d3b..c1c540583 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -3414,7 +3414,7 @@ def wet_spell_total_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Total length of dry spells. + r"""Total length of dry spells. Total number of days in wet periods of a minimum length, during which the maximum or accumulated precipitation within a window of the same length is over a threshold. @@ -3485,7 +3485,7 @@ def wet_spell_max_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Longest wet spell. + r"""Longest wet spell. Maximum number of consecutive days in a wet period of minimum length, during which the maximum or accumulated precipitation within a window of the same length is over a threshold. From 8b75acd1cdeb2e5b989353825ae0e6ec2466b8e9 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 16 Jul 2024 16:34:30 -0400 Subject: [PATCH 06/13] Merge - changes from review - add fast path --- xclim/indices/_threshold.py | 90 ++++++++++++++++++++++--------------- xclim/indices/generic.py | 16 ++++++- 2 files changed, 68 insertions(+), 38 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index c1c540583..04133959a 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -3156,7 +3156,7 @@ def dry_spell_frequency( op: str = "sum", **indexer, ) -> xarray.DataArray: - """Return the number of dry periods of n days and more. + r"""Return the number of dry periods of n days and more. Periods during which the accumulated or maximal daily precipitation amount on a window of n days is under threshold. @@ -3170,16 +3170,20 @@ def dry_spell_frequency( window : int Minimum length of the spells. freq : str - Resampling frequency. + Resampling frequency. resample_before_rl : bool - Determines if the resampling should take place before or after the run - length encoding (or a similar algorithm) is applied to runs. - op: {"sum","max"} - Operation to perform on the window. - Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the - threshold. - "max" checks that the maximal daily precipitation amount within the window is less than the threshold. - This is the same as verifying that each individual day is below the threshold. + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + op: {"sum", "max", "min", "mean"} + Operation to perform on the window. + Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the + threshold. + "max" checks that the maximal daily precipitation amount within the window is less than the threshold. + This is the same as verifying that each individual day is below the threshold. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + Indexing is done after finding the dry days, but before finding the spells. Returns ------- @@ -3221,7 +3225,7 @@ def dry_spell_total_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Total length of dry spells. + r"""Total length of dry spells. Total number of days in dry periods of a minimum length, during which the maximum or accumulated precipitation within a window of the same length is under a threshold. @@ -3234,11 +3238,18 @@ def dry_spell_total_length( Accumulated precipitation value under which a period is considered dry. window : int Number of days when the maximum or accumulated precipitation is under threshold. - op : {"max", "sum"} - Reduce operation. + op: {"sum", "max", "min", "mean"} + Operation to perform on the window. + Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the + threshold. + "max" checks that the maximal daily precipitation amount within the window is less than the threshold. + This is the same as verifying that each individual day is below the threshold. freq : str Resampling frequency. - indexer + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. @@ -3285,7 +3296,7 @@ def dry_spell_max_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - """Longest dry spell. + r"""Longest dry spell. Maximum number of consecutive days in a dry period of minimum length, during which the maximum or accumulated precipitation within a window of the same length is under a threshold. @@ -3302,7 +3313,10 @@ def dry_spell_max_length( Reduce operation. freq : str Resampling frequency. - indexer + resample_before_rl : bool + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. Indexing is done after finding the dry days, but before finding the spells. @@ -3349,7 +3363,7 @@ def wet_spell_frequency( op: str = "sum", **indexer, ) -> xarray.DataArray: - """Return the number of wet periods of n days and more. + r"""Return the number of wet periods of n days and more. Periods during which the accumulated, minimal, or maximal daily precipitation amount on a window of n days is over threshold. @@ -3363,16 +3377,20 @@ def wet_spell_frequency( window : int Minimum length of the spells. freq : str - Resampling frequency. + Resampling frequency. resample_before_rl : bool - Determines if the resampling should take place before or after the run - length encoding (or a similar algorithm) is applied to runs. - op: {"sum","min"} - Operation to perform on the window. - Default is "sum", which checks that the sum of accumulated precipitation over the whole window is more than the - threshold. - "min" checks that the maximal daily precipitation amount within the window is more than the threshold. - This is the same as verifying that each individual day is above the threshold. + Determines if the resampling should take place before or after the run + length encoding (or a similar algorithm) is applied to runs. + op: {"sum","min", "max", "mean"} + Operation to perform on the window. + Default is "sum", which checks that the sum of accumulated precipitation over the whole window is more than the + threshold. + "min" checks that the maximal daily precipitation amount within the window is more than the threshold. + This is the same as verifying that each individual day is above the threshold. + \*\*indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + Indexing is done after finding the wet days, but before finding the spells. See Also -------- @@ -3414,9 +3432,9 @@ def wet_spell_total_length( resample_before_rl: bool = True, **indexer, ) -> xarray.DataArray: - r"""Total length of dry spells. + r"""Total length of wet spells. - Total number of days in wet periods of a minimum length, during which the maximum or + Total number of days in wet periods of a minimum length, during which the minimum or accumulated precipitation within a window of the same length is over a threshold. Parameters @@ -3424,13 +3442,12 @@ def wet_spell_total_length( pr : xarray.DataArray Daily precipitation. thresh : Quantified - Accumulated precipitation value over which a period is considered dry. + Accumulated precipitation value over which a period is considered wet. window : int Number of days when the maximum or accumulated precipitation is over threshold. - op : {"min", "max", "sum"} + op : {"min", "sum", "max", "mean"} Reduce operation. `min` means that all days within the minimum window must exceed the threshold. - `max` means that at least one day within the window must exceed the threshold. `sum` means that the accumulated precipitation within the window must exceed the threshold. In all cases, the whole window is marked a part of a wet spell. freq : str @@ -3441,7 +3458,7 @@ def wet_spell_total_length( \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. - Indexing is done after finding the dry days, but before finding the spells. + Indexing is done after finding the wet days, but before finding the spells. See Also -------- @@ -3487,7 +3504,7 @@ def wet_spell_max_length( ) -> xarray.DataArray: r"""Longest wet spell. - Maximum number of consecutive days in a wet period of minimum length, during which the maximum or + Maximum number of consecutive days in a wet period of minimum length, during which the minimum or accumulated precipitation within a window of the same length is over a threshold. Parameters @@ -3495,13 +3512,12 @@ def wet_spell_max_length( pr : xarray.DataArray Daily precipitation. thresh : Quantified - Accumulated precipitation value over which a period is considered dry. + Accumulated precipitation value over which a period is considered wet. window : int Number of days when the maximum or accumulated precipitation is over threshold. - op : {"min", "max", "sum"} + op : {"min", "sum", "max", "mean"} Reduce operation. `min` means that all days within the minimum window must exceed the threshold. - `max` means that at least one day within the window must exceed the threshold. `sum` means that the accumulated precipitation within the window must exceed the threshold. In all cases, the whole window is marked a part of a wet spell. freq : str @@ -3512,7 +3528,7 @@ def wet_spell_max_length( \*\*indexer Indexing parameters to compute the indicator on a temporal subset of the data. It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. - Indexing is done after finding the dry days, but before finding the spells. + Indexing is done after finding the wet days, but before finding the spells. See Also -------- diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 4949b89a5..d06b63c8d 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -431,6 +431,20 @@ def spell_length_statistics( if window == 1: # Fast path is_in_spell = compare(data, op, thresh) + elif (win_reducer == "min" and op in [">", ">=", "ge", "gt"]) or ( + win_reducer == "max" and op in ["`<", "<=", "le", "lt"] + ): + # Fast path for specific cases, this yields a smaller dask graph (rolling twice is expensive!) + # For these two cases, a day can't be part of a spell if it doesn't respect the condition itself + mask = compare(data, op, thresh) + # We need to filter out the spells shorter than "window" + # find sequences of consecutive respected constraints + cs_s = rl._cumsum_reset_on_zero(mask) + # end of these sequences + cs_s = cs_s.where(mask.shift({"time": -1}, fill_value=0) == 0) + # propagate these end of sequences + # the `.where(mask>0, 0)` acts a stopper + is_in_spell = cs_s.where(cs_s >= window).where(mask > 0, 0).bfill("time") > 0 else: data_pad = data.pad(time=(0, window)) # The spell-wise value to test @@ -456,7 +470,7 @@ def spell_length_statistics( ) if spell_reducer == "count": - return out.assign_attrs(units="1") + return out.assign_attrs(units="") # All other cases are statistics of the number of timesteps return to_agg_units(out, data, "count") From c4ceb9d764979012f53363caf1f6fbff4b93630f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 17 Jul 2024 15:39:18 -0400 Subject: [PATCH 07/13] Make resample.map(lazy_indexing) lazy again --- xclim/indices/run_length.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xclim/indices/run_length.py b/xclim/indices/run_length.py index 1edd62a22..0aad6f947 100644 --- a/xclim/indices/run_length.py +++ b/xclim/indices/run_length.py @@ -1360,9 +1360,14 @@ def _index_from_1d_array(indices, array): da2 = xr.DataArray(da.data, dims=(tmpname,), name=None) # for each chunk of index, take corresponding values from da out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name) + # Map blocks chunks aux coords. Replace them by non-chunked from the original array. + # This avoids unwanted loading of the aux coord in a resample.map, for example + for name, crd in out.coords.items(): + if uses_dask(crd) and name in index.coords and index[name].size == crd.size: + out = out.assign_coords(**{name: index[name]}) # mask where index was NaN. Drop any auxiliary coord, they are already on `out`. # Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal, which means loading them - # making lazy_indexing not lazy. + # making lazy_indexing not lazy. same issue as above out = out.where( ~invalid.drop_vars( [crd for crd in invalid.coords if crd not in invalid.dims] From 99f0f9cba5f84c293a53cdd8e2842cd1caf7107b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 24 Jul 2024 16:02:37 -0400 Subject: [PATCH 08/13] Apply suggestions from code review Co-authored-by: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> --- xclim/indices/_threshold.py | 6 +++--- xclim/indices/generic.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 04133959a..0ebd484c2 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -3238,7 +3238,7 @@ def dry_spell_total_length( Accumulated precipitation value under which a period is considered dry. window : int Number of days when the maximum or accumulated precipitation is under threshold. - op: {"sum", "max", "min", "mean"} + op : {"sum", "max", "min", "mean"} Operation to perform on the window. Default is "sum", which checks that the sum of accumulated precipitation over the whole window is less than the threshold. @@ -3381,7 +3381,7 @@ def wet_spell_frequency( resample_before_rl : bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. - op: {"sum","min", "max", "mean"} + op : {"sum","min", "max", "mean"} Operation to perform on the window. Default is "sum", which checks that the sum of accumulated precipitation over the whole window is more than the threshold. @@ -3452,7 +3452,7 @@ def wet_spell_total_length( In all cases, the whole window is marked a part of a wet spell. freq : str Resampling frequency. - resample_before_rl: bool + resample_before_rl : bool Determines if the resampling should take place before or after the run length encoding (or a similar algorithm) is applied to runs. \*\*indexer diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index d06b63c8d..bf2f0657b 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -406,7 +406,7 @@ def spell_length_statistics( ... freq="YS", ... ) - Here, a day is part of a spell if it is in any 7 day period where the minimum temperature is over 35°C. We then + Here, a day is part of a spell if it is in any seven (7) day period where the minimum temperature is over 35°C. We then return the annual sum of the spell lengths, so the total number of days in such spells. >>> pram = rate2amount(pr) @@ -420,7 +420,7 @@ def spell_length_statistics( ... freq="YS", ... ) - Here, a day is part of a spell if it is in any 5 day period where the total accumulated precipitation reaches or exceeds + Here, a day is part of a spell if it is in any five (5) day period where the total accumulated precipitation reaches or exceeds 20 mm. We then return the length of the longest of such spells. """ thresh = convert_units_to( From a0eca649703cfa072bb46cadffde197f039bb4fc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 24 Jul 2024 16:11:11 -0400 Subject: [PATCH 09/13] Update docstring --- xclim/indices/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index bf2f0657b..d5e348df0 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -396,7 +396,7 @@ def spell_length_statistics( Examples -------- - >>> spell_statistics( + >>> spell_length_statistics( ... tas, ... thresh="35 °C", ... window=7, @@ -410,7 +410,7 @@ def spell_length_statistics( return the annual sum of the spell lengths, so the total number of days in such spells. >>> pram = rate2amount(pr) - >>> spell_statistics( + >>> spell_length_statistics( ... pram, ... thresh="20 mm", ... window=5, From 55028c0f58a1f8fda7ac05d7baa8ecf280c1ea02 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 24 Jul 2024 16:36:24 -0400 Subject: [PATCH 10/13] Update generic.py --- xclim/indices/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index d5e348df0..9e3bf7b40 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -398,7 +398,7 @@ def spell_length_statistics( -------- >>> spell_length_statistics( ... tas, - ... thresh="35 °C", + ... threshold="35 °C", ... window=7, ... op=">", ... win_reducer="min", @@ -412,7 +412,7 @@ def spell_length_statistics( >>> pram = rate2amount(pr) >>> spell_length_statistics( ... pram, - ... thresh="20 mm", + ... threshold="20 mm", ... window=5, ... op=">=", ... win_reducer="sum", From c8c7c852ed8a4c3cf9bd81c81ce8c8d425cb8c76 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:34:10 -0400 Subject: [PATCH 11/13] fix docstring import --- xclim/indices/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index 9e3bf7b40..e40ea5b20 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -408,7 +408,7 @@ def spell_length_statistics( Here, a day is part of a spell if it is in any seven (7) day period where the minimum temperature is over 35°C. We then return the annual sum of the spell lengths, so the total number of days in such spells. - + >>> from xclim.core.units import rate2amount >>> pram = rate2amount(pr) >>> spell_length_statistics( ... pram, From ff795cb8560bec1eb63628d494b9eb106ba9b727 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 25 Jul 2024 16:59:09 -0400 Subject: [PATCH 12/13] add pr dummy variable to doctest namespace --- tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 133cea41e..943788f04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -382,9 +382,10 @@ def add_imports(xdoctest_namespace, threadsafe_data_dir) -> None: @pytest.fixture(autouse=True, scope="function") -def add_example_dataarray(xdoctest_namespace, tas_series) -> None: +def add_example_dataarray(xdoctest_namespace, tas_series, pr_series) -> None: ns = xdoctest_namespace ns["tas"] = tas_series(np.random.rand(365) * 20 + 253.15) + ns["pr"] = pr_series(np.random.rand(365) * 5) @pytest.fixture(autouse=True, scope="session") From 9daaa7fd78868db51f7bee6e4d73b4008da11d3a Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:15:45 -0400 Subject: [PATCH 13/13] this function likes to know what you want out of it --- xclim/indices/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xclim/indices/generic.py b/xclim/indices/generic.py index e40ea5b20..39831bc1f 100644 --- a/xclim/indices/generic.py +++ b/xclim/indices/generic.py @@ -406,10 +406,10 @@ def spell_length_statistics( ... freq="YS", ... ) - Here, a day is part of a spell if it is in any seven (7) day period where the minimum temperature is over 35°C. We then - return the annual sum of the spell lengths, so the total number of days in such spells. + Here, a day is part of a spell if it is in any seven (7) day period where the minimum temperature is over 35°C. + We then return the annual sum of the spell lengths, so the total number of days in such spells. >>> from xclim.core.units import rate2amount - >>> pram = rate2amount(pr) + >>> pram = rate2amount(pr, out_units="mm") >>> spell_length_statistics( ... pram, ... threshold="20 mm", @@ -420,8 +420,8 @@ def spell_length_statistics( ... freq="YS", ... ) - Here, a day is part of a spell if it is in any five (5) day period where the total accumulated precipitation reaches or exceeds - 20 mm. We then return the length of the longest of such spells. + Here, a day is part of a spell if it is in any five (5) day period where the total accumulated precipitation reaches + or exceeds 20 mm. We then return the length of the longest of such spells. """ thresh = convert_units_to( threshold,