From 18f918345afb6a8fe81d5f32e1af1b1b425ee617 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 17 Apr 2024 16:58:55 -0400 Subject: [PATCH 1/4] remove columns that are all NaNs --- tests/test_ensembles.py | 8 ++++++++ xclim/ensembles/_reduce.py | 3 +++ 2 files changed, 11 insertions(+) diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index 63d62e6d4..cc84e0785 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -566,6 +566,14 @@ def test_make_criteria(self, tas_series): uncrit = crit.unstack("criteria") assert set(uncrit.dims) == {"realization", "lat", "time"} + crit = ensembles.make_criteria(ds.where(ds.var_a > 0)) + assert crit.dims == ("realization", "criteria") + assert crit.criteria.size == 12 + np.testing.assert_array_equal(crit.isnull().sum(), 0) + np.testing.assert_array_equal(crit.min(), 1) + uncrit = crit.unstack("criteria") + assert set(uncrit.dims) == {"realization", "lat", "time"} + # ## Tests for Robustness ## @pytest.fixture diff --git a/xclim/ensembles/_reduce.py b/xclim/ensembles/_reduce.py index cdcc3599d..3884487c3 100644 --- a/xclim/ensembles/_reduce.py +++ b/xclim/ensembles/_reduce.py @@ -113,6 +113,9 @@ def _make_crit(da): else: # Easy peasy, skip all the convoluted stuff crit = _make_crit(ds) + + # drop criteria that are all NaN + crit = crit.dropna(dim="criteria", how="all") return crit.rename("criteria") From dbfd5f8cf51bf7731eda2dc3a891607520d70337 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Wed, 17 Apr 2024 17:14:33 -0400 Subject: [PATCH 2/4] upd changes --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5f953b2b7..ce4303cc3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changelog v0.49.0 (unreleased) -------------------- -Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Juliette Lavoie (:user:`juliettelavoie`). +Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Juliette Lavoie (:user:`juliettelavoie`), Gabriel Rondeau-Genesse (:user:`RondeauG`). Announcements ^^^^^^^^^^^^^ @@ -17,6 +17,7 @@ Bug fixes * Fixed bug with loess smoothing for an array full of NaNs. (:pull:`1699`). * Fixed and adapted ``time_bnds`` to the newest xarray. (:pull:`1700`). * Fixed "agreement fraction" in ``robustness_fractions`` to distinguish between negative change and no change. Added "negative" and "changed negative" fractions (:issue:`1690`, :pull:`1711`). +* ``make_criteria`` now skips columns with NaNs across all realizations. (:pull:`1713`). Internal changes ^^^^^^^^^^^^^^^^ From 27cc79b07cf4fc3b272878486445c2cdc3020cb9 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 18 Apr 2024 09:39:18 -0400 Subject: [PATCH 3/4] suggestion from code review --- xclim/ensembles/_reduce.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xclim/ensembles/_reduce.py b/xclim/ensembles/_reduce.py index 3884487c3..404188fd2 100644 --- a/xclim/ensembles/_reduce.py +++ b/xclim/ensembles/_reduce.py @@ -57,6 +57,8 @@ def make_criteria(ds: xarray.Dataset | xarray.DataArray): `ds2` will have all variables with the same dimensions, so if the original dataset had variables with different dimensions, the added dimensions are filled with NaNs. + Also, note that criteria that are all NaN (such as lat/lon coordinates with no data) are dropped from `crit` to avoid issues with + the clustering algorithms, so the original dataset might not be able to be fully reconstructed. The `to_dataset` part can be skipped if the original input was a DataArray. """ From fe5a209ed465f3975c1b7b84f1436fa0f41d6f2b Mon Sep 17 00:00:00 2001 From: RondeauG Date: Thu, 18 Apr 2024 15:18:14 -0400 Subject: [PATCH 4/4] fix tests --- tests/test_ensembles.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index cc84e0785..91fafb92e 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -571,8 +571,9 @@ def test_make_criteria(self, tas_series): assert crit.criteria.size == 12 np.testing.assert_array_equal(crit.isnull().sum(), 0) np.testing.assert_array_equal(crit.min(), 1) - uncrit = crit.unstack("criteria") + uncrit = crit.unstack("criteria").to_dataset("variables") assert set(uncrit.dims) == {"realization", "lat", "time"} + assert uncrit.time.size == 3 # ## Tests for Robustness ##