From fb624bd29c33a9f570da68d7a1e323c346cd8a64 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Tue, 2 Jan 2024 13:49:45 -0800 Subject: [PATCH 1/2] Update unweighted temporal averages to not require bounds (#579) - Time bounds are required for generating weights for weighted averages Co-authored-by: tomvothecoder --- xcdat/temporal.py | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index a5611e42..a0daf9a4 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -757,21 +757,17 @@ def _averager( # Preprocess the dataset based on method argument values. ds = self._preprocess_dataset(ds) - # Get the data variable and the required time axis metadata. - dv = _get_data_var(ds, data_var) - time_bounds = ds.bounds.get_bounds("T", var_key=dv.name) - if self._mode == "average": - dv = self._average(dv, time_bounds) + dv_avg = self._average(ds, data_var) elif self._mode in ["group_average", "climatology", "departures"]: - dv = self._group_average(dv, time_bounds) + dv_avg = self._group_average(ds, data_var) # The original time dimension is dropped from the dataset because # it becomes obsolete after the data variable is averaged. When the # averaged data variable is added to the dataset, the new time dimension # and its associated coordinates are also added. ds = ds.drop_dims(self.dim) # type: ignore - ds[dv.name] = dv + ds[dv_avg.name] = dv_avg if keep_weights: ds = self._keep_weights(ds) @@ -1075,28 +1071,28 @@ def _drop_leap_days(self, ds: xr.Dataset): ) return ds - def _average( - self, data_var: xr.DataArray, time_bounds: xr.DataArray - ) -> xr.DataArray: + def _average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: """Averages a data variable with the time dimension removed. Parameters ---------- - data_var : xr.DataArray - The data variable. - time_bounds : xr.DataArray - The time bounds. + ds : xr.Dataset + The dataset. + data_var : str + The key of the data variable. Returns ------- xr.DataArray - The averages for a data variable with the time dimension removed. + The data variable averaged with the time dimension removed. """ - dv = data_var.copy() + dv = _get_data_var(ds, data_var) with xr.set_options(keep_attrs=True): if self._weighted: + time_bounds = ds.bounds.get_bounds("T", var_key=data_var) self._weights = self._get_weights(time_bounds) + dv = dv.weighted(self._weights).mean(dim=self.dim) # type: ignore else: dv = dv.mean(dim=self.dim) # type: ignore @@ -1105,31 +1101,31 @@ def _average( return dv - def _group_average( - self, data_var: xr.DataArray, time_bounds: xr.DataArray - ) -> xr.DataArray: + def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: """Averages a data variable by time group. Parameters ---------- - data_var : xr.DataArray - The data variable. - time_bounds : xr.DataArray - The time bounds. + ds : xr.Dataset + The dataset. + data_var : str + The key of the data variable. Returns ------- xr.DataArray The data variable averaged by time group. """ - dv = data_var.copy() + dv = _get_data_var(ds, data_var) # Label the time coordinates for grouping weights and the data variable # values. self._labeled_time = self._label_time_coords(dv[self.dim]) if self._weighted: + time_bounds = ds.bounds.get_bounds("T", var_key=data_var) self._weights = self._get_weights(time_bounds) + # Weight the data variable. dv *= self._weights @@ -1145,8 +1141,9 @@ def _group_average( # included to take into account zero weight for missing data. with xr.set_options(keep_attrs=True): dv = self._group_data(dv).sum() / self._group_data(weights).sum() + # Restore the data variable's name. - dv.name = data_var.name + dv.name = data_var else: dv = self._group_data(dv).mean() From fbf1db68fd6328d5a534e0c1e57e22e6fdd70c76 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Tue, 16 Jan 2024 11:41:12 -0800 Subject: [PATCH 2/2] Fix pre-commit mypy issues (#588) --- pyproject.toml | 2 +- xcdat/temporal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 116e2f52..6ae6def0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ markers = ["flaky", "network"] [tool.mypy] # Docs: https://mypy.readthedocs.io/en/stable/config_file.html -python_version = 3.10 +python_version = "3.10" check_untyped_defs = true ignore_missing_imports = true warn_unused_ignores = true diff --git a/xcdat/temporal.py b/xcdat/temporal.py index a0daf9a4..05e7db57 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -732,7 +732,7 @@ def departures( # The original time dimension name is restored after grouped # arithmetic, so the labeled time dimension name is no longer needed # and therefore dropped. - ds_obs = ds_obs.drop_vars(self._labeled_time.name) + ds_obs = ds_obs.drop_vars(str(self._labeled_time.name)) if weighted and keep_weights: self._weights = ds_climo.time_wts