Skip to content

Commit 15f78c1

Browse files
Resolve "Adjustments using adjust require the input data of the control period to have the same size for the time dimension" (#67)
1 parent 1d7e0b8 commit 15f78c1

9 files changed

+241
-16
lines changed

CHANGELOG.md

+35-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,36 @@
11
# Changelog
22

3-
## [v2.0.0](https://github.com/btschwertfeger/python-cmethods/tree/v2.0.0) (2023-01-23)
3+
## [Unreleased](https://github.com/btschwertfeger/python-cmethods/tree/HEAD)
4+
5+
[Full Changelog](https://github.com/btschwertfeger/python-cmethods/compare/v2.0.2...HEAD)
6+
7+
**Merged pull requests:**
8+
9+
- Fix typos and update pre-commit hooks [\#64](https://github.com/btschwertfeger/python-cmethods/pull/64) ([btschwertfeger](https://github.com/btschwertfeger))
10+
11+
## [v2.0.2](https://github.com/btschwertfeger/python-cmethods/tree/v2.0.2) (2024-02-02)
12+
13+
[Full Changelog](https://github.com/btschwertfeger/python-cmethods/compare/v2.0.1...v2.0.2)
14+
15+
**Merged pull requests:**
16+
17+
- Update documentation -- QM and QDM formulas [\#62](https://github.com/btschwertfeger/python-cmethods/pull/62) ([btschwertfeger](https://github.com/btschwertfeger))
18+
- Bump GitHub action versions [\#59](https://github.com/btschwertfeger/python-cmethods/pull/59) ([btschwertfeger](https://github.com/btschwertfeger))
19+
20+
## [v2.0.1](https://github.com/btschwertfeger/python-cmethods/tree/v2.0.1) (2024-02-01)
21+
22+
[Full Changelog](https://github.com/btschwertfeger/python-cmethods/compare/v2.0.0...v2.0.1)
23+
24+
**Closed issues:**
25+
26+
- The latest documentation still describes the legacy max_scaling_factor [\#60](https://github.com/btschwertfeger/python-cmethods/issues/60)
27+
28+
**Merged pull requests:**
29+
30+
- adjust CI workflows [\#58](https://github.com/btschwertfeger/python-cmethods/pull/58) ([btschwertfeger](https://github.com/btschwertfeger))
31+
- Resolve "The latest documentation still describes the legacy max scaling factor" [\#61](https://github.com/btschwertfeger/python-cmethods/pull/61) ([btschwertfeger](https://github.com/btschwertfeger))
32+
33+
## [v2.0.0](https://github.com/btschwertfeger/python-cmethods/tree/v2.0.0) (2024-01-23)
434

535
[Full Changelog](https://github.com/btschwertfeger/python-cmethods/compare/v1.0.3...v2.0.0)
636

@@ -13,6 +43,10 @@
1343
- Optimization for `adjust_3d` [\#47](https://github.com/btschwertfeger/python-cmethods/issues/47)
1444
- Find a solution to process large data sets more efficient [\#6](https://github.com/btschwertfeger/python-cmethods/issues/6)
1545

46+
**Merged pull requests:**
47+
48+
- Fix the CodeQL workflow execution; prepare v2.0.0 release [\#50](https://github.com/btschwertfeger/python-cmethods/pull/50) ([btschwertfeger](https://github.com/btschwertfeger))
49+
1650
## [v1.0.3](https://github.com/btschwertfeger/python-cmethods/tree/v1.0.3) (2023-08-09)
1751

1852
[Full Changelog](https://github.com/btschwertfeger/python-cmethods/compare/v1.0.2...v1.0.3)

README.md

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-orange.svg)](https://www.gnu.org/licenses/gpl-3.0)
88
[![Downloads](https://pepy.tech/badge/python-cmethods)](https://pepy.tech/project/python-cmethods)
99

10-
![CodeQL](https://github.com/btschwertfeger/python-cmethods/actions/workflows/codeql.yaml/badge.svg)
1110
[![CI/CD](https://github.com/btschwertfeger/python-cmethods/actions/workflows/cicd.yaml/badge.svg?branch=master)](https://github.com/btschwertfeger/python-cmethods/actions/workflows/cicd.yaml)
1211
[![codecov](https://codecov.io/github/btschwertfeger/python-cmethods/branch/master/graph/badge.svg?token=OSO4PAABPD)](https://codecov.io/github/btschwertfeger/python-cmethods)
1312

cmethods/core.py

+51-8
Original file line numberDiff line numberDiff line change
@@ -50,26 +50,45 @@ def apply_ufunc(
5050
if method not in __METHODS_FUNC__:
5151
raise UnknownMethodError(method, __METHODS_FUNC__.keys())
5252

53+
if kwargs.get("input_core_dims"):
54+
if not isinstance(kwargs["input_core_dims"], dict):
55+
raise TypeError("input_core_dims must be an object of type 'dict'")
56+
if not len(kwargs["input_core_dims"]) == 3 or any(
57+
not isinstance(value, str) for value in kwargs["input_core_dims"].values()
58+
):
59+
raise ValueError(
60+
"input_core_dims must have three key-value pairs like: "
61+
'{"obs": "time", "simh": "time", "simp": "time"}',
62+
)
63+
64+
input_core_dims = kwargs["input_core_dims"]
65+
else:
66+
input_core_dims = {"obs": "time", "simh": "time", "simp": "time"}
67+
5368
result: XRData = xr.apply_ufunc(
5469
__METHODS_FUNC__[method],
5570
obs,
5671
simh,
5772
# Need to spoof a fake time axis since 'time' coord on full dataset is different
5873
# than 'time' coord on training dataset.
59-
simp.rename({"time": "t2"}),
74+
simp.rename({input_core_dims["simp"]: "__t_simp__"}),
6075
dask="parallelized",
6176
vectorize=True,
6277
# This will vectorize over the time dimension, so will submit each grid cell
6378
# independently
64-
input_core_dims=[["time"], ["time"], ["t2"]],
79+
input_core_dims=[
80+
[input_core_dims["obs"]],
81+
[input_core_dims["simh"]],
82+
["__t_simp__"],
83+
],
6584
# Need to denote that the final output dataset will be labeled with the
6685
# spoofed time coordinate
67-
output_core_dims=[["t2"]],
86+
output_core_dims=[["__t_simp__"]],
6887
kwargs=dict(kwargs),
6988
)
7089

7190
# Rename to proper coordinate name.
72-
result = result.rename({"t2": "time"})
91+
result = result.rename({"__t_simp__": input_core_dims["simp"]})
7392

7493
# ufunc will put the core dimension to the end (time), so want to preserve original
7594
# order where time is commonly first.
@@ -90,6 +109,14 @@ def adjust(
90109
91110
See https://python-cmethods.readthedocs.io/en/latest/src/methods.html
92111
112+
113+
The time dimension of ``obs``, ``simh`` and ``simp`` must be named ``time``.
114+
115+
If the sizes of time dimensions of the input data sets differ, you have to
116+
pass the hidden ``input_core_dims`` parameter, see
117+
https://python-cmethods.readthedocs.io/en/latest/src/getting_started.html#advanced-usage
118+
for more information.
119+
93120
:param method: Technique to apply
94121
:type method: str
95122
:param obs: The reference/observational data set
@@ -127,14 +154,30 @@ def adjust(
127154
)
128155

129156
# Grouped correction | scaling-based technique
130-
group: str = kwargs["group"]
157+
group: str | dict[str, str] = kwargs["group"]
158+
if isinstance(group, str):
159+
# only for same sized time dimensions
160+
obs_group = group
161+
simh_group = group
162+
simp_group = group
163+
elif isinstance(group, dict):
164+
if any(key not in {"obs", "simh", "simp"} for key in group):
165+
raise ValueError(
166+
"group must either be a string like 'time' or a dict like "
167+
'{"obs": "time.month", "simh": "t_simh.month", "simp": "time.month"}',
168+
)
169+
# for different sized time dimensions
170+
obs_group = group["obs"]
171+
simh_group = group["simh"]
172+
simp_group = group["simp"]
173+
131174
del kwargs["group"]
132175

133176
result: Optional[XRData] = None
134177
for (_, obs_gds), (_, simh_gds), (_, simp_gds) in zip(
135-
obs.groupby(group),
136-
simh.groupby(group),
137-
simp.groupby(group),
178+
obs.groupby(obs_group),
179+
simh.groupby(simh_group),
180+
simp.groupby(simp_group),
138181
):
139182
monthly_result = apply_ufunc(
140183
method,

cmethods/distribution.py

+10
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ def quantile_mapping(
6262

6363
cdf_obs = get_cdf(obs, xbins)
6464
cdf_simh = get_cdf(simh, xbins)
65+
cdf_simh = np.interp(
66+
cdf_simh,
67+
(cdf_simh.min(), cdf_simh.max()),
68+
(cdf_obs.min(), cdf_obs.max()),
69+
)
6570

6671
if kind in ADDITIVE:
6772
epsilon = np.interp(simp, xbins, cdf_simh) # Eq. 1
@@ -124,6 +129,11 @@ def detrended_quantile_mapping(
124129

125130
cdf_obs = get_cdf(obs, xbins)
126131
cdf_simh = get_cdf(simh, xbins)
132+
cdf_simh = np.interp(
133+
cdf_simh,
134+
(cdf_simh.min(), cdf_simh.max()),
135+
(cdf_obs.min(), cdf_obs.max()),
136+
)
127137

128138
# detrended => shift mean of $X_{sim,p}$ to range of $X_{sim,h}$ to adjust extremes
129139
res = np.zeros(len(simp.values))

doc/links.rst

-3
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
.. |Downloads badge| image:: https://static.pepy.tech/personalized-badge/python-cmethods?period=total&units=abbreviation&left_color=grey&right_color=orange&left_text=downloads
2828
:target: https://pepy.tech/project/python-cmethods
2929

30-
.. |CodeQL badge| image:: https://github.com/btschwertfeger/python-cmethods/actions/workflows/codeql.yaml/badge.svg?branch=master
31-
:target: https://github.com/btschwertfeger/python-cmethods/actions/workflows/codeql.yaml
32-
3330
.. |CI/CD badge| image:: https://github.com/btschwertfeger/python-cmethods/actions/workflows/cicd.yaml/badge.svg?branch=master
3431
:target: https://github.com/btschwertfeger/python-cmethods/actions/workflows/cicd.yaml
3532

doc/src/getting_started.rst

+57
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,60 @@ method specific documentation.
5656
n_quaniles=1000,
5757
kind="+",
5858
)
59+
60+
61+
Advanced Usage
62+
--------------
63+
64+
In some cases the time dimension of input data sets have different sizes. In
65+
such case, the hidden parameter ``input_core_dims`` must be passed to the
66+
``adjust`` call.
67+
68+
It defines the dimension names of the input data sets, i.e. if the time
69+
dimensions of ``obs`` and ``simp`` have the length, but the time dimension of
70+
``simh`` is somewhat smaller, you have to define this as follows:
71+
72+
73+
.. code-block:: python
74+
:linenos:
75+
:caption: Bias Adjustments for data sets with different time dimension lengths pt. 1
76+
77+
from cmethods import adjust
78+
import xarray as xr
79+
80+
obs = xr.open_dataset("examples/input_data/observations.nc")["tas"]
81+
simp = xr.open_dataset("examples/input_data/control.nc")["tas"]
82+
simh = simp.copy(deep=True)[3650:]
83+
84+
bc = adjust(
85+
method="quantile_mapping",
86+
obs=obs,
87+
simh=simh.rename({"time": "t_simh"}),
88+
simp=simh,
89+
kind="+",
90+
input_core_dims={"obs": "time", "simh": "t_simh", "simp": "time"}
91+
)
92+
93+
In case you are applying a scaling based technique using grouping, you have to
94+
adjust the group names accordingly to the time dimension names.
95+
96+
.. code-block:: python
97+
:linenos:
98+
:caption: Bias Adjustments for data sets with different time dimension lengths pt. 2
99+
100+
from cmethods import adjust
101+
import xarray as xr
102+
103+
obs = xr.open_dataset("examples/input_data/observations.nc")["tas"]
104+
simp = xr.open_dataset("examples/input_data/control.nc")["tas"]
105+
simh = simp.copy(deep=True)[3650:]
106+
107+
bc = adjust(
108+
method="linear_scaling",
109+
obs=obs,
110+
simh=simh.rename({"time": "t_simh"}),
111+
simp=simh,
112+
kind="+",
113+
group={"obs": "time.month", "simh": "t_simh.month", "simp": "time.month"},
114+
input_core_dims={"obs": "time", "simh": "t_simh", "simp": "time"}
115+
)

doc/src/introduction.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Introduction
44
=============
55

66
|GitHub badge| |License badge| |PyVersions badge| |Downloads badge|
7-
|CodeQL badge| |CI/CD badge| |codecov badge|
7+
|CI/CD badge| |codecov badge|
88
|Release date badge| |Release version badge| |DOI badge| |Docs stable|
99

1010
About

tests/test_methods.py

+85
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,55 @@ def test_3d_scaling(
107107
assert is_3d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
108108

109109

110+
@pytest.mark.parametrize(
111+
("method", "kind"),
112+
[
113+
("linear_scaling", "+"),
114+
("variance_scaling", "+"),
115+
("linear_scaling", "*"),
116+
],
117+
)
118+
def test_3d_scaling_different_time_span(
119+
datasets: dict,
120+
method: str,
121+
kind: str,
122+
) -> None:
123+
obsh: XRData_t = datasets[kind]["obsh"]
124+
obsp: XRData_t = datasets[kind]["obsp"]
125+
simh: XRData_t = datasets[kind]["simh"]
126+
simp: XRData_t = datasets[kind]["simp"]
127+
simh = simh.sel(time=slice(simh.time[1], None)).rename({"time": "t_simh"})
128+
129+
time_names = {"obs": "time", "simh": "t_simh", "simp": "time"}
130+
131+
# not grouped
132+
result: XRData_t = adjust(
133+
method=method,
134+
obs=obsh,
135+
simh=simh,
136+
simp=simp,
137+
kind=kind,
138+
input_core_dims=time_names,
139+
)
140+
141+
assert isinstance(result, XRData_t)
142+
assert is_3d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
143+
144+
# grouped
145+
result: XRData_t = adjust(
146+
method=method,
147+
obs=obsh,
148+
simh=simh,
149+
simp=simp,
150+
kind=kind,
151+
group={"obs": "time.month", "simh": "t_simh.month", "simp": "time.month"},
152+
input_core_dims=time_names,
153+
)
154+
155+
assert isinstance(result, XRData_t)
156+
assert is_3d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
157+
158+
110159
@pytest.mark.parametrize(
111160
("method", "kind"),
112161
[
@@ -171,6 +220,42 @@ def test_3d_distribution(
171220
assert is_3d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
172221

173222

223+
@pytest.mark.parametrize(
224+
("method", "kind"),
225+
[
226+
("quantile_mapping", "+"),
227+
("quantile_delta_mapping", "+"),
228+
("quantile_mapping", "*"),
229+
("quantile_delta_mapping", "*"),
230+
],
231+
)
232+
def test_3d_distribution_different_time_span(
233+
datasets: dict,
234+
method: str,
235+
kind: str,
236+
) -> None:
237+
obsh: XRData_t = datasets[kind]["obsh"]
238+
obsp: XRData_t = datasets[kind]["obsp"]
239+
simh: XRData_t = datasets[kind]["simh"]
240+
simp: XRData_t = datasets[kind]["simp"]
241+
242+
simh = simh.sel(time=slice(simh.time[1], None)).rename({"time": "t_simh"})
243+
time_names = {"obs": "time", "simh": "t_simh", "simp": "time"}
244+
245+
result: XRData_t = adjust(
246+
method=method,
247+
obs=obsh,
248+
simh=simh,
249+
simp=simp,
250+
kind=kind,
251+
n_quantiles=N_QUANTILES,
252+
input_core_dims=time_names,
253+
)
254+
255+
assert isinstance(result, XRData_t)
256+
assert is_3d_rmse_better(result=result[kind], obsp=obsp, simp=simp)
257+
258+
174259
def test_1d_detrended_quantile_mapping_add(datasets: dict) -> None:
175260
kind: str = "+"
176261
obsh: XRData_t = datasets[kind]["obsh"][:, 0, 0]

tests/test_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@
3939
def test_quantile_mapping_single_nan() -> None:
4040
obs, simh, simp = list(np.arange(10)), list(np.arange(10)), list(np.arange(10))
4141
obs[0] = np.nan
42-
expected = np.array([0.0, 1.9, 2.9, 3.9, 4.9, 5.9, 6.9, 7.9, 8.9, 9.0])
42+
expected = np.array([0.0, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9.0])
4343

4444
res = quantile_mapping(obs=obs, simh=simh, simp=simp, n_quantiles=5)
45-
assert np.allclose(res, expected)
45+
assert np.allclose(res, expected), res
4646

4747

4848
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered")

0 commit comments

Comments
 (0)