Skip to content

Commit 9ae86de

Browse files
fixed detrended quantile mapping for ratio based variables
1 parent 1ef323f commit 9ae86de

File tree

7 files changed

+722
-193
lines changed

7 files changed

+722
-193
lines changed

README.md

+9-6
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ Collection of different scale- and distribution-based bias adjustment techniques
88

99
Bias adjustment procedures in Python are very slow, so they should not be used on large data sets.
1010
A C++ implementation that works way faster can be found here: [https://github.com/btschwertfeger/Bias-Adjustment-Cpp](https://github.com/btschwertfeger/Bias-Adjustment-Cpp).
11+
____
12+
## Available methods:
13+
- Linear Scaling (additive and multiplicative)
14+
- Variance Scaling (additive)
15+
- Delta (Change) Method (additive and multiplicative)
16+
- Quantile Mapping (additive)
17+
- Detrended Quantile Mapping (additive and multiplicative)
18+
- Quantile Delta Mapping (additive and multuplicative)
1119

12-
## Available methods
13-
- Linear Scaling
14-
- Variance Scaling
15-
- Delta (Change) Method
16-
- Quantile and Detrended Quantile Mapping
17-
- Quantile Delta Mapping
1820
____
1921
## Usage
2022

@@ -24,6 +26,7 @@ python3 -m pip install python-cmethods
2426
```
2527
### Import and application
2628
```python
29+
import xarray as xr
2730
from cmethods.CMethods import CMethods
2831
cm = CMethods()
2932

cmethods/CMethods.py

+63-67
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,9 @@ class UnknownMethodError(Exception):
4444
def __init__(self, method: str, available_methods: list):
4545
super().__init__(f'Unknown method "{method}"! Available methods: {available_methods}')
4646

47-
CUSTOM_METHODS = [
48-
'linear_scaling', 'variance_scaling', 'delta_method',
49-
'quantile_mapping', 'quantile_delta_mapping'
50-
]
47+
SCALING_METHODS = ['linear_scaling', 'variance_scaling', 'delta_method']
48+
DISTRIBUTION_METHODS = ['quantile_mapping', 'quantile_delta_mapping']
49+
CUSTOM_METHODS = SCALING_METHODS + DISTRIBUTION_METHODS
5150
METHODS = CUSTOM_METHODS #+ XCLIM_SDBA_METHODS
5251

5352
def __init__(self):
@@ -122,6 +121,8 @@ def adjust_2d(cls,
122121
simh = simh.transpose('lat', 'lon', 'time')
123122
simp = simp.transpose('lat', 'lon', 'time')
124123

124+
if group == None and method in SCALING_METHODS: group = 'time.month'
125+
125126
result = simp.copy(deep=True).load()
126127
len_lat, len_lon = len(obs.lat), len(obs.lon)
127128

@@ -169,7 +170,6 @@ def pool_adjust(cls, params) -> xr.core.dataarray.DataArray:
169170
n_quantiles = params.get('n_quantiles', 100)
170171
kind = params.get('kind', '+')
171172
group = params.get('group', None)
172-
save_model = params.get('save_model', None)
173173
kwargs = params.get('kwargs', {})
174174

175175
result = simp.copy(deep=True).load()
@@ -393,7 +393,7 @@ def delta_method(cls,
393393
----- R E F E R E N C E S -----
394394
Beyer, R. and Krapp, M. and Manica, A.: An empirical evaluation of bias correction methods for palaeoclimate simulations (https://doi.org/10.5194/cp-16-1493-2020)
395395
396-
and
396+
and
397397
https://svn.oss.deltares.nl/repos/openearthtools/trunk/python/applications/hydrotools/hydrotools/statistics/bias_correction.py
398398
399399
'''
@@ -435,67 +435,65 @@ def quantile_mapping(cls,
435435
Add (+):
436436
(1.) X^{*QM}_{sim,p}(i) = F^{-1}_{obs,h} \left\{F_{sim,h}\left[X_{sim,p}(i)\right]\right\}
437437
Mult (*):
438-
(1.) --//--
438+
(2.) X^{*QM}_{sim,p}(i) = F^{-1}_{obs,h}
439+
\Biggl\{
440+
F_{sim,h}\left[
441+
\frac{
442+
\mu{X_{sim,h}} \mu{X_{sim,p}(i)}
443+
}{
444+
\mu{X_{sim,p}(i)}
445+
}
446+
\right]
447+
\Biggr\}
448+
\frac{
449+
\mu{X_{sim,p}(i)}
450+
}{
451+
\mu{X_{sim,h}}
452+
}
439453
440454
----- R E F E R E N C E S -----
441455
Alex J. Cannon and Stephen R. Sobie and Trevor Q. Murdock Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?
442456
https://doi.org/10.1175/JCLI-D-14-00754.1)
443-
and
444-
'deleted profile' OR Adrian Tompkins [email protected], posted on November 8, 2016 at
445-
https://www.researchgate.net/post/Does-anyone-know-about-bias-correction-and-quantile-mapping-in-PYTHON
446457
'''
447458

448459
if group != None: return cls.grouped_correction(method='quantile_mapping', obs=obs, simh=simh, simp=simp, group=group, n_quantiles=n_quantiles, kind=kind, **kwargs)
449-
elif kind == '+':
450-
res = simp.copy(deep=True)
451-
obs, simh, simp = np.array(obs), np.array(simh), np.array(simp)
452-
453-
global_max = max(np.amax(obs), np.amax(simh))
454-
global_min = min(np.amin(obs), np.amin(simh))
455-
wide = abs(global_max - global_min) / n_quantiles
456-
xbins = np.arange(global_min, global_max + wide, wide)
457-
458-
cdf_obs = cls.get_cdf(obs, xbins)
459-
cdf_simh = cls.get_cdf(simh, xbins)
460-
461-
if kwargs.get('detrended', False):
462-
'''detrended => shift mean of $T_{sim,p}$ to range of $T_{sim,h}$ to adjust extremes'''
463-
for month, idxs in res.groupby('time.month').groups.items():
464-
m_simh, m_simp = [], []
465-
for idx in idxs:
466-
m_simh.append(simh[idx])
467-
m_simp.append(simp[idx])
468-
469-
m_simh_mean = np.nanmean(m_simh)
470-
m_simp_mean = np.nanmean(m_simp)
471-
472-
epsilon = np.interp((m_simp - m_simp_mean) + m_simh_mean, xbins, cdf_simh)
473-
X = (cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins) + m_simp_mean) - m_simh_mean
474-
for i, idx in enumerate(idxs): res.values[idx] = X[i]
475-
else:
476-
epsilon = np.interp(simp, xbins, cdf_simh) # Eq. 1
477-
res.values = cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins) # Eq. 1
460+
res = simp.copy(deep=True)
461+
obs, simh, simp = np.array(obs), np.array(simh), np.array(simp)
462+
463+
global_max = max(np.amax(obs), np.amax(simh))
464+
global_min = min(np.amin(obs), np.amin(simh))
465+
wide = abs(global_max - global_min) / n_quantiles
466+
xbins = np.arange(global_min, global_max + wide, wide)
467+
468+
cdf_obs = cls.get_cdf(obs, xbins)
469+
cdf_simh = cls.get_cdf(simh, xbins)
470+
471+
if kwargs.get('detrended', False) or kind in [ '*', 'mult' ]:
472+
'''detrended => shift mean of $X_{sim,p}$ to range of $X_{sim,h}$ to adjust extremes'''
473+
for month, idxs in res.groupby('time.month').groups.items():
474+
m_simh, m_simp = [], []
475+
for idx in idxs:
476+
m_simh.append(simh[idx])
477+
m_simp.append(simp[idx])
478+
479+
m_simh = np.array(m_simh)
480+
m_simp = np.array(m_simp)
481+
m_simh_mean = np.nanmean(m_simh)
482+
m_simp_mean = np.nanmean(m_simp)
483+
484+
if kind == '+':
485+
epsilon = np.interp(m_simp - m_simp_mean + m_simh_mean, xbins, cdf_simh) # Eq. 1
486+
X = cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins) + m_simp_mean - m_simh_mean # Eq. 1
487+
else:
488+
epsilon = np.interp((m_simh_mean * m_simp) / m_simp_mean, xbins, cdf_simh, left=.0, right=999.0) # Eq. 2
489+
X = np.interp(epsilon, cdf_obs, xbins, left=.0, right=999.9) * (m_simp_mean / m_simh_mean) # Eq. 2
490+
#x = cm.get_inverse_of_cdf(cdf_obs, epsilon, xbins) * (m_simp_mean / m_simh_mean)
491+
for i, idx in enumerate(idxs): res.values[idx] = X[i]
492+
return res
493+
elif kind in [ '+', 'add' ]: # additive, no detrend
494+
epsilon = np.interp(simp, xbins, cdf_simh) # Eq. 1
495+
res.values = cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins) # Eq. 1
478496
return res
479-
480-
elif kind == '*':
481-
''' Inspired by Adrian Tompkins [email protected] posted here:
482-
https://www.researchgate.net/post/Does-anyone-know-about-bias-correction-and-quantile-mapping-in-PYTHON
483-
'''
484-
485-
obs, simh, simp = np.sort(obs), np.sort(simh), np.array(simp)
486-
global_max = max(np.amax(obs), np.amax(simh))
487-
wide = global_max / n_quantiles
488-
xbins = np.arange(0.0, global_max + wide, wide)
489-
490-
pdf_obs, bins = np.histogram(obs, bins=xbins)
491-
pdf_simh, bins = np.histogram(simh, bins=xbins)
492-
493-
cdf_obs = np.insert(np.cumsum(pdf_obs), 0, 0.0)
494-
cdf_simh = np.insert(np.cumsum(pdf_simh), 0, 0.0)
495-
496-
epsilon = np.interp(simp, xbins, cdf_simh, left=0.0, right=999.0)
497-
return np.interp(epsilon, cdf_obs, xbins, left=0.0, right=-999.0)
498-
499497
else: raise ValueError('Not implemented!')
500498

501499
# ? -----========= E M P I R I C A L - Q U A N T I L E - M A P P I N G =========------
@@ -554,9 +552,7 @@ def empirical_quantile_mapping(cls,
554552
# n_quantiles = n_quantiles,
555553
# extrapolate = extrapolate
556554
# )
557-
# else:
558-
# obs, simh, simp = np.array(obs), np.array(simh), np.array(simp)
559-
# return corrected
555+
# else: pass
560556

561557
# ? -----========= Q U A N T I L E - D E L T A - M A P P I N G =========------
562558
@classmethod
@@ -638,7 +634,7 @@ def quantile_delta_mapping(cls,
638634
obs, simh, simp = np.array(obs), np.array(simh), np.array(simp)
639635
global_max = max(np.amax(obs), np.amax(simh))
640636
wide = global_max / n_quantiles
641-
xbins = np.arange(kwargs.get('global_min', 0.0), global_max + wide, wide)
637+
xbins = np.arange(kwargs.get('global_min', .0), global_max + wide, wide)
642638

643639
cdf_obs = cls.get_cdf(obs,xbins)
644640
cdf_simh = cls.get_cdf(simh,xbins)
@@ -672,14 +668,14 @@ def get_inverse_of_cdf(base_cdf, insert_cdf, xbins) -> np.array:
672668
@staticmethod
673669
def load_data(
674670
obs_fpath: str,
675-
contr_fpath: str,
676-
scen_fpath: str,
671+
simh_fpath: str,
672+
simp_fpath: str,
677673
use_cftime: bool=False,
678674
chunks=None
679675
) -> (xr.core.dataarray.Dataset, xr.core.dataarray.Dataset, xr.core.dataarray.Dataset):
680676
'''Load and return loaded netcdf datasets'''
681677
obs = xr.open_dataset(obs_fpath, use_cftime=use_cftime, chunks=chunks)
682-
simh = xr.open_dataset(contr_fpath, use_cftime=use_cftime, chunks=chunks)
683-
simp = xr.open_dataset(scen_fpath, use_cftime=use_cftime, chunks=chunks)
678+
simh = xr.open_dataset(simh_fpath, use_cftime=use_cftime, chunks=chunks)
679+
simp = xr.open_dataset(simp_fpath, use_cftime=use_cftime, chunks=chunks)
684680

685681
return obs, simh, simp

examples/do_bias_correction.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# * ----- L O G G I N G -----
1717
formatter = logging.Formatter(
1818
fmt='%(asctime)s %(module)s,line: %(lineno)d %(levelname)8s | %(message)s',
19-
datefmt='%Y-%m-%d %H:%M:%S'
19+
datefmt='%Y-%m-%d %H:%M:%S'
2020
)
2121

2222
log = logging.getLogger()
@@ -37,7 +37,6 @@
3737

3838
parser.add_argument('-g', '--group', dest='group', type=str, default=None, help='Value grouping, default: time, (options: time.month, time.dayofyear, time.year')
3939
parser.add_argument('-k', '--kind', dest='kind', type=str, default='+', help='+ or *, default: +')
40-
parser.add_argument('-w', '--window', dest='window', type=int, default=1, help='Window of grouping')
4140
parser.add_argument('-n', '--nquantiles', dest='n_quantiles', type=int, default=100, help='Nr. of Quantiles to use')
4241

4342
parser.add_argument('-p', '--processes', dest='p', type=int, default=1, help='Multiprocessing with n processes, default: 1')
@@ -52,7 +51,6 @@
5251
unit = params['unit']
5352
group = params['group']
5453
kind = params['kind']
55-
window = params['window']
5654
n_quantiles = params['n_quantiles']
5755
n_jobs = params['p']
5856

@@ -75,9 +73,8 @@ def main() -> None:
7573
end_date: str = ds_simp['time'][-1].dt.strftime('%Y%m%d').values.ravel()[0]
7674

7775
descr1, descr2 = '', ''
78-
if method in cm.XCLIM_SDBA_METHODS or method in ['quantile_mapping', 'quantile_delta_mapping']:
76+
if method in cm.DISTRIBUTION_METHODS:
7977
descr1 = f'_quantiles-{n_quantiles}'
80-
descr2 = f'_window-{window}'
8178

8279
# ----- Adjustment -----
8380
log.info(f'Starting {method} adjustment')
@@ -89,7 +86,6 @@ def main() -> None:
8986
n_quantiles = n_quantiles,
9087
kind = kind,
9188
group = group,
92-
window = window,
9389
n_jobs = n_jobs
9490
)
9591
log.info('Saving now')
@@ -103,4 +99,4 @@ def main() -> None:
10399
main()
104100

105101

106-
# * ----- ----- E O F ----- -----
102+
# * ----- ----- E O F ----- -----
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)