Skip to content

Commit

Permalink
Merge pull request #163 from NREL/gb/skill
Browse files Browse the repository at this point in the history
Gb/skill
  • Loading branch information
grantbuster authored Aug 25, 2023
2 parents f3803a8 + 5bcb930 commit e1ae1b5
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 24 deletions.
183 changes: 161 additions & 22 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Utilities to calculate the bias correction factors for biased data that is
going to be fed into the sup3r downscaling models. This is typically used to
bias correct GCM data vs. some historical record like the WTK or NSRDB."""
import copy
import json
import logging
import os
Expand All @@ -14,7 +15,7 @@
from rex.utilities.fun_utils import get_fun_call_str
from scipy.ndimage.filters import gaussian_filter
from scipy.spatial import KDTree
from scipy.stats import ks_2samp
from scipy import stats

import sup3r.preprocessing.data_handling
from sup3r.utilities import VERSION_RECORD, ModuleName
Expand Down Expand Up @@ -151,7 +152,7 @@ def compare_dists(base_data, bias_data, adder=0, scalar=1):
out : float
KS test statistic
"""
out = ks_2samp(base_data, bias_data * scalar + adder)
out = stats.ks_2samp(base_data, bias_data * scalar + adder)
return out.statistic

@classmethod
Expand Down Expand Up @@ -492,8 +493,30 @@ class LinearCorrection(DataRetrievalBase):
available data (no season bias correction)
"""

# size of the time dimension, 1 is no time-based bias correction
NT = 1
"""size of the time dimension, 1 is no time-based bias correction"""

def __init__(self, *args, **kwargs):
"""
Parameters
----------
*args : list
Same positional args as DataRetrievalBase
**kwargs : dict
Same keyword args as DataRetrievalBase
"""
super().__init__(*args, **kwargs)

keys = [f'{self.bias_feature}_scalar',
f'{self.bias_feature}_adder',
f'bias_{self.bias_feature}_mean',
f'bias_{self.bias_feature}_std',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
]
self.out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
np.nan, np.float32)
for k in keys}

@staticmethod
def get_linear_correction(bias_data, base_data, bias_feature, base_dset):
Expand Down Expand Up @@ -597,7 +620,7 @@ def fill_and_smooth(self, out, fill_extend=True, smooth_extend=0,
"""
for key, arr in out.items():
nan_mask = np.isnan(arr[..., 0])
for idt in range(self.NT):
for idt in range(arr.shape[-1]):

arr_smooth = arr[..., idt]

Expand Down Expand Up @@ -705,17 +728,6 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
"""
logger.debug('Starting linear correction calculation...')

keys = [f'{self.bias_feature}_scalar',
f'{self.bias_feature}_adder',
f'bias_{self.bias_feature}_mean',
f'bias_{self.bias_feature}_std',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
]
out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
np.nan, np.float32)
for k in keys}

logger.info('Initialized scalar / adder with shape: {}'
.format(self.bias_gid_raster.shape))

Expand All @@ -735,7 +747,7 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
daily_reduction,
self.bias_ti, self.decimals)
for key, arr in single_out.items():
out[key][raster_loc] = arr
self.out[key][raster_loc] = arr

logger.info('Completed bias calculations for {} out of {} '
'sites'.format(i + 1, len(self.bias_meta)))
Expand Down Expand Up @@ -765,19 +777,19 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
raster_loc = futures[future]
single_out = future.result()
for key, arr in single_out.items():
out[key][raster_loc] = arr
self.out[key][raster_loc] = arr

logger.info('Completed bias calculations for {} out of {} '
'sites'.format(i + 1, len(futures)))

logger.info('Finished calculating bias correction factors.')

out = self.fill_and_smooth(out, fill_extend, smooth_extend,
smooth_interior)
self.out = self.fill_and_smooth(self.out, fill_extend, smooth_extend,
smooth_interior)

self.write_outputs(fp_out, out)
self.write_outputs(fp_out, self.out)

return out
return copy.deepcopy(self.out)


class MonthlyLinearCorrection(LinearCorrection):
Expand All @@ -786,8 +798,8 @@ class MonthlyLinearCorrection(LinearCorrection):
This calculation operates on single bias sites on a montly basis
"""

# size of the time dimension, 12 is monthly bias correction
NT = 12
"""size of the time dimension, 12 is monthly bias correction"""

@classmethod
def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
Expand Down Expand Up @@ -823,3 +835,130 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
out[k][month - 1] = v

return out


class SkillAssessment(MonthlyLinearCorrection):
"""Calculate historical skill of one dataset compared to another."""

PERCENTILES = (1, 5, 25, 50, 75, 95, 99)
"""Data percentiles to report."""

def __init__(self, *args, **kwargs):
"""
Parameters
----------
*args : list
Same positional args as DataRetrievalBase
**kwargs : dict
Same keyword args as DataRetrievalBase
"""
super().__init__(*args, **kwargs)

monthly_keys = [f'{self.bias_feature}_scalar',
f'{self.bias_feature}_adder',
f'bias_{self.bias_feature}_mean_monthly',
f'bias_{self.bias_feature}_std_monthly',
f'base_{self.base_dset}_mean_monthly',
f'base_{self.base_dset}_std_monthly',
]

annual_keys = [f'{self.bias_feature}_ks_stat',
f'{self.bias_feature}_ks_p',
f'{self.bias_feature}_bias',
f'bias_{self.bias_feature}_mean',
f'bias_{self.bias_feature}_std',
f'bias_{self.bias_feature}_skew',
f'bias_{self.bias_feature}_kurtosis',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
f'base_{self.base_dset}_skew',
f'base_{self.base_dset}_kurtosis',
]

self.out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
np.nan, np.float32)
for k in monthly_keys}

arr = np.full((*self.bias_gid_raster.shape, 1), np.nan, np.float32)
for k in annual_keys:
self.out[k] = arr.copy()

for p in self.PERCENTILES:
base_k = f'base_{self.base_dset}_percentile_{p}'
bias_k = f'bias_{self.bias_feature}_percentile_{p}'
self.out[base_k] = arr.copy()
self.out[bias_k] = arr.copy()

@classmethod
def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
"""Run skill assessment metrics on 1D datasets at a single site.
Note we run the KS test on the mean=0 distributions as per:
S. Brands et al. 2013 https://doi.org/10.1007/s00382-013-1742-8
"""

out = {}
bias_mean = np.nanmean(bias_data)
base_mean = np.nanmean(base_data)
out[f'{bias_feature}_bias'] = (bias_mean - base_mean)

out[f'bias_{bias_feature}_mean'] = bias_mean
out[f'bias_{bias_feature}_std'] = np.nanstd(bias_data)
out[f'bias_{bias_feature}_skew'] = stats.skew(bias_data)
out[f'bias_{bias_feature}_kurtosis'] = stats.kurtosis(bias_data)

out[f'base_{base_dset}_mean'] = base_mean
out[f'base_{base_dset}_std'] = np.nanstd(base_data)
out[f'base_{base_dset}_skew'] = stats.skew(base_data)
out[f'base_{base_dset}_kurtosis'] = stats.kurtosis(base_data)

ks_out = stats.ks_2samp(base_data - base_mean, bias_data - bias_mean)
out[f'{bias_feature}_ks_stat'] = ks_out.statistic
out[f'{bias_feature}_ks_p'] = ks_out.pvalue

for p in cls.PERCENTILES:
base_k = f'base_{base_dset}_percentile_{p}'
bias_k = f'bias_{bias_feature}_percentile_{p}'
out[base_k] = np.percentile(base_data, p)
out[bias_k] = np.percentile(bias_data, p)

return out

@classmethod
def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
base_gid, base_handler, daily_reduction, bias_ti,
decimals):
"""Do a skill assessment at a single site"""

base_data, base_ti = cls.get_base_data(base_fps, base_dset,
base_gid, base_handler,
daily_reduction=daily_reduction,
decimals=decimals)

arr = np.full(cls.NT, np.nan, dtype=np.float32)
out = {f'bias_{bias_feature}_mean_monthly': arr.copy(),
f'bias_{bias_feature}_std_monthly': arr.copy(),
f'base_{base_dset}_mean_monthly': arr.copy(),
f'base_{base_dset}_std_monthly': arr.copy(),
f'{bias_feature}_scalar': arr.copy(),
f'{bias_feature}_adder': arr.copy(),
}

out.update(cls._run_skill_eval(bias_data, base_data,
bias_feature, base_dset))

for month in range(1, 13):
bias_mask = bias_ti.month == month
base_mask = base_ti.month == month

if any(bias_mask) and any(base_mask):
mout = cls.get_linear_correction(bias_data[bias_mask],
base_data[base_mask],
bias_feature,
base_dset)
for k, v in mout.items():
if not k.endswith(('_scalar', '_adder')):
k += '_monthly'
out[k][month - 1] = v

return out
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import numpy as np
import pytest
import xarray as xr
from scipy import stats

from sup3r import CONFIG_DIR, TEST_DATA_DIR
from sup3r.bias.bias_calc import LinearCorrection, MonthlyLinearCorrection
from sup3r.bias.bias_calc import (LinearCorrection, MonthlyLinearCorrection,
SkillAssessment)
from sup3r.bias.bias_transforms import local_linear_bc, monthly_local_linear_bc
from sup3r.models import Sup3rGan
from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy
Expand All @@ -31,13 +33,14 @@ def test_smooth_interior_bc():

calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
TARGET, SHAPE, bias_handler='DataHandlerNCforCC')

out = calc.run(knn=1, threshold=0.6, fill_extend=False, max_workers=1)
og_scalar = out['rsds_scalar']
og_adder = out['rsds_adder']
nan_mask = np.isnan(og_scalar)
assert np.isnan(og_adder[nan_mask]).all()

calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
TARGET, SHAPE, bias_handler='DataHandlerNCforCC')
out = calc.run(knn=1, threshold=0.6, fill_extend=True, smooth_interior=0,
max_workers=1)
scalar = out['rsds_scalar']
Expand All @@ -49,10 +52,13 @@ def test_smooth_interior_bc():
assert not np.isnan(scalar[nan_mask]).any()

# make sure smoothing affects the interior pixels but not the exterior
calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
TARGET, SHAPE, bias_handler='DataHandlerNCforCC')
out = calc.run(knn=1, threshold=0.6, fill_extend=True, smooth_interior=1,
max_workers=1)
smooth_scalar = out['rsds_scalar']
smooth_adder = out['rsds_adder']

assert not np.allclose(smooth_scalar[~nan_mask], scalar[~nan_mask])
assert not np.allclose(smooth_adder[~nan_mask], adder[~nan_mask])
assert np.allclose(smooth_scalar[nan_mask], scalar[nan_mask])
Expand Down Expand Up @@ -102,6 +108,8 @@ def test_linear_bc():
assert np.isnan(adder[nan_mask]).all()

# make sure the NN fill works for out-of-bounds pixels
calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
TARGET, SHAPE, bias_handler='DataHandlerNCforCC')
out = calc.run(knn=1, threshold=0.6, fill_extend=True, max_workers=1)
scalar = out['rsds_scalar']
adder = out['rsds_adder']
Expand All @@ -114,6 +122,8 @@ def test_linear_bc():
assert not np.isnan(adder[nan_mask]).any()

# make sure smoothing affects the out-of-bounds pixels but not the in-bound
calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
TARGET, SHAPE, bias_handler='DataHandlerNCforCC')
out = calc.run(knn=1, threshold=0.6, fill_extend=True, smooth_extend=2,
max_workers=1)
smooth_scalar = out['rsds_scalar']
Expand All @@ -124,6 +134,8 @@ def test_linear_bc():
assert not np.allclose(smooth_adder[nan_mask], adder[nan_mask])

# parallel test
calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
TARGET, SHAPE, bias_handler='DataHandlerNCforCC')
out = calc.run(knn=1, threshold=0.6, fill_extend=True, smooth_extend=2,
max_workers=2)
par_scalar = out['rsds_scalar']
Expand Down Expand Up @@ -430,3 +442,37 @@ def test_qa_integration():
data_bc = qa.get_source_dset(feature, feature)

assert np.allclose(data_bc, data_truth, equal_nan=True)


def test_skill_assessment():
"""Test the skill assessment of a climate model vs. historical data"""
calc = SkillAssessment(FP_NSRDB, FP_CC, 'ghi', 'rsds', TARGET, SHAPE,
bias_handler='DataHandlerNCforCC')

# test a known in-bounds gid
bias_gid = 5
dist, base_gid = calc.get_base_gid(bias_gid, 1)
bias_data = calc.get_bias_data(bias_gid)
base_data, _ = calc.get_base_data(calc.base_fps, calc.base_dset,
base_gid, calc.base_handler,
daily_reduction='avg')
bias_coord = calc.bias_meta.loc[bias_gid, ['latitude', 'longitude']]
base_coord = calc.base_meta.loc[base_gid, ['latitude', 'longitude']]
true_dist = bias_coord.values - base_coord.values
true_dist = np.hypot(true_dist[0], true_dist[1])
assert np.allclose(true_dist, dist)
assert true_dist < 0.1
iloc = np.where(calc.bias_gid_raster == bias_gid)
iloc += (0, )

out = calc.run(knn=1, threshold=0.6, fill_extend=True, max_workers=1)

base_mean = base_data.mean()
bias_mean = bias_data.mean()
assert np.allclose(out['base_ghi_mean'][iloc], base_mean)
assert np.allclose(out['bias_rsds_mean'][iloc], bias_mean)
assert np.allclose(out['rsds_bias'][iloc], bias_mean - base_mean)

ks = stats.ks_2samp(base_data - base_mean, bias_data - bias_mean)
assert np.allclose(out['rsds_ks_stat'][iloc], ks.statistic)
assert np.allclose(out['rsds_ks_p'][iloc], ks.pvalue)

0 comments on commit e1ae1b5

Please sign in to comment.