Skip to content

Commit

Permalink
Fix issues in pp query (#692)
Browse files Browse the repository at this point in the history
* fix hr -> 1hr freq conversion in pp query
try using regex string contains standard_name in query

* add check for parameter type to xr_parser approximate_attribute_value

* remove regex from pp query standard_name

* add check that bounds is populated in cf.assessor, then check coord attrs and only run coord bounds check if bounda s are not None in xr_parser
  • Loading branch information
wrongkindofdoctor authored Sep 30, 2024
1 parent f7ec46f commit e1a590e
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 31 deletions.
25 changes: 12 additions & 13 deletions src/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from src.util import datelabel as dl
import cftime
import intake
import math
import numpy as np
import xarray as xr
import collections
Expand Down Expand Up @@ -922,31 +921,31 @@ def query_catalog(self,

for case_name, case_d in case_dict.items():
# path_regex = re.compile(r'(?i)(?<!\\S){}(?!\\S+)'.format(case_name))
path_regex = re.compile(r'({})'.format(case_name))
# path_regex = '*' + case_name + '*'
path_regex = [re.compile(r'({})'.format(case_name))]

for var in case_d.varlist.iter_vars():
realm_regex = var.realm + '*'
var_id = var.translation.name
standard_name = var.translation.standard_name
if var.translation.convention == 'no_translation':
date_range = var.T.range
var_id = var.name
standard_name = var.standard_name
date_range = var.T.range
var_id = var.name
standard_name = var.standard_name
if var.translation.convention is not None:
var_id = var.translation.name
standard_name = var.translation.standard_name
date_range = var.translation.T.range
if var.is_static:
date_range = None
freq = "fx"
else:
date_range = var.translation.T.range
freq = var.T.frequency
if freq == 'hr':
freq = '1hr'
if not isinstance(freq, str):
freq = freq.format_local()
if freq == 'hr':
freq = '1hr'

# define initial query dictionary with variable settings requirements that do not change if
# the variable is translated
case_d.query['frequency'] = freq
case_d.query['path'] = [path_regex]
case_d.query['path'] = path_regex
case_d.query['realm'] = realm_regex
case_d.query['standard_name'] = standard_name
case_d.query['variable_id'] = var_id
Expand Down
54 changes: 36 additions & 18 deletions src/xr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,13 @@ def approximate_attribute_value(self, our_name: str, ds_name: str) -> bool:
"""Determine if the dataset attribute value is an approximate match to the expected attribute value"""
exclude = ["with", "on", "in", "of", "at", "near"]
our_name_split = [i for i in our_name.split('_') if i not in exclude]
ds_name_split = [i for i in ds_name.split('_') if i not in exclude]
if isinstance(ds_name, str):
ds_name_split = [i for i in ds_name.split('_') if i not in exclude]
elif isinstance(ds_name, list):
for n in ds_name:
print(n)
ds_name_split = [i for i in ds_name[0].split('_')]

isect = set(our_name_split).intersection(ds_name_split)

if len(isect) >= len(our_name_split) - 2 and len(isect) > 0:
Expand Down Expand Up @@ -868,6 +874,8 @@ def reconcile_attr(self, our_var, ds_var, our_attr_name, ds_attr_name=None,
"""Compare attribute of a :class:`~src.data_model.DMVariable` (*our_var*)
with what's set in the xarray.Dataset (*ds_var*).
"""
if ds_var is None:
return
if ds_attr_name is None:
ds_attr_name = our_attr_name
our_attr = getattr(our_var, our_attr_name)
Expand Down Expand Up @@ -1063,28 +1071,38 @@ def reconcile_coord_bounds(self, our_coord, ds, ds_coord_name):
expectations based on the model's convention (*our_var*), for the bounds
on the dimension coordinate *our_coord*.
"""
try:
if len(ds.cf.bounds) > 0:
bounds = ds.cf.get_bounds(ds_coord_name)
except KeyError:
# cf accessor could't find associated bounds variable
elif hasattr(ds[ds_coord_name], 'attrs'):
if ds[ds_coord_name].attrs.get('bounds', None):
bounds = ds[ds_coord_name].bounds
if isinstance(bounds, str):
our_coord.bounds_var = None
return
else:
our_coord.bounds_var = None
return
else:
# cf accessor couldn't find associated bounds variable
bounds = None
our_coord.bounds_var = None
return

# Inherit standard_name from our_coord if not present (regardless of
# skip_std_name), overwriting metadata on bounds if different
self.reconcile_attr(our_coord, bounds, 'standard_name',
fill_ours=False, fill_ds=True, overwrite_ours=False
)
# Inherit units from our_coord if not present (regardless of skip_units),
# overwriting metadata on bounds if different
self.reconcile_attr(our_coord, bounds, 'units',
comparison_func=units.units_equal,
fill_ours=False, fill_ds=True, overwrite_ours=False
)
if our_coord.name != bounds.name:
self.log.debug("Updating %s for '%s' to value '%s' from dataset.",
'bounds', our_coord.name, bounds.name)
our_coord.bounds_var = bounds
if bounds is not None:
self.reconcile_attr(our_coord, bounds, 'standard_name',
fill_ours=False, fill_ds=True, overwrite_ours=False
)
# Inherit units from our_coord if not present (regardless of skip_units),
# overwriting metadata on bounds if different
self.reconcile_attr(our_coord, bounds, 'units',
comparison_func=units.units_equal,
fill_ours=False, fill_ds=True, overwrite_ours=False
)
if our_coord.name != bounds.name:
self.log.debug("Updating %s for '%s' to value '%s' from dataset.",
'bounds', our_coord.name, bounds.name)
our_coord.bounds_var = bounds

def reconcile_dimension_coords(self, our_var, ds):
"""Reconcile name, standard_name and units attributes between the
Expand Down

0 comments on commit e1a590e

Please sign in to comment.