From e1a590eec70a169df335ba2d78ba43d035b3aa70 Mon Sep 17 00:00:00 2001 From: Jess <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:51:20 -0400 Subject: [PATCH] Fix issues in pp query (#692) * fix hr -> 1hr freq conversion in pp query try using regex string contains standard_name in query * add check for parameter type to xr_parser approximate_attribute_value * remove regex from pp query standard_name * add check that bounds is populated in cf.assessor, then check coord attrs and only run coord bounds check if bounda s are not None in xr_parser --- src/preprocessor.py | 25 ++++++++++----------- src/xr_parser.py | 54 ++++++++++++++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/src/preprocessor.py b/src/preprocessor.py index 188c90faf..388667ca4 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -12,7 +12,6 @@ from src.util import datelabel as dl import cftime import intake -import math import numpy as np import xarray as xr import collections @@ -922,31 +921,31 @@ def query_catalog(self, for case_name, case_d in case_dict.items(): # path_regex = re.compile(r'(?i)(? bool: """Determine if the dataset attribute value is an approximate match to the expected attribute value""" exclude = ["with", "on", "in", "of", "at", "near"] our_name_split = [i for i in our_name.split('_') if i not in exclude] - ds_name_split = [i for i in ds_name.split('_') if i not in exclude] + if isinstance(ds_name, str): + ds_name_split = [i for i in ds_name.split('_') if i not in exclude] + elif isinstance(ds_name, list): + for n in ds_name: + print(n) + ds_name_split = [i for i in ds_name[0].split('_')] + isect = set(our_name_split).intersection(ds_name_split) if len(isect) >= len(our_name_split) - 2 and len(isect) > 0: @@ -868,6 +874,8 @@ def reconcile_attr(self, our_var, ds_var, our_attr_name, ds_attr_name=None, """Compare attribute of a :class:`~src.data_model.DMVariable` (*our_var*) with what's set in the xarray.Dataset (*ds_var*). """ + if ds_var is None: + return if ds_attr_name is None: ds_attr_name = our_attr_name our_attr = getattr(our_var, our_attr_name) @@ -1063,28 +1071,38 @@ def reconcile_coord_bounds(self, our_coord, ds, ds_coord_name): expectations based on the model's convention (*our_var*), for the bounds on the dimension coordinate *our_coord*. """ - try: + if len(ds.cf.bounds) > 0: bounds = ds.cf.get_bounds(ds_coord_name) - except KeyError: - # cf accessor could't find associated bounds variable + elif hasattr(ds[ds_coord_name], 'attrs'): + if ds[ds_coord_name].attrs.get('bounds', None): + bounds = ds[ds_coord_name].bounds + if isinstance(bounds, str): + our_coord.bounds_var = None + return + else: + our_coord.bounds_var = None + return + else: + # cf accessor couldn't find associated bounds variable + bounds = None our_coord.bounds_var = None return - # Inherit standard_name from our_coord if not present (regardless of # skip_std_name), overwriting metadata on bounds if different - self.reconcile_attr(our_coord, bounds, 'standard_name', - fill_ours=False, fill_ds=True, overwrite_ours=False - ) - # Inherit units from our_coord if not present (regardless of skip_units), - # overwriting metadata on bounds if different - self.reconcile_attr(our_coord, bounds, 'units', - comparison_func=units.units_equal, - fill_ours=False, fill_ds=True, overwrite_ours=False - ) - if our_coord.name != bounds.name: - self.log.debug("Updating %s for '%s' to value '%s' from dataset.", - 'bounds', our_coord.name, bounds.name) - our_coord.bounds_var = bounds + if bounds is not None: + self.reconcile_attr(our_coord, bounds, 'standard_name', + fill_ours=False, fill_ds=True, overwrite_ours=False + ) + # Inherit units from our_coord if not present (regardless of skip_units), + # overwriting metadata on bounds if different + self.reconcile_attr(our_coord, bounds, 'units', + comparison_func=units.units_equal, + fill_ours=False, fill_ds=True, overwrite_ours=False + ) + if our_coord.name != bounds.name: + self.log.debug("Updating %s for '%s' to value '%s' from dataset.", + 'bounds', our_coord.name, bounds.name) + our_coord.bounds_var = bounds def reconcile_dimension_coords(self, our_var, ds): """Reconcile name, standard_name and units attributes between the