Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataclass fixes #317

Merged
merged 19 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 123 additions & 87 deletions lib/adf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,11 @@ def __init__(self, adfobj):
self.ref_nickname = self.base_nickname

# define reference data
self.set_reference() # specify "ref_labels" -> called "data_list" in zonal_mean (name of data source)
self.set_reference(init=True) # specify "ref_labels" -> called "data_list" in zonal_mean (name of data source)
justin-richling marked this conversation as resolved.
Show resolved Hide resolved

def set_reference(self):
# Reference case setup (baseline/obs)
#------------------------------------
def set_reference(self, init=False):
"""Set attributes for reference (aka baseline) data location, names, and variables."""
if self.adf.compare_obs:
self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict}
Expand All @@ -76,87 +78,19 @@ def set_reference(self):
# when using a reference simulation, allow a "special" attribute with the case name:
self.ref_case_label = self.adf.get_baseline_info("cam_case_name", required=True)
for v in self.adf.diag_var_list:
self.ref_var_nam[v] = v
justin-richling marked this conversation as resolved.
Show resolved Hide resolved
self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True)
f = self.get_reference_climo_file(v)
if f is None:
warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}")
if not init:
warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}")
continue
else:
self.ref_var_loc[v] = f
self.ref_var_nam[v] = v
self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True)

def get_reference_climo_file(self, var):
"""Return a list of files to be used as reference (aka baseline) for variable var."""
if self.adf.compare_obs:
fils = self.ref_var_loc.get(var, None)
return [fils] if fils is not None else None
ref_loc = self.adf.get_baseline_info("cam_climo_loc")
# NOTE: originally had this looking for *_baseline.nc
fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
if fils:
return fils
return None

def load_reference_dataset(self, var):
fils = self.get_reference_climo_file(var)
if not fils:
warnings.warn(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.")
return None
return self.load_dataset(fils)

def load_reference_da(self, variablename):
da = self.load_reference_dataset(variablename)[self.ref_var_nam[variablename]]
if variablename in self.adf.variable_defaults:
vres = self.adf.variable_defaults[variablename]
if self.adf.compare_obs:
scale_factor = vres.get("obs_scale_factor",1)
add_offset = vres.get("obs_add_offset", 0)
else:
scale_factor = vres.get("scale_factor",1)
add_offset = vres.get("add_offset", 0)
da = da * scale_factor + add_offset
da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none'))
return da


def load_reference_regrid_dataset(self, case, field):
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)


def load_reference_regrid_da(self, case, field):
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_da(fils, field)


def load_climo_da(self, case, variablename):
"""Return DataArray from climo file"""
fils = self.get_climo_file(case, variablename)
return self.load_da(fils, variablename)


def load_climo_file(self, case, variablename):
"""Return Dataset for climo of variablename"""
fils = self.get_climo_file(case, variablename)
if not fils:
warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
return None
return self.load_dataset(fils)


def get_climo_file(self, case, variablename):
"""Retrieve the climo file path(s) for variablename for a specific case."""
a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case) # the entry for specified case
model_cl_loc = Path(a[caseindex])
return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc"))

# Time series files
#------------------
def get_timeseries_file(self, case, field):
ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case)
Expand Down Expand Up @@ -203,31 +137,115 @@ def load_timeseries_dataset(self, fils):
warnings.warn("Timeseries file does not have time bounds info.")
return xr.decode_cf(ds)

#----------------


# Climatology files
#------------------
def get_climo_file(self, case, variablename):
"""Retrieve the climo file path(s) for variablename for a specific case."""
a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases)
caseindex = (self.case_names).index(case) # the entry for specified case
model_cl_loc = Path(a[caseindex])
return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc"))


def get_reference_climo_file(self, var):
"""Return a list of files to be used as reference (aka baseline) for variable var."""
if self.adf.compare_obs:
fils = self.ref_var_loc.get(var, None)
return [fils] if fils is not None else None
ref_loc = self.adf.get_baseline_info("cam_climo_loc")
# NOTE: originally had this looking for *_baseline.nc
fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc"))
if fils:
return fils
return None


def load_climo_da(self, case, variablename):
"""Return DataArray from climo file"""
fils = self.get_climo_file(case, variablename)
return self.load_da(case, fils, variablename)


def load_climo_file(self, case, variablename):
"""Return Dataset for climo of variablename"""
fils = self.get_climo_file(case, variablename)
if not fils:
warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.")
return None
return self.load_dataset(fils)


def load_obs_climo_dataset(self, variablename):
"""Return Dataset for observation climo of variablename"""
fils = self.get_reference_climo_file(variablename)
if not fils:
warnings.warn(f"ERROR: Did not find any reference climo files for variable: {variablename}. Will try to skip.")
return None
return self.load_dataset(fils)

#----------------


# Regridded files
#----------------
def load_reference_regrid_dataset(self, case, field):
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_dataset(fils)


def load_reference_regrid_da(self, case, field):
fils = self.get_ref_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
return None
return self.load_da(case, fils, field)


def get_ref_regrid_file(self, case, field):
model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True))
return sorted(model_rg_loc.glob(f"{case}_{field}_*.nc"))
if self.adf.compare_obs:
obs_loc = self.ref_var_loc.get(field, None)
fils = [str(obs_loc)]
nusbaume marked this conversation as resolved.
Show resolved Hide resolved
else:
model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True))
fils = sorted(model_rg_loc.glob(f"{case}_{field}_*.nc"))
return fils


def get_regrid_file(self, case, field):
model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True))
rlbl = self.ref_labels[field] # rlbl = "reference label" = the name of the reference data that defines target grid
# rlbl = "reference label" = the name of the reference data that defines target grid
rlbl = self.ref_labels[field]
return sorted(model_rg_loc.glob(f"{rlbl}_{case}_{field}_*.nc"))


def load_regrid_dataset(self, case, field):
fils = self.get_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
nusbaume marked this conversation as resolved.
Show resolved Hide resolved
return None
return self.load_dataset(fils)


def load_regrid_da(self, case, field):
fils = self.get_regrid_file(case, field)
if not fils:
warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}")
nusbaume marked this conversation as resolved.
Show resolved Hide resolved
return None
return self.load_da(fils, field)
return self.load_da(case, fils, field)

#----------------


# DataSet and DataArray load
#---------------------------

# Load DataSet
def load_dataset(self, fils):
if (len(fils) == 0):
warnings.warn("Input file list is empty.")
Expand All @@ -245,14 +263,32 @@ def load_dataset(self, fils):
return ds


def load_da(self, fils, variablename):
ds = self.load_dataset(fils)
if ds is None:
warnings.warn(f"ERROR: Load failed for {variablename}")
return None
da = (ds[variablename]).squeeze()
# Load DataArray
def load_da(self, case, fils, variablename):
#Check if case is baseline and if it is, check if comparing against obs
if (case == self.ref_labels[variablename]) and (self.adf.compare_obs):
da = self.load_obs_climo_dataset(variablename)[self.ref_var_nam[variablename]]
#Else, its either a test case, or baseline and NOT comparing against obs
else:
ds = self.load_dataset(fils)
da = (ds[variablename]).squeeze()

if variablename in self.adf.variable_defaults:
vres = self.adf.variable_defaults[variablename]
da = da * vres.get("scale_factor",1) + vres.get("add_offset", 0)
if (case == self.ref_labels[variablename]) and (self.adf.compare_obs):
scale_factor = vres.get("obs_scale_factor",1)
add_offset = vres.get("obs_add_offset", 0)
else:
scale_factor = vres.get("scale_factor",1)
add_offset = vres.get("add_offset", 0)
da = da * scale_factor + add_offset
da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none'))
return da


#----------------

#End Script
##########


21 changes: 13 additions & 8 deletions lib/adf_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ def __init__(self, config_file, debug=False):
#that check this variable won't crash:
self.__cam_bl_climo_info = None

# Set baseline hist string object to None
self.__base_hist_str = None

#Also set data name for use below:
data_name = "Obs"
base_nickname = "Obs"
Expand Down Expand Up @@ -773,16 +776,18 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name):
errmsg = f"Time series directory '{input_ts_loc}' not found. Script is exiting."
raise AdfError(errmsg)

# Search for first variable in var_list to get a time series file to read
# Search for first available variable in var_list to get a time series file to read
# NOTE: it is assumed all the variables have the same dates!
# Also, it is assumed that only h0 files should be climo-ed.
ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var_list[0]}.*nc"))

#Read hist_str (component.hist_num) from the yaml file, or set to default
hist_str = self.get_basic_info('hist_str')
#If hist_str is not present, then default to 'cam.h0':
if not hist_str:
hist_str = 'cam.h0'
for var in var_list:
ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var}.*nc"))
if ts_files:
print(var)
break
else:
logmsg = "get years for time series:"
logmsg = f"\tVar '{var}' not in dataset, skip to next to try and find climo years..."
self.debug_log(logmsg)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in case the first variable in the yaml file doesn't have a history file, we need to keep looking for the next available one.


#Read in file(s)
if len(ts_files) == 1:
Expand Down
29 changes: 21 additions & 8 deletions scripts/plotting/global_latlon_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,13 @@ def global_latlon_map(adfobj):
"MAM": [3, 4, 5],
"SON": [9, 10, 11]
}

# probably want to do this one variable at a time:
for var in var_list:
if var not in adfobj.data.ref_var_nam:
dmsg = f"No reference data found for variable `{var}`, zonal mean plotting skipped."
dmsg = f"No reference data found for variable `{var}`, global lat/lon mean plotting skipped."
adfobj.debug_log(dmsg)
print(dmsg)
continue

#Notify user of variable being plotted:
Expand All @@ -156,10 +157,19 @@ def global_latlon_map(adfobj):
vres['central_longitude'] = pf.get_central_longitude(adfobj)

# load reference data (observational or baseline)
# odata = adfobj.data.load_reference_da(var)
odata = adfobj.data.load_reference_regrid_da(adfobj.data.ref_case_label, var)
if not adfobj.compare_obs:
base_name = adfobj.data.ref_case_label
else:
base_name = adfobj.data.ref_labels[var]

# Gather reference variable data
odata = adfobj.data.load_reference_regrid_da(base_name, var)

if odata is None:
dmsg = f"No regridded test file for {base_name} for variable `{var}`, global lat/lon mean plotting skipped."
adfobj.debug_log(dmsg)
continue

o_has_dims = pf.validate_dims(odata, ["lat", "lon", "lev"]) # T iff dims are (lat,lon) -- can't plot unless we have both
if (not o_has_dims['has_lat']) or (not o_has_dims['has_lon']):
print(f"\t = skipping global map for {var} as REFERENCE does not have both lat and lon")
Expand All @@ -184,6 +194,8 @@ def global_latlon_map(adfobj):

#Skip this variable/case if the regridded climo file doesn't exist:
if mdata is None:
dmsg = f"No regridded test file for {case_name} for variable `{var}`, global lat/lon mean plotting skipped."
adfobj.debug_log(dmsg)
continue

#Determine dimensions of variable:
Expand All @@ -192,16 +204,17 @@ def global_latlon_map(adfobj):
print(f"\t = skipping global map for {var} for case {case_name} as it does not have both lat and lon")
continue
else: # i.e., has lat&lon
if pres_levs and (not has_dims['has_lev']):
print(f"\t - skipping global map for {var} as it has more than lat/lon dims, but no pressure levels were provided")
if (has_dims['has_lev']) and (not pres_levs):
print(f"\t - skipping global map for {var} as it has more than lev dimension, but no pressure levels were provided")
continue

# Check output file. If file does not exist, proceed.
# If file exists:
# if redo_plot is true: delete it now and make plot
# if redo_plot is false: add to website and move on
doplot = {}
if not pres_levs:

if not has_dims['has_lev']:
for s in seasons:
plot_name = plot_loc / f"{var}_{s}_LatLon_Mean.{plot_type}"
doplot[plot_name] = plot_file_op(adfobj, plot_name, var, case_name, s, web_category, redo_plot, "LatLon")
Expand Down Expand Up @@ -353,4 +366,4 @@ def plot_file_op(adfobj, plot_name, var, case_name, season, web_category, redo_p
return True

##############
#END OF SCRIPT
#END OF SCRIPT
Loading
Loading