diff --git a/lib/adf_dataset.py b/lib/adf_dataset.py new file mode 100644 index 000000000..35d26a865 --- /dev/null +++ b/lib/adf_dataset.py @@ -0,0 +1,258 @@ +from pathlib import Path +import xarray as xr + +import warnings # use to warn user about missing files + +def my_formatwarning(msg, *args, **kwargs): + # ignore everything except the message + return str(msg) + '\n' +warnings.formatwarning = my_formatwarning + +# "reference data" +# It is often just a "baseline case", +# but could be some totally external data (reanalysis or observation or other model) +# When it is another simulation, it gets treated like another "case" +# When it is external data expect: +# - "climo" files (12 monthly climos in the file) +# - one variable per "climo" +# - source can differ for each variable, requires label +# - resolution can differ for each variable, requires regridded file(s) +# - the variable name and units in the file may differ from CAM; use defaults.yaml to set conversion +# - there could be multiple instances of a variable from different sources (e.g. different observations) + +# NOTE: the last item (multiple instances of a variable) is not allowed in AdfObs.var_obs_dict +# Since ADF is not able to handle this case, for now it is excluded the AdfData class. + +# NOTE: To make the "baseline case" vs "external data" cases as similar as possible, +# below construct the "baseline case" version to be similar to "external data". +# - provide a dictionary of (variable: file-path) +# + For external data, that dictionay is from AdfObs.var_obs_dict, +# which provides a dict of all the available variables. +# + For reference simulation, look for files that match the diag_var_list + +# NOTE: There is currently a "base_nickname" allowed from AdfInfo. +# Set AdfData.ref_nickname to that. +# Could be altered from "Obs" to be the data source label. + +class AdfData: + """A class instantiated with an AdfDiag object. + Methods provide means to load data. + This class does not interact with plotting, + just provides access to data locations and loading data. + + A future need is to add some kind of frequency/sampling + parameters to allow for non-h0 files. + + """ + def __init__(self, adfobj): + self.adf = adfobj # provides quick access to the AdfDiag object + # paths + self.model_rgrid_loc = adfobj.get_basic_info("cam_regrid_loc", required=True) + + # variables (and info for unit transform) + # use self.adf.diag_var_list and self.adf.self.adf.variable_defaults + + # case names and nicknames + self.case_names = adfobj.get_cam_info("cam_case_name", required=True) + self.test_nicknames = adfobj.case_nicknames["test_nicknames"] + self.base_nickname = adfobj.case_nicknames["base_nickname"] + self.ref_nickname = self.base_nickname + + # define reference data + self.set_reference() # specify "ref_labels" -> called "data_list" in zonal_mean (name of data source) + + def set_reference(self): + """Set attributes for reference (aka baseline) data location, names, and variables.""" + if self.adf.compare_obs: + self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict} + self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict} + self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict} + if not self.adf.var_obs_dict: + warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.") + else: + self.ref_var_loc = {} + self.ref_var_nam = {} + self.ref_labels = {} + # when using a reference simulation, allow a "special" attribute with the case name: + self.ref_case_label = self.adf.get_baseline_info("cam_case_name", required=True) + for v in self.adf.diag_var_list: + f = self.get_reference_climo_file(v) + if f is None: + warnings.warn(f"\t WARNING: ADFData found no reference climo file for {v}") + continue + else: + self.ref_var_loc[v] = f + self.ref_var_nam[v] = v + self.ref_labels[v] = self.adf.get_baseline_info("cam_case_name", required=True) + + def get_reference_climo_file(self, var): + """Return a list of files to be used as reference (aka baseline) for variable var.""" + if self.adf.compare_obs: + fils = self.ref_var_loc.get(var, None) + return [fils] if fils is not None else None + ref_loc = self.adf.get_baseline_info("cam_climo_loc") + # NOTE: originally had this looking for *_baseline.nc + fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc")) + if fils: + return fils + return None + + def load_reference_dataset(self, var): + fils = self.get_reference_climo_file(var) + if not fils: + warnings.warn(f"ERROR: Did not find any reference files for variable: {var}. Will try to skip.") + return None + return self.load_dataset(fils) + + def load_reference_da(self, variablename): + da = self.load_reference_dataset(variablename)[self.ref_var_nam[variablename]] + if variablename in self.adf.variable_defaults: + vres = self.adf.variable_defaults[variablename] + if self.adf.compare_obs: + scale_factor = vres.get("obs_scale_factor",1) + add_offset = vres.get("obs_add_offset", 0) + else: + scale_factor = vres.get("scale_factor",1) + add_offset = vres.get("add_offset", 0) + da = da * scale_factor + add_offset + da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none')) + return da + + + def load_reference_regrid_dataset(self, case, field): + fils = self.get_ref_regrid_file(case, field) + if not fils: + warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") + return None + return self.load_dataset(fils) + + + def load_reference_regrid_da(self, case, field): + fils = self.get_ref_regrid_file(case, field) + if not fils: + warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") + return None + return self.load_da(fils, field) + + + def load_climo_da(self, case, variablename): + """Return DataArray from climo file""" + fils = self.get_climo_file(case, variablename) + return self.load_da(fils, variablename) + + + def load_climo_file(self, case, variablename): + """Return Dataset for climo of variablename""" + fils = self.get_climo_file(case, variablename) + if not fils: + warnings.warn(f"ERROR: Did not find climo file for variable: {variablename}. Will try to skip.") + return None + return self.load_dataset(fils) + + + def get_climo_file(self, case, variablename): + """Retrieve the climo file path(s) for variablename for a specific case.""" + a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases) + caseindex = (self.case_names).index(case) # the entry for specified case + model_cl_loc = Path(a[caseindex]) + return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc")) + + def get_timeseries_file(self, case, field): + ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases) + caseindex = (self.case_names).index(case) + ts_loc = Path(ts_locs[caseindex]) + ts_filenames = f'{case}.*.{field}.*nc' + ts_files = sorted(ts_loc.glob(ts_filenames)) + return ts_files + + + def get_ref_timeseries_file(self, field): + if self.adf.compare_obs: + return None + else: + ts_loc = Path(self.adf.get_baseline_info("cam_ts_loc", required=True)) + ts_filenames = f'{self.ref_case_label}.*.{field}.*nc' + ts_files = sorted(ts_loc.glob(ts_filenames)) + return ts_files + + + def load_timeseries_dataset(self, fils): + if (len(fils) == 0): + warnings.warn("Input file list is empty.") + return None + elif (len(fils) > 1): + ds = xr.open_mfdataset(fils, decode_times=False) + else: + sfil = str(fils[0]) + if not Path(sfil).is_file(): + warnings.warn(f"Expecting to find file: {sfil}") + return None + ds = xr.open_dataset(sfil, decode_times=False) + if ds is None: + warnings.warn(f"invalid data on load_dataset") + # assign time to midpoint of interval (even if it is already) + if 'time_bnds' in ds: + t = ds['time_bnds'].mean(dim='nbnd') + t.attrs = ds['time'].attrs + ds = ds.assign_coords({'time':t}) + elif 'time_bounds' in ds: + t = ds['time_bounds'].mean(dim='nbnd') + t.attrs = ds['time'].attrs + ds = ds.assign_coords({'time':t}) + else: + warnings.warn("Timeseries file does not have time bounds info.") + return xr.decode_cf(ds) + + def get_ref_regrid_file(self, case, field): + model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True)) + return sorted(model_rg_loc.glob(f"{case}_{field}_*.nc")) + + + def get_regrid_file(self, case, field): + model_rg_loc = Path(self.adf.get_basic_info("cam_regrid_loc", required=True)) + rlbl = self.ref_labels[field] # rlbl = "reference label" = the name of the reference data that defines target grid + return sorted(model_rg_loc.glob(f"{rlbl}_{case}_{field}_*.nc")) + + def load_regrid_dataset(self, case, field): + fils = self.get_regrid_file(case, field) + if not fils: + warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") + return None + return self.load_dataset(fils) + + def load_regrid_da(self, case, field): + fils = self.get_regrid_file(case, field) + if not fils: + warnings.warn(f"ERROR: Did not find regrid file(s) for case: {case}, variable: {field}") + return None + return self.load_da(fils, field) + + + def load_dataset(self, fils): + if (len(fils) == 0): + warnings.warn("Input file list is empty.") + return None + elif (len(fils) > 1): + ds = xr.open_mfdataset(fils, combine='by_coords') + else: + sfil = str(fils[0]) + if not Path(sfil).is_file(): + warnings.warn(f"Expecting to find file: {sfil}") + return None + ds = xr.open_dataset(sfil) + if ds is None: + warnings.warn(f"invalid data on load_dataset") + return ds + + + def load_da(self, fils, variablename): + ds = self.load_dataset(fils) + if ds is None: + warnings.warn(f"ERROR: Load failed for {variablename}") + return None + da = (ds[variablename]).squeeze() + if variablename in self.adf.variable_defaults: + vres = self.adf.variable_defaults[variablename] + da = da * vres.get("scale_factor",1) + vres.get("add_offset", 0) + da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none')) + return da diff --git a/lib/adf_diag.py b/lib/adf_diag.py index aa6f1ef08..aa3c3954c 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -1,4 +1,3 @@ - """ Location of the "AdfDiag" object, which is used to store all relevant data and @@ -95,9 +94,9 @@ # +++++++++++++++++++++++++++++ -# Finally, import needed ADF module: +# Finally, import needed ADF modules: from adf_web import AdfWeb - +from adf_dataset import AdfData ################# # Helper functions @@ -177,6 +176,9 @@ def __init__(self, config_file, debug=False): # Add plotting script names: self.__plotting_scripts = self.read_config_var("plotting_scripts") + # Provide convenience functions for data handling: + self.data = AdfData(self) + # Create property needed to return "plotting_scripts" variable to user: @property def plotting_scripts(self): @@ -351,6 +353,7 @@ def call_ncrcat(cmd): start_years = [self.climo_yrs["syear_baseline"]] end_years = [self.climo_yrs["eyear_baseline"]] case_type_string = "baseline" + hist_str_list = [self.hist_string["base_hist_str"]] else: # Use test case settings, which are already lists: @@ -362,13 +365,13 @@ def call_ncrcat(cmd): start_years = self.climo_yrs["syears"] end_years = self.climo_yrs["eyears"] case_type_string="case" + hist_str_list = self.hist_string["test_hist_str"] # Notify user that script has started: # End if # Read hist_str (component.hist_num) from the yaml file, or set to default - hist_str_list = self.get_cam_info("hist_str") dmsg = f"reading from {hist_str_list} files" self.debug_log(dmsg) @@ -438,7 +441,6 @@ def call_ncrcat(cmd): # Note: could use `open_mfdataset`, but that can become very slow; # This approach effectively assumes that all files contain the same variables. - # Check what kind of vertical coordinate (if any) is being used for this model run: # ------------------------ if "lev" in hist_file_ds: @@ -485,8 +487,8 @@ def call_ncrcat(cmd): print(wmsg) vert_coord_type = None - # End if (long name) - # End if (vert_coord) + # End if (long name) + # End if (vert_coord) else: # No level dimension found, so assume there is no vertical coordinate: vert_coord_type = None @@ -517,9 +519,10 @@ def call_ncrcat(cmd): diag_var_list = self.diag_var_list # Aerosol Calcs - #-------------- - #Always make sure PMID is made if aerosols are desired in config file - # Since there's no requirement for `aerosol_zonal_list`, allow it to be absent: + # -------------- + # Always make sure PMID is made if aerosols are desired in config file + # Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent: + azl = res.get("aerosol_zonal_list", []) if "PMID" not in diag_var_list: if any(item in azl for item in diag_var_list): @@ -527,9 +530,9 @@ def call_ncrcat(cmd): if "T" not in diag_var_list: if any(item in azl for item in diag_var_list): diag_var_list += ["T"] - #End aerosol calcs + # End aerosol calcs - #Initialize dictionary for derived variable with needed list of constituents + # Initialize dictionary for derived variable with needed list of constituents constit_dict = {} for var in diag_var_list: @@ -544,30 +547,27 @@ def call_ncrcat(cmd): constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' " constit_errmsg += f"for {var} in variable defaults yaml file." - #Check if current variable is a derived quantity + # Check if current variable is a derived quantity if var not in hist_file_var_list: - - #Try to get varible defaults dictionary for - #non-present quantity vres = res.get(var, {}) - #Initialiaze list for constituents - #NOTE: This is if the variable is NOT derivable but needs + # Initialiaze list for constituents + # NOTE: This is if the variable is NOT derivable but needs # an empty list as a check later constit_list = [] - #intialize boolean to check if variable is derivable + # intialize boolean to check if variable is derivable derive = False # assume it can't be derived and update if it can - #intialize boolean for regular CAM variable constituents + # intialize boolean for regular CAM variable constituents try_cam_constits = True - #Check first if variable is potentially part of a CAM-CHEM run + # Check first if variable is potentially part of a CAM-CHEM run if "derivable_from_cam_chem" in vres: constit_list = vres["derivable_from_cam_chem"] if constit_list: if all(item in hist_file_ds.data_vars for item in constit_list): - #Set check to look for regular CAM constituents in variable defaults + # Set check to look for regular CAM constituents in variable defaults try_cam_constits = False derive = True msg = f"create time series for {case_name}:" @@ -576,10 +576,10 @@ def call_ncrcat(cmd): self.debug_log(msg) else: self.debug_log(constit_errmsg) - #End if - #End if + # End if + # End if - #If not CAM-CHEM, check regular CAM runs + # If not CAM-CHEM, check regular CAM runs if try_cam_constits: if "derivable_from" in vres: derive = True @@ -594,32 +594,32 @@ def call_ncrcat(cmd): der_from_msg += "or set appropriate argument in variable " der_from_msg += "defaults yaml file." self.debug_log(der_from_msg) - #End if + # End if - #Check if this variable can be derived + # Check if this variable can be derived if (derive) and (constit_list): for constit in constit_list: if constit not in diag_var_list: diag_var_list.append(constit) - #Add variable to list to derive + # Add variable to list to derive vars_to_derive.append(var) - #Add constituent list to variable key in dictionary + # Add constituent list to variable key in dictionary constit_dict[var] = constit_list continue - #Log if this variable can be derived but is missing list of constituents + # Log if this variable can be derived but is missing list of constituents elif (derive) and (not constit_list): self.debug_log(constit_errmsg) continue - #Lastly, raise error if the variable is not a derived quanitity but is also not - #in the history file(s) + # Lastly, raise error if the variable is not a derived quanitity but is also not + # in the history file(s) else: msg = f"WARNING: {var} is not in the file {hist_files[0]} " msg += "nor can it be derived.\n" msg += "\t ** No time series will be generated." print(msg) continue - #End if - #End if (var in var_diag_list) + # End if + # End if (var in var_diag_list) # Check if variable has a "lev" dimension according to first file: has_lev = bool("lev" in hist_file_ds[var].dims) @@ -953,7 +953,7 @@ def setup_run_cvdp(self): ) # End if - #intialize objects that might not be declared later + # intialize objects that might not be declared later case_name_baseline = None baseline_ts_loc = None syears_baseline = None @@ -1098,33 +1098,33 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= """ - #Loop through derived variables + # Loop through derived variables for var in vars_to_derive: print(f"\t - deriving time series for {var}") - #Grab list of constituents for this variable + # Grab list of constituents for this variable constit_list = constit_dict[var] - #Grab all required time series files for derived variable + # Grab all required time series files for derived variable constit_files = [] for constit in constit_list: - #Check if the constituent file is present, if so add it to list + # Check if the constituent file is present, if so add it to list if hist_str: const_glob_str = f"*{hist_str}*.{constit}.*.nc" else: const_glob_str = f"*.{constit}.*.nc" - #end if + # end if if glob.glob(os.path.join(ts_dir, const_glob_str)): constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0]) - #Check if all the necessary constituent files were found + # Check if all the necessary constituent files were found if len(constit_files) != len(constit_list): ermsg = f"\t ** Not all constituent files present; {var} cannot be calculated." ermsg += f" Please remove {var} from 'diag_var_list' or find the " ermsg += "relevant CAM files.\n" print(ermsg) if constit_files: - #Add what's missing to debug log + # Add what's missing to debug log dmsg = "create time series:" dmsg += "\n\tneeded constituents for derivation of " dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in " @@ -1139,13 +1139,13 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= self.debug_log(dmsg) else: - #Open a new dataset with all the constituent files/variables - ds = xr.open_mfdataset(constit_files) + # Open a new dataset with all the constituent files/variables + ds = xr.open_mfdataset(constit_files).compute() # create new file name for derived variable derived_file = constit_files[0].replace(constit_list[0], var) - #Check if clobber is true for file + # Check if clobber is true for file if Path(derived_file).is_file(): if overwrite: Path(derived_file).unlink() @@ -1155,30 +1155,29 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= print(msg) continue - #NOTE: this will need to be changed when derived equations are more complex! - JR + # NOTE: this will need to be changed when derived equations are more complex! - JR if var == "RESTOM": der_val = ds["FSNT"]-ds["FLNT"] else: - #Loop through all constituents and sum + # Loop through all constituents and sum der_val = 0 for v in constit_list: der_val += ds[v] - #Set derived variable name and add to dataset + # Set derived variable name and add to dataset der_val.name = var ds[var] = der_val - #Aerosol Calculations - #---------------------------------------------------------------------------------- - #These will be multiplied by rho (density of dry air) + # Aerosol Calculations + # ---------------------------------------------------------------------------------- + # These will be multiplied by rho (density of dry air) ds_pmid_done = False ds_t_done = False # User-defined defaults might not include aerosol zonal list azl = res.get("aerosol_zonal_list", []) if var in azl: - - #Only calculate once for all aerosol vars + # Only calculate once for all aerosol vars if not ds_pmid_done: ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0]) ds_pmid_done = True @@ -1200,16 +1199,16 @@ def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir= print(errmsg) continue - #Multiply aerosol by dry air density (rho): (P/Rd*T) + # Multiply aerosol by dry air density (rho): (P/Rd*T) ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"])) - #Sulfate conversion factor + # Sulfate conversion factor if var == "SO4": ds[var] = ds[var]*(96./115.) - #---------------------------------------------------------------------------------- + # ---------------------------------------------------------------------------------- - #Drop all constituents from final saved dataset - #These are not necessary because they have their own time series files + # Drop all constituents from final saved dataset + # These are not necessary because they have their own time series files ds_final = ds.drop_vars(constit_list) ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w') @@ -1221,7 +1220,6 @@ def setup_run_mdtf(self): """ - copy_files_only = False # True (copy files but don't run), False (copy files and run MDTF) # Note that the MDTF variable test_mode (set in the mdtf_info of the yaml file) # has a different meaning: Data is fetched but PODs are not run. @@ -1266,7 +1264,6 @@ def setup_run_mdtf(self): if mdtf_info[var] == "default": mdtf_info[var] = plot_path - # # Write the input settings json file # @@ -1435,10 +1432,9 @@ def move_tsfiles_for_mdtf(self, verbose): # end for case - ######## -#Helper Function(s) +# Helper Function(s) def _load_dataset(fils): @@ -1475,5 +1471,5 @@ def my_formatwarning(msg, *args, **kwargs): else: return xr.open_dataset(fils[0]) #End if -#End def +# End def ######## diff --git a/lib/adf_info.py b/lib/adf_info.py index 5b5ffc438..7f10a64cb 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -124,13 +124,6 @@ def __init__(self, config_file, debug=False): self.__cam_climo_info[conf_var] = [conf_val] #End if #End for - - #If hist_str (component.hist_num) was not in yaml file, set to default - hist_str = self.__cam_climo_info['hist_str'] - if not hist_str: - hist_str = [['cam.h0a']]*self.__num_cases - #End if - #------------------------------------------- #Initialize ADF variable list: @@ -209,7 +202,8 @@ def __init__(self, config_file, debug=False): input_ts_loc = Path(input_ts_baseline) #Get years from pre-made timeseries file(s) - found_syear_baseline, found_eyear_baseline = self.get_climo_yrs_from_ts(input_ts_loc, data_name) + found_syear_baseline, found_eyear_baseline = self.get_climo_yrs_from_ts( + input_ts_loc, data_name) found_yr_range = np.arange(found_syear_baseline,found_eyear_baseline,1) #History file path isn't needed if user is running ADF directly on time series. @@ -239,11 +233,20 @@ def __init__(self, config_file, debug=False): # Check if history file path exists: if any(baseline_hist_locs): - if not isinstance(baseline_hist_str, list): - baseline_hist_str = [baseline_hist_str] - hist_str = baseline_hist_str[0] + #Check if user provided + if not baseline_hist_str: + baseline_hist_str = ['cam.h0a'] + else: + #Make list if not already + if not isinstance(baseline_hist_str, list): + baseline_hist_str = [baseline_hist_str] + #Initialize baseline history string list + self.__base_hist_str = baseline_hist_str + + #Grab first possible hist string, just looking for years of run + base_hist_str = baseline_hist_str[0] starting_location = Path(baseline_hist_locs) - file_list = sorted(starting_location.glob("*" + hist_str + ".*.nc")) + file_list = sorted(starting_location.glob("*" + base_hist_str + ".*.nc")) # Partition string to find exactly where h-number is # This cuts the string before and after the `{hist_str}.` sub-string # so there will always be three parts: @@ -251,7 +254,7 @@ def __init__(self, config_file, debug=False): #Since the last part always includes the time range, grab that with last index (2) #NOTE: this is based off the current CAM file name structure in the form: # $CASE.cam.h#.YYYY.nc - base_climo_yrs = [int(str(i).partition(f"{hist_str}.")[2][0:4]) for i in file_list] + base_climo_yrs = [int(str(i).partition(f"{base_hist_str}.")[2][0:4]) for i in file_list] base_climo_yrs = sorted(np.unique(base_climo_yrs)) base_found_syr = int(base_climo_yrs[0]) @@ -336,8 +339,17 @@ def __init__(self, config_file, debug=False): #Extract cam history files location: cam_hist_locs = self.get_cam_info('cam_hist_loc') - # Read hist_str (component.hist_num, eg cam.h0) from the yaml file - cam_hist_str = self.get_cam_info('hist_str') + #Get cleaned nested list of hist_str for test case(s) (component.hist_num, eg cam.h0) + cam_hist_str = self.__cam_climo_info.get('hist_str', None) + + if not cam_hist_str: + hist_str = [['cam.h0a']]*self.__num_cases + else: + hist_str = cam_hist_str + #End if + + #Initialize CAM history string nested list + self.__hist_str = hist_str #Check if using pre-made ts files cam_ts_done = self.get_cam_info("cam_ts_done") @@ -391,8 +403,9 @@ def __init__(self, config_file, debug=False): #End if #Check if history file path exists: - hist_str_case = cam_hist_str[case_idx] + hist_str_case = hist_str[case_idx] if any(cam_hist_locs): + #Grab first possible hist string, just looking for years of run hist_str = hist_str_case[0] #Get climo years for verification or assignment if missing @@ -632,8 +645,11 @@ def case_nicknames(self): @property def hist_string(self): - """ Return the history string name to the user if requested.""" - return self.__hist_str + """ Return the CAM history string list to the user if requested.""" + cam_hist_strs = copy.copy(self.__hist_str) + base_hist_strs = copy.copy(self.__base_hist_str) + hist_strs = {"test_hist_str":cam_hist_strs, "base_hist_str":base_hist_strs} + return hist_strs ######### @@ -762,6 +778,12 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name): # Also, it is assumed that only h0 files should be climo-ed. ts_files = sorted(input_location.glob(f"{case_name}*h0*.{var_list[0]}.*nc")) + #Read hist_str (component.hist_num) from the yaml file, or set to default + hist_str = self.get_basic_info('hist_str') + #If hist_str is not present, then default to 'cam.h0': + if not hist_str: + hist_str = 'cam.h0' + #Read in file(s) if len(ts_files) == 1: cam_ts_data = xr.open_dataset(ts_files[0], decode_times=True) @@ -770,10 +792,18 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name): #Average time dimension over time bounds, if bounds exist: if 'time_bnds' in cam_ts_data: + time_bounds_name = 'time_bnds' + elif 'time_bounds' in cam_ts_data: + time_bounds_name = 'time_bounds' + else: + time_bounds_name = None + + if time_bounds_name: time = cam_ts_data['time'] #NOTE: force `load` here b/c if dask & time is cftime, #throws a NotImplementedError: - time = xr.DataArray(cam_ts_data['time_bnds'].load().mean(dim='nbnd').values, + + time = xr.DataArray(cam_ts_data[time_bounds_name].load().mean(dim='nbnd').values, dims=time.dims, attrs=time.attrs) cam_ts_data['time'] = time cam_ts_data.assign_coords(time=time) @@ -792,4 +822,4 @@ def get_climo_yrs_from_ts(self, input_ts_loc, case_name): #++++++++++++++++++++ #End Class definition -#++++++++++++++++++++ +#++++++++++++++++++++ \ No newline at end of file diff --git a/lib/plotting_functions.py b/lib/plotting_functions.py index 03f7b6ae1..05a7d32b3 100644 --- a/lib/plotting_functions.py +++ b/lib/plotting_functions.py @@ -2317,9 +2317,7 @@ def square_contour_difference(fld1, fld2, **kwargs): mnorm = mpl.colors.Normalize(mn, mx) coord1, coord2 = fld1.coords # ASSUMES xarray WITH coords AND 2-dimensions - print(f"{coord1}, {coord2}") xx, yy = np.meshgrid(fld1[coord2], fld1[coord1]) - print(f"shape of meshgrid: {xx.shape}") img1 = ax1.contourf(xx, yy, fld1.transpose()) if (coord1 == 'month') and (fld1.shape[0] ==12): diff --git a/scripts/averaging/create_climo_files.py b/scripts/averaging/create_climo_files.py index 62b5ae340..11844e189 100644 --- a/scripts/averaging/create_climo_files.py +++ b/scripts/averaging/create_climo_files.py @@ -8,11 +8,20 @@ def my_formatwarning(msg, *args, **kwargs): return str(msg) + '\n' warnings.formatwarning = my_formatwarning - +import numpy as np import xarray as xr # module-level import so all functions can get to it. import multiprocessing as mp +def get_time_slice_by_year(time, startyear, endyear): + if not hasattr(time, 'dt'): + print("Warning: get_time_slice_by_year requires the `time` parameter to be an xarray time coordinate with a dt accessor. Returning generic slice (which will probably fail).") + return slice(startyear, endyear) + start_time_index = np.argwhere((time.dt.year >= startyear).values).flatten().min() + end_time_index = np.argwhere((time.dt.year <= endyear).values).flatten().max() + return slice(start_time_index, end_time_index+1) + + ############## #Main function @@ -207,7 +216,8 @@ def process_variable(ts_files, syr, eyr, output_file): cam_ts_data.assign_coords(time=time) cam_ts_data = xr.decode_cf(cam_ts_data) #Extract data subset using provided year bounds: - cam_ts_data = cam_ts_data.sel(time=slice(syr, eyr)) + tslice = get_time_slice_by_year(cam_ts_data.time, int(syr), int(eyr)) + cam_ts_data = cam_ts_data.isel(time=tslice) #Group time series values by month, and average those months together: cam_climo_data = cam_ts_data.groupby('time.month').mean(dim='time') #Rename "months" to "time": diff --git a/scripts/plotting/global_latlon_map.py b/scripts/plotting/global_latlon_map.py index e04e1b91e..6939d98e7 100644 --- a/scripts/plotting/global_latlon_map.py +++ b/scripts/plotting/global_latlon_map.py @@ -8,6 +8,8 @@ my_formatwarning(msg, *args, **kwargs) format warning messages (private method) +plot_file_op + Check on status of output plot file. """ #Import standard modules: from pathlib import Path @@ -15,13 +17,15 @@ import xarray as xr import warnings # use to warn user about missing files. +import plotting_functions as pf + #Format warning messages: def my_formatwarning(msg, *args, **kwargs): """Issue `msg` as warning.""" return str(msg) + '\n' warnings.formatwarning = my_formatwarning -def global_latlon_map(adfobj): +def global_latlon_map_B(adfobj): """ This script/function is designed to generate global 2-D lat/lon maps of model fields with continental overlays. @@ -37,36 +41,28 @@ def global_latlon_map(adfobj): Notes ----- - This function imports `pandas` and `plotting_functions` It uses the AdfDiag object's methods to get necessary information. - Specificially: + Makes use of AdfDiag's data sub-class. + Explicitly accesses: adfobj.diag_var_list List of variables - adfobj.get_basic_info - Regrid data path, checks `compare_obs`, checks `redo_plot`, checks `plot_press_levels` adfobj.plot_location output plot path - adfobj.get_cam_info - Get `cam_case_name` and `case_nickname` adfobj.climo_yrs start and end climo years of the case(s), `syears` & `eyears` start and end climo years of the reference, `syear_baseline` & `eyear_baseline` - adfobj.var_obs_dict - reference data (conditional) - adfobj.get_baseline_info - get reference case, `cam_case_name` adfobj.variable_defaults dict of variable-specific plot preferences adfobj.read_config_var dict of basic info, `diag_basic_info` Then use to check `plot_type` - adfobj.compare_obs - Used to set data path adfobj.debug_log Issues debug message adfobj.add_website_data Communicates information to the website generator + adfobj.compare_obs + Logical to determine if comparing to observations The `plotting_functions` module is needed for: @@ -82,20 +78,6 @@ def global_latlon_map(adfobj): Checks on pressure level dimension """ - #Import necessary modules: - #------------------------ - import pandas as pd - - #CAM diagnostic plotting functions: - import plotting_functions as pf - #------------------------- - - # Steps: - # - load regridded climo files for model and obs - # - calculate all-time and seasonal fields (from individual months) - # - Take difference, calculate statistics - # - make plot - #Notify user that script has started: print("\n Generating lat/lon maps...") @@ -103,50 +85,18 @@ def global_latlon_map(adfobj): # Use ADF api to get all necessary information # var_list = adfobj.diag_var_list - model_rgrid_loc = adfobj.get_basic_info("cam_regrid_loc", required=True) - #Special ADF variable which contains the output paths for #all generated plots and tables for each case: plot_locations = adfobj.plot_location - #CAM simulation variables (this is always assumed to be a list): - case_names = adfobj.get_cam_info("cam_case_name", required=True) - #Grab case years syear_cases = adfobj.climo_yrs["syears"] eyear_cases = adfobj.climo_yrs["eyears"] - # CAUTION: - # "data" here refers to either obs or a baseline simulation, - # Until those are both treated the same (via intake-esm or similar) - # we will do a simple check and switch options as needed: - if adfobj.get_basic_info("compare_obs"): - #Set obs call for observation details for plot titles - obs = True - - #Extract variable-obs dictionary: - var_obs_dict = adfobj.var_obs_dict - - #If dictionary is empty, then there are no observations to regrid to, - #so quit here: - if not var_obs_dict: - print("No observations found to plot against, so no lat/lon maps will be generated.") - return - else: - obs = False - data_name = adfobj.get_baseline_info("cam_case_name", required=True) # does not get used, is just here as a placemarker - data_list = [data_name] # gets used as just the name to search for climo files HAS TO BE LIST - data_loc = model_rgrid_loc #Just use the re-gridded model data path - #End if - #Grab baseline years (which may be empty strings if using Obs): syear_baseline = adfobj.climo_yrs["syear_baseline"] eyear_baseline = adfobj.climo_yrs["eyear_baseline"] - #Grab all case nickname(s) - test_nicknames = adfobj.case_nicknames["test_nicknames"] - base_nickname = adfobj.case_nicknames["base_nickname"] - res = adfobj.variable_defaults # will be dict of variable-specific plot preferences # or an empty dictionary if use_defaults was not specified in YAML. @@ -162,19 +112,11 @@ def global_latlon_map(adfobj): print(f"\t NOTE: redo_plot is set to {redo_plot}") #----------------------------------------- - #Set data path variables: - #----------------------- - mclimo_rg_loc = Path(model_rgrid_loc) - if not adfobj.compare_obs: - dclimo_loc = Path(data_loc) - #----------------------- - #Determine if user wants to plot 3-D variables on #pressure levels: pres_levs = adfobj.get_basic_info("plot_press_levels") - #For now, let's always do seasonal weighting: - weight_season = True + weight_season = True #always do seasonal weighting #Set seasonal ranges: seasons = {"ANN": np.arange(1,13,1), @@ -183,29 +125,13 @@ def global_latlon_map(adfobj): "MAM": [3, 4, 5], "SON": [9, 10, 11] } - + # probably want to do this one variable at a time: for var in var_list: - - if adfobj.compare_obs: - #Check if obs exist for the variable: - if var in var_obs_dict: - #Note: In the future these may all be lists, but for - #now just convert the target_list. - #Extract target file: - dclimo_loc = var_obs_dict[var]["obs_file"] - #Extract target list (eventually will be a list, for now need to convert): - data_list = [var_obs_dict[var]["obs_name"]] - #Extract target variable name: - data_var = var_obs_dict[var]["obs_var"] - else: - dmsg = f"No obs found for variable `{var}`, lat/lon map plotting skipped." - adfobj.debug_log(dmsg) - continue - else: - #Set "data_var" for consistent use below: - data_var = var - #End if + if var not in adfobj.data.ref_var_nam: + dmsg = f"No reference data found for variable `{var}`, zonal mean plotting skipped." + adfobj.debug_log(dmsg) + continue #Notify user of variable being plotted: print("\t - lat/lon maps for {}".format(var)) @@ -229,258 +155,202 @@ def global_latlon_map(adfobj): # otherwise defaults to 180 vres['central_longitude'] = pf.get_central_longitude(adfobj) - #loop over different data sets to plot model against: - for data_src in data_list: + # load reference data (observational or baseline) + # odata = adfobj.data.load_reference_da(var) + odata = adfobj.data.load_reference_regrid_da(adfobj.data.ref_case_label, var) + if odata is None: + continue + o_has_dims = pf.validate_dims(odata, ["lat", "lon", "lev"]) # T iff dims are (lat,lon) -- can't plot unless we have both + if (not o_has_dims['has_lat']) or (not o_has_dims['has_lon']): + print(f"\t = skipping global map for {var} as REFERENCE does not have both lat and lon") + continue - # load data (observational) commparison files (we should explore intake as an alternative to having this kind of repeated code): - if adfobj.compare_obs: - #For now, only grab one file (but convert to list for use below) - oclim_fils = [dclimo_loc] - else: - oclim_fils = sorted(dclimo_loc.glob(f"{data_src}_{var}_baseline.nc")) + #Loop over model cases: + for case_idx, case_name in enumerate(adfobj.data.case_names): + + #Set case nickname: + case_nickname = adfobj.data.test_nicknames[case_idx] - oclim_ds = pf.load_dataset(oclim_fils) - if oclim_ds is None: - print("WARNING: Did not find any oclim_fils. Will try to skip.") - print(f"INFO: Data Location, dclimo_loc is {dclimo_loc}") - print(f"INFO: The glob is: {data_src}_{var}_*.nc") + #Set output plot location: + plot_loc = Path(plot_locations[case_idx]) + + #Check if plot output directory exists, and if not, then create it: + if not plot_loc.is_dir(): + print(" {} not found, making new directory".format(plot_loc)) + plot_loc.mkdir(parents=True) + + #Load re-gridded model files: + mdata = adfobj.data.load_regrid_da(case_name, var) + + #Skip this variable/case if the regridded climo file doesn't exist: + if mdata is None: continue - #End if - #Loop over model cases: - for case_idx, case_name in enumerate(case_names): + #Determine dimensions of variable: + has_dims = pf.validate_dims(mdata, ["lat", "lon", "lev"]) + if (not has_dims['has_lat']) or (not has_dims['has_lon']): + print(f"\t = skipping global map for {var} for case {case_name} as it does not have both lat and lon") + continue + else: # i.e., has lat&lon + if pres_levs and (not has_dims['has_lev']): + print(f"\t - skipping global map for {var} as it has more than lat/lon dims, but no pressure levels were provided") + continue - #Set case nickname: - case_nickname = test_nicknames[case_idx] + # Check output file. If file does not exist, proceed. + # If file exists: + # if redo_plot is true: delete it now and make plot + # if redo_plot is false: add to website and move on + doplot = {} + if not pres_levs: + for s in seasons: + plot_name = plot_loc / f"{var}_{s}_LatLon_Mean.{plot_type}" + doplot[plot_name] = plot_file_op(adfobj, plot_name, var, case_name, s, web_category, redo_plot, "LatLon") + else: + for pres in pres_levs: + for s in seasons: + plot_name = plot_loc / f"{var}_{pres}hpa_{s}_LatLon_Mean.{plot_type}" + doplot[plot_name] = plot_file_op(adfobj, plot_name, f"{var}_{pres}hpa", case_name, s, web_category, redo_plot, "LatLon") + if all(value is None for value in doplot.values()): + print(f"All plots exist for {var}. Redo is {redo_plot}. Existing plots added to website data. Continue.") + continue - #Set output plot location: - plot_loc = Path(plot_locations[case_idx]) + #Create new dictionaries: + mseasons = {} + oseasons = {} + dseasons = {} # hold the differences - #Check if plot output directory exists, and if not, then create it: - if not plot_loc.is_dir(): - print(" {} not found, making new directory".format(plot_loc)) - plot_loc.mkdir(parents=True) + if not has_dims['has_lev']: # strictly 2-d data - #Load re-gridded model files: - mclim_fils = sorted(mclimo_rg_loc.glob(f"{data_src}_{case_name}_{var}_*.nc")) - mclim_ds = pf.load_dataset(mclim_fils) + #Loop over season dictionary: + for s in seasons: + plot_name = plot_loc / f"{var}_{s}_LatLon_Mean.{plot_type}" + if doplot[plot_name] is None: + continue + + if weight_season: + mseasons[s] = pf.seasonal_mean(mdata, season=s, is_climo=True) + oseasons[s] = pf.seasonal_mean(odata, season=s, is_climo=True) + else: + #Just average months as-is: + mseasons[s] = mdata.sel(time=seasons[s]).mean(dim='time') + oseasons[s] = odata.sel(time=seasons[s]).mean(dim='time') + #End if + + # difference: each entry should be (lat, lon) + dseasons[s] = mseasons[s] - oseasons[s] + + pf.plot_map_and_save(plot_name, case_nickname, adfobj.data.ref_nickname, + [syear_cases[case_idx],eyear_cases[case_idx]], + [syear_baseline,eyear_baseline], + mseasons[s], oseasons[s], dseasons[s], + obs=adfobj.compare_obs, **vres) + + #Add plot to website (if enabled): + adfobj.add_website_data(plot_name, var, case_name, category=web_category, + season=s, plot_type="LatLon") + + else: # => pres_levs has values, & we already checked that lev is in mdata (has_lev) + + for pres in pres_levs: + + #Check that the user-requested pressure level + #exists in the model data, which should already + #have been interpolated to the standard reference + #pressure levels: + if (not (pres in mdata['lev'])) or (not (pres in odata['lev'])): + print(f"plot_press_levels value '{pres}' not present in {var} [test: {(pres in mdata['lev'])}, ref: {pres in odata['lev']}], so skipping.") + continue + + #Loop over seasons: + for s in seasons: + plot_name = plot_loc / f"{var}_{pres}hpa_{s}_LatLon_Mean.{plot_type}" + if doplot[plot_name] is None: + continue - #Skip this variable/case if the regridded climo file doesn't exist: - if mclim_ds is None: - print("WARNING: Did not find any regridded climo files. Will try to skip.") - print(f"INFO: Data Location, mclimo_rg_loc, is {mclimo_rg_loc}") - print(f"INFO: The glob is: {data_src}_{case_name}_{var}_*.nc") - continue - #End if - - #Extract variable of interest - odata = oclim_ds[data_var].squeeze() # squeeze in case of degenerate dimensions - mdata = mclim_ds[var].squeeze() - - # APPLY UNITS TRANSFORMATION IF SPECIFIED: - # NOTE: looks like our climo files don't have all their metadata - mdata = mdata * vres.get("scale_factor",1) + vres.get("add_offset", 0) - # update units - mdata.attrs['units'] = vres.get("new_unit", mdata.attrs.get('units', 'none')) - - # Do the same for the baseline case if need be: - if not adfobj.compare_obs: - odata = odata * vres.get("scale_factor",1) + vres.get("add_offset", 0) - # update units - odata.attrs['units'] = vres.get("new_unit", odata.attrs.get('units', 'none')) - # Or for observations: - else: - odata = odata * vres.get("obs_scale_factor",1) + vres.get("obs_add_offset", 0) - # Note: we are going to assume that the specification ensures the conversion makes the units the same. Doesn't make sense to add a different unit. - - #Determine dimensions of variable: - has_dims = pf.lat_lon_validate_dims(odata) - if has_dims: - #If observations/baseline CAM have the correct - #dimensions, does the input CAM run have correct - #dimensions as well? - has_dims_cam = pf.lat_lon_validate_dims(mdata) - - #If both fields have the required dimensions, then - #proceed with plotting: - if has_dims_cam: - - # - # Seasonal Averages - # Note: xarray can do seasonal averaging, - # but depends on having time accessor, - # which these prototype climo files do not have. - # - - #Create new dictionaries: - mseasons = {} - oseasons = {} - dseasons = {} # hold the differences - - #Loop over season dictionary: - for s in seasons: - # time to make plot; here we'd probably loop over whatever plots we want for this variable - # I'll just call this one "LatLon_Mean" ... would this work as a pattern [operation]_[AxesDescription] ? - plot_name = plot_loc / f"{var}_{s}_LatLon_Mean.{plot_type}" - - # Check redo_plot. If set to True: remove old plot, if it already exists: - if (not redo_plot) and plot_name.is_file(): - #Add already-existing plot to website (if enabled): - adfobj.debug_log(f"'{plot_name}' exists and clobber is false.") - adfobj.add_website_data(plot_name, var, case_name, category=web_category, - season=s, plot_type="LatLon") - - #Continue to next iteration: - continue - elif (redo_plot) and plot_name.is_file(): - plot_name.unlink() - - - if weight_season: - mseasons[s] = pf.seasonal_mean(mdata, season=s, is_climo=True) - oseasons[s] = pf.seasonal_mean(odata, season=s, is_climo=True) - else: - #Just average months as-is: - mseasons[s] = mdata.sel(time=seasons[s]).mean(dim='time') - oseasons[s] = odata.sel(time=seasons[s]).mean(dim='time') - #End if - - # difference: each entry should be (lat, lon) - dseasons[s] = mseasons[s] - oseasons[s] - - - #Create new plot: - # NOTE: send vres as kwarg dictionary. --> ONLY vres, not the full res - # This relies on `plot_map_and_save` knowing how to deal with the options - # currently knows how to handle: - # colormap, contour_levels, diff_colormap, diff_contour_levels, tiString, tiFontSize, mpl - # *Any other entries will be ignored. - # NOTE: If we were doing all the plotting here, we could use whatever we want from the provided YAML file. - - pf.plot_map_and_save(plot_name, case_nickname, base_nickname, - [syear_cases[case_idx],eyear_cases[case_idx]], - [syear_baseline,eyear_baseline], - mseasons[s], oseasons[s], dseasons[s], - obs, **vres) - - #Add plot to website (if enabled): - adfobj.add_website_data(plot_name, var, case_name, category=web_category, - season=s, plot_type="LatLon") - - else: #mdata dimensions check - print(f"\t - skipping lat/lon map for {var} as it doesn't have only lat/lon dims.") - #End if (dimensions check) - - elif pres_levs: #Is the user wanting to interpolate to a specific pressure level? - - #Check that case inputs have the correct dimensions (including "lev"): - _, has_lev = pf.zm_validate_dims(mdata) - - if has_lev: - - #Calculate monthly weights (if applicable): if weight_season: - #Add date-stamp to time dimension: - #Note: For now using made-up dates, but in the future - #it might be good to extract this info from the files - #themselves. - timefix = pd.date_range(start='1/1/1980', end='12/1/1980', freq='MS') - mdata['time']=timefix - odata['time']=timefix - - #Calculate monthly weights based on number of days: - month_length = mdata.time.dt.days_in_month - weights = (month_length.groupby("time.season") / month_length.groupby("time.season").sum()) + mseasons[s] = pf.seasonal_mean(mdata, season=s, is_climo=True) + oseasons[s] = pf.seasonal_mean(odata, season=s, is_climo=True) + else: + #Just average months as-is: + mseasons[s] = mdata.sel(time=seasons[s]).mean(dim='time') + oseasons[s] = odata.sel(time=seasons[s]).mean(dim='time') #End if - #Loop over pressure levels: - for pres in pres_levs: - - #Check that the user-requested pressure level - #exists in the model data, which should already - #have been interpolated to the standard reference - #pressure levels: - if not (pres in mclim_ds['lev']): - #Move on to the next pressure level: - print(f"plot_press_levels value '{pres}' not a standard reference pressure, so skipping.") - continue - #End if - - #Create new dictionaries: - mseasons = {} - oseasons = {} - dseasons = {} - - #Loop over seasons: - for s in seasons: - plot_name = plot_loc / f"{var}_{pres}hpa_{s}_LatLon_Mean.{plot_type}" - - # Check redo_plot. If set to True: remove old plot, if it already exists: - redo_plot = adfobj.get_basic_info('redo_plot') - if (not redo_plot) and plot_name.is_file(): - #Add already-existing plot to website (if enabled): - adfobj.debug_log(f"'{plot_name}' exists and clobber is false.") - adfobj.add_website_data(plot_name, f"{var}_{pres}hpa", case_name, category=web_category, - season=s, plot_type="LatLon") - - #Continue to next iteration: - continue - elif (redo_plot) and plot_name.is_file(): - plot_name.unlink() - - #If requested, then calculate the monthly-weighted seasonal averages: - if weight_season: - mseasons[s] = (pf.seasonal_mean(mdata, season=s, is_climo=True)).sel(lev=pres) - oseasons[s] = (pf.seasonal_mean(odata, season=s, is_climo=True)).sel(lev=pres) - else: - #Just average months as-is: - mseasons[s] = mdata.sel(time=seasons[s], lev=pres).mean(dim='time') - oseasons[s] = odata.sel(time=seasons[s], lev=pres).mean(dim='time') - #End if - - # difference: each entry should be (lat, lon) - dseasons[s] = mseasons[s] - oseasons[s] - - # time to make plot; here we'd probably loop over whatever plots we want for this variable - # I'll just call this one "LatLon_Mean" ... would this work as a pattern [operation]_[AxesDescription] ? - - #Create new plot: - # NOTE: send vres as kwarg dictionary. --> ONLY vres, not the full res - # This relies on `plot_map_and_save` knowing how to deal with the options - # currently knows how to handle: - # colormap, contour_levels, diff_colormap, diff_contour_levels, tiString, tiFontSize, mpl - # *Any other entries will be ignored. - # NOTE: If we were doing all the plotting here, we could use whatever we want from the provided YAML file. - pf.plot_map_and_save(plot_name, case_nickname, base_nickname, - [syear_cases[case_idx],eyear_cases[case_idx]], - [syear_baseline,eyear_baseline], - mseasons[s], oseasons[s], dseasons[s], - obs, **vres) - - #Add plot to website (if enabled): - adfobj.add_website_data(plot_name, f"{var}_{pres}hpa", case_name, category=web_category, - season=s, plot_type="LatLon") - - #End for (seasons) - #End for (pressure levels) - - else: - print(f"\t - variable '{var}' has no vertical dimension but is not just time/lat/lon, so skipping.") - #End if (has_lev) - else: - print(f"\t - skipping polar map for {var} as it has more than lat/lon dims, but no pressure levels were provided") - #End if (dimensions check and plotting pressure levels) - #End for (case loop) - #End for (obs/baseline loop) + # difference: each entry should be (lat, lon) + dseasons[s] = mseasons[s] - oseasons[s] + + pf.plot_map_and_save(plot_name, case_nickname, adfobj.data.ref_nickname, + [syear_cases[case_idx],eyear_cases[case_idx]], + [syear_baseline,eyear_baseline], + mseasons[s].sel(lev=pres), oseasons[s].sel(lev=pres), dseasons[s].sel(lev=pres), + obs=adfobj.compare_obs, **vres) + + #Add plot to website (if enabled): + adfobj.add_website_data(plot_name, f"{var}_{pres}hpa", case_name, category=web_category, + season=s, plot_type="LatLon") + #End for (seasons) + #End for (pressure levels) + #End if (plotting pressure levels) + #End for (case loop) #End for (variable loop) #Notify user that script has ended: print(" ...lat/lon maps have been generated successfully.") -######### -# Helpers -######### +def plot_file_op(adfobj, plot_name, var, case_name, season, web_category, redo_plot, plot_type): + """Check if output plot needs to be made or remade. + + Parameters + ---------- + adfobj : AdfDiag + The diagnostics object that contains all the configuration information + + plot_name : Path + path of the output plot + + var : str + name of variable + + case_name : str + case name + + season : str + season being plotted + + web_category : str + the category for this variable + + redo_plot : bool + whether to overwrite existing plot with this file name + + plot_type : str + the file type for the output plot + + Returns + ------- + int, None + Returns 1 if existing file is removed or no existing file. + Returns None if file exists and redo_plot is False + + Notes + ----- + The long list of parameters is because add_website_data is called + when the file exists and will not be overwritten. + + """ + # Check redo_plot. If set to True: remove old plot, if it already exists: + if plot_name.is_file(): + if redo_plot: + plot_name.unlink() + return True + else: + #Add already-existing plot to website (if enabled): + adfobj.add_website_data(plot_name, var, case_name, category=web_category, + season=season, plot_type=plot_type) + return False # False tells caller that file exists and not to overwrite + else: + return True ############## -#END OF SCRIPT \ No newline at end of file +#END OF SCRIPT diff --git a/scripts/plotting/tape_recorder.py b/scripts/plotting/tape_recorder.py index 03cd9ef45..6b527c5b0 100644 --- a/scripts/plotting/tape_recorder.py +++ b/scripts/plotting/tape_recorder.py @@ -36,15 +36,26 @@ def tape_recorder(adfobj): plot_location = adfobj.plot_location plot_loc = Path(plot_location[0]) - #Grab history string: - hist_str = adfobj.hist_string - #Grab test case name(s) case_names = adfobj.get_cam_info('cam_case_name', required=True) #Grab test case time series locs(s) case_ts_locs = adfobj.get_cam_info("cam_ts_loc", required=True) + #Grab history strings: + cam_hist_strs = adfobj.hist_string["test_hist_str"] + + # Filter the list to include only strings that are exactly in the possible h0 strings + # - Search for either h0 or h0a + substrings = {"cam.h0","cam.h0a"} + case_hist_strs = [] + for cam_case_str in cam_hist_strs: + # Check each possible h0 string + for string in cam_case_str: + if string in substrings: + case_hist_strs.append(string) + break + #Grab test case climo years start_years = adfobj.climo_yrs["syears"] end_years = adfobj.climo_yrs["eyears"] @@ -72,8 +83,25 @@ def tape_recorder(adfobj): data_end_year = adfobj.climo_yrs["eyear_baseline"] start_years = start_years+[data_start_year] end_years = end_years+[data_end_year] + + #Grab history string: + baseline_hist_strs = adfobj.hist_string["base_hist_str"] + # Filter the list to include only strings that are exactly in the substrings list + base_hist_strs = [string for string in baseline_hist_strs if string in substrings] + hist_strs = case_hist_strs + base_hist_strs #End if + if not case_ts_locs: + exitmsg = "WARNING: No time series files in any case directory." + exitmsg += " No tape recorder plots will be made." + print(exitmsg) + logmsg = "create tape recorder:" + logmsg += f"\n Tape recorder plots require monthly mean h0 time series files." + logmsg += f"\n None were found for any case. Please check the time series paths." + adfobj.debug_log(logmsg) + #End tape recorder plotting script: + return + # Default colormap cmap='precip_nowhite' @@ -110,10 +138,10 @@ def tape_recorder(adfobj): elif (redo_plot) and plot_name.is_file(): plot_name.unlink() - #Make dictionary for case names and associated timeseries file locations + #Make dictionary for case names and associated timeseries file locations and hist strings runs_LT2={} for i,val in enumerate(test_nicknames): - runs_LT2[val] = case_ts_locs[i] + runs_LT2[val] = [case_ts_locs[i], hist_strs[i]] # MLS data mls = xr.open_dataset(obs_loc / "mls_h2o_latNpressNtime_3d_monthly_v5.nc") @@ -133,7 +161,10 @@ def tape_recorder(adfobj): alldat=[] runname_LT=[] for idx,key in enumerate(runs_LT2): - fils= sorted(Path(runs_LT2[key]).glob(f'*{hist_str}.{var}.*.nc')) + # Search for files + ts_loc = Path(runs_LT2[key][0]) + hist_str = runs_LT2[key][1] + fils= sorted(ts_loc.glob(f'*{hist_str}.{var}.*.nc')) dat = pf.load_dataset(fils) if not dat: dmsg = f"\t No data for `{var}` found in {fils}, case will be skipped in tape recorder plot." @@ -152,7 +183,7 @@ def tape_recorder(adfobj): runname_LT=xr.DataArray(runname_LT, dims='run', coords=[np.arange(0,len(runname_LT),1)], name='run') alldat_concat_LT = xr.concat(alldat, dim=runname_LT) else: - msg = f"WARNING: No cases seem to be available, please check history files for {var}." + msg = f"WARNING: No cases seem to be available, please check time series files for {var}." msg += "\n\tNo tape recorder plots will be made." print(msg) #End tape recorder plotting script: diff --git a/scripts/plotting/zonal_mean.py b/scripts/plotting/zonal_mean.py index 944683752..a150009f1 100644 --- a/scripts/plotting/zonal_mean.py +++ b/scripts/plotting/zonal_mean.py @@ -2,6 +2,7 @@ import numpy as np import xarray as xr import plotting_functions as pf + import warnings # use to warn user about missing files. def my_formatwarning(msg, *args, **kwargs): @@ -10,91 +11,53 @@ def my_formatwarning(msg, *args, **kwargs): warnings.formatwarning = my_formatwarning -def zonal_mean(adfobj): +def zonal_mean_B(adfobj): """ - This script plots zonal averages. + Plots zonal average from climatological files (annual and seasonal). Compare CAM climatologies against other climatological data (observations or baseline runs). - Description of needed inputs from ADF: - case_name -> Name of CAM case provided by "cam_case_name". - model_rgrid_loc -> Location of re-gridded CAM climo files provided by "cam_regrid_loc". - data_name -> Name of data set CAM case is being compared against, - which is always either "obs" or the baseline CAM case name, - depending on whether "compare_obs" is true or false. - data_loc -> Location of comparison data, which is either "obs_climo_loc" - or "cam_baseline_climo_loc", depending on whether - "compare_obs" is true or false. - var_list -> List of CAM output variables provided by "diag_var_list" - data_list -> List of data sets CAM will be compared against, which - is simply the baseline case name in situations when - "compare_obs" is false. - climo_yrs -> Dictionary containing the start and end years of the test - and baseline model data (if applicable). - plot_location -> Location where plot files will be written to, which is - specified by "cam_diag_plot_loc". - variable_defaults -> optional, - Dict that has keys that are variable names and values that are plotting preferences/defaults. - Notes: - The script produces plots of 2-D and 3-D variables, - but needs to determine which type along the way. - For 3-D variables, the default behavior is to interpolate - climo files to pressure levels, which requires the hybrid-sigma - coefficients and surface pressure. That ASSUMES that the climo - files are using native hybrid-sigma levels rather than being - transformed to pressure levels. + + Parameters + ---------- + adfobj : AdfDiag + The diagnostics object that contains all the configuration information + + Returns + ------- + None + Does not return value, produces files. + + Notes + ----- + Uses AdfData for loading data described by adfobj. + + Directly uses adfobj for the following: + diag_var_list, climo_yrs, variable_defaults, read_config_var, + get_basic_info, add_website_data, debug_log + + Determines whether `lev` dimension is present. If not, makes + a line plot, but if so it makes a contour plot. + TODO: There's a flag to plot linear vs log pressure, but no + method to infer what the user wants. """ - #Notify user that script has started: print("\n Generating zonal mean plots...") - #Extract needed quantities from ADF object: - #----------------------------------------- var_list = adfobj.diag_var_list - model_rgrid_loc = adfobj.get_basic_info("cam_regrid_loc", required=True) #Special ADF variable which contains the output paths for #all generated plots and tables: plot_locations = adfobj.plot_location - #CAM simulation variables (this is always assumed to be a list): - case_names = adfobj.get_cam_info("cam_case_name", required=True) - #Grab case years syear_cases = adfobj.climo_yrs["syears"] eyear_cases = adfobj.climo_yrs["eyears"] - # CAUTION: - # "data" here refers to either obs or a baseline simulation, - # Until those are both treated the same (via intake-esm or similar) - # we will do a simple check and switch options as needed: - if adfobj.get_basic_info("compare_obs"): - #Set obs call for observation details for plot titles - obs = True - - #Extract variable-obs dictionary: - var_obs_dict = adfobj.var_obs_dict - - #If dictionary is empty, then there are no observations to regrid to, - #so quit here: - if not var_obs_dict: - print("\t No observations found to plot against, so no zonal-mean maps will be generated.") - return - else: - obs = False - data_name = adfobj.get_baseline_info("cam_case_name", required=True) # does not get used, is just here as a placemarker - data_list = [data_name] # gets used as just the name to search for climo files HAS TO BE LIST - data_loc = model_rgrid_loc #Just use the re-gridded model data path - #End if - #Grab baseline years (which may be empty strings if using Obs): syear_baseline = adfobj.climo_yrs["syear_baseline"] eyear_baseline = adfobj.climo_yrs["eyear_baseline"] - #Grab all case nickname(s) - test_nicknames = adfobj.case_nicknames["test_nicknames"] - base_nickname = adfobj.case_nicknames["base_nickname"] - res = adfobj.variable_defaults # will be dict of variable-specific plot preferences # or an empty dictionary if use_defaults was not specified in YAML. @@ -110,12 +73,6 @@ def zonal_mean(adfobj): print(f"\t NOTE: redo_plot is set to {redo_plot}") #----------------------------------------- - #Set data path variables: - #----------------------- - mclimo_rg_loc = Path(model_rgrid_loc) - if not adfobj.compare_obs: - dclimo_loc = Path(data_loc) - #----------------------- #Set seasonal ranges: seasons = {"ANN": np.arange(1,13,1), @@ -131,7 +88,7 @@ def zonal_mean(adfobj): logp_zonal_skip = [] #Loop over model cases: - for case_idx, case_name in enumerate(case_names): + for case_idx, case_name in enumerate(adfobj.data.case_names): #Set output plot location: plot_loc = Path(plot_locations[case_idx]) @@ -145,7 +102,7 @@ def zonal_mean(adfobj): for var in var_list: for s in seasons: #Check zonal log-p: - plot_name_log = plot_loc / f"{var}_logp_{s}_Zonal_Mean.{plot_type}" + plot_name_log = plot_loc / f"{var}_{s}_Zonal_logp_Mean.{plot_type}" # Check redo_plot. If set to True: remove old plot, if it already exists: if (not redo_plot) and plot_name_log.is_file(): @@ -165,7 +122,6 @@ def zonal_mean(adfobj): if (not redo_plot) and plot_name.is_file(): zonal_skip.append(plot_name) #Add already-existing plot to website (if enabled): - adfobj.debug_log(f"'{plot_name}' exists and clobber is false.") adfobj.add_website_data(plot_name, var, case_name, season=s, plot_type="Zonal") @@ -185,27 +141,10 @@ def zonal_mean(adfobj): # #Loop over variables: for var in var_list: - - if adfobj.compare_obs: - #Check if obs exist for the variable: - if var in var_obs_dict: - #Note: In the future these may all be lists, but for - #now just convert the target_list. - #Extract target file: - dclimo_loc = var_obs_dict[var]["obs_file"] - #Extract target list (eventually will be a list, for now need to convert): - data_list = [var_obs_dict[var]["obs_name"]] - #Extract target variable name: - data_var = var_obs_dict[var]["obs_var"] - else: - dmsg = f"No obs found for variable `{var}`, zonal mean plotting skipped." - adfobj.debug_log(dmsg) - continue - #End if - else: - #Set "data_var" for consistent use below: - data_var = var - #End if + if var not in adfobj.data.ref_var_nam: + dmsg = f"No obs found for variable `{var}`, zonal mean plotting skipped." + adfobj.debug_log(dmsg) + continue #Notify user of variable being plotted: print(f"\t - zonal mean plots for {var}") @@ -220,81 +159,48 @@ def zonal_mean(adfobj): vres = {} #End if - #loop over different data sets to plot model against: - for data_src in data_list: - # load data (observational) comparison files - # (we should explore intake as an alternative to having this kind of repeated code): - if adfobj.compare_obs: - #For now, only grab one file (but convert to list for use below) - oclim_fils = [dclimo_loc] - else: - oclim_fils = sorted(dclimo_loc.glob(f"{data_src}_{var}_baseline.nc")) - #End if - oclim_ds = pf.load_dataset(oclim_fils) - - #Loop over model cases: - for case_idx, case_name in enumerate(case_names): - - #Set case nickname: - case_nickname = test_nicknames[case_idx] - - #Set output plot location: - plot_loc = Path(plot_locations[case_idx]) - - # load re-gridded model files: - mclim_fils = sorted(mclimo_rg_loc.glob(f"{data_src}_{case_name}_{var}_*.nc")) - mclim_ds = pf.load_dataset(mclim_fils) - - # stop if data is invalid: - if (oclim_ds is None) or (mclim_ds is None): - warnings.warn(f"invalid data, skipping zonal mean plot of {var}") - continue + # load reference data (observational or baseline) + odata = adfobj.data.load_reference_regrid_da(adfobj.data.ref_case_label, var) + has_lat_ref, has_lev_ref = pf.zm_validate_dims(odata) - #Extract variable of interest - odata = oclim_ds[data_var].squeeze() # squeeze in case of degenerate dimensions - mdata = mclim_ds[var].squeeze() - - # APPLY UNITS TRANSFORMATION IF SPECIFIED: - # NOTE: looks like our climo files don't have all their metadata - mdata = mdata * vres.get("scale_factor",1) + vres.get("add_offset", 0) - # update units - mdata.attrs['units'] = vres.get("new_unit", mdata.attrs.get('units', 'none')) - - # Do the same for the baseline case if need be: - if not adfobj.compare_obs: - odata = odata * vres.get("scale_factor",1) + vres.get("add_offset", 0) - # update units - odata.attrs['units'] = vres.get("new_unit", odata.attrs.get('units', 'none')) - # Or for observations - else: - odata = odata * vres.get("obs_scale_factor",1) + vres.get("obs_add_offset", 0) - # Note: we are going to assume that the specification ensures the conversion makes the units the same. Doesn't make sense to add a different unit. + #Loop over model cases: + for case_idx, case_name in enumerate(adfobj.data.case_names): - # determine whether it's 2D or 3D - # 3D triggers search for surface pressure - has_lat, has_lev = pf.zm_validate_dims(mdata) # assumes will work for both mdata & odata + #Set case nickname: + case_nickname = adfobj.data.test_nicknames[case_idx] - #Notify user of level dimension: - if has_lev: - print(f"\t {var} has lev dimension.") + #Set output plot location: + plot_loc = Path(plot_locations[case_idx]) - # - # Seasonal Averages - # + # load re-gridded model files: + mdata = adfobj.data.load_regrid_da(case_name, var) - #Create new dictionaries: - mseasons = {} - oseasons = {} - - #Loop over season dictionary: - for s in seasons: - - # time to make plot; here we'd probably loop over whatever plots we want for this variable - # I'll just call this one "Zonal_Mean" ... would this work as a pattern [operation]_[AxesDescription] ? - # NOTE: Up to this point, nothing really differs from global_latlon_map, - # so we could have made one script instead of two. - # Merging would make overall timing better because looping twice will double I/O steps. - # + # determine whether it's 2D or 3D + # 3D triggers search for surface pressure + has_lat, has_lev = pf.zm_validate_dims(mdata) # assumes will work for both mdata & odata + + #Notify user of level dimension: + if has_lev: + print(f"\t {var} has lev dimension.") + + # + # Seasonal Averages + # + + #Create new dictionaries: + mseasons = {} + oseasons = {} + + #Loop over season dictionary: + for s in seasons: + + # time to make plot; here we'd probably loop over whatever plots we want for this variable + # I'll just call this one "Zonal_Mean" ... would this work as a pattern [operation]_[AxesDescription] ? + # NOTE: Up to this point, nothing really differs from global_latlon_map, + # so we could have made one script instead of two. + # Merging would make overall timing better because looping twice will double I/O steps. + # + if not has_lev: plot_name = plot_loc / f"{var}_{s}_Zonal_Mean.{plot_type}" if plot_name not in zonal_skip: @@ -302,7 +208,7 @@ def zonal_mean(adfobj): #Seasonal Averages mseasons[s] = pf.seasonal_mean(mdata, season=s, is_climo=True) oseasons[s] = pf.seasonal_mean(odata, season=s, is_climo=True) - + # difference: each entry should be (lat, lon) or (plev, lat, lon) # dseasons[s] = mseasons[s] - oseasons[s] # difference will be calculated in plot_zonal_mean_and_save; @@ -310,40 +216,45 @@ def zonal_mean(adfobj): # This could be re-visited for efficiency or improved code structure. #Create new plot: - pf.plot_zonal_mean_and_save(plot_name, case_nickname, base_nickname, + pf.plot_zonal_mean_and_save(plot_name, case_nickname, adfobj.data.ref_nickname, [syear_cases[case_idx],eyear_cases[case_idx]], [syear_baseline,eyear_baseline], - mseasons[s], oseasons[s], has_lev, log_p=False, obs=obs, **vres) + mseasons[s], oseasons[s], has_lev, log_p=False, obs=adfobj.compare_obs, **vres) #Add plot to website (if enabled): adfobj.add_website_data(plot_name, var, case_name, season=s, plot_type="Zonal") - #Create new plot with log-p: - if has_lev: - plot_name_log = plot_loc / f"{var}_logp_{s}_Zonal_Mean.{plot_type}" + #Create new plot with log-p: + # NOTE: The log-p should be an option here. + else: + if (not has_lev_ref) or (not has_lev): + print(f"Error: expecting lev for both case: {has_lev} and ref: {has_lev_ref}") + continue + if len(mdata['lev']) != len(odata['lev']): + print(f"Error: zonal mean contour expects `lev` dim to have same size, got {len(mdata['lev'])} and {len(odata['lev'])}") + continue + plot_name_log = plot_loc / f"{var}_{s}_Zonal_logp_Mean.{plot_type}" + if plot_name_log not in logp_zonal_skip: + #Seasonal Averages + mseasons[s] = pf.seasonal_mean(mdata, season=s, is_climo=True) + oseasons[s] = pf.seasonal_mean(odata, season=s, is_climo=True) - if plot_name_log not in logp_zonal_skip: - pf.plot_zonal_mean_and_save(plot_name_log, case_nickname, base_nickname, - [syear_cases[case_idx],eyear_cases[case_idx]], - [syear_baseline,eyear_baseline], - mseasons[s], oseasons[s], has_lev, log_p=True, obs=obs, **vres) + pf.plot_zonal_mean_and_save(plot_name_log, case_nickname, adfobj.data.ref_nickname, + [syear_cases[case_idx],eyear_cases[case_idx]], + [syear_baseline,eyear_baseline], + mseasons[s], oseasons[s], has_lev, log_p=True, obs=adfobj.compare_obs, **vres) - #Add plot to website (if enabled): - adfobj.add_website_data(plot_name_log, f"{var}_logp", case_name, season=s, plot_type="Zonal", category="Log-P") + #Add plot to website (if enabled): + adfobj.add_website_data(plot_name_log, f"{var}_logp", case_name, season=s, plot_type="Zonal", category="Log-P") - #End for (seasons loop) - #End for (case names loop) - #End for (obs/baseline loop) + #End for (seasons loop) + #End for (case names loop) #End for (variables loop) #Notify user that script has ended: print(" ...Zonal mean plots have been generated successfully.") -######### -# Helpers -######### - - ############## -#END OF SCRIPT \ No newline at end of file +#END OF SCRIPT +