From 5b756aa22a20a64a7291681390bd7a3cd537f4b2 Mon Sep 17 00:00:00 2001 From: Leila Belabbassi Date: Fri, 5 Jan 2024 11:19:55 -0600 Subject: [PATCH] Update build_erddap_catalog.py (#290) * Update build_erddap_catalog.py * Use ioos-qc library in glider_qc.py * Update requirements.txt added an extension in the requirement file to pull in the ioos-qc library package * Update qc_config.yml * Update glider_qc.py * Update build_erddap_catalog.py Fixed the flag name attribute to match ioos-qc definitions Fixed the manual link Added the min and max values for a flag * Update glider_qc.py Modified the script to create qartod variables and add the qartod test results to each qartod variable. * Update build_erddap_catalog.py - Changed the required variables to a list of qartod variables. - Made changes to few functions to work accordingly. * Update glider_qc.py fixed spelling detected by the code checks * Update build_erddap_catalog.py rectified the omitted quotes in line 611 * Update glider_qc.py added 'nc_path' to the run_qc function * Update glider_qc.py commented out an old version of the get_unmasked function * Update glider_qc.py fixed syntax errors added qartod_status attribute * Update glider_qc.py fixed log.info() and log.exception() syntax errors * Update glider_qc.py it did not like the addition log.exception() added to the "except OSError:" when checking os.getxattr() in the previous commit. * Update glider_qc.py extended qartod variables' attributes to include more verbose explanation * Update glider_qc.py added more to log.info for debugging purposes * Update requirements.txt commented out the ioos_qartod git repo that has been deprecated * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- data/qc_config.yml | 198 +++++++---- glider_qc/glider_qc.py | 575 +++++++++++++++++++++----------- requirements.txt | 4 +- scripts/build_erddap_catalog.py | 59 ++-- 4 files changed, 535 insertions(+), 301 deletions(-) diff --git a/data/qc_config.yml b/data/qc_config.yml index 66274874..6cd85d18 100644 --- a/data/qc_config.yml +++ b/data/qc_config.yml @@ -1,67 +1,131 @@ -sea_water_temperature: # deg_C - flat_line: - low_reps: 4 - high_reps: 8 - eps: 1.1920929e-07 - gross_range: - sensor_span: - - -5. - - 45. - spike: {} - rate_of_change: {} -sea_water_electrical_conductivity: # S m-1 - flat_line: - low_reps: 4 - high_reps: 8 - eps: 1.1920929e-07 - gross_range: - sensor_span: - - 0 - - 7 - spike: {} - rate_of_change: {} -sea_water_practical_salinity: # unitless - flat_line: - low_reps: 4 - high_reps: 8 - eps: 1.1920929e-07 - gross_range: - sensor_span: - - 0 - - 50 - spike: {} - rate_of_change: {} -sea_water_salinity: # unitless - flat_line: - low_reps: 4 - high_reps: 8 - eps: 1.1920929e-07 - gross_range: - sensor_span: - - 0 - - 50 - spike: {} - rate_of_change: {} -sea_water_density: # kg m-3 - flat_line: - low_reps: 4 - high_reps: 8 - eps: 1.1920929e-07 - gross_range: - sensor_span: - - 900 - - 1050 - spike: {} - rate_of_change: {} -sea_water_pressure: # ` - flat_line: - low_reps: 4 - high_reps: 8 - eps: 1.1920929e-07 - gross_range: - sensor_span: - - 0 - - 11000 - pressure: {} - rate_of_change: {} - spike: {} +contexts: + - streams: + conductivity: + qartod: + gross_range_test: + suspect_span: [0, 6] + fail_span: [0, 9] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + temperature: + qartod: + gross_range_test: + suspect_span: [0, 35] + fail_span: [-2, 40] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 +# flat_line_test: +# tolerance: 1 +# suspect_threshold: 3600 +# fail_threshold: 9000 + pressure: + qartod: + gross_range_test: + suspect_span: [0, 1000] + fail_span: [0, 6000] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 +# flat_line_test: +# tolerance: 1 +# suspect_threshold: 3600 +# fail_threshold: 9000 + salinity: + qartod: + gross_range_test: + fail_span: [0, 42] + suspect_span: [10, 38] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 + density: + qartod: + gross_range_test: + fail_span: [1000, 1100] + spike_test: + suspect_threshold: + fail_threshold: + rate_of_change_test: + threshold: 0.1 +# flat_line_test: +# tolerance: 1 +# suspect_threshold: 3600 +# fail_threshold: 9000 + +# sea_water_temperature: # deg_C +# flat_line: +# low_reps: 4 +# high_reps: 8 +# eps: 1.1920929e-07 +# gross_range: +# sensor_span: +# - -5. +# - 45. +# spike: {} +# rate_of_change: {} +# sea_water_electrical_conductivity: # S m-1 +# flat_line: +# low_reps: 4 +# high_reps: 8 +# eps: 1.1920929e-07 +# gross_range: +# sensor_span: +# - 0 +# - 7 +# spike: {} +# rate_of_change: {} +# sea_water_practical_salinity: # unitless +# flat_line: +# low_reps: 4 +# high_reps: 8 +# eps: 1.1920929e-07 +# gross_range: +# sensor_span: +# - 0 +# - 50 +# spike: {} +# rate_of_change: {} +# sea_water_salinity: # unitless +# flat_line: +# low_reps: 4 +# high_reps: 8 +# eps: 1.1920929e-07 +# gross_range: +# sensor_span: +# - 0 +# - 50 +# spike: {} +# rate_of_change: {} +# sea_water_density: # kg m-3 +# flat_line: +# low_reps: 4 +# high_reps: 8 +# eps: 1.1920929e-07 +# gross_range: +# sensor_span: +# - 900 +# - 1050 +# spike: {} +# rate_of_change: {} +# sea_water_pressure: # ` +# flat_line: +# low_reps: 4 +# high_reps: 8 +# eps: 1.1920929e-07 +# gross_range: +# sensor_span: +# - 0 +# - 11000 +# pressure: {} +# rate_of_change: {} +# spike: {} diff --git a/glider_qc/glider_qc.py b/glider_qc/glider_qc.py index c743dcf3..0c2a74a4 100644 --- a/glider_qc/glider_qc.py +++ b/glider_qc/glider_qc.py @@ -4,8 +4,12 @@ ''' from cf_units import Unit from netCDF4 import num2date, Dataset -from ioos_qartod.qc_tests import qc -from ioos_qartod.qc_tests import gliders as gliders_qc +# from ioos_qartod.qc_tests import qc +# from ioos_qartod.qc_tests import gliders as gliders_qc +from ioos_qc.qartod import aggregate +from ioos_qc.streams import XarrayStream +from ioos_qc.results import collect_results, CollectedResult +from ioos_qc.config import Config import numpy as np import numpy.ma as ma import quantities as pq @@ -65,12 +69,14 @@ def find_qc_flags(self, ncvariable): if varname not in self.ncfile.variables: log.warning("%s defined as ancillary variable but doesn't exist", varname) continue - anc_standard_name = getattr(self.ncfile.variables[varname], - 'standard_name', '') - if varname.endswith('_qc'): - valid_variables.append(varname) - elif ("status_flag" in anc_standard_name or - anc_standard_name.endswith("quality_flag")): + # anc_standard_name = getattr(self.ncfile.variables[varname], + # 'standard_name', '') + # if varname.endswith('_qc'): + # valid_variables.append(varname) + # elif ("status_flag" in anc_standard_name or + # anc_standard_name.endswith("quality_flag")): + # valid_variables.append(varname) + if varname.startswith("qartod"): valid_variables.append(varname) return valid_variables @@ -101,102 +107,102 @@ def needs_qc(self, ncvariable): ancillary_variables = self.find_qc_flags(ncvariable) return len(ancillary_variables) == 0 - def create_qc_variables(self, ncvariable): - ''' - Returns a list of variable names for the newly created variables for QC flags - ''' - name = ncvariable.name - standard_name = ncvariable.standard_name - dims = ncvariable.dimensions - log.info("Creating QARTOD variables for %s", name) - - templates = { - 'flat_line': { - 'name': 'qartod_%(name)s_flat_line_flag', - 'long_name': 'QARTOD Flat Line Test for %(standard_name)s', - 'standard_name': 'flat_line_test_quality_flag', - 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), - 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', - 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', - 'qartod_test': 'flat_line', - 'dac_comment': 'ioos_qartod' - }, - 'gross_range': { - 'name': 'qartod_%(name)s_gross_range_flag', - 'long_name': 'QARTOD Gross Range Test for %(standard_name)s', - 'standard_name': 'gross_range_test_quality_flag', - 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), - 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', - 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', - 'qartod_test': 'gross_range', - 'dac_comment': 'ioos_qartod' - }, - 'rate_of_change': { - 'name': 'qartod_%(name)s_rate_of_change_flag', - 'long_name': 'QARTOD Rate of Change Test for %(standard_name)s', - 'standard_name': 'rate_of_change_test_quality_flag', - 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), - 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', - 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', - 'qartod_test': 'rate_of_change', - 'dac_comment': 'ioos_qartod' - }, - 'spike': { - 'name': 'qartod_%(name)s_spike_flag', - 'long_name': 'QARTOD Spike Test for %(standard_name)s', - 'standard_name': "spike_test_quality_flag", - 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), - 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', - 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', - 'qartod_test': 'spike', - 'dac_comment': 'ioos_qartod' - }, - 'pressure': { - 'name': 'qartod_monotonic_pressure_flag', - 'long_name': 'QARTOD Pressure Test for %(standard_name)s', - 'standard_name': 'quality_flag', - 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), - 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', - 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', - 'qartod_test': 'pressure', - 'dac_comment': 'ioos_qartod' - }, - 'primary': { - 'name': 'qartod_%(name)s_primary_flag', - 'long_name': 'QARTOD Primary Flag for %(standard_name)s', - 'standard_name': 'aggregate_quality_flag', - 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), - 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', - 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', - 'dac_comment': 'ioos_qartod_primary' - } - } - - qcvariables = [] - - for tname, template in list(templates.items()): - if tname == 'pressure' and standard_name != 'sea_water_pressure': - continue - variable_name = template['name'] % {'name': name} - - if variable_name not in self.ncfile.variables: - ncvar = self.ncfile.createVariable(variable_name, np.int8, dims, fill_value=np.int8(9)) - else: - ncvar = self.ncfile.variables[variable_name] - - ncvar.units = '1' - ncvar.standard_name = template['standard_name'] % {'standard_name': standard_name} - ncvar.long_name = template['long_name'] % {'standard_name': standard_name} - ncvar.flag_values = template['flag_values'] - ncvar.flag_meanings = template['flag_meanings'] - ncvar.references = template['references'] - ncvar.dac_comment = template['dac_comment'] - if 'qartod_test' in template: - ncvar.qartod_test = template['qartod_test'] - qcvariables.append(variable_name) - self.append_ancillary_variable(ncvariable, ncvar) - - return qcvariables + # def create_qc_variables(self, ncvariable): + # ''' + # Returns a list of variable names for the newly created variables for QC flags + # ''' + # name = ncvariable.name + # standard_name = ncvariable.standard_name + # dims = ncvariable.dimensions + # log.info("Creating QARTOD variables for %s", name) + # + # templates = { + # 'flat_line': { + # 'name': 'qartod_%(name)s_flat_line_flag', + # 'long_name': 'QARTOD Flat Line Test for %(standard_name)s', + # 'standard_name': 'flat_line_test_quality_flag', + # 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), + # 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', + # 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', + # 'qartod_test': 'flat_line', + # 'dac_comment': 'ioos_qartod' + # }, + # 'gross_range': { + # 'name': 'qartod_%(name)s_gross_range_flag', + # 'long_name': 'QARTOD Gross Range Test for %(standard_name)s', + # 'standard_name': 'gross_range_test_quality_flag', + # 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), + # 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', + # 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', + # 'qartod_test': 'gross_range', + # 'dac_comment': 'ioos_qartod' + # }, + # 'rate_of_change': { + # 'name': 'qartod_%(name)s_rate_of_change_flag', + # 'long_name': 'QARTOD Rate of Change Test for %(standard_name)s', + # 'standard_name': 'rate_of_change_test_quality_flag', + # 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), + # 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', + # 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', + # 'qartod_test': 'rate_of_change', + # 'dac_comment': 'ioos_qartod' + # }, + # 'spike': { + # 'name': 'qartod_%(name)s_spike_flag', + # 'long_name': 'QARTOD Spike Test for %(standard_name)s', + # 'standard_name': "spike_test_quality_flag", + # 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), + # 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', + # 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', + # 'qartod_test': 'spike', + # 'dac_comment': 'ioos_qartod' + # }, + # 'pressure': { + # 'name': 'qartod_monotonic_pressure_flag', + # 'long_name': 'QARTOD Pressure Test for %(standard_name)s', + # 'standard_name': 'quality_flag', + # 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), + # 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', + # 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', + # 'qartod_test': 'pressure', + # 'dac_comment': 'ioos_qartod' + # }, + # 'primary': { + # 'name': 'qartod_%(name)s_primary_flag', + # 'long_name': 'QARTOD Primary Flag for %(standard_name)s', + # 'standard_name': 'aggregate_quality_flag', + # 'flag_values': np.array([1, 2, 3, 4, 9], dtype=np.int8), + # 'flag_meanings': 'GOOD NOT_EVALUATED SUSPECT BAD MISSING', + # 'references': 'http://gliders.ioos.us/static/pdf/Manual-for-QC-of-Glider-Data_05_09_16.pdf', + # 'dac_comment': 'ioos_qartod_primary' + # } + # } + # + # qcvariables = [] + # + # for tname, template in list(templates.items()): + # if tname == 'pressure' and standard_name != 'sea_water_pressure': + # continue + # variable_name = template['name'] % {'name': name} + # + # if variable_name not in self.ncfile.variables: + # ncvar = self.ncfile.createVariable(variable_name, np.int8, dims, fill_value=np.int8(9)) + # else: + # ncvar = self.ncfile.variables[variable_name] + # + # ncvar.units = '1' + # ncvar.standard_name = template['standard_name'] % {'standard_name': standard_name} + # ncvar.long_name = template['long_name'] % {'standard_name': standard_name} + # ncvar.flag_values = template['flag_values'] + # ncvar.flag_meanings = template['flag_meanings'] + # ncvar.references = template['references'] + # ncvar.dac_comment = template['dac_comment'] + # if 'qartod_test' in template: + # ncvar.qartod_test = template['qartod_test'] + # qcvariables.append(variable_name) + # self.append_ancillary_variable(ncvariable, ncvar) + # + # return qcvariables def load_config(self, path='data/qc_config.yml'): ''' @@ -229,60 +235,97 @@ def normalize_variable(cls, values, units, standard_name): raise return converted - def apply_qc(self, ncvariable, parent): + def apply_qc(self, ncpath, varname, configset): ''' - Applies QC to a qartod variable + Pass configuration and netcdf file to ioos_qc to generate + qc test results for a variable - :param netCDF4.Variable ncvariable: A QARTOD Variable + :param ncpath: string defining path to the netcdf file + :param varname: string defining the variable name + :param configset: dictionary with variable config specs for each QARTOD tests ''' - qc_tests = { - 'flat_line': qc.flat_line_check, - 'gross_range': qc.range_check, - 'rate_of_change': qc.rate_of_change_check, - 'spike': qc.spike_check, - 'pressure': gliders_qc.pressure_check - } - qartod_test = getattr(ncvariable, 'qartod_test', None) - if not qartod_test: - return - standard_name = parent.standard_name - - times, values, mask = self.get_unmasked(parent) - # There's no data to QC - if len(values) == 0: - return - - # If the config isn't set for this test, don't run it. - if qartod_test not in self.config[standard_name]: - return - - test_params = {} - if qartod_test in 'rate_of_change': - times = ma.getdata(times[~mask]) - time_units = self.ncfile.variables['time'].units - dates = np.array(num2date(times, time_units), dtype='datetime64[ms]') - test_params['times'] = dates - # Calculate the threshold value - test_params['thresh_val'] = self.get_rate_of_change_threshold(values, times, time_units) - elif qartod_test == 'spike': - test_params['times'] = ma.getdata(times[~mask]) - test_params['low_thresh'], test_params['high_thresh'] = self.get_spike_thresholds(values) - else: - test_params = self.config[standard_name][qartod_test] - - if qartod_test == 'pressure': - test_params['pressure'] = values - else: - test_params['arr'] = values - - qc_flags = qc_tests[qartod_test](**test_params) - ncvariable[~mask] = qc_flags - - for test_param in test_params: - if test_param in ('arr', 'times', 'pressure'): - continue - ncvariable.setncattr(test_param, test_params[test_param]) + # Read the variable configuration specifications + c = Config(configset) + + # Setup the stream + # Use XarrayStream to extract numpy arrays from variables within a netCDF file + # and passes them through as arrays to XarrayStream. + qc = XarrayStream(ncpath) + + # Pass the run method and the config specs + # Store as a list + runner = list(qc.run(c)) + + results = collect_results(runner, how='list') + + agg = CollectedResult( + stream_id=varname, + package='qartod', + test='qc_rollup', + function=aggregate, + results=aggregate(results), + tinp=qc.time(), + data=qc.data(varname) + ) + results.append(agg) + + return results + + # def apply_qc(self, ncvariable, parent): + # ''' + # Applies QC to a qartod variable + # + # :param netCDF4.Variable ncvariable: A QARTOD Variable + # ''' + # qc_tests = { + # 'flat_line': qc.flat_line_check, + # 'gross_range': qc.range_check, + # 'rate_of_change': qc.rate_of_change_check, + # 'spike': qc.spike_check, + # 'pressure': gliders_qc.pressure_check + # } + # + # qartod_test = getattr(ncvariable, 'qartod_test', None) + # if not qartod_test: + # return + # standard_name = parent.standard_name + # + # times, values, mask = self.get_unmasked(parent) + # # There's no data to QC + # if len(values) == 0: + # return + # + # # If the config isn't set for this test, don't run it. + # if qartod_test not in self.config[standard_name]: + # return + # + # test_params = {} + # if qartod_test in 'rate_of_change': + # times = ma.getdata(times[~mask]) + # time_units = self.ncfile.variables['time'].units + # dates = np.array(num2date(times, time_units), dtype='datetime64[ms]') + # test_params['times'] = dates + # # Calculate the threshold value + # test_params['thresh_val'] = self.get_rate_of_change_threshold(values, times, time_units) + # elif qartod_test == 'spike': + # test_params['times'] = ma.getdata(times[~mask]) + # test_params['low_thresh'], test_params['high_thresh'] = self.get_spike_thresholds(values) + # else: + # test_params = self.config[standard_name][qartod_test] + # + # if qartod_test == 'pressure': + # test_params['pressure'] = values + # else: + # test_params['arr'] = values + # + # qc_flags = qc_tests[qartod_test](**test_params) + # ncvariable[~mask] = qc_flags + # + # for test_param in test_params: + # if test_param in ('arr', 'times', 'pressure'): + # continue + # ncvariable.setncattr(test_param, test_params[test_param]) def get_rate_of_change_threshold(self, values, times, time_units='seconds since 1970-01-01T00:00:00Z'): ''' @@ -319,9 +362,23 @@ def get_spike_thresholds(self, values): max_thresh = 2.0 * std return min_thresh, max_thresh - def get_unmasked(self, ncvariable): - times = self.ncfile.variables['time'][:] - values = ncvariable[:] + # def get_unmasked(self, ncvariable): + # times = self.ncfile.variables['time'][:] + # values = ncvariable[:] + + # mask = np.zeros(times.shape[0], dtype=bool) + + # if hasattr(values, 'mask'): + # mask |= values.mask + + # if hasattr(times, 'mask'): + # mask |= times.mask + + # values = ma.getdata(values[~mask]) + # values = self.normalize_variable(values, ncvariable.units, ncvariable.standard_name) + # return times, values, mask + + def get_unmasked(self, ncvariable, times, values): mask = np.zeros(times.shape[0], dtype=bool) @@ -332,39 +389,68 @@ def get_unmasked(self, ncvariable): mask |= times.mask values = ma.getdata(values[~mask]) - values = self.normalize_variable(values, ncvariable.units, ncvariable.standard_name) - return times, values, mask + times = ma.getdata(times[~mask]) - def apply_primary_qc(self, ncvariable): + values = self.normalize_variable(values, ncvariable.units, ncvariable.standard_name) + return times, values + + # def apply_primary_qc(self, ncvariable): + # ''' + # Applies the primary QC array which is an aggregate of all the other QC + # tests. + # + # :param netCDF4.Variable ncvariable: NCVariable + # ''' + # primary_qc_name = 'qartod_%s_primary_flag' % ncvariable.name + # if primary_qc_name not in self.ncfile.variables: + # return + # + # qcvar = self.ncfile.variables[primary_qc_name] + # # Only perform primary QC on variables created by DAC + # if getattr(qcvar, 'dac_comment', '') != 'ioos_qartod_primary': + # return + # + # qc_variables = self.find_qc_flags(ncvariable) + # vectors = [] + # + # for qc_variable in qc_variables: + # ncvar = self.ncfile.variables[qc_variable] + # if getattr(ncvar, 'dac_comment', '') != 'ioos_qartod': + # continue + # log.info("Using %s in primary QC for %s", qc_variable, primary_qc_name) + # vectors.append(ma.getdata(ncvar[:])) + # + # log.info("Applying QC for %s", primary_qc_name) + # flags = qc.qc_compare(vectors) + # qcvar[:] = flags + + def update_config(self, varspec, varname, times, values, time_units): ''' - Applies the primary QC array which is an aggregate of all the other QC - tests. + Update the config file with specs values for the spike + and the rate of change test methods - :param netCDF4.Variable ncvariable: NCVariable + :param varspec: Input dictionary with variable config specs for QARTOD tests + :param varname: string defining the variable name + :param times: numpy array of times + :param values: numpy array of values + :param time_units: string defining time units ''' - primary_qc_name = 'qartod_%s_primary_flag' % ncvariable.name - if primary_qc_name not in self.ncfile.variables: - return - qcvar = self.ncfile.variables[primary_qc_name] - # Only perform primary QC on variables created by DAC - if getattr(qcvar, 'dac_comment', '') != 'ioos_qartod_primary': - return + # Calculate the spike test threshold + suspect_threshold, fail_threshold = self.get_spike_thresholds(values) + # replace the threshold values in the config file + varspec['spike_test']['suspect_threshold'] = suspect_threshold + varspec['spike_test']['fail_threshold'] = fail_threshold - qc_variables = self.find_qc_flags(ncvariable) - vectors = [] + # Calculate the rate of change test threshold + threshold = self.get_rate_of_change_threshold(values, times, time_units) + # replace the threshold values in the config file + varspec['rate_of_change_test']['threshold'] = threshold - for qc_variable in qc_variables: - ncvar = self.ncfile.variables[qc_variable] - if getattr(ncvar, 'dac_comment', '') != 'ioos_qartod': - continue - log.info("Using %s in primary QC for %s", qc_variable, primary_qc_name) - vectors.append(ma.getdata(ncvar[:])) - - log.info("Applying QC for %s", primary_qc_name) - flags = qc.qc_compare(vectors) - qcvar[:] = flags + # Update the variable config specs + configset = {'contexts': [{'streams': {varname: {'qartod': varspec}}}]} + return configset def qc_task(nc_path, config): ''' @@ -375,7 +461,7 @@ def qc_task(nc_path, config): raise ProcessError("File lock already acquired by another process") try: with Dataset(nc_path, 'r+') as nc: - run_qc(config, nc) + run_qc(config, nc, nc_path) os.setxattr(nc_path, "user.qc_run", b"true") except OSError: log.exception(f"Exception occurred trying to save QC to file on {nc_path}:") @@ -412,27 +498,120 @@ def get_redis_connection(): return __RCONN -def run_qc(config, ncfile): +# def run_qc(config, ncfile): +# ''' +# Runs QC on a netCDF file +# ''' +# qc = GliderQC(ncfile, config) +# for varname in qc.find_geophysical_variables(): +# log.info("Inspecting %s", varname) +# ncvar = ncfile.variables[varname] +# +# if not qc.needs_qc(ncvar): +# log.info("%s does not need QARTOD", varname) +# continue +# +# for qcvarname in qc.create_qc_variables(ncvar): +# log.info("Created QC Variable %s", qcvarname) +# qcvar = ncfile.variables[qcvarname] +# +# log.info("Applying QC for %s", qcvar.name) +# qc.apply_qc(qcvar, ncvar) +# +# qc.apply_primary_qc(ncvar) +# +# # maybe unnecessary with calling context handler, but had some files +# # which had xattr set, but not updated with QC +# ncfile.sync() + +def run_qc(config, ncfile, ncpath): ''' - Runs QC on a netCDF file + Runs IOOS QARTOD tests on a netCDF file ''' - qc = GliderQC(ncfile, config) - for varname in qc.find_geophysical_variables(): - log.info("Inspecting %s", varname) - ncvar = ncfile.variables[varname] + xyz = GliderQC(ncfile, config) - if not qc.needs_qc(ncvar): - log.info("%s does not need QARTOD", varname) - continue + for var_name in xyz.find_geophysical_variables(): + log.info("Inspecting %s", var_name) - for qcvarname in qc.create_qc_variables(ncvar): - log.info("Created QC Variable %s", qcvarname) - qcvar = ncfile.variables[qcvarname] + # Extract data from the netCDF dataset + var_data = ncfile.variables[var_name] + timedata = ncfile.variables['time'] + time_units = timedata.units - log.info("Applying QC for %s", qcvar.name) - qc.apply_qc(qcvar, ncvar) + t = timedata[:] + x = var_data[:] + times, values = xyz.get_unmasked(var_data, t, x) - qc.apply_primary_qc(ncvar) + if len(values) == 0: + log.info("%s is empty", var_name) + continue + + if not xyz.needs_qc(var_data): + log.info("%s does not need QARTOD", var_name) + continue + + # UPDATE VARIABLE CONFIG SET + log.info("Updating config file for %s", var_name) + var_spec = xyz.config['contexts'][0]['streams'][var_name]['qartod'] + config_set = xyz.update_config(var_spec, var_name, times, values, time_units) + + # Get the variable's qc results + log.info("Applying QC to %s", var_name) + results = xyz.apply_qc(ncpath, var_name, config_set) + + # Read the results and update the qartod variable + for testname in ['gross_range_test', + 'spike_test', + 'rate_of_change_test', + 'qc_rollup', + 'flat_line_test']: + log.info("Reading %s results for %s", testname, var_name) + try: + qc_test = next(r for r in results if r.stream_id == var_name and r.test == testname) + qartod_flag = qc_test.results + test_message = 'Could run qartod ' + except Exception as error: + # Handle the exception + # Mark all flags as Not Evaluated + log.exception(f"Exception occurred trying to apply {var_name, testname}:") + qartod_flag = np.full((values.size,), 2) + test_message = 'Could not run qartod ' + + # create the qartod variable + log.info("Creating %s QARTOD variable to log %s results", var_name, testname) + + if testname == 'qc_rollup': + # compose the qartod variable name + qartodname = 'qartod_'+ var_name + '_primary_flag' + # Pass the config specs to a variable + testconfig = config_set['contexts'][0]['streams'][var_name]['qartod'] + else: + # compose the qartod variable name + qartodname = 'qartod_'+ var_name + '_'+ testname.split('_test')[0]+'_flag' + # Pass the config specs to a variable + testconfig = config_set['contexts'][0]['streams'][var_name]['qartod'][testname] + + # Create the qartod netcdf variable + qartod_var = ncfile.createVariable(qartodname, np.int8, var_data.dimensions, fill_value=np.int8(9)) + + # Add the qartod variable attributes + qartod_var.units = '1' + qartod_var.standard_name = testname +'_quality_flag' + qartod_var.long_name = 'QARTOD ' + testname + ' for ' + var_data.standard_name + qartod_var.flag_values = np.array([1, 2, 3, 4, 9], dtype=np.int8) + qartod_var.flag_meanings = 'PASS NOT_EVALUATED SUSPECT FAIL MISSING' + qartod_var.references = 'https://gliders.ioos.us/files/Manual-for-QC-of-Glider-Data_05_09_16.pdf' + qartod_var.dac_comment = 'Using IOOS QC collection of utilities, scripts and tests' + qartod_var.ioos_category = 'Quality' + qartod_var.valid_min = 1 + qartod_var.valid_max = 9 + qartod_var.qartod_config = str(testconfig) + qartod_var.qartod_status = test_message + str(testname) + qartod_var[:] = np.array(qartod_flag) + + # Add the qartod variable to the geophysical variable's ancillary_variables attribute + log.info("Add %s to %s ancillary_variables", qartodname, var_name) + xyz.append_ancillary_variable(var_data, qartod_var) # maybe unnecessary with calling context handler, but had some files # which had xattr set, but not updated with QC diff --git a/requirements.txt b/requirements.txt index c5426715..e40ada41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,9 @@ thredds_crawler>=1.5.4 pyyaml>=5.2 rq==1.3.0 rq-dashboard==0.3.4 --e git+https://github.com/ioos/qartod@12301185bb874fb8193e54822b9ea7da4a34bc9a#egg=ioos_qartod +# ioos_qartod is no longer in use +# -e git+https://github.com/ioos/qartod@12301185bb874fb8193e54822b9ea7da4a34bc9a#egg=ioos_qartod +-e git+https://github.com/ioos/ioos_qc@0b1406b52872e564885ccff5808315cf88ae28ee#egg=ioos_qc netCDF4>=1.4.0 cf-units>=2 # cftime needs to be frozen for utime. Some scripts could be rewritten to use diff --git a/scripts/build_erddap_catalog.py b/scripts/build_erddap_catalog.py index 126d0018..b25b9b94 100755 --- a/scripts/build_erddap_catalog.py +++ b/scripts/build_erddap_catalog.py @@ -185,7 +185,6 @@ def build_datasets_xml(data_root, catalog_root, force): for inactive_deployment_name in inactive_deployment_names: sync_deployment(inactive_deployment_name) - def variable_sort_function(element): """ Sorts by ERDDAP variable destinationName, or by @@ -592,7 +591,7 @@ def build_erddap_catalog_chunk(data_root, deployment): gts_ingest = getattr(ds, 'gts_ingest', 'true') # Set default value to true - qartod_vars_snippet = qc_var_snippets(required_qartod_vars, qartod_var_type) + qartod_vars_snippet = qartod_var_snippets(required_qartod_vars, qartod_var_type) vars_sorted = sorted(common_variables + qartod_vars_snippet + all_other_vars, @@ -603,12 +602,13 @@ def build_erddap_catalog_chunk(data_root, deployment): # Add any of the extra variables and attributes reload_template = "{}" if completed or delayed_mode: - reload_settings = reload_template.format(720) + reload_settings = reload_template.format(720) else: reload_settings = reload_template.format(10) + try: tree = etree.fromstring(f""" - +