From 74f356bdaa34a3f863cb86019a409f6acf67eb49 Mon Sep 17 00:00:00 2001 From: Rob Cermak Date: Mon, 18 Mar 2024 23:26:03 -0700 Subject: [PATCH 1/2] pytest fixes * base.py: Sometimes cfutil is None * protocols/netcdf.py: - Return False right away if content_type is None - Support extended type strings: "application/x-netcdf;ver=4" * suite.py: If text for xml fails, look for netcdf one more time * Convert suite.run() calls to suite.run_all() calls * Update hyrax url * Update thredds url * Add some development notes for troubleshooting tests with pytest using the vcr feature. --- compliance_checker/base.py | 5 +++-- compliance_checker/protocols/netcdf.py | 6 +++++- compliance_checker/suite.py | 5 +++++ compliance_checker/tests/test_cf.py | 3 ++- .../tests/test_cf_integration.py | 12 +++++++---- compliance_checker/tests/test_protocols.py | 11 ++++++++-- compliance_checker/tests/test_suite.py | 21 ++++++++++++------- docs/source/development.md | 11 ++++++++++ docs/source/faq.md | 6 ++++-- docs/source/index.rst | 1 + 10 files changed, 62 insertions(+), 19 deletions(-) create mode 100644 docs/source/development.md diff --git a/compliance_checker/base.py b/compliance_checker/base.py index c2674e858..dbad85183 100644 --- a/compliance_checker/base.py +++ b/compliance_checker/base.py @@ -192,8 +192,9 @@ def __del__(self): inadvertently mutated by other functions. """ - cfutil.get_geophysical_variables.cache_clear() - cfutil.get_time_variables.cache_clear() + if cfutil is not None: + cfutil.get_geophysical_variables.cache_clear() + cfutil.get_time_variables.cache_clear() class BaseNCCheck: diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 415a94ecf..9bcfa1dbc 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -90,6 +90,10 @@ def is_remote_netcdf(ds_str): else: content_type = head_req.headers.get("content-type") + if content_type is None: + return False + # if the Content-Type header returned was "application/x-netcdf", # or a netCDF file (not OPeNDAP) we can open this into a Dataset - return content_type == "application/x-netcdf" + # Add support for application/x-netcdf;ver=4 + return content_type.split(";")[0] == "application/x-netcdf" diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index d0e84769e..733328dff 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -871,6 +871,11 @@ def load_remote_dataset(self, ds_str): content_type = response.headers.get("content-type") if content_type.split(";")[0] == "text/xml": return self.process_doc(response.content) + elif content_type.split(";")[0] == "application/x-netcdf": + return Dataset( + urlparse(response.url).path, + memory=response.content, + ) else: raise ValueError( f"Unknown service with content-type: {content_type}", diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 360683be3..6f79053d8 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -1794,7 +1794,8 @@ def test_64bit(self): dataset = self.load_dataset(STATIC_FILES["ints64"]) suite = CheckSuite() suite.checkers = {"cf": CF1_6Check} - suite.run(dataset, "cf") + #suite.run(dataset, "cf") + suite.run_all(dataset, ["cf"], skip_checks=["cf"]) def test_variable_feature_check(self): # non-compliant dataset -- 1/1 fail diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index 5d162672c..978c90474 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -247,7 +247,8 @@ def get_results(self, check_results, checksuite): ], # must be specified to load this param at runtime, instead of at collection ) def test_cf_integration(self, loaded_dataset, expected_messages, cs): - check_results = cs.run(loaded_dataset, [], "cf") + #check_results = cs.run(loaded_dataset, [], "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of @@ -272,14 +273,16 @@ def test_cf_integration(self, loaded_dataset, expected_messages, cs): indirect=["loaded_dataset"], ) def test_no_incorrect_errors(self, cs, loaded_dataset, wrong_message): - check_results = cs.run(loaded_dataset, [], True, "cf") + #check_results = cs.run(loaded_dataset, [], True, "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) messages = self.get_results(check_results, cs)[-1] assert wrong_message not in "".join(messages) @pytest.mark.parametrize("loaded_dataset", ["fvcom"], indirect=True) def test_fvcom(self, cs, loaded_dataset): - check_results = cs.run(loaded_dataset, [], True, "cf") + #check_results = cs.run(loaded_dataset, [], True, "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of @@ -307,6 +310,7 @@ def test_ncei_templates(self, cs, loaded_dataset): Tests some of the NCEI NetCDF templates, which usually should get a perfect score. """ - check_results = cs.run(loaded_dataset, [], "cf") + #check_results = cs.run(loaded_dataset, [], "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index f43bce686..faa68d419 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -38,7 +38,10 @@ def test_hyrax(): """ Tests that a connection can be made to Hyrax """ - url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml" + # Returns: error 405 + # url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml" + # More direct file + url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml.dap.nc4" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None @@ -48,13 +51,17 @@ def test_thredds(): """ Tests that a connection can be made to a remote THREDDS endpoint """ - url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" + # Returns: error 400 + #url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" + # Use a smaller dataset + url = "https://thredds.ucar.edu/thredds/ncss/grid/grib/NCEP/GFS/Global_0p25deg_ana/TP?var=Temperature_altitude_above_msl&accept=netcdf3" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None +@pytest.mark.skip(reason="The thredds endpoint is no longer serving SOS.") def test_sos(): """ Tests that a connection can be made to an SOS endpoint diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 54f49a954..ad6d525cd 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -63,16 +63,19 @@ def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? ds = self.cs.load_dataset(static_files["2dim"]) - self.cs.run(ds, [], "acdd") + #self.cs.run(ds, [], "acdd") + self.cs.run_all(ds, ["acdd"], skip_checks=[]) def test_suite_pathlib(self): path_obj = Path(static_files["2dim"]) ds = self.cs.load_dataset(path_obj) - self.cs.run(ds, [], "acdd") + #self.cs.run(ds, [], "acdd") + self.cs.run_all(ds, ["acdd"], skip_checks=[]) def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files["bad_region"]) - score_groups = self.cs.run(ds, [], "cf") + #score_groups = self.cs.run(ds, [], "cf") + score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in score_groups.items(): @@ -163,7 +166,8 @@ def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range ds = self.cs.load_dataset(static_files["bad_data_type"]) - score_groups = self.cs.run(ds, [], "cf") + #score_groups = self.cs.run(ds, [], "cf") + score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in score_groups.items(): @@ -198,7 +202,8 @@ def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file # Load the cdl file ds = self.cs.load_dataset(static_files["test_cdl"]) - vals = self.cs.run(ds, [], "cf") + #vals = self.cs.run(ds, [], "cf") + vals = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in vals.items(): @@ -221,7 +226,8 @@ def test_cdl_file(self): # Ok now load the nc file that it came from ds = self.cs.load_dataset(static_files["test_cdl_nc"]) - vals = self.cs.run(ds, [], "cf") + #vals = self.cs.run(ds, [], "cf") + vals = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in vals.items(): @@ -259,7 +265,8 @@ def test_standard_output_score_header(self): of potential issues, rather than the weighted score """ ds = self.cs.load_dataset(static_files["bad_region"]) - score_groups = self.cs.run(ds, [], "cf") + #score_groups = self.cs.run(ds, [], "cf") + score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 groups, errors = score_groups["cf"] score_list, all_passed, out_of = self.cs.standard_output( diff --git a/docs/source/development.md b/docs/source/development.md new file mode 100644 index 000000000..67b35b67e --- /dev/null +++ b/docs/source/development.md @@ -0,0 +1,11 @@ +# Developer Notes + +## pytest + +When running the python test suite, there may be test errors. Certain +tests `record` responses to remote queries for information. If tests +fail, they will appear to continue to fail as the queries are cached. + +To perform tests using fresh queries from remote services, use +`pytest --disable-vcr`. In certain cases, clearing the cache is +also advised, use `pytest --clear-cache`. diff --git a/docs/source/faq.md b/docs/source/faq.md index f461ce886..d95859bb1 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -45,6 +45,8 @@ The Compliance Checker is completely open-source and available on [GitHub](https ## Disclaimer -The objective of the IOOS Compliance Checker is to check your file against our interpretation of select dataset metadata standards to use as a guideline in generating compliant files. -The compliance checker should not be considered the authoritative source on whether your file is 100% "compliant". +The objective of the IOOS Compliance Checker is to check your file against +our interpretation of select dataset metadata standards to use as a +guideline in generating compliant files. The compliance checker should +not be considered the authoritative source on whether your file is 100% "compliant". Instead, we recommend that users use the results as a guide to work towards compliance. diff --git a/docs/source/index.rst b/docs/source/index.rst index 0f67a1527..bcf62b59f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,6 +10,7 @@ Python tool to check your datasets against compliance standards. quickintro compliance_checker_api faq + development Indices and tables ================== From b06e777e4272ee0a657f8bd14bd5a61173569c83 Mon Sep 17 00:00:00 2001 From: Rob Cermak Date: Mon, 18 Mar 2024 23:49:36 -0700 Subject: [PATCH 2/2] Linter updates --- compliance_checker/tests/test_cf.py | 2 +- compliance_checker/tests/test_cf_integration.py | 8 ++++---- compliance_checker/tests/test_protocols.py | 2 +- compliance_checker/tests/test_suite.py | 14 +++++++------- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 6f79053d8..24a58c781 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -1794,7 +1794,7 @@ def test_64bit(self): dataset = self.load_dataset(STATIC_FILES["ints64"]) suite = CheckSuite() suite.checkers = {"cf": CF1_6Check} - #suite.run(dataset, "cf") + # suite.run(dataset, "cf") suite.run_all(dataset, ["cf"], skip_checks=["cf"]) def test_variable_feature_check(self): diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index 978c90474..06e8ddf15 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -247,7 +247,7 @@ def get_results(self, check_results, checksuite): ], # must be specified to load this param at runtime, instead of at collection ) def test_cf_integration(self, loaded_dataset, expected_messages, cs): - #check_results = cs.run(loaded_dataset, [], "cf") + # check_results = cs.run(loaded_dataset, [], "cf") check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) @@ -273,7 +273,7 @@ def test_cf_integration(self, loaded_dataset, expected_messages, cs): indirect=["loaded_dataset"], ) def test_no_incorrect_errors(self, cs, loaded_dataset, wrong_message): - #check_results = cs.run(loaded_dataset, [], True, "cf") + # check_results = cs.run(loaded_dataset, [], True, "cf") check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) messages = self.get_results(check_results, cs)[-1] @@ -281,7 +281,7 @@ def test_no_incorrect_errors(self, cs, loaded_dataset, wrong_message): @pytest.mark.parametrize("loaded_dataset", ["fvcom"], indirect=True) def test_fvcom(self, cs, loaded_dataset): - #check_results = cs.run(loaded_dataset, [], True, "cf") + # check_results = cs.run(loaded_dataset, [], True, "cf") check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of @@ -310,7 +310,7 @@ def test_ncei_templates(self, cs, loaded_dataset): Tests some of the NCEI NetCDF templates, which usually should get a perfect score. """ - #check_results = cs.run(loaded_dataset, [], "cf") + # check_results = cs.run(loaded_dataset, [], "cf") check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index faa68d419..509b055d9 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -52,7 +52,7 @@ def test_thredds(): Tests that a connection can be made to a remote THREDDS endpoint """ # Returns: error 400 - #url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" + # url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" # Use a smaller dataset url = "https://thredds.ucar.edu/thredds/ncss/grid/grib/NCEP/GFS/Global_0p25deg_ana/TP?var=Temperature_altitude_above_msl&accept=netcdf3" diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index ad6d525cd..f520f3993 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -63,18 +63,18 @@ def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? ds = self.cs.load_dataset(static_files["2dim"]) - #self.cs.run(ds, [], "acdd") + # self.cs.run(ds, [], "acdd") self.cs.run_all(ds, ["acdd"], skip_checks=[]) def test_suite_pathlib(self): path_obj = Path(static_files["2dim"]) ds = self.cs.load_dataset(path_obj) - #self.cs.run(ds, [], "acdd") + # self.cs.run(ds, [], "acdd") self.cs.run_all(ds, ["acdd"], skip_checks=[]) def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files["bad_region"]) - #score_groups = self.cs.run(ds, [], "cf") + # score_groups = self.cs.run(ds, [], "cf") score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 @@ -166,7 +166,7 @@ def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range ds = self.cs.load_dataset(static_files["bad_data_type"]) - #score_groups = self.cs.run(ds, [], "cf") + # score_groups = self.cs.run(ds, [], "cf") score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 @@ -202,7 +202,7 @@ def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file # Load the cdl file ds = self.cs.load_dataset(static_files["test_cdl"]) - #vals = self.cs.run(ds, [], "cf") + # vals = self.cs.run(ds, [], "cf") vals = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 @@ -226,7 +226,7 @@ def test_cdl_file(self): # Ok now load the nc file that it came from ds = self.cs.load_dataset(static_files["test_cdl_nc"]) - #vals = self.cs.run(ds, [], "cf") + # vals = self.cs.run(ds, [], "cf") vals = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 @@ -265,7 +265,7 @@ def test_standard_output_score_header(self): of potential issues, rather than the weighted score """ ds = self.cs.load_dataset(static_files["bad_region"]) - #score_groups = self.cs.run(ds, [], "cf") + # score_groups = self.cs.run(ds, [], "cf") score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 groups, errors = score_groups["cf"]