diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py index 7e6cb03b..8a975642 100644 --- a/compliance_checker/cf/cf_1_7.py +++ b/compliance_checker/cf/cf_1_7.py @@ -6,6 +6,7 @@ import numpy as np import pyproj +import regex from compliance_checker import cfutil from compliance_checker.base import BaseCheck, Result, TestCtx from compliance_checker.cf.appendix_d import dimless_vertical_coordinates_1_7 @@ -231,9 +232,11 @@ def check_cell_boundaries(self, ds): # Note that test does not check monotonicity ret_val = [] reasoning = [] + for variable_name, boundary_variable_name in cfutil.get_cell_boundary_map( ds, ).items(): + variable = ds.variables[variable_name] valid = True reasoning = [] @@ -507,7 +510,7 @@ def _check_gmattr_existence_condition_geoid_name_geoptl_datum_name(self, var): def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var): """ If one of reference_ellipsoid_name, prime_meridian_name, or - horizontal_datum_name are defined as grid_mapping attributes, + horizontal_datum_name, geographic_crs_name are defined as grid_mapping attributes, they must all be defined. :param netCDF4.Variable var @@ -523,18 +526,24 @@ def _check_gmattr_existence_condition_ell_pmerid_hdatum(self, var): _ncattrs = set(var.ncattrs()) if any( - x in _ncattrs - for x in [ - "reference_ellipsoid_name", - "prime_meridian_name", - "horizontal_datum_name", + [ + x in _ncattrs + for x in [ + "reference_ellipsoid_name", + "prime_meridian_name", + "horizontal_datum_name", + "geographic_crs_name", + ] ] ) and ( - not { - "reference_ellipsoid_name", - "prime_meridian_name", - "horizontal_datum_name", - }.issubset(_ncattrs) + not set( + [ + "reference_ellipsoid_name", + "prime_meridian_name", + "horizontal_datum_name", + "geographic_crs_name", + ] + ).issubset(_ncattrs) ): return (False, msg) @@ -746,20 +755,106 @@ def _evaluate_towgs84(self, val): return (True, msg) def check_grid_mapping(self, ds): - prev_return = super().check_grid_mapping(ds) - grid_mapping_variables = cfutil.get_grid_mapping_variables(ds) - for var_name in sorted(grid_mapping_variables): + super(CF1_7Check, self).check_grid_mapping.__doc__ + prev_return = super(CF1_7Check, self).check_grid_mapping(ds) + test_ctx = self.get_test_ctx( + BaseCheck.HIGH, self.section_titles["5.6"]) + # Get the grid_mapping variable list for Requirements 5.6 [.../9] + grid_mapping_variable_list = [] + + for item in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None): + # [1/9] The type of the grid_mapping attribute is a string whose value + # is of the following form, in which brackets indicate optional + # text: grid_mapping_name[: coord_var [coord_var ...]][grid_mapping_name: [coord_var ... ]] + if not isinstance(item.grid_mapping, str): + test_ctx.messages.append("The grid_mapping attribute {} " + "is not a string".format(item.grid_mapping)) + test_ctx.out_of += 1 + else: + # [2/9] Note that in its simplest form the attribute comprises just + # a grid_mapping_name as a single word. + grid_mapping_variable_list.append(regex.findall(r"(\w+)",item.grid_mapping)) + + # + # Get the CF grid_mapping variables list (== only if the variable exist in the Variables list) + grid_mapping_variables = cfutil.get_grid_mapping_variables(ds) + grid_mapping_variables_coordinates = cfutil.get_grid_mapping_variables_coordinates(ds) + + #Check if all grid_mapping variables are listed in the file as expected + # [3/9] Each grid_mapping_name is the name of a variable (known as a + # grid mapping variable), which must exist in the file. + if len(grid_mapping_variable_list) != len(grid_mapping_variables) + len(grid_mapping_variables_coordinates): + test_ctx.messages.append("Not all of the grid_mapping variables exist in the file") + test_ctx.out_of += 1 + + test_ctx.score += 1 + + # Get the CF coordinate variables or auxiliary coordinate variables + auxiliary_coordinate_variables = cfutil.get_auxiliary_coordinate_variables(ds) + coordinate_variables = cfutil.get_coordinate_variables(ds) + # Check the grid_mapping variable coordinates if listed. + # [4/9] Each coord_var is the name of a coordinate variable or + # auxiliary coordinate variable, which must exist in the file. If it + # is an auxiliary coordinate variable, it must be listed in the + # coordinates attribute. + for coord_item in grid_mapping_variables_coordinates: + if coord_item not in coordinate_variables and coord_item not in auxiliary_coordinate_variables: + test_ctx.messages.append("{} is not listed as coordinate or auxiliary coord Variable.".format(coord_item)) + test_ctx.out_of += 1 + + test_ctx.score += 1 + + # Check the gid_mapping variable attributes + for var_name in sorted(grid_mapping_variables): var = ds.variables[var_name] test_ctx = self.get_test_ctx( - BaseCheck.HIGH, - self.section_titles["5.6"], - var.name, - ) - + BaseCheck.HIGH, self.section_titles["5.6"], var.name) + + # Recommendation: The grid mapping variables should have 0 dimensions. + if var.dimensions: + test_ctx.messages.append("The grid mapping variable {} should have 0 dimension".format(var.name)) + test_ctx.out_of += 1 + + test_ctx.score += 1 + + # [5/9] The grid mapping variables must have the grid_mapping_name attribute. + # The legal values for the grid_mapping_name attribute are contained in Appendix F. + if "grid_mapping_name" not in var.ncattrs(): + test_ctx.messages.append("The grid mapping variable " + "{} doesn't have the grid_mapping_name attribute".format(var.name)) + test_ctx.out_of += 1 + else: + if var.grid_mapping_name not in grid_mapping_dict17: + test_ctx.messages.append("The legal values for the grid_mapping_name attribute " + "{} is not contained in Appendix F".format(var.grid_mapping_name)) + test_ctx.out_of += 1 + + test_ctx.score += 1 + + # [6/9] The data types of the attributes of the + # grid mapping variable must be specified in Table 1 of Appendix F. + type_map = {"S": "str" , "N": "float64"} + for attrs_name in var.ncattrs(): + if attrs_name in grid_mapping_attr_types17: + attrs_type = np.dtype(type(getattr(var,attrs_name))) + attrs_type17 = type_map[grid_mapping_attr_types17[attrs_name]['type']] + if attrs_type17 != attrs_type: + test_ctx.messages.append("The data types of the attributes " + "{} and {} do not match".format(attrs_type17, attrs_type)) + test_ctx.out_of += 1 + else: + test_ctx.messages.append("The attribute {} " + "does not exist in Table 1 of Appendix F".format(attrs_name)) + test_ctx.out_of += 1 + test_ctx.score += 1 + # TODO: check cases where crs_wkt provides part of a necessary # grid_mapping attribute, or where a grid_mapping attribute # overrides what has been provided in crs_wkt. # attempt to parse crs_wkt if it is present + + # [7/9] If present, the crs_wkt attribute must be a text string conforming to + # the CRS WKT specification described in reference [OGC_CTS]. if "crs_wkt" in var.ncattrs(): crs_wkt = var.crs_wkt if not isinstance(crs_wkt, str): @@ -772,33 +867,38 @@ def check_grid_mapping(self, ds): test_ctx.messages.append( f"Cannot parse crs_wkt attribute to CRS using Proj4. Proj4 error: {str(crs_error)}", ) - else: - test_ctx.score += 1 - test_ctx.out_of += 1 - + test_ctx.out_of += 1 + test_ctx.score += 1 + # existence_conditions + # Both geoid_name and geopotential_datum_name cannot exist exist_cond_1 = ( self._check_gmattr_existence_condition_geoid_name_geoptl_datum_name(var) ) - test_ctx.assert_true(exist_cond_1[0], exist_cond_1[1]) + + test_ctx.out_of += 1 + if not exist_cond_1[0]: + test_ctx.messages.append(exist_cond_1[1]) + else: + test_ctx.score += 1 + + + # [8/9] reference_ellipsoid_name, prime_meridian_name, horizontal_datum_name and + # geographic_crs_name must be all defined if any one is defined. exist_cond_2 = self._check_gmattr_existence_condition_ell_pmerid_hdatum(var) - test_ctx.assert_true(exist_cond_2[0], exist_cond_2[1]) + + test_ctx.out_of += 1 + if not exist_cond_2[0]: + test_ctx.messages.append(exist_cond_2[1]) + else: + test_ctx.score += 1 # handle vertical datum related grid_mapping attributes vert_datum_attrs = {} possible_vert_datum_attrs = {"geoid_name", "geopotential_datum_name"} - vert_datum_attrs = possible_vert_datum_attrs.intersection(var.ncattrs()) - len_vdatum_name_attrs = len(vert_datum_attrs) - # check that geoid_name and geopotential_datum_name are not both - # present in the grid_mapping variable - if len_vdatum_name_attrs == 2: - test_ctx.out_of += 1 - test_ctx.messages.append( - "Cannot have both 'geoid_name' and " - "'geopotential_datum_name' attributes in " - f"grid mapping variable '{var.name}'", - ) - elif len_vdatum_name_attrs == 1: + vert_datum_attrs = possible_vert_datum_attrs.intersection(var.ncattrs()) + + if exist_cond_1[0] and vert_datum_attrs: # should be one or zero attrs proj_db_path = os.path.join(pyproj.datadir.get_data_dir(), "proj.db") try: @@ -823,8 +923,22 @@ def check_grid_mapping(self, ds): f"Proj4 SQLite database at {proj_db_path}: {str(e)}", stacklevel=2, ) - prev_return[var.name] = test_ctx.to_result() + # [9/9] Check If projected_crs_name is defined then geographic_crs_name must be also. + test_possible = {"projected_crs_name","geographic_crs_name"} + if "projected_crs_name" in var.ncattrs(): + test_attr = test_possible.intersection(var.ncattrs()) + len_test_attr = len(test_attr) + + if len_test_attr != 2: + test_ctx.messages.append("We do not have both projected_crs_name " + "and geographic_crs_name defined") + test_ctx.out_of += 1 + + test_ctx.score += 1 + + prev_return[var.name] = test_ctx.to_result() + return prev_return def check_standard_name_deprecated_modifiers(self, ds): diff --git a/compliance_checker/cfutil.py b/compliance_checker/cfutil.py index 8bcce331..48d9ebdc 100644 --- a/compliance_checker/cfutil.py +++ b/compliance_checker/cfutil.py @@ -355,6 +355,7 @@ def get_cell_boundary_map(nc): for variable in nc.get_variables_by_attributes(bounds=lambda x: x is not None): if variable.bounds in nc.variables: boundary_map[variable.name] = variable.bounds + return boundary_map @@ -765,18 +766,52 @@ def get_flag_variables(nc): return flag_variables -def get_grid_mapping_variables(nc): +def get_grid_mapping_variables(ds): """ Returns a list of grid mapping variables :param netCDF4.Dataset nc: An open netCDF4 Dataset """ grid_mapping_variables = set() - for ncvar in nc.get_variables_by_attributes(grid_mapping=lambda x: x is not None): - if ncvar.grid_mapping in nc.variables: + for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None and isinstance(x, str)): + + if ' ' in ncvar.grid_mapping: + grid_mapping_list = [ + x.split(':')[0] for x in ncvar.grid_mapping.split(' ') if ':' in x + ] + if grid_mapping_list: + for item in grid_mapping_list: + if item in ds.variables: + grid_mapping_variables.add(item) + else: + if ncvar.grid_mapping in ds.variables: + grid_mapping_variables.add(ncvar.grid_mapping) + + for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None): + if ncvar.grid_mapping in ds.variables: grid_mapping_variables.add(ncvar.grid_mapping) return grid_mapping_variables +def get_grid_mapping_variables_coordinates(ds): + """ + Returns a list of grid mapping variables' coordinates + + :param netCDF4.Dataset ds: An open netCDF4 Dataset + """ + + grid_mapping_variables_coordinates = set() + for ncvar in ds.get_variables_by_attributes(grid_mapping=lambda x: x is not None and isinstance(x, str)): + + if ' ' in ncvar.grid_mapping: + coord_mapping_list = [ + x.split(':')[0] for x in ncvar.grid_mapping.split(' ') if not ':' in x + ] + if coord_mapping_list: + for coord_item in coord_mapping_list: + if coord_item in ds.variables: + grid_mapping_variables_coordinates.add(coord_item) + + return grid_mapping_variables_coordinates def get_axis_map(nc, variable): """ diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 5e13bb7b..edb43729 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -1497,17 +1497,6 @@ def test_check_reduced_horizontal_grid(self): assert len([r for r in results if r.value[0] < r.value[1]]) == 1 assert all(r.name == "§5.3 Reduced Horizontal Grid" for r in results) - def test_check_grid_mapping(self): - dataset = self.load_dataset(STATIC_FILES["mapping"]) - results = self.cf.check_grid_mapping(dataset) - - assert len(results) == 6 - assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 0 - expected_name = ( - "§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" - ) - assert all(r.name == expected_name for r in results.values()) - def test_is_geophysical(self): # check whether string type variable, which are not `cf_role`, are # properly processed @@ -1674,15 +1663,6 @@ def test_compress_packed(self): self.assertFalse(results[0].value) self.assertFalse(results[1].value) - # def test_check_all_features_are_same_type(self): - # dataset = self.load_dataset(STATIC_FILES["rutgers"]) - # result = self.cf.check_all_features_are_same_type(dataset) - # assert result - - # dataset = self.load_dataset(STATIC_FILES["featureType"]) - # result = self.cf.check_all_features_are_same_type(dataset) - # assert result - def test_featureType_is_case_insensitive(self): """ Tests that the featureType attribute is case insensitive @@ -2223,6 +2203,82 @@ def test_cell_measures(self): message = "Cell measure variable box_area referred to by PS is not present in dataset or external variables" assert message in messages + def test_check_grid_mapping(self): + # create cells with grid_apping variable + dataset = MockTimeSeries() + + # add variable to the file as auxilary coordinate + # NOTE: Dimension variable It is a one-dimensional variable with the same name as its dimension [e.g., + # time(time) ], and it is defined as a numeric data type with values that are + # ordered monotonically. Missing values are not allowed in coordinate variables. + # + dataset.createDimension("y", 500) + dataset.createDimension("x", 500) + dataset.createVariable("y", "d", ("y",)) + dataset.createVariable("x", "d", ("x",)) + y = dataset.variables["y"] + y.axis = "Y" + y.standard_name = "projection_y_coordinate" + x = dataset.variables["x"] + x.axis = "X" + x.standard_name = "projection_x_coordinate" + + # add a variable with "grid_mapping" as an attribute + dataset.createVariable("temp", "d", ("time", "x", "y")) + dataset.createVariable("crsOSGB", "i") + dataset.createVariable("crsWGS84", "i") + + temp = dataset.variables["temp"] + temp.standard_name = "air_temperature" + temp.units = "K" + temp.coordinates = "lat lon" + temp.grid_mapping = "crsOSGB: z y crsWGS84: lat lon" + + # create grid_mapping crsOSGB ; + crsOSGB = dataset.variables["crsOSGB"] + crsOSGB.grid_mapping_name = "transverse_mercator" + crsOSGB.semi_major_axis = "6377563.396" + crsOSGB.inverse_flattening = 299.3249646 + crsOSGB.longitude_of_prime_meridian = 0.0 + crsOSGB.latitude_of_projection_origin = 49.0 + crsOSGB.longitude_of_central_meridian = -2.0 + crsOSGB.scale_factor_at_central_meridian = "0.9996012717" + crsOSGB.false_easting = 400000.0 + crsOSGB.false_northing = -100000.0 + crsOSGB.unit = "metre" + + # create grid_mapping crsWGS84 + crsWGS84 = dataset.variables["crsWGS84"] + crsWGS84.grid_mapping_name = "latitude_longitude" + crsWGS84.longitude_of_prime_meridian = 0.0 + crsWGS84.semi_major_axis = 6378137.0 + crsWGS84.inverse_flattening = 298.257223563 + crsWGS84.reference_ellipsoid_name = "testing" + crsWGS84.prime_meridian_name = "testing" + crsWGS84.horizontal_datum_name = "testing" + crsWGS84.geographic_crs_name = "testing" + crsWGS84.projected_crs_name = "testing" + + results = self.cf.check_grid_mapping(dataset) + assert len(results) == 3 + assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 1 + expected_name = ( + "§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" + ) + assert all(r.name == expected_name for r in results.values()) + + # test a different datastet + dataset = self.load_dataset(STATIC_FILES["mapping"]) + # print(dataset.variables) + results = self.cf.check_grid_mapping(dataset) + assert len(results) == 6 + assert len([r.value for r in results.values() if r.value[0] < r.value[1]]) == 1 + expected_name = ( + "§5.6 Horizontal Coordinate Reference Systems, Grid Mappings, Projections" + ) + assert all(r.name == expected_name for r in results.values()) + + def test_variable_features(self): with MockTimeSeries() as dataset: # I hope to never see an attribute value like this, but since