From 62152d0c8aa7e077da8e48d60c6f141f59a03129 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 21 Mar 2019 16:35:18 -0400 Subject: [PATCH 01/32] Read and save `grid_mapping` and `bounds` as coordinates. This does not cooperate with slicing/pulling out individual variables. `grid_mapping` should only be associated with variables that have horizontal dimensions or coordinates. `bounds` should stay associated despite having more dimensions. An alternate approach to dealing with bounds is to use a `pandas.IntervalIndex` http://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.IntervalIndex.html#pandas.IntervalIndex and use where the coordinate is within its cell to determine on which side the intervals are closed (`x_dim == x_dim_bnds[:, 0]` corresponds to "left", `x_dim == x_dim_bnds[:, 1]` corresponds to "right", and anything else is "neither"). This would stay through slicing and might already be used for `.groupby_bins()`, but would not generalize to boundaries of multidimensional coordinates unless someone implements a multidimensional generalization of `pd.IntervalIndex`. I do not yet know where to put tests for this. This should probably also be mentioned in the documentation. --- xarray/conventions.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/xarray/conventions.py b/xarray/conventions.py index 5f41639e890..9a0d6377345 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -408,6 +408,18 @@ def stackable(dim): new_vars[k].encoding['coordinates'] = coord_str del var_attrs['coordinates'] coord_names.update(var_coord_names) + if 'bounds' in var_attrs: + bounds_str = var_attrs['bounds'] + var_bounds_names = [bounds_str] + if all(k in variables for k in var_bounds_names): + new_vars[k].encoding['bounds'] = bounds_str + coord_names.update(var_bounds_names) + if 'grid_mapping' in var_attrs: + proj_str = var_attrs['grid_mapping'] + var_proj_names = proj_str.split() + if all(k in variables for k in var_proj_names): + new_vars[k].encoding['grid_mapping'] = proj_str + coord_names.update(var_proj_names) if decode_coords and 'coordinates' in attributes: attributes = OrderedDict(attributes) @@ -535,6 +547,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): global_coordinates = non_dim_coord_names.copy() variable_coordinates = defaultdict(set) + not_technically_coordinates = set() for coord_name in non_dim_coord_names: target_dims = variables[coord_name].dims for k, v in variables.items(): @@ -543,6 +556,12 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): variable_coordinates[k].add(coord_name) global_coordinates.discard(coord_name) + att_val = v.attrs.get + if ((att_val("bounds", None) == coord_name or + att_val("grid_mapping", None) == coord_name)): + not_technically_coordinates.add(coord_name) + global_coordinates.discard(coord_name) + variables = OrderedDict((k, v.copy(deep=False)) for k, v in variables.items()) @@ -553,6 +572,12 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): raise ValueError('cannot serialize coordinates because variable ' "%s already has an attribute 'coordinates'" % var_name) + + # Only add actual coordinates to coordinates + # Exceptions are created using CF mechanisms + # Non-CF datasets work as previously + for not_coord in not_technically_coordinates: + coord_names.discard(not_coord) attrs['coordinates'] = ' '.join(map(str, coord_names)) # These coordinates are not associated with any particular variables, so we From 2ae8a7ee09d3779bca8edbff6cf3699296962cd9 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 21 Mar 2019 18:18:47 -0400 Subject: [PATCH 02/32] Add tests for (de)serialization of `grid_mapping` and `bounds`. --- xarray/tests/test_backends.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a20ba2df229..ea64f056289 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -714,6 +714,40 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn): actual.variables[k].dtype) assert_allclose(decoded, actual, decode_bytes=False) + def test_grid_mapping_and_bounds_are_coordinates(self): + original = Dataset( + dict(variable=(('latitude', 'longitude'), + [[0, 1], [2, 3]], + {'grid_mapping': 'latlon'})), + dict( + latitude=('latitude', + [0, 1], + {'bounds': 'latitude_bnds', + 'grid_mapping': 'latlon'}), + longitude=('longitude', + [0, 1], + {'bounds': 'longitude_bnds', + 'grid_mapping': 'latlon'}), + latlon=((), -1, {'grid_mapping_name': 'latitude_longitude'}), + latitude_bnds=(('latitude', 'bnds2'), + [[0, 1], [1, 2]]), + longitude_bnds=(('longitude', 'bnds2'), + [[0, 1], [1, 2]]) + ) + ) + with self.roundtrip(original) as actual: + assert_identical(actual, original) + + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with open_dataset(tmp_file, decode_coords=False) as ds: + assert (ds.coords['latitude'].attrs['bounds'] == + 'latitude_bnds') + assert (ds.coords['longitude'].attrs['bounds'] == + 'longitude_bnds') + assert 'latlon' not in ds['variable'].attrs['coordinates'] + assert 'coordinates' not in ds.attrs + def test_coordinates_encoding(self): def equals_latlon(obj): return obj == 'lat lon' or obj == 'lon lat' From fff73c860255030c8c4531e64416f9a91673c50a Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 31 Mar 2019 07:11:20 -0400 Subject: [PATCH 03/32] BUG: Use only encoding for tracking bounds and grid_mapping. Discussion on GH2844 indicates that encoding is the preferred location for how things are stored on disk. --- xarray/conventions.py | 13 +++++++++--- xarray/tests/test_backends.py | 38 ++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 9a0d6377345..a44158866d3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -235,6 +235,11 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_default_fill_value(var) var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) + + if var.encoding.get('grid_mapping', None) is not None: + var.attrs['grid_mapping'] = var.encoding.pop('grid_mapping') + if var.encoding.get('bounds', None) is not None: + var.attrs['bounds'] = var.encoding.pop('bounds') return var @@ -414,12 +419,14 @@ def stackable(dim): if all(k in variables for k in var_bounds_names): new_vars[k].encoding['bounds'] = bounds_str coord_names.update(var_bounds_names) + del var_attrs['bounds'] if 'grid_mapping' in var_attrs: proj_str = var_attrs['grid_mapping'] var_proj_names = proj_str.split() if all(k in variables for k in var_proj_names): new_vars[k].encoding['grid_mapping'] = proj_str coord_names.update(var_proj_names) + del var_attrs['grid_mapping'] if decode_coords and 'coordinates' in attributes: attributes = OrderedDict(attributes) @@ -556,9 +563,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): variable_coordinates[k].add(coord_name) global_coordinates.discard(coord_name) - att_val = v.attrs.get - if ((att_val("bounds", None) == coord_name or - att_val("grid_mapping", None) == coord_name)): + encoding_val = v.encoding.get + if ((encoding_val('bounds', None) == coord_name or + encoding_val('grid_mapping', None) == coord_name)): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ea64f056289..fc9fdd6ec2e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -717,17 +717,14 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn): def test_grid_mapping_and_bounds_are_coordinates(self): original = Dataset( dict(variable=(('latitude', 'longitude'), - [[0, 1], [2, 3]], - {'grid_mapping': 'latlon'})), + [[0, 1], [2, 3]])), dict( latitude=('latitude', [0, 1], - {'bounds': 'latitude_bnds', - 'grid_mapping': 'latlon'}), + {'units': 'degrees_north'}), longitude=('longitude', [0, 1], - {'bounds': 'longitude_bnds', - 'grid_mapping': 'latlon'}), + {'units': 'degrees_east'}), latlon=((), -1, {'grid_mapping_name': 'latitude_longitude'}), latitude_bnds=(('latitude', 'bnds2'), [[0, 1], [1, 2]]), @@ -735,19 +732,28 @@ def test_grid_mapping_and_bounds_are_coordinates(self): [[0, 1], [1, 2]]) ) ) + original['variable'].encoding['grid_mapping'] = 'latlon' + original.coords['latitude'].encoding.update( + dict(grid_mapping='latlon', + bounds='latitude_bnds') + ) + original.coords['longitude'].encoding.update( + dict(grid_mapping='latlon', + bounds='longitude_bnds') + ) + with create_tmp_file() as tmp_file: + original.to_netcdf(tmp_file) + with open_dataset(tmp_file, decode_coords=False) as ds: + assert (ds.coords['latitude'].attrs['bounds'] == + 'latitude_bnds') + assert (ds.coords['longitude'].attrs['bounds'] == + 'longitude_bnds') + assert 'latlon' not in ds['variable'].attrs['coordinates'] + assert 'coordinates' not in ds.attrs + with self.roundtrip(original) as actual: assert_identical(actual, original) - with create_tmp_file() as tmp_file: - original.to_netcdf(tmp_file) - with open_dataset(tmp_file, decode_coords=False) as ds: - assert (ds.coords['latitude'].attrs['bounds'] == - 'latitude_bnds') - assert (ds.coords['longitude'].attrs['bounds'] == - 'longitude_bnds') - assert 'latlon' not in ds['variable'].attrs['coordinates'] - assert 'coordinates' not in ds.attrs - def test_coordinates_encoding(self): def equals_latlon(obj): return obj == 'lat lon' or obj == 'lon lat' From b3696d3a83038ac45f3a3b1f5f2e503bfb2893f0 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 30 May 2019 23:16:52 -0400 Subject: [PATCH 04/32] Address feedback on PR. --- doc/whats-new.rst | 5 +++++ xarray/conventions.py | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3de610b3046..4141ad5eda6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,11 @@ v0.12.1 (unreleased) Enhancements ~~~~~~~~~~~~ +- Decode the CF attributes 'grid_mapping' and 'bounds' on file read + (put the referenced variables into 'coords' instead of 'data_vars') + and encode on write (write attributes corresponding to encoding + values). + Bug fixes ~~~~~~~~~ diff --git a/xarray/conventions.py b/xarray/conventions.py index a44158866d3..ee7e874905c 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -236,9 +236,9 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) - if var.encoding.get('grid_mapping', None) is not None: + if 'grid_mapping' in var.encoding: var.attrs['grid_mapping'] = var.encoding.pop('grid_mapping') - if var.encoding.get('bounds', None) is not None: + if 'bounds' in var.encoding: var.attrs['bounds'] = var.encoding.pop('bounds') return var @@ -564,8 +564,8 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): global_coordinates.discard(coord_name) encoding_val = v.encoding.get - if ((encoding_val('bounds', None) == coord_name or - encoding_val('grid_mapping', None) == coord_name)): + if ((encoding_val('bounds') == coord_name or + encoding_val('grid_mapping') == coord_name)): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) From 02aff730c03848c466b8dc60ee4b910e1013bde4 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 14 Feb 2020 08:09:35 -0500 Subject: [PATCH 05/32] Style fixes: newline before binary operator. --- xarray/conventions.py | 4 ++-- xarray/tests/test_backends.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 39dd6492d75..fe7f132a056 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -682,8 +682,8 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): variable_coordinates[k].add(coord_name) encoding_val = v.encoding.get - if ((encoding_val('bounds') == coord_name or - encoding_val('grid_mapping') == coord_name)): + if ((encoding_val('bounds') == coord_name + or encoding_val('grid_mapping') == coord_name)): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 79abb185ac7..805d09fb9bf 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -837,10 +837,10 @@ def test_grid_mapping_and_bounds_are_coordinates(self): with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: - assert (ds.coords['latitude'].attrs['bounds'] == - 'latitude_bnds') - assert (ds.coords['longitude'].attrs['bounds'] == - 'longitude_bnds') + assert (ds.coords['latitude'].attrs['bounds'] + == 'latitude_bnds') + assert (ds.coords['longitude'].attrs['bounds'] + == 'longitude_bnds') assert 'latlon' not in ds['variable'].attrs['coordinates'] assert 'coordinates' not in ds.attrs From 0721506a817b4f4e7dffd64eef3fa527764c0030 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 14 Feb 2020 09:57:52 -0500 Subject: [PATCH 06/32] Style fixes: double quotes for string literals, rewrap lines. --- xarray/conventions.py | 33 +++++++++++++++------------ xarray/tests/test_backends.py | 43 +++++++++++++---------------------- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index fe7f132a056..c5d0fb21f3d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -255,10 +255,10 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) - if 'grid_mapping' in var.encoding: - var.attrs['grid_mapping'] = var.encoding.pop('grid_mapping') - if 'bounds' in var.encoding: - var.attrs['bounds'] = var.encoding.pop('bounds') + if "grid_mapping" in var.encoding: + var.attrs["grid_mapping"] = var.encoding.pop("grid_mapping") + if "bounds" in var.encoding: + var.attrs["bounds"] = var.encoding.pop("bounds") return var @@ -507,20 +507,20 @@ def stackable(dim): new_vars[k].encoding["coordinates"] = coord_str del var_attrs["coordinates"] coord_names.update(var_coord_names) - if 'bounds' in var_attrs: - bounds_str = var_attrs['bounds'] + if "bounds" in var_attrs: + bounds_str = var_attrs["bounds"] var_bounds_names = [bounds_str] if all(k in variables for k in var_bounds_names): - new_vars[k].encoding['bounds'] = bounds_str + new_vars[k].encoding["bounds"] = bounds_str coord_names.update(var_bounds_names) - del var_attrs['bounds'] - if 'grid_mapping' in var_attrs: - proj_str = var_attrs['grid_mapping'] + del var_attrs["bounds"] + if "grid_mapping" in var_attrs: + proj_str = var_attrs["grid_mapping"] var_proj_names = proj_str.split() if all(k in variables for k in var_proj_names): - new_vars[k].encoding['grid_mapping'] = proj_str + new_vars[k].encoding["grid_mapping"] = proj_str coord_names.update(var_proj_names) - del var_attrs['grid_mapping'] + del var_attrs["grid_mapping"] if decode_coords and "coordinates" in attributes: attributes = dict(attributes) @@ -682,8 +682,10 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): variable_coordinates[k].add(coord_name) encoding_val = v.encoding.get - if ((encoding_val('bounds') == coord_name - or encoding_val('grid_mapping') == coord_name)): + if ( + encoding_val("bounds") == coord_name + or encoding_val("grid_mapping") == coord_name + ): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) @@ -705,7 +707,8 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): coords_str = pop_to(encoding, attrs, "coordinates") if not coords_str and variable_coordinates[name]: attrs["coordinates"] = " ".join( - str(coord_name) for coord_name in variable_coordinates[name] + str(coord_name) + for coord_name in variable_coordinates[name] if coord_name not in not_technically_coordinates ) if "coordinates" in attrs: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 805d09fb9bf..abed6193377 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -809,40 +809,29 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn): def test_grid_mapping_and_bounds_are_coordinates(self): original = Dataset( - dict(variable=(('latitude', 'longitude'), - [[0, 1], [2, 3]])), + dict(variable=(("latitude", "longitude"), [[0, 1], [2, 3]])), dict( - latitude=('latitude', - [0, 1], - {'units': 'degrees_north'}), - longitude=('longitude', - [0, 1], - {'units': 'degrees_east'}), - latlon=((), -1, {'grid_mapping_name': 'latitude_longitude'}), - latitude_bnds=(('latitude', 'bnds2'), - [[0, 1], [1, 2]]), - longitude_bnds=(('longitude', 'bnds2'), - [[0, 1], [1, 2]]) - ) + latitude=("latitude", [0, 1], {"units": "degrees_north"}), + longitude=("longitude", [0, 1], {"units": "degrees_east"}), + latlon=((), -1, {"grid_mapping_name": "latitude_longitude"}), + latitude_bnds=(("latitude", "bnds2"), [[0, 1], [1, 2]]), + longitude_bnds=(("longitude", "bnds2"), [[0, 1], [1, 2]]), + ), ) - original['variable'].encoding['grid_mapping'] = 'latlon' - original.coords['latitude'].encoding.update( - dict(grid_mapping='latlon', - bounds='latitude_bnds') + original["variable"].encoding["grid_mapping"] = "latlon" + original.coords["latitude"].encoding.update( + dict(grid_mapping="latlon", bounds="latitude_bnds") ) - original.coords['longitude'].encoding.update( - dict(grid_mapping='latlon', - bounds='longitude_bnds') + original.coords["longitude"].encoding.update( + dict(grid_mapping="latlon", bounds="longitude_bnds") ) with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: - assert (ds.coords['latitude'].attrs['bounds'] - == 'latitude_bnds') - assert (ds.coords['longitude'].attrs['bounds'] - == 'longitude_bnds') - assert 'latlon' not in ds['variable'].attrs['coordinates'] - assert 'coordinates' not in ds.attrs + assert ds.coords["latitude"].attrs["bounds"] == "latitude_bnds" + assert ds.coords["longitude"].attrs["bounds"] == "longitude_bnds" + assert "latlon" not in ds["variable"].attrs["coordinates"] + assert "coordinates" not in ds.attrs with self.roundtrip(original) as actual: assert_identical(actual, original) From 239761e5a695c286663b107a2a3a5881e1f729e7 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 9 Jul 2020 09:38:13 -0400 Subject: [PATCH 07/32] Address comments from review. There are functions for a few things I do, there should be error reporting for some of the things, and some of the optimizations were unnecessary. --- xarray/conventions.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index c5d0fb21f3d..f055af46185 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -255,10 +255,8 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) - if "grid_mapping" in var.encoding: - var.attrs["grid_mapping"] = var.encoding.pop("grid_mapping") - if "bounds" in var.encoding: - var.attrs["bounds"] = var.encoding.pop("bounds") + pop_to(var.encoding, var.attrs, "grid_mapping") + pop_to(var.encoding, var.attrs, "bounds") return var @@ -509,10 +507,14 @@ def stackable(dim): coord_names.update(var_coord_names) if "bounds" in var_attrs: bounds_str = var_attrs["bounds"] - var_bounds_names = [bounds_str] - if all(k in variables for k in var_bounds_names): + if bounds_str in variables: new_vars[k].encoding["bounds"] = bounds_str coord_names.update(var_bounds_names) + else: + warnings.warn( + "Bounds variable \"{0:s}\" not in variables" + .format(bounds_str) + ) del var_attrs["bounds"] if "grid_mapping" in var_attrs: proj_str = var_attrs["grid_mapping"] @@ -520,6 +522,12 @@ def stackable(dim): if all(k in variables for k in var_proj_names): new_vars[k].encoding["grid_mapping"] = proj_str coord_names.update(var_proj_names) + else: + warnings.warn( + "Grid mappings not in variables: {0:s}" + .format([proj_name for grid_map in var_proj_names + if proj_name not in variables]) + ) del var_attrs["grid_mapping"] if decode_coords and "coordinates" in attributes: @@ -681,10 +689,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): ): variable_coordinates[k].add(coord_name) - encoding_val = v.encoding.get if ( - encoding_val("bounds") == coord_name - or encoding_val("grid_mapping") == coord_name + v.encoding.get("bounds") == coord_name + or v.encoding.get("grid_mapping") == coord_name ): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) From e0b8e996d36d0efe607b48f913ac8d86c5fe121f Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 9 Jul 2020 09:51:07 -0400 Subject: [PATCH 08/32] Fix style issues and complete name changes. Linters are your friend. They can help you fix simple mistakes without needing to run expensive tests. --- xarray/conventions.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index f055af46185..ac0100f0948 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -509,11 +509,10 @@ def stackable(dim): bounds_str = var_attrs["bounds"] if bounds_str in variables: new_vars[k].encoding["bounds"] = bounds_str - coord_names.update(var_bounds_names) + coord_names.add(bounds_str) else: warnings.warn( - "Bounds variable \"{0:s}\" not in variables" - .format(bounds_str) + 'Bounds variable "{0:s}" not in variables'.format(bounds_str) ) del var_attrs["bounds"] if "grid_mapping" in var_attrs: @@ -524,9 +523,13 @@ def stackable(dim): coord_names.update(var_proj_names) else: warnings.warn( - "Grid mappings not in variables: {0:s}" - .format([proj_name for grid_map in var_proj_names - if proj_name not in variables]) + "Grid mappings not in variables: {0:s}".format( + [ + proj_name + for proj_name in var_proj_names + if proj_name not in variables + ] + ) ) del var_attrs["grid_mapping"] From 9ba7485ea5451de1d4caf206f78f82b7128ac913 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 2 Aug 2020 17:57:11 -0400 Subject: [PATCH 09/32] Add more attributes from the CF conventions. "grid_mapping" and "bounds" are not the only attributes in CF that contain names of variables that are not sensible to interpret as the primary variables of analysis (XArray data variables) and that should remain associated with the variable with the abstract. This change will keep most of the referenced variables associated with the referencing variables, but will also keep variables referenced by other variables. --- xarray/conventions.py | 78 +++++++++++++++++++++-------------- xarray/tests/test_backends.py | 38 ++++++++++++++++- 2 files changed, 83 insertions(+), 33 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index ac0100f0948..7b8642b1c73 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -11,6 +11,24 @@ from .core.pycompat import dask_array_type from .core.variable import IndexVariable, Variable, as_variable +CF_RELATED_DATA = ( + "bounds", + "grid_mapping", + "climatology", + "geometry", + "node_coordinates", + "node_count", + "part_node_count", + "interior_ring", + "ancillary_variables", + "cell_measures", + "formula_terms", +) +CF_RELATED_DATA_NEEDS_PARSING = ( + "cell_measures", + "formula_terms", +) + class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from non-native to native endianness @@ -255,8 +273,8 @@ def encode_cf_variable(var, needs_copy=True, name=None): var = maybe_encode_bools(var) var = ensure_dtype_not_object(var, name=name) - pop_to(var.encoding, var.attrs, "grid_mapping") - pop_to(var.encoding, var.attrs, "bounds") + for attr_name in CF_RELATED_DATA: + pop_to(var.encoding, var.attrs, attr_name) return var @@ -505,33 +523,31 @@ def stackable(dim): new_vars[k].encoding["coordinates"] = coord_str del var_attrs["coordinates"] coord_names.update(var_coord_names) - if "bounds" in var_attrs: - bounds_str = var_attrs["bounds"] - if bounds_str in variables: - new_vars[k].encoding["bounds"] = bounds_str - coord_names.add(bounds_str) - else: - warnings.warn( - 'Bounds variable "{0:s}" not in variables'.format(bounds_str) - ) - del var_attrs["bounds"] - if "grid_mapping" in var_attrs: - proj_str = var_attrs["grid_mapping"] - var_proj_names = proj_str.split() - if all(k in variables for k in var_proj_names): - new_vars[k].encoding["grid_mapping"] = proj_str - coord_names.update(var_proj_names) - else: - warnings.warn( - "Grid mappings not in variables: {0:s}".format( - [ - proj_name - for proj_name in var_proj_names - if proj_name not in variables - ] + for attr_name in CF_RELATED_DATA: + if attr_name in var_attrs: + attr_val = var_attrs[attr_name] + var_names = attr_val.split() + if attr_name in CF_RELATED_DATA_NEEDS_PARSING: + var_names = [ + name + for name in var_names + if not name.endswith(":") and not name == k + ] + if all(k in variables for k in var_names): + new_vars[k].encoding[attr_name] = attr_val + coord_names.update(var_names) + else: + warnings.warn( + "Variable(s) referenced in {0:s} not in variables: {1!s}".format( + attr_name, + [ + proj_name + for proj_name in var_names + if proj_name not in variables + ], + ) ) - ) - del var_attrs["grid_mapping"] + del var_attrs[attr_name] if decode_coords and "coordinates" in attributes: attributes = dict(attributes) @@ -692,9 +708,9 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): ): variable_coordinates[k].add(coord_name) - if ( - v.encoding.get("bounds") == coord_name - or v.encoding.get("grid_mapping") == coord_name + if any( + attr_name in v.encoding and coord_name in v.encoding.get(attr_name) + for attr_name in CF_RELATED_DATA ): not_technically_coordinates.add(coord_name) global_coordinates.discard(coord_name) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index abed6193377..86a1cd65aa8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -809,22 +809,56 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn): def test_grid_mapping_and_bounds_are_coordinates(self): original = Dataset( - dict(variable=(("latitude", "longitude"), [[0, 1], [2, 3]])), + dict( + variable=( + ("ln_p", "latitude", "longitude"), + np.arange(8).reshape(2, 2, 2), + ) + ), dict( latitude=("latitude", [0, 1], {"units": "degrees_north"}), longitude=("longitude", [0, 1], {"units": "degrees_east"}), latlon=((), -1, {"grid_mapping_name": "latitude_longitude"}), latitude_bnds=(("latitude", "bnds2"), [[0, 1], [1, 2]]), longitude_bnds=(("longitude", "bnds2"), [[0, 1], [1, 2]]), + areas=( + ("latitude", "longitude"), + [[1, 1], [1, 1]], + {"units": "degree^2"}, + ), + std_devs=( + ("ln_p", "latitude", "longitude"), + np.arange(0.1, 0.9, 0.1).reshape(2, 2, 2), + {"standard_name": "standard_error"}, + ), + det_lim=((), 0.1, {"standard_name": "detection_minimum"},), + ln_p=( + "ln_p", + [1.0, 0.5], + { + "standard_name": "atmosphere_natural_log_pressure_coordinate", + "computed_standard_name": "air_pressure", + }, + ), + P0=((), 1013.25, {"units": "hPa"}), ), ) - original["variable"].encoding["grid_mapping"] = "latlon" + original["variable"].encoding.update( + { + "cell_measures": "area: areas", + "grid_mapping": "latlon", + "ancillary_variables": "std_devs det_lim", + }, + ) original.coords["latitude"].encoding.update( dict(grid_mapping="latlon", bounds="latitude_bnds") ) original.coords["longitude"].encoding.update( dict(grid_mapping="latlon", bounds="longitude_bnds") ) + original.coords["ln_p"].encoding.update( + {"formula_terms": "p0: P0 lev: ln_p",} + ) with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: From 427473035990fe7648fbd574d75848bacb4416f6 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 2 Aug 2020 18:56:11 -0400 Subject: [PATCH 10/32] Remove a trailing comma in a one-element dict literal. --- xarray/tests/test_backends.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ba30c7f4e44..92d92d1c686 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -887,9 +887,7 @@ def test_grid_mapping_and_bounds_are_coordinates(self): original.coords["longitude"].encoding.update( dict(grid_mapping="latlon", bounds="longitude_bnds") ) - original.coords["ln_p"].encoding.update( - {"formula_terms": "p0: P0 lev: ln_p",} - ) + original.coords["ln_p"].encoding.update({"formula_terms": "p0: P0 lev: ln_p"}) with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: From 702776725b0961ff6d4f2afc486cd668733378b0 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 8 Aug 2020 22:01:56 -0400 Subject: [PATCH 11/32] Stop moving ancillary_variables to coords Not everyone feels that these are "associated variables" in the sense of XArray `coords`, so leave it as a data variable so it is clear this is not a CF coordinate. --- xarray/conventions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 4aa0c7c46ae..5a3d28129f2 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -20,7 +20,6 @@ "node_count", "part_node_count", "interior_ring", - "ancillary_variables", "cell_measures", "formula_terms", ) From 8d96a664572a70978013f804473d8591353cad98 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 16 Aug 2020 15:08:08 -0400 Subject: [PATCH 12/32] Expand the list of attributes in the documentation. The PR grew rather beyond the original What's New entry, so it got moved into weather-climate.rst with a pointer in the What's New entry. At least, I think there's a pointer. The doc build requires pandoc, which is a bit tricky locally. --- doc/weather-climate.rst | 22 ++++++++++++++++++++++ doc/whats-new.rst | 8 ++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index f03dfd14c73..7d6b7dc02a3 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -10,6 +10,28 @@ Weather and climate data ``xarray`` can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include automatic labelling of plots with descriptive names and units if proper metadata is present (see :ref:`plotting`) and support for non-standard calendars used in climate science through the ``cftime`` module (see :ref:`CFTimeIndex`). There are also a number of geosciences-focused projects that build on xarray (see :ref:`related-projects`). +Several CF variable attributes contain lists of other variables +associated with the variable with the attribute. A few of these are +now parsed by XArray, with the attribute value popped to encoding on +read and the variables in that value interpreted as non-dimension +coordinates: + +- `coordinates` +- `bounds` +- `grid_mapping` +- `climatology` +- `geometry` +- `node_coordinates` +- `node_count` +- `part_node_count` +- `interior_ring` +- `cell_measures` +- `formula_terms` + +The CF attribute `ancillary_variables` was not included in the list +due to the variables listed there being associated primarily with the +variable with the attribute, rather than with the dimensions. + .. _Climate and Forecast (CF) conventions: http://cfconventions.org .. _metpy_accessor: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3fc573e7b67..f6fec5dc673 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,10 +33,10 @@ New Features of :py:class:`DataArray` and :py:class:`Dataset` objects and document the new method in :doc:`internals`. (:pull:`4248`). By `Justus Magin `_. -- Decode the CF attributes 'grid_mapping' and 'bounds' on file read - (put the referenced variables into 'coords' instead of 'data_vars') - and encode on write (write attributes corresponding to encoding - values). (:pull:`2844`, :issue:`3689`) +- Decode more CF attributes on file read (put the referenced variables + into 'coords' instead of 'data_vars') and encode on write (write + attributes corresponding to encoding values). The list of variables + is in :ref:`weather-climate` (:pull:`2844`, :issue:`3689`) Bug fixes From 1a5b35db818c8e45b6dd70259e22fb55297be039 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 16 Aug 2020 15:10:58 -0400 Subject: [PATCH 13/32] Make sure to run the pip associated with the running python. --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index d3d126cb33f..2691277ddfe 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -43,7 +43,7 @@ subprocess.run(["conda", "list"]) else: print("pip environment:") - subprocess.run(["pip", "list"]) + subprocess.run([sys.executable, "-m", "pip", "list"]) print("xarray: %s, %s" % (xarray.__version__, xarray.__file__)) From 9f53fbbca9689190a5b6f842d3bf7b517f588193 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 16 Aug 2020 15:23:36 -0400 Subject: [PATCH 14/32] Warn about new locations for some variables. --- xarray/conventions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/conventions.py b/xarray/conventions.py index 5a3d28129f2..ef7ccb8107c 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -539,6 +539,12 @@ def stackable(dim): if all(k in variables for k in var_names): new_vars[k].encoding[attr_name] = attr_val coord_names.update(var_names) + # Warn that some things will be coords rather + # than data_vars. + warnings.warn( + "Variable(s) {0!s} moved from data_vars to coords\n" + "based on {1:s} attribute".format(var_names, attr_name) + ) else: warnings.warn( "Variable(s) referenced in {0:s} not in variables: {1!s}".format( From 546b43e1d1a73142a315913c0ccdf72d41985920 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 12:25:46 -0400 Subject: [PATCH 15/32] Move ancillary variables back to data_vars in test. --- xarray/tests/test_backends.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 92d92d1c686..95cc6fa1058 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -843,8 +843,15 @@ def test_grid_mapping_and_bounds_are_coordinates(self): dict( variable=( ("ln_p", "latitude", "longitude"), - np.arange(8).reshape(2, 2, 2), - ) + np.arange(8, dtype="f4").reshape(2, 2, 2), + {"ancillary_variables": "std_devs det_lim"} + ), + std_devs=( + ("ln_p", "latitude", "longitude"), + np.arange(0.1, 0.9, 0.1).reshape(2, 2, 2), + {"standard_name": "standard_error"}, + ), + det_lim=((), 0.1, {"standard_name": "detection_minimum"},), ), dict( latitude=("latitude", [0, 1], {"units": "degrees_north"}), @@ -857,17 +864,11 @@ def test_grid_mapping_and_bounds_are_coordinates(self): [[1, 1], [1, 1]], {"units": "degree^2"}, ), - std_devs=( - ("ln_p", "latitude", "longitude"), - np.arange(0.1, 0.9, 0.1).reshape(2, 2, 2), - {"standard_name": "standard_error"}, - ), - det_lim=((), 0.1, {"standard_name": "detection_minimum"},), ln_p=( "ln_p", [1.0, 0.5], { - "standard_name": "atmosphere_natural_log_pressure_coordinate", + "standard_name": "atmosphere_ln_pressure_coordinate", "computed_standard_name": "air_pressure", }, ), @@ -878,7 +879,6 @@ def test_grid_mapping_and_bounds_are_coordinates(self): { "cell_measures": "area: areas", "grid_mapping": "latlon", - "ancillary_variables": "std_devs det_lim", }, ) original.coords["latitude"].encoding.update( From 8ec4af34428f3ac28bb9ed350da5b12a6626021f Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 12:27:04 -0400 Subject: [PATCH 16/32] Update warnings to provide a more useful stack level. decode_cf_variables is calibrated to warn on the line where xarray.open_dataset is called. If called through a different path, it may point to a less useful part of the stack. --- xarray/conventions.py | 6 ++++-- xarray/tests/test_backends.py | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index ef7ccb8107c..bf045005b9d 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -543,7 +543,8 @@ def stackable(dim): # than data_vars. warnings.warn( "Variable(s) {0!s} moved from data_vars to coords\n" - "based on {1:s} attribute".format(var_names, attr_name) + "based on {1:s} attribute".format(var_names, attr_name), + stacklevel=5, ) else: warnings.warn( @@ -554,7 +555,8 @@ def stackable(dim): for proj_name in var_names if proj_name not in variables ], - ) + ), + stacklevel=5, ) del var_attrs[attr_name] diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 95cc6fa1058..8f20505f100 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -896,8 +896,9 @@ def test_grid_mapping_and_bounds_are_coordinates(self): assert "latlon" not in ds["variable"].attrs["coordinates"] assert "coordinates" not in ds.attrs - with self.roundtrip(original) as actual: - assert_identical(actual, original) + with pytest.warns(UserWarning, match=" moved from data_vars to coords\nbased on "): + with self.roundtrip(original) as actual: + assert_identical(actual, original) def test_coordinates_encoding(self): def equals_latlon(obj): From bc0b1d155a593e4b0aa5a6a49065301529142f69 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 13:38:20 -0400 Subject: [PATCH 17/32] Split the CF attribute test into multiple smaller tests. This gives a bit more detail about what works and what doesn't. --- xarray/tests/test_backends.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8f20505f100..790fdc92b63 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -838,7 +838,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn): assert decoded.variables[k].dtype == actual.variables[k].dtype assert_allclose(decoded, actual, decode_bytes=False) - def test_grid_mapping_and_bounds_are_coordinates(self): + @staticmethod + def _create_cf_dataset(): original = Dataset( dict( variable=( @@ -888,6 +889,10 @@ def test_grid_mapping_and_bounds_are_coordinates(self): dict(grid_mapping="latlon", bounds="longitude_bnds") ) original.coords["ln_p"].encoding.update({"formula_terms": "p0: P0 lev: ln_p"}) + return original + + def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self): + original = self._create_cf_dataset() with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with open_dataset(tmp_file, decode_coords=False) as ds: @@ -896,6 +901,8 @@ def test_grid_mapping_and_bounds_are_coordinates(self): assert "latlon" not in ds["variable"].attrs["coordinates"] assert "coordinates" not in ds.attrs + def test_grid_mapping_and_bounds_are_coordinates_after_dataset_roundtrip(self): + original = self._create_cf_dataset() with pytest.warns(UserWarning, match=" moved from data_vars to coords\nbased on "): with self.roundtrip(original) as actual: assert_identical(actual, original) From 5c085e187214b840c1364759f52ddda21d100a9d Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 13:40:13 -0400 Subject: [PATCH 18/32] Add a test of a roundtrip after dropping bounds. --- xarray/tests/test_backends.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 790fdc92b63..0862387bc11 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -907,6 +907,25 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataset_roundtrip(self): with self.roundtrip(original) as actual: assert_identical(actual, original) + def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self): + original = self._create_cf_dataset() + # The DataArray roundtrip should have the same warnings as the + # Dataset, but we already tested for those, so just go for the + # new warnings. It would appear that there is no way to tell + # pytest "This warning and also this warning should both be + # present". + # xarray/tests/test_conventions.py::TestCFEncodedDataStore + # needs the to_dataset. The other backends should be fine + # without it. + with pytest.warns( + UserWarning, match=( + r"Variable\(s\) referenced in bounds not in variables: " + r"\['l(at|ong)itude_bnds'\]" + ) + ): + with self.roundtrip(original["variable"].to_dataset()) as actual: + assert_identical(actual, original["variable"].to_dataset()) + def test_coordinates_encoding(self): def equals_latlon(obj): return obj == "lat lon" or obj == "lon lat" From a864b83a4692cb7ab7b90f7912372cc5db20af49 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 14:22:10 -0400 Subject: [PATCH 19/32] Run black on changes. I really need to remember to do this before pushing. --- xarray/tests/test_backends.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0862387bc11..59bba1d2331 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -845,7 +845,7 @@ def _create_cf_dataset(): variable=( ("ln_p", "latitude", "longitude"), np.arange(8, dtype="f4").reshape(2, 2, 2), - {"ancillary_variables": "std_devs det_lim"} + {"ancillary_variables": "std_devs det_lim"}, ), std_devs=( ("ln_p", "latitude", "longitude"), @@ -877,10 +877,7 @@ def _create_cf_dataset(): ), ) original["variable"].encoding.update( - { - "cell_measures": "area: areas", - "grid_mapping": "latlon", - }, + {"cell_measures": "area: areas", "grid_mapping": "latlon",}, ) original.coords["latitude"].encoding.update( dict(grid_mapping="latlon", bounds="latitude_bnds") @@ -903,7 +900,9 @@ def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self): def test_grid_mapping_and_bounds_are_coordinates_after_dataset_roundtrip(self): original = self._create_cf_dataset() - with pytest.warns(UserWarning, match=" moved from data_vars to coords\nbased on "): + with pytest.warns( + UserWarning, match=" moved from data_vars to coords\nbased on " + ): with self.roundtrip(original) as actual: assert_identical(actual, original) @@ -918,10 +917,11 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self) # needs the to_dataset. The other backends should be fine # without it. with pytest.warns( - UserWarning, match=( + UserWarning, + match=( r"Variable\(s\) referenced in bounds not in variables: " r"\['l(at|ong)itude_bnds'\]" - ) + ), ): with self.roundtrip(original["variable"].to_dataset()) as actual: assert_identical(actual, original["variable"].to_dataset()) From c8d1bdcc09a5a877f3f4bf89efde74abdecaa50d Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 14:32:16 -0400 Subject: [PATCH 20/32] Check whether round-trip to iris breaks things. This is probably mostly a reminder to do this when upgrading other things. --- xarray/tests/test_backends.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59bba1d2331..3f9f7d7be39 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -55,6 +55,7 @@ requires_cftime, requires_dask, requires_h5netcdf, + requires_iris, requires_netCDF4, requires_pseudonetcdf, requires_pydap, @@ -926,6 +927,17 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self) with self.roundtrip(original["variable"].to_dataset()) as actual: assert_identical(actual, original["variable"].to_dataset()) + @requires_iris + def test_grid_mapping_and_bounds_are_coordinates_after_iris_roundtrip(self): + original = self._create_cf_dataset() + iris_cube = original["variable"].to_iris() + actual = DataArray.from_iris(iris_cube) + # Bounds will be missing (xfail) + del original.coords["latitude_bnds"], original.coords["longitude_bnds"] + # Ancillary vars will be missing + # Those are data_vars, and will be dropped when grabbing the variable + assert_identical(actual, original["variable"]) + def test_coordinates_encoding(self): def equals_latlon(obj): return obj == "lat lon" or obj == "lon lat" From 478be8a8c0148ca8efab757f8550397425ee9a94 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sun, 23 Aug 2020 14:37:31 -0400 Subject: [PATCH 21/32] Remove trailing comma. --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3f9f7d7be39..b46d1c21945 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -878,7 +878,7 @@ def _create_cf_dataset(): ), ) original["variable"].encoding.update( - {"cell_measures": "area: areas", "grid_mapping": "latlon",}, + {"cell_measures": "area: areas", "grid_mapping": "latlon"}, ) original.coords["latitude"].encoding.update( dict(grid_mapping="latlon", bounds="latitude_bnds") From b0e7a859292e05292c3dbb230fc6d21748e80617 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Jan 2021 15:32:13 -0500 Subject: [PATCH 22/32] Style fixes from black. New beta version of black required. --- xarray/tests/test_backends.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 44e1e85b1d2..76d08c7ba98 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -872,7 +872,11 @@ def _create_cf_dataset(): np.arange(0.1, 0.9, 0.1).reshape(2, 2, 2), {"standard_name": "standard_error"}, ), - det_lim=((), 0.1, {"standard_name": "detection_minimum"},), + det_lim=( + (), + 0.1, + {"standard_name": "detection_minimum"}, + ), ), dict( latitude=("latitude", [0, 1], {"units": "degrees_north"}), From 1a9b201cdda72f9430f4d609610e0cbd60b152b4 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 16 Jan 2021 11:55:17 -0500 Subject: [PATCH 23/32] Include suggestions from review. --- doc/weather-climate.rst | 3 +++ doc/whats-new.rst | 11 +++++---- xarray/conventions.py | 45 ++++++++++++++++------------------- xarray/tests/test_backends.py | 9 +++---- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index af990bc2fb6..5975d287189 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -28,6 +28,9 @@ coordinates: - `cell_measures` - `formula_terms` +This decoding is controlled by the `decode_coords` kwarg to +:py:func:`open_dataset` and :py:func:`open_mfdataset`. + The CF attribute `ancillary_variables` was not included in the list due to the variables listed there being associated primarily with the variable with the attribute, rather than with the dimensions. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 28f28baae25..696cfc99bd7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,16 +29,19 @@ Breaking changes values were required, which would lead to inaccurate I/O round-trips. - remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull: `4725`). By `Aureliana Barghini `_ +- Variables referred to in attributes like `bounds` and `grid_mapping` + are now automatically set as coordinate variables. These attributes + are moved to :py:attr:`DataArray.encoding` from + :py:attr:`DataArray.attrs`. This behaviour is controlled by the + `decode_coords` kwarg to :py:func:`open_dataset` and + :py:func:`open_mfdataset`. The precise list of variables is in + :ref:`weather-climate` (:pull:`2844`, :issue:`3689`) New Features ~~~~~~~~~~~~ - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. By `Deepak Cherian `_ -- Decode more CF attributes on file read (put the referenced variables - into 'coords' instead of 'data_vars') and encode on write (write - attributes corresponding to encoding values). The list of variables - is in :ref:`weather-climate` (:pull:`2844`, :issue:`3689`) Bug fixes ~~~~~~~~~ diff --git a/xarray/conventions.py b/xarray/conventions.py index cf21520ba91..cf914204624 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -531,33 +531,30 @@ def stackable(dim): for attr_name in CF_RELATED_DATA: if attr_name in var_attrs: attr_val = var_attrs[attr_name] - var_names = attr_val.split() - if attr_name in CF_RELATED_DATA_NEEDS_PARSING: - var_names = [ - name - for name in var_names - if not name.endswith(":") and not name == k + if attr_name not in CF_RELATED_DATA_NEEDS_PARSING: + var_names = attr_val.split() + else: + roles_and_names = [ + role_or_name + for part in attr_val.split(":") + for role_or_name in part.split() ] - if all(k in variables for k in var_names): + if len(roles_and_names) % 2 == 1: + warnings.warn( + f"Attribute {attr_name:s} malformed", stacklevel=5 + ) + var_names = roles_and_names[1::2] + if all(var_name in variables for var_name in var_names): new_vars[k].encoding[attr_name] = attr_val coord_names.update(var_names) - # Warn that some things will be coords rather - # than data_vars. - warnings.warn( - "Variable(s) {0!s} moved from data_vars to coords\n" - "based on {1:s} attribute".format(var_names, attr_name), - stacklevel=5, - ) else: + referenced_vars_not_in_variables = [ + proj_name + for proj_name in var_names + if proj_name not in variables + ] warnings.warn( - "Variable(s) referenced in {0:s} not in variables: {1!s}".format( - attr_name, - [ - proj_name - for proj_name in var_names - if proj_name not in variables - ], - ), + f"Variable(s) referenced in {attr_name:s} not in variables: {referenced_vars_not_in_variables!s}", stacklevel=5, ) del var_attrs[attr_name] @@ -596,8 +593,8 @@ def decode_cf( Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. decode_coords : bool, optional - Use the 'coordinates' attribute on variable (or the dataset itself) to - identify coordinates. + Use the 'coordinates', 'grid_mapping', and 'bounds' attributes + on variables (or the dataset itself) to identify coordinates. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 76d08c7ba98..33207c925f3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -909,7 +909,7 @@ def _create_cf_dataset(): original.coords["longitude"].encoding.update( dict(grid_mapping="latlon", bounds="longitude_bnds") ) - original.coords["ln_p"].encoding.update({"formula_terms": "p0: P0 lev: ln_p"}) + original.coords["ln_p"].encoding.update({"formula_terms": "p0: P0 lev : ln_p"}) return original def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self): @@ -924,11 +924,8 @@ def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self): def test_grid_mapping_and_bounds_are_coordinates_after_dataset_roundtrip(self): original = self._create_cf_dataset() - with pytest.warns( - UserWarning, match=" moved from data_vars to coords\nbased on " - ): - with self.roundtrip(original) as actual: - assert_identical(actual, original) + with self.roundtrip(original) as actual: + assert_identical(actual, original) def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self): original = self._create_cf_dataset() From 6f3d55ef0c9f4fac6cc4c57651c0867c5bbda6d7 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 16 Jan 2021 21:08:48 -0500 Subject: [PATCH 24/32] Update xarray/tests/test_backends.py Shorten a test name, making it more accurate in the process. Co-authored-by: Deepak Cherian --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 33207c925f3..b8bd2c81d96 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -922,7 +922,7 @@ def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self): assert "latlon" not in ds["variable"].attrs["coordinates"] assert "coordinates" not in ds.attrs - def test_grid_mapping_and_bounds_are_coordinates_after_dataset_roundtrip(self): + def test_coordinate_variables_after_dataset_roundtrip(self): original = self._create_cf_dataset() with self.roundtrip(original) as actual: assert_identical(actual, original) From 5268500943c0fbf498a9c0b56cb5b2ebae900f5f Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 16 Jan 2021 21:10:37 -0500 Subject: [PATCH 25/32] Update xarray/conventions.py Note that the identified coordinates are no longer primarily CF Auxiliary Coordinates. Co-authored-by: Deepak Cherian --- xarray/conventions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index cf914204624..58550289f7f 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -594,7 +594,7 @@ def decode_cf( np.datetime64. decode_coords : bool, optional Use the 'coordinates', 'grid_mapping', and 'bounds' attributes - on variables (or the dataset itself) to identify coordinates. + on variables (or the dataset itself) to identify coordinate variables. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or From 2edd367a014d4ccd9c842fa7003f193087e0cf01 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 16 Jan 2021 21:12:11 -0500 Subject: [PATCH 26/32] Mention that there are other attributes not listed There's more than three attributes used to assign variables as coordinates. Co-authored-by: Deepak Cherian --- xarray/conventions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 58550289f7f..e7fb915e604 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -593,7 +593,7 @@ def decode_cf( Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. decode_coords : bool, optional - Use the 'coordinates', 'grid_mapping', and 'bounds' attributes + Use the 'coordinates', 'grid_mapping', 'bounds' and other attributes on variables (or the dataset itself) to identify coordinate variables. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the From 948465c71419408e3c1b5613e6dd181b55a6ad3c Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 16 Jan 2021 21:13:30 -0500 Subject: [PATCH 27/32] Fix .rst syntax in whats-new Use two backticks for monospace font. Single backticks trop you into the default role. Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 696cfc99bd7..f41e294065d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,7 +29,7 @@ Breaking changes values were required, which would lead to inaccurate I/O round-trips. - remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull: `4725`). By `Aureliana Barghini `_ -- Variables referred to in attributes like `bounds` and `grid_mapping` +- Variables referred to in attributes like ``bounds`` and ``grid_mapping`` are now automatically set as coordinate variables. These attributes are moved to :py:attr:`DataArray.encoding` from :py:attr:`DataArray.attrs`. This behaviour is controlled by the From c68d3725dc1ed0479f90982f8f7c2cb400c1fcf5 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 16 Jan 2021 21:21:53 -0500 Subject: [PATCH 28/32] Shorten name of another test. I'm not just testing ``grid_mapping`` and ``bounds`` here, so I should have the name reflect that. --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b8bd2c81d96..10bc53ee6e7 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -948,7 +948,7 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self) assert_identical(actual, original["variable"].to_dataset()) @requires_iris - def test_grid_mapping_and_bounds_are_coordinates_after_iris_roundtrip(self): + def test_coordinate_variables_after_iris_roundtrip(self): original = self._create_cf_dataset() iris_cube = original["variable"].to_iris() actual = DataArray.from_iris(iris_cube) From 9ee7c3a7848bf6f2155f410c485c7a1708e28176 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 17 Jan 2021 09:28:17 -0700 Subject: [PATCH 29/32] Update docs. --- doc/weather-climate.rst | 37 +++++++++++++++++++++---------------- doc/whats-new.rst | 4 ++-- xarray/backends/api.py | 8 ++++---- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 5975d287189..fefa748d678 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -10,33 +10,38 @@ Weather and climate data ``xarray`` can leverage metadata that follows the `Climate and Forecast (CF) conventions`_ if present. Examples include automatic labelling of plots with descriptive names and units if proper metadata is present (see :ref:`plotting`) and support for non-standard calendars used in climate science through the ``cftime`` module (see :ref:`CFTimeIndex`). There are also a number of geosciences-focused projects that build on xarray (see :ref:`related-projects`). +.. _Climate and Forecast (CF) conventions: http://cfconventions.org + +.. _cf_variables: + +Related Variables +----------------- + Several CF variable attributes contain lists of other variables associated with the variable with the attribute. A few of these are now parsed by XArray, with the attribute value popped to encoding on read and the variables in that value interpreted as non-dimension coordinates: -- `coordinates` -- `bounds` -- `grid_mapping` -- `climatology` -- `geometry` -- `node_coordinates` -- `node_count` -- `part_node_count` -- `interior_ring` -- `cell_measures` -- `formula_terms` - -This decoding is controlled by the `decode_coords` kwarg to +- ``coordinates`` +- ``bounds`` +- ``grid_mapping`` +- ``climatology`` +- ``geometry`` +- ``node_coordinates`` +- ``node_count`` +- ``part_node_count`` +- ``interior_ring`` +- ``cell_measures`` +- ``formula_terms`` + +This decoding is controlled by the ``decode_coords`` kwarg to :py:func:`open_dataset` and :py:func:`open_mfdataset`. -The CF attribute `ancillary_variables` was not included in the list +The CF attribute ``ancillary_variables`` was not included in the list due to the variables listed there being associated primarily with the variable with the attribute, rather than with the dimensions. -.. _Climate and Forecast (CF) conventions: http://cfconventions.org - .. _metpy_accessor: CF-compliant coordinate variables diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f41e294065d..f893d01d7f1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,8 +33,8 @@ Breaking changes are now automatically set as coordinate variables. These attributes are moved to :py:attr:`DataArray.encoding` from :py:attr:`DataArray.attrs`. This behaviour is controlled by the - `decode_coords` kwarg to :py:func:`open_dataset` and - :py:func:`open_mfdataset`. The precise list of variables is in + ``decode_coords`` kwarg to :py:func:`open_dataset` and + :py:func:`open_mfdataset`. The full list of decoded attributes is in :ref:`weather-climate` (:pull:`2844`, :issue:`3689`) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index faa7e6cf3d3..c0e4faf4e5a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -355,8 +355,8 @@ def open_dataset( removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. decode_coords : bool, optional - If True, decode the 'coordinates' attribute to identify coordinates in - the resulting dataset. + Use the 'coordinates', 'grid_mapping', 'bounds' and other attributes + on variables (or the dataset itself) to identify coordinate variables. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ "pseudonetcdf", "zarr"}, optional Engine to use when reading files. If not provided, the default engine @@ -614,8 +614,8 @@ def open_dataarray( removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. decode_coords : bool, optional - If True, decode the 'coordinates' attribute to identify coordinates in - the resulting dataset. + Use the 'coordinates', 'grid_mapping', 'bounds' and other attributes + on variables (or the dataset itself) to identify coordinate variables. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib"}, \ optional Engine to use when reading files. If not provided, the default engine From 94b81537143c8f529ba778c1f4011cd6444dbcb9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 11 Feb 2021 07:06:19 -0700 Subject: [PATCH 30/32] fix merge. --- doc/whats-new.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 68d87206830..5f65839cce6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,10 +39,8 @@ Breaking changes always be set such that ``int64`` values can be used. In the past, no units finer than "seconds" were chosen, which would sometimes mean that ``float64`` values were required, which would lead to inaccurate I/O round-trips. -- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull: `4725`). - By `Aureliana Barghini `_ - Variables referred to in attributes like ``bounds`` and ``grid_mapping`` - are now automatically set as coordinate variables. These attributes + are can be set as coordinate variables. These attributes are moved to :py:attr:`DataArray.encoding` from :py:attr:`DataArray.attrs`. This behaviour is controlled by the ``decode_coords`` kwarg to :py:func:`open_dataset` and From c8896f35530f542d7891429c1fedb1522aa01c86 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 11 Feb 2021 07:06:40 -0700 Subject: [PATCH 31/32] Activate new behaviour only with `decode_coords="all"` --- xarray/backends/api.py | 20 ++++++++++++++------ xarray/conventions.py | 14 ++++++++++---- xarray/tests/test_backends.py | 16 ++++++++++++++-- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5df3346b03a..289a7369275 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -354,9 +354,13 @@ def open_dataset( form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. - decode_coords : bool, optional - Use the 'coordinates', 'grid_mapping', 'bounds' and other attributes - on variables (or the dataset itself) to identify coordinate variables. + decode_coords : bool or str, optional + Controls which variables are set as coordinate variables: + * ``"coordinates"`` or ``True``: Set variables reffered to in the + ``'coordinates'`` `attribute of the datasets or individual variables + as coordinate variables. + * ``"all"``: Set variables refferd to in 'grid_mapping', 'bounds' and + other attributes as coordinate variables. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ "pseudonetcdf", "zarr"}, optional Engine to use when reading files. If not provided, the default engine @@ -613,9 +617,13 @@ def open_dataarray( form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. - decode_coords : bool, optional - Use the 'coordinates', 'grid_mapping', 'bounds' and other attributes - on variables (or the dataset itself) to identify coordinate variables. + decode_coords : bool or str, optional + Controls which variables are set as coordinate variables: + * ``"coordinates"`` or ``True``: Set variables reffered to in the + ``'coordinates'`` `attribute of the datasets or individual variables + as coordinate variables. + * ``"all"``: Set variables refferd to in 'grid_mapping', 'bounds' and + other attributes as coordinate variables. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib"}, \ optional Engine to use when reading files. If not provided, the default engine diff --git a/xarray/conventions.py b/xarray/conventions.py index aa2c67dcbd8..b46202b75c6 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -519,7 +519,7 @@ def stackable(dim): use_cftime=use_cftime, decode_timedelta=decode_timedelta, ) - if decode_coords: + if decode_coords in [True, "coordinates", "all"]: var_attrs = new_vars[k].attrs if "coordinates" in var_attrs: coord_str = var_attrs["coordinates"] @@ -528,6 +528,8 @@ def stackable(dim): new_vars[k].encoding["coordinates"] = coord_str del var_attrs["coordinates"] coord_names.update(var_coord_names) + + if decode_coords == "all": for attr_name in CF_RELATED_DATA: if attr_name in var_attrs: attr_val = var_attrs[attr_name] @@ -592,9 +594,13 @@ def decode_cf( decode_times : bool, optional Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. - decode_coords : bool, optional - Use the 'coordinates', 'grid_mapping', 'bounds' and other attributes - on variables (or the dataset itself) to identify coordinate variables. + decode_coords : bool or str, optional + Controls which variables are set as coordinate variables: + * ``"coordinates"`` or ``True``: Set variables reffered to in the + ``'coordinates'`` `attribute of the datasets or individual variables + as coordinate variables. + * ``"all"``: Set variables refferd to in 'grid_mapping', 'bounds' and + other attributes as coordinate variables. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fec29db5e13..f2e00ad5622 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -924,9 +924,19 @@ def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self): def test_coordinate_variables_after_dataset_roundtrip(self): original = self._create_cf_dataset() - with self.roundtrip(original) as actual: + with self.roundtrip(original, open_kwargs={"decode_coords": "all"}) as actual: assert_identical(actual, original) + with self.roundtrip(original) as actual: + expected = original.reset_coords( + ["latitude_bnds", "longitude_bnds", "areas", "P0", "latlon"] + ) + # equal checks that coords and data_vars are equal which + # should be enough + # identical would require resetting a number of attributes + # skip that. + assert_equal(actual, expected) + def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self): original = self._create_cf_dataset() # The DataArray roundtrip should have the same warnings as the @@ -944,7 +954,9 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(self) r"\['l(at|ong)itude_bnds'\]" ), ): - with self.roundtrip(original["variable"].to_dataset()) as actual: + with self.roundtrip( + original["variable"].to_dataset(), open_kwargs={"decode_coords": "all"} + ) as actual: assert_identical(actual, original["variable"].to_dataset()) @requires_iris From d3ec7abca84e0cdbdfbd07b7465b208af6ccb262 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 11 Feb 2021 10:46:44 -0700 Subject: [PATCH 32/32] [skip-ci] fix docstrings --- xarray/backends/api.py | 18 ++++++++++-------- xarray/backends/apiv2.py | 12 ++++++++---- xarray/conventions.py | 9 +++++---- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 289a7369275..659ae12a2c9 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -354,12 +354,13 @@ def open_dataset( form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. - decode_coords : bool or str, optional + decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - * ``"coordinates"`` or ``True``: Set variables reffered to in the - ``'coordinates'`` `attribute of the datasets or individual variables + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - * ``"all"``: Set variables refferd to in 'grid_mapping', 'bounds' and + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ "pseudonetcdf", "zarr"}, optional @@ -617,12 +618,13 @@ def open_dataarray( form string arrays. Dimensions will only be concatenated over (and removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. - decode_coords : bool or str, optional + decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - * ``"coordinates"`` or ``True``: Set variables reffered to in the - ``'coordinates'`` `attribute of the datasets or individual variables + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - * ``"all"``: Set variables refferd to in 'grid_mapping', 'bounds' and + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib"}, \ optional diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py index d31fc9ea773..de1b3e1bb29 100644 --- a/xarray/backends/apiv2.py +++ b/xarray/backends/apiv2.py @@ -195,10 +195,14 @@ def open_dataset( removed) if they have no corresponding variable and if they are only used as the last dimension of character arrays. This keyword may not be supported by all the backends. - decode_coords : bool, optional - If True, decode the 'coordinates' attribute to identify coordinates in - the resulting dataset. This keyword may not be supported by all the - backends. + decode_coords : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. drop_variables: str or iterable, optional A variable or list of variables to exclude from the dataset parsing. This may be useful to drop variables with problems or diff --git a/xarray/conventions.py b/xarray/conventions.py index b46202b75c6..bb0cc5cd338 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -594,12 +594,13 @@ def decode_cf( decode_times : bool, optional Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. - decode_coords : bool or str, optional + decode_coords : bool or {"coordinates", "all"}, optional Controls which variables are set as coordinate variables: - * ``"coordinates"`` or ``True``: Set variables reffered to in the - ``'coordinates'`` `attribute of the datasets or individual variables + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables as coordinate variables. - * ``"all"``: Set variables refferd to in 'grid_mapping', 'bounds' and + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and other attributes as coordinate variables. drop_variables : str or iterable, optional A variable or list of variables to exclude from being parsed from the