From 99fa8cebda439a1c900627206e7977d536b5358e Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Fri, 16 Jun 2023 11:18:56 +0200 Subject: [PATCH] Fix concatenation of cubes with aux factories (#5340) * Fix concatenation of cubes with aux factories * Added What's new entry * Added comment why all aux factories need to be updated during concatenation --- docs/src/whatsnew/3.6.rst | 5 + lib/iris/_concatenate.py | 95 ++++++++++++------- .../concatenate/test_concatenate.py | 11 +++ 3 files changed, 79 insertions(+), 32 deletions(-) diff --git a/docs/src/whatsnew/3.6.rst b/docs/src/whatsnew/3.6.rst index 543ce7372e..61bf75f15c 100644 --- a/docs/src/whatsnew/3.6.rst +++ b/docs/src/whatsnew/3.6.rst @@ -71,6 +71,11 @@ This document explains the changes made to Iris for this release conversion of lazy data when using a `Distributed`_ scheduler. (:issue:`5347`, :pull:`5349`) + #. `@schlunma`_ fixed a bug in the concatenation of cubes with aux factories + which could lead to a `KeyError` due to dependencies that have not been + properly updated. + (:issue:`5339`, :pull:`5340`) + 📢 Announcements ================ diff --git a/lib/iris/_concatenate.py b/lib/iris/_concatenate.py index 10a31eafc1..d98e63da5a 100644 --- a/lib/iris/_concatenate.py +++ b/lib/iris/_concatenate.py @@ -880,9 +880,13 @@ def concatenate(self): # Concatenate the new dimension coordinate. dim_coords_and_dims = self._build_dim_coordinates() - # Concatenate the new auxiliary coordinates. + # Concatenate the new auxiliary coordinates (does NOT include + # scalar coordinates!). aux_coords_and_dims = self._build_aux_coordinates() + # Concatenate the new scalar coordinates. + scalar_coords = self._build_scalar_coordinates() + # Concatenate the new cell measures cell_measures_and_dims = self._build_cell_measures() @@ -891,18 +895,21 @@ def concatenate(self): # Concatenate the new aux factories aux_factories = self._build_aux_factories( - dim_coords_and_dims, aux_coords_and_dims + dim_coords_and_dims, aux_coords_and_dims, scalar_coords ) # Concatenate the new data payload. data = self._build_data() # Build the new cube. + all_aux_coords_and_dims = aux_coords_and_dims + [ + (scalar_coord, ()) for scalar_coord in scalar_coords + ] kwargs = cube_signature.defn._asdict() cube = iris.cube.Cube( data, dim_coords_and_dims=dim_coords_and_dims, - aux_coords_and_dims=aux_coords_and_dims, + aux_coords_and_dims=all_aux_coords_and_dims, cell_measures_and_dims=cell_measures_and_dims, ancillary_variables_and_dims=ancillary_variables_and_dims, aux_factories=aux_factories, @@ -1163,12 +1170,22 @@ def _build_aux_coordinates(self): aux_coords_and_dims.append((coord.copy(), dims)) - # Generate all the scalar coordinates for the new concatenated cube. - for coord in cube_signature.scalar_coords: - aux_coords_and_dims.append((coord.copy(), ())) - return aux_coords_and_dims + def _build_scalar_coordinates(self): + """ + Generate the scalar coordinates for the new concatenated cube. + + Returns: + A list of scalar coordinates. + + """ + scalar_coords = [] + for coord in self._cube_signature.scalar_coords: + scalar_coords.append(coord.copy()) + + return scalar_coords + def _build_cell_measures(self): """ Generate the cell measures with associated dimension(s) @@ -1247,7 +1264,9 @@ def _build_ancillary_variables(self): return ancillary_variables_and_dims - def _build_aux_factories(self, dim_coords_and_dims, aux_coords_and_dims): + def _build_aux_factories( + self, dim_coords_and_dims, aux_coords_and_dims, scalar_coords + ): """ Generate the aux factories for the new concatenated cube. @@ -1261,6 +1280,9 @@ def _build_aux_factories(self, dim_coords_and_dims, aux_coords_and_dims): A list of auxiliary coordinates and dimension(s) tuple pairs from the concatenated cube. + * scalar_coords: + A list of scalar coordinates from the concatenated cube. + Returns: A list of :class:`iris.aux_factory.AuxCoordFactory`. @@ -1271,35 +1293,44 @@ def _build_aux_factories(self, dim_coords_and_dims, aux_coords_and_dims): old_aux_coords = [a[0] for a in cube_signature.aux_coords_and_dims] new_dim_coords = [d[0] for d in dim_coords_and_dims] new_aux_coords = [a[0] for a in aux_coords_and_dims] - scalar_coords = cube_signature.scalar_coords + old_scalar_coords = cube_signature.scalar_coords + new_scalar_coords = scalar_coords aux_factories = [] # Generate all the factories for the new concatenated cube. - for i, (coord, dims, factory) in enumerate( - cube_signature.derived_coords_and_dims - ): - # Check whether the derived coordinate of the factory spans the - # nominated dimension of concatenation. - if self.axis in dims: - # Update the dependencies of the factory with coordinates of - # the concatenated cube. We need to check all coordinate types - # here (dim coords, aux coords, and scalar coords). - new_dependencies = {} - for old_dependency in factory.dependencies.values(): - if old_dependency in old_dim_coords: - dep_idx = old_dim_coords.index(old_dependency) - new_dependency = new_dim_coords[dep_idx] - elif old_dependency in old_aux_coords: - dep_idx = old_aux_coords.index(old_dependency) - new_dependency = new_aux_coords[dep_idx] - else: - dep_idx = scalar_coords.index(old_dependency) - new_dependency = scalar_coords[dep_idx] - new_dependencies[id(old_dependency)] = new_dependency + for _, _, factory in cube_signature.derived_coords_and_dims: + # Update the dependencies of the factory with coordinates of + # the concatenated cube. We need to check all coordinate types + # here (dim coords, aux coords, and scalar coords). + + # Note: in contrast to other _build_... methods of this class, we + # do NOT need to distinguish between aux factories that span the + # nominated concatenation axis and aux factories that do not. The + # reason is that ALL aux factories need to be updated with the new + # coordinates of the concatenated cube (passed to this function via + # dim_coords_and_dims, aux_coords_and_dims, scalar_coords [these + # contain ALL new coordinates, not only the ones spanning the + # concatenation dimension]), so no special treatment for the aux + # factories that span the concatenation dimension is necessary. If + # not all aux factories are properly updated with references to the + # new coordinates, this may lead to KeyErrors (see + # https://github.com/SciTools/iris/issues/5339). + new_dependencies = {} + for old_dependency in factory.dependencies.values(): + if old_dependency in old_dim_coords: + dep_idx = old_dim_coords.index(old_dependency) + new_dependency = new_dim_coords[dep_idx] + elif old_dependency in old_aux_coords: + dep_idx = old_aux_coords.index(old_dependency) + new_dependency = new_aux_coords[dep_idx] + else: + dep_idx = old_scalar_coords.index(old_dependency) + new_dependency = new_scalar_coords[dep_idx] + new_dependencies[id(old_dependency)] = new_dependency - # Create new factory with the updated dependencies. - factory = factory.updated(new_dependencies) + # Create new factory with the updated dependencies. + factory = factory.updated(new_dependencies) aux_factories.append(factory) diff --git a/lib/iris/tests/integration/concatenate/test_concatenate.py b/lib/iris/tests/integration/concatenate/test_concatenate.py index 1f39b2589d..2543e2931b 100644 --- a/lib/iris/tests/integration/concatenate/test_concatenate.py +++ b/lib/iris/tests/integration/concatenate/test_concatenate.py @@ -278,6 +278,17 @@ def test_equal_derived_coords(self): [[10.0, 20.0], [10.0, 40.0], [10.0, 20.0], [10.0, 40.0]], ) + # Make sure indexing the resulting cube works correctly + # (see https://github.com/SciTools/iris/issues/5339) + self.assertEqual(result[0][0].shape, (2,)) + + # Make sure ALL aux factory dependencies of the resulting cube were + # properly updated (i.e., they are different from the original cubes). + for aux_factory in result[0].aux_factories: + for coord in aux_factory.dependencies.values(): + self.assertNotEqual(id(coord), id(cube_a.coord(coord.name()))) + self.assertNotEqual(id(coord), id(cube_b.coord(coord.name()))) + def test_equal_derived_coords_with_bounds(self): cube_a = self.create_cube() cube_a.coord("sigma").bounds = [[0.0, 5.0], [5.0, 20.0]]