Merge branch 'main' into bugfix/cube_attr_compare

SciTools · Oct 25, 2024 · 50b624d · 50b624d
2 parents 12ab648 + 8c213f0
commit 50b624d
Show file tree

Hide file tree

Showing 41 changed files with 586 additions and 494 deletions.
diff --git a/.github/workflows/ci-manifest.yml b/.github/workflows/ci-manifest.yml
@@ -23,4 +23,4 @@ concurrency:
 jobs:
   manifest:
     name: "check-manifest"
-    uses: scitools/workflows/.github/workflows/[email protected].1
+    uses: scitools/workflows/.github/workflows/[email protected].2
diff --git a/.github/workflows/refresh-lockfiles.yml b/.github/workflows/refresh-lockfiles.yml
@@ -14,5 +14,5 @@ on:
 
 jobs:
   refresh_lockfiles:
-    uses: scitools/workflows/.github/workflows/[email protected].1
+    uses: scitools/workflows/.github/workflows/[email protected].2
     secrets: inherit
diff --git a/docs/src/further_topics/metadata.rst b/docs/src/further_topics/metadata.rst
@@ -403,10 +403,10 @@ instances. Normally, this would cause issues. For example,
 
     >>> simply = {"one": np.int32(1), "two": np.array([1.0, 2.0])}
     >>> simply
-    {'one': 1, 'two': array([1., 2.])}
+    {'one': np.int32(1), 'two': array([1., 2.])}
     >>> fruity = {"one": np.int32(1), "two": np.array([1.0, 2.0])}
     >>> fruity
-    {'one': 1, 'two': array([1., 2.])}
+    {'one': np.int32(1), 'two': array([1., 2.])}
     >>> simply == fruity
     Traceback (most recent call last):
     ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
@@ -418,9 +418,9 @@ However, metadata class equality is rich enough to handle this eventuality,
     >>> metadata1 = cube.metadata._replace(attributes=simply)
     >>> metadata2 = cube.metadata._replace(attributes=fruity)
     >>> metadata1
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata2
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
 
 .. doctest:: richer-metadata
 
@@ -430,10 +430,10 @@ However, metadata class equality is rich enough to handle this eventuality,
 .. doctest:: richer-metadata
 
     >>> metadata1
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata2 = cube.metadata._replace(attributes={"one": np.int32(1), "two": np.array([1000.0, 2000.0])})
     >>> metadata2
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1000., 2000.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1000., 2000.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata1 == metadata2
     False
 

diff --git a/docs/src/further_topics/ugrid/operations.rst b/docs/src/further_topics/ugrid/operations.rst
@@ -500,7 +500,7 @@ GeoVista :external+geovista:doc:`generated/gallery/index`.
             Attributes:
                 NCO                         'netCDF Operators version 4.7.5 (Homepage = http://nco.sf.net, Code = h ...'
                 history                     'Mon Apr 12 01:44:41 2021: ncap2 -s synthetic=float(synthetic) mesh_C4_synthetic.nc ...'
-                nco_openmp_thread_number    1
+                nco_openmp_thread_number    np.int32(1)
 
         # Convert our mesh+data to a PolyData object.
         >>> face_polydata = cube_to_polydata(sample_mesh_cube)
@@ -600,7 +600,7 @@ below:
             Attributes:
                 NCO                         'netCDF Operators version 4.7.5 (Homepage = http://nco.sf.net, Code = h ...'
                 history                     'Mon Apr 12 01:44:41 2021: ncap2 -s synthetic=float(synthetic) mesh_C4_synthetic.nc ...'
-                nco_openmp_thread_number    1
+                nco_openmp_thread_number    np.int32(1)
 
         >>> regional_cube = extract_unstructured_region(
         ...     cube=sample_mesh_cube,
@@ -619,7 +619,7 @@ below:
             Attributes:
                 NCO                         'netCDF Operators version 4.7.5 (Homepage = http://nco.sf.net, Code = h ...'
                 history                     'Mon Apr 12 01:44:41 2021: ncap2 -s synthetic=float(synthetic) mesh_C4_synthetic.nc ...'
-                nco_openmp_thread_number    1
+                nco_openmp_thread_number    np.int32(1)
 
 
 Regridding

diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -24,7 +24,8 @@ This document explains the changes made to Iris for this release
 📢 Announcements
 ================
 
-#. N/A
+#. Iris is now compliant with NumPy v2. This may affect your scripts.
+   :ref:`See the full What's New entry for more details <numpy2>`.
 
 
 ✨ Features
@@ -84,7 +85,7 @@ This document explains the changes made to Iris for this release
    may happen when there are very many or large auxiliary coordinates, derived
    coordinates, cell measures, or ancillary variables to be checked that span
    the concatenation axis. This issue can be avoided by disabling the
-   problematic check. (:pull:`5926`)
+   problematic check. (:pull:`5926` and :pull:`6187`)
 
 🔥 Deprecations
 ===============
@@ -95,7 +96,15 @@ This document explains the changes made to Iris for this release
 🔗 Dependencies
 ===============
 
-#. N/A
+.. _numpy2:
+
+#. `@trexfeathers`_ adapted the Iris codebase to work with NumPy v2. The
+   `NumPy v2 full release notes`_ have the exhaustive details. Notable
+   changes that may affect your Iris scripts are below. (:pull:`6035`)
+
+   * `NumPy v2 changed data type promotion`_
+
+   * `NumPy v2 changed scalar printing`_
 
 
 📚 Documentation
@@ -125,3 +134,6 @@ This document explains the changes made to Iris for this release
     Whatsnew resources in alphabetical order:
 
 .. _cartopy#2390: https://github.com/SciTools/cartopy/issues/2390
+.. _NumPy v2 changed data type promotion: https://numpy.org/doc/stable/numpy_2_0_migration_guide.html#changes-to-numpy-data-type-promotion
+.. _NumPy v2 changed scalar printing: https://numpy.org/doc/stable/release/2.0.0-notes.html#representation-of-numpy-scalars-changed
+.. _NumPy v2 full release notes: https://numpy.org/doc/stable/release/2.0.0-notes.html
diff --git a/lib/iris/_concatenate.py b/lib/iris/_concatenate.py
@@ -310,7 +310,7 @@ def _hash_ndarray(a: np.ndarray) -> np.ndarray:
 
     # Hash the bytes representing the array data.
     hash.update(b"data=")
-    if isinstance(a, np.ma.MaskedArray):
+    if np.ma.is_masked(a):
         # Hash only the unmasked data
         hash.update(a.compressed().tobytes())
         # Hash the mask

diff --git a/lib/iris/_representation/cube_summary.py b/lib/iris/_representation/cube_summary.py
@@ -47,6 +47,8 @@ def __init__(self, cube, name_padding=35):
 
 def string_repr(text, quote_strings=False, clip_strings=False):
     """Produce a one-line printable form of a text string."""
+    # Convert any np.str_ instances to plain strings.
+    text = str(text)
     force_quoted = re.findall("[\n\t]", text) or quote_strings
     if force_quoted:
         # Replace the string with its repr (including quotes).

diff --git a/lib/iris/analysis/_regrid.py b/lib/iris/analysis/_regrid.py
@@ -156,22 +156,30 @@ def _src_align_and_flatten(coord):
     #
 
     # Wrap modular values (e.g. longitudes) if required.
-    modulus = sx.units.modulus
+    _modulus = sx.units.modulus
+    # Convert to NumPy scalar to enable cast checking.
+    modulus = np.min_scalar_type(_modulus).type(_modulus)
+
+    def _cast_sx_points(sx_points_: np.ndarray):
+        """Ensure modulus arithmetic will not raise a TypeError."""
+        if not np.can_cast(modulus, sx_points_.dtype):
+            new_type = np.promote_types(sx_points_.dtype, modulus.dtype)
+            result = sx_points_.astype(new_type, casting="safe")
+        else:
+            result = sx_points_
+        return result
+
     if modulus is not None:
         # Match the source cube x coordinate range to the target grid
         # cube x coordinate range.
         min_sx, min_tx = np.min(sx.points), np.min(tx.points)
         if min_sx < 0 and min_tx >= 0:
             indices = np.where(sx_points < 0)
-            # Ensure += doesn't raise a TypeError
-            if not np.can_cast(modulus, sx_points.dtype):
-                sx_points = sx_points.astype(type(modulus), casting="safe")
+            sx_points = _cast_sx_points(sx_points)
             sx_points[indices] += modulus
         elif min_sx >= 0 and min_tx < 0:
             indices = np.where(sx_points > (modulus / 2))
-            # Ensure -= doesn't raise a TypeError
-            if not np.can_cast(modulus, sx_points.dtype):
-                sx_points = sx_points.astype(type(modulus), casting="safe")
+            sx_points = _cast_sx_points(sx_points)
             sx_points[indices] -= modulus
 
     # Create target grid cube x and y cell boundaries.

diff --git a/lib/iris/analysis/maths.py b/lib/iris/analysis/maths.py
@@ -867,7 +867,7 @@ def _binary_op_common(
         if iris._lazy_data.is_lazy_data(other):
             rhs = other
         else:
-            rhs = np.asanyarray(other)
+            rhs = np.asanyarray(other, dtype=new_dtype)
 
     def unary_func(lhs):
         data = operation_function(lhs, rhs)

diff --git a/lib/iris/common/resolve.py b/lib/iris/common/resolve.py
@@ -2358,16 +2358,16 @@ def cube(self, data, in_place=False):
             >>> resolver.map_rhs_to_lhs
             True
             >>> cube1.data.sum()
-            124652160.0
+            np.float32(124652160.0)
             >>> zeros.shape
             (240, 37, 49)
             >>> zeros.sum()
-            0.0
+            np.float32(0.0)
             >>> result = resolver.cube(zeros, in_place=True)
             >>> result is cube1
             True
             >>> cube1.data.sum()
-            0.0
+            np.float32(0.0)
 
         """
         from iris.cube import Cube

diff --git a/lib/iris/coords.py b/lib/iris/coords.py
@@ -2431,9 +2431,9 @@ def nearest_neighbour_index(self, point):
 
         >>> cube = iris.load_cube(iris.sample_data_path('ostia_monthly.nc'))
         >>> cube.coord('latitude').nearest_neighbour_index(0)
-        9
+        np.int64(9)
         >>> cube.coord('longitude').nearest_neighbour_index(10)
-        12
+        np.int64(12)
 
         .. note:: If the coordinate contains bounds, these will be used to
             determine the nearest neighbour instead of the point values.

diff --git a/lib/iris/fileformats/_ff.py b/lib/iris/fileformats/_ff.py
@@ -5,6 +5,7 @@
 """Provides UK Met Office Fields File (FF) format specific capabilities."""
 
 import os
+from typing import Any
 import warnings
 
 import numpy as np
@@ -370,9 +371,22 @@ def __init__(self, filename, word_depth=DEFAULT_FF_WORD_DEPTH):
                     setattr(self, elem, res)
 
     def __str__(self):
+        def _str_tuple(to_print: Any):
+            """Print NumPy scalars within tuples as numbers, not np objects.
+
+            E.g. ``lookup_table`` is a tuple of NumPy scalars.
+            NumPy v2 by default prints ``np.int32(1)`` instead of ``1`` when
+            printing an iterable of scalars.
+            """
+            if isinstance(to_print, tuple):
+                result = "(" + ", ".join([str(i) for i in to_print]) + ")"
+            else:
+                result = str(to_print)
+            return result
+
         attributes = []
         for name, _ in FF_HEADER:
-            attributes.append("    {}: {}".format(name, getattr(self, name)))
+            attributes.append(f"    {name}: {_str_tuple(getattr(self, name))}")
         return "FF Header:\n" + "\n".join(attributes)
 
     def __repr__(self):

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -1387,9 +1387,13 @@ def _ensure_valid_dtype(self, values, src_name, src_object):
             val_min, val_max = (values.min(), values.max())
             if is_lazy_data(values):
                 val_min, val_max = _co_realise_lazy_arrays([val_min, val_max])
+            # NumPy will inherit values.dtype even if the scalar numbers work
+            #  with a smaller type.
+            min_dtype = np.promote_types(
+                *[np.min_scalar_type(m) for m in (val_min, val_max)]
+            )
             # Cast to an integer type supported by netCDF3.
-            can_cast = all([np.can_cast(m, np.int32) for m in (val_min, val_max)])
-            if not can_cast:
+            if not np.can_cast(min_dtype, np.int32):
                 msg = (
                     "The data type of {} {!r} is not supported by {} and"
                     " its values cannot be safely cast to a supported"

diff --git a/lib/iris/fileformats/nimrod_load_rules.py b/lib/iris/fileformats/nimrod_load_rules.py
@@ -126,42 +126,41 @@ def units(cube, field):
         "n/a": "1",
     }
 
+    dtype_original = cube.dtype
     field_units = remove_unprintable_chars(field.units)
     if field_units == "m/2-25k":
         # Handle strange visibility units
-        cube.data = (cube.data.astype(np.float32) + 25000.0) * 2
+        cube.data = (cube.data + 25000.0) * 2
         field_units = "m"
     if "*" in field_units:
         # Split into unit string and integer
         unit_list = field_units.split("*")
         if "^" in unit_list[1]:
             # Split out magnitude
             unit_sublist = unit_list[1].split("^")
-            cube.data = cube.data.astype(np.float32) / float(unit_sublist[0]) ** float(
-                unit_sublist[1]
-            )
+            cube.data = cube.data / float(unit_sublist[0]) ** float(unit_sublist[1])
         else:
-            cube.data = cube.data.astype(np.float32) / float(unit_list[1])
+            cube.data = cube.data / float(unit_list[1])
         field_units = unit_list[0]
     if "ug/m3E1" in field_units:
         # Split into unit string and integer
         unit_list = field_units.split("E")
-        cube.data = cube.data.astype(np.float32) / 10.0
+        cube.data = cube.data / 10.0
         field_units = unit_list[0]
     if field_units == "%":
         # Convert any percentages into fraction
         field_units = "1"
-        cube.data = cube.data.astype(np.float32) / 100.0
+        cube.data = cube.data / 100.0
     if field_units == "oktas":
         field_units = "1"
-        cube.data = cube.data.astype(np.float32) / 8.0
+        cube.data = cube.data / 8.0
     if field_units == "dBZ":
         # cf_units doesn't recognise decibels (dBZ), but does know BZ
         field_units = "BZ"
-        cube.data = cube.data.astype(np.float32) / 10.0
+        cube.data = cube.data / 10.0
     if field_units == "g/Kg":
         field_units = "kg/kg"
-        cube.data = cube.data.astype(np.float32) / 1000.0
+        cube.data = cube.data / 1000.0
     if not field_units:
         if field.field_code == 8:
             # Relative Humidity data are unitless, but not "unknown"
@@ -175,6 +174,14 @@ def units(cube, field):
         # Deal with the case where the units are of the form '/unit' eg
         # '/second' in the Nimrod file. This converts to the form unit^-1
         field_units = field_units[1:] + "^-1"
+
+    if cube.dtype != dtype_original:
+        # Original development logic: if any arithmetic takes place, ensure
+        #  the data type is float32 (starts as an int). Unknown why.
+        #  Automatic casting is returning inconsistent types when masks are
+        #  involved, so the new logic is to do the casting as the final step.
+        cube.data = cube.data.astype(np.float32)
+
     try:
         cube.units = field_units
     except ValueError:

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
@@ -11,6 +11,7 @@
 import os
 import re
 import struct
+from typing import Any
 import warnings
 
 import cf_units
@@ -950,6 +951,21 @@ def t2(self):
 
     def __repr__(self):
         """Return a string representation of the PP field."""
+
+        def _str_tuple(to_print: Any):
+            """Print NumPy scalars within tuples as numbers, not np objects.
+
+            E.g. ``lbuser`` is a tuple of NumPy scalars.
+
+            NumPy v2 by default prints ``np.int32(1)`` instead of ``1`` when
+            printing an iterable of scalars.
+            """
+            if isinstance(to_print, tuple):
+                result = "(" + ", ".join([str(i) for i in to_print]) + ")"
+            else:
+                result = str(to_print)
+            return result
+
         # Define an ordering on the basic header names
         attribute_priority_lookup = {name: loc[0] for name, loc in self.HEADER_DEFN}
 
@@ -975,9 +991,8 @@ def __repr__(self):
             ),
         )
 
-        return (
-            "PP Field" + "".join(["\n   %s: %s" % (k, v) for k, v in attributes]) + "\n"
-        )
+        contents = "".join([f"\n   {k}: {_str_tuple(v)}" for k, v in attributes])
+        return f"PP Field{contents}\n"
 
     @property
     def stash(self):
@@ -1178,7 +1193,7 @@ def save(self, file_handle):
             data.dtype = data.dtype.newbyteorder(">")
 
         # Create the arrays which will hold the header information
-        lb = np.empty(shape=NUM_LONG_HEADERS, dtype=np.dtype(">u%d" % PP_WORD_DEPTH))
+        lb = np.empty(shape=NUM_LONG_HEADERS, dtype=np.dtype(">i%d" % PP_WORD_DEPTH))
         b = np.empty(shape=NUM_FLOAT_HEADERS, dtype=np.dtype(">f%d" % PP_WORD_DEPTH))
 
         # Fill in the header elements from the PPField