Demo Numpy v2 (#6035)

* Demo Numpy v2. * Force NumPy v2. * Store PP LB headers as signed ints (as specified in UMDP F3). * Correct PP tuple printing. * NumPy 2 compliant dtype casting in regridding. * Todo note in What's New. * Accept NumPy 2 avoiding up-casting dtype in integration NetCDF _get_scale_factor_add_offset. * Adapt NetCDF _ensure_valid_dtype() for NumPy v2. * Adapt maths handling of rhs scalar dtype - for NumPy v2. * Accept regrid KGO changes from changes to NumPy scalar arithmetic dtypes. * Correct FF tuple printing. * More changed NumPy scalar representations. * Accept more changes for NumPy scalar arithmetic dtypes. * Accept NIMROD changes from NumPy scalar aritmetic dtypes. * Revert "Accept NIMROD changes from NumPy scalar aritmetic dtypes." This reverts commit 664f523. * More consistent dtype casting in NIMROD load, plus minor change to known-good-outputs. * Accept more changes for NumPy scalar arithmetic dtypes. * Accept more changes for NumPy scalar arithmetic dtypes. * Demonstrate new mo_pack version. * More thorough use of float32 for mask testing. * Explicitly use the values of the pandas object in as_cube(). * Cube summary: convert any np.str_ instances to plain strings. * Unpin NumPy. * Fix doctests. Fix doctests. * Fix dtype handling in iris.util.regular_points for NumPy v2. * Update lock files. * Undo over-zealous doctest changes. * Remove dask/dask#11433 workaround. * What's New entry. * Missed doctest changes from f547662. * Revert "Undo over-zealous doctest changes." - valid since f547662. This reverts commit 8207e05.
SciTools · Oct 24, 2024 · b5bdaff · b5bdaff
1 parent dd0432e
commit b5bdaff
Show file tree

Hide file tree

Showing 37 changed files with 565 additions and 490 deletions.
diff --git a/docs/src/further_topics/metadata.rst b/docs/src/further_topics/metadata.rst
@@ -403,10 +403,10 @@ instances. Normally, this would cause issues. For example,
 
     >>> simply = {"one": np.int32(1), "two": np.array([1.0, 2.0])}
     >>> simply
-    {'one': 1, 'two': array([1., 2.])}
+    {'one': np.int32(1), 'two': array([1., 2.])}
     >>> fruity = {"one": np.int32(1), "two": np.array([1.0, 2.0])}
     >>> fruity
-    {'one': 1, 'two': array([1., 2.])}
+    {'one': np.int32(1), 'two': array([1., 2.])}
     >>> simply == fruity
     Traceback (most recent call last):
     ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
@@ -418,9 +418,9 @@ However, metadata class equality is rich enough to handle this eventuality,
     >>> metadata1 = cube.metadata._replace(attributes=simply)
     >>> metadata2 = cube.metadata._replace(attributes=fruity)
     >>> metadata1
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata2
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
 
 .. doctest:: richer-metadata
 
@@ -430,10 +430,10 @@ However, metadata class equality is rich enough to handle this eventuality,
 .. doctest:: richer-metadata
 
     >>> metadata1
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1., 2.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata2 = cube.metadata._replace(attributes={"one": np.int32(1), "two": np.array([1000.0, 2000.0])})
     >>> metadata2
-    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': 1, 'two': array([1000., 2000.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
+    CubeMetadata(standard_name='air_temperature', long_name=None, var_name='air_temperature', units=Unit('K'), attributes={'one': np.int32(1), 'two': array([1000., 2000.])}, cell_methods=(CellMethod(method='mean', coord_names=('time',), intervals=('6 hour',), comments=()),))
     >>> metadata1 == metadata2
     False
 

diff --git a/docs/src/further_topics/ugrid/operations.rst b/docs/src/further_topics/ugrid/operations.rst
@@ -500,7 +500,7 @@ GeoVista :external+geovista:doc:`generated/gallery/index`.
             Attributes:
                 NCO                         'netCDF Operators version 4.7.5 (Homepage = http://nco.sf.net, Code = h ...'
                 history                     'Mon Apr 12 01:44:41 2021: ncap2 -s synthetic=float(synthetic) mesh_C4_synthetic.nc ...'
-                nco_openmp_thread_number    1
+                nco_openmp_thread_number    np.int32(1)
 
         # Convert our mesh+data to a PolyData object.
         >>> face_polydata = cube_to_polydata(sample_mesh_cube)
@@ -600,7 +600,7 @@ below:
             Attributes:
                 NCO                         'netCDF Operators version 4.7.5 (Homepage = http://nco.sf.net, Code = h ...'
                 history                     'Mon Apr 12 01:44:41 2021: ncap2 -s synthetic=float(synthetic) mesh_C4_synthetic.nc ...'
-                nco_openmp_thread_number    1
+                nco_openmp_thread_number    np.int32(1)
 
         >>> regional_cube = extract_unstructured_region(
         ...     cube=sample_mesh_cube,
@@ -619,7 +619,7 @@ below:
             Attributes:
                 NCO                         'netCDF Operators version 4.7.5 (Homepage = http://nco.sf.net, Code = h ...'
                 history                     'Mon Apr 12 01:44:41 2021: ncap2 -s synthetic=float(synthetic) mesh_C4_synthetic.nc ...'
-                nco_openmp_thread_number    1
+                nco_openmp_thread_number    np.int32(1)
 
 
 Regridding

diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -24,7 +24,8 @@ This document explains the changes made to Iris for this release
 📢 Announcements
 ================
 
-#. N/A
+#. Iris is now compliant with NumPy v2. This may affect your scripts.
+   :ref:`See the full What's New entry for more details <numpy2>`.
 
 
 ✨ Features
@@ -92,7 +93,15 @@ This document explains the changes made to Iris for this release
 🔗 Dependencies
 ===============
 
-#. N/A
+.. _numpy2:
+
+#. `@trexfeathers`_ adapted the Iris codebase to work with NumPy v2. The
+   `NumPy v2 full release notes`_ have the exhaustive details. Notable
+   changes that may affect your Iris scripts are below. (:pull:`6035`)
+
+   * `NumPy v2 changed data type promotion`_
+
+   * `NumPy v2 changed scalar printing`_
 
 
 📚 Documentation
@@ -122,3 +131,6 @@ This document explains the changes made to Iris for this release
     Whatsnew resources in alphabetical order:
 
 .. _cartopy#2390: https://github.com/SciTools/cartopy/issues/2390
+.. _NumPy v2 changed data type promotion: https://numpy.org/doc/stable/numpy_2_0_migration_guide.html#changes-to-numpy-data-type-promotion
+.. _NumPy v2 changed scalar printing: https://numpy.org/doc/stable/release/2.0.0-notes.html#representation-of-numpy-scalars-changed
+.. _NumPy v2 full release notes: https://numpy.org/doc/stable/release/2.0.0-notes.html
diff --git a/lib/iris/_representation/cube_summary.py b/lib/iris/_representation/cube_summary.py
@@ -47,6 +47,8 @@ def __init__(self, cube, name_padding=35):
 
 def string_repr(text, quote_strings=False, clip_strings=False):
     """Produce a one-line printable form of a text string."""
+    # Convert any np.str_ instances to plain strings.
+    text = str(text)
     force_quoted = re.findall("[\n\t]", text) or quote_strings
     if force_quoted:
         # Replace the string with its repr (including quotes).

diff --git a/lib/iris/analysis/_regrid.py b/lib/iris/analysis/_regrid.py
@@ -156,22 +156,30 @@ def _src_align_and_flatten(coord):
     #
 
     # Wrap modular values (e.g. longitudes) if required.
-    modulus = sx.units.modulus
+    _modulus = sx.units.modulus
+    # Convert to NumPy scalar to enable cast checking.
+    modulus = np.min_scalar_type(_modulus).type(_modulus)
+
+    def _cast_sx_points(sx_points_: np.ndarray):
+        """Ensure modulus arithmetic will not raise a TypeError."""
+        if not np.can_cast(modulus, sx_points_.dtype):
+            new_type = np.promote_types(sx_points_.dtype, modulus.dtype)
+            result = sx_points_.astype(new_type, casting="safe")
+        else:
+            result = sx_points_
+        return result
+
     if modulus is not None:
         # Match the source cube x coordinate range to the target grid
         # cube x coordinate range.
         min_sx, min_tx = np.min(sx.points), np.min(tx.points)
         if min_sx < 0 and min_tx >= 0:
             indices = np.where(sx_points < 0)
-            # Ensure += doesn't raise a TypeError
-            if not np.can_cast(modulus, sx_points.dtype):
-                sx_points = sx_points.astype(type(modulus), casting="safe")
+            sx_points = _cast_sx_points(sx_points)
             sx_points[indices] += modulus
         elif min_sx >= 0 and min_tx < 0:
             indices = np.where(sx_points > (modulus / 2))
-            # Ensure -= doesn't raise a TypeError
-            if not np.can_cast(modulus, sx_points.dtype):
-                sx_points = sx_points.astype(type(modulus), casting="safe")
+            sx_points = _cast_sx_points(sx_points)
             sx_points[indices] -= modulus
 
     # Create target grid cube x and y cell boundaries.

diff --git a/lib/iris/analysis/maths.py b/lib/iris/analysis/maths.py
@@ -867,7 +867,7 @@ def _binary_op_common(
         if iris._lazy_data.is_lazy_data(other):
             rhs = other
         else:
-            rhs = np.asanyarray(other)
+            rhs = np.asanyarray(other, dtype=new_dtype)
 
     def unary_func(lhs):
         data = operation_function(lhs, rhs)

diff --git a/lib/iris/common/resolve.py b/lib/iris/common/resolve.py
@@ -2358,16 +2358,16 @@ def cube(self, data, in_place=False):
             >>> resolver.map_rhs_to_lhs
             True
             >>> cube1.data.sum()
-            124652160.0
+            np.float32(124652160.0)
             >>> zeros.shape
             (240, 37, 49)
             >>> zeros.sum()
-            0.0
+            np.float32(0.0)
             >>> result = resolver.cube(zeros, in_place=True)
             >>> result is cube1
             True
             >>> cube1.data.sum()
-            0.0
+            np.float32(0.0)
 
         """
         from iris.cube import Cube

diff --git a/lib/iris/coords.py b/lib/iris/coords.py
@@ -2431,9 +2431,9 @@ def nearest_neighbour_index(self, point):
 
         >>> cube = iris.load_cube(iris.sample_data_path('ostia_monthly.nc'))
         >>> cube.coord('latitude').nearest_neighbour_index(0)
-        9
+        np.int64(9)
         >>> cube.coord('longitude').nearest_neighbour_index(10)
-        12
+        np.int64(12)
 
         .. note:: If the coordinate contains bounds, these will be used to
             determine the nearest neighbour instead of the point values.

diff --git a/lib/iris/fileformats/_ff.py b/lib/iris/fileformats/_ff.py
@@ -5,6 +5,7 @@
 """Provides UK Met Office Fields File (FF) format specific capabilities."""
 
 import os
+from typing import Any
 import warnings
 
 import numpy as np
@@ -370,9 +371,22 @@ def __init__(self, filename, word_depth=DEFAULT_FF_WORD_DEPTH):
                     setattr(self, elem, res)
 
     def __str__(self):
+        def _str_tuple(to_print: Any):
+            """Print NumPy scalars within tuples as numbers, not np objects.
+
+            E.g. ``lookup_table`` is a tuple of NumPy scalars.
+            NumPy v2 by default prints ``np.int32(1)`` instead of ``1`` when
+            printing an iterable of scalars.
+            """
+            if isinstance(to_print, tuple):
+                result = "(" + ", ".join([str(i) for i in to_print]) + ")"
+            else:
+                result = str(to_print)
+            return result
+
         attributes = []
         for name, _ in FF_HEADER:
-            attributes.append("    {}: {}".format(name, getattr(self, name)))
+            attributes.append(f"    {name}: {_str_tuple(getattr(self, name))}")
         return "FF Header:\n" + "\n".join(attributes)
 
     def __repr__(self):

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -1387,9 +1387,13 @@ def _ensure_valid_dtype(self, values, src_name, src_object):
             val_min, val_max = (values.min(), values.max())
             if is_lazy_data(values):
                 val_min, val_max = _co_realise_lazy_arrays([val_min, val_max])
+            # NumPy will inherit values.dtype even if the scalar numbers work
+            #  with a smaller type.
+            min_dtype = np.promote_types(
+                *[np.min_scalar_type(m) for m in (val_min, val_max)]
+            )
             # Cast to an integer type supported by netCDF3.
-            can_cast = all([np.can_cast(m, np.int32) for m in (val_min, val_max)])
-            if not can_cast:
+            if not np.can_cast(min_dtype, np.int32):
                 msg = (
                     "The data type of {} {!r} is not supported by {} and"
                     " its values cannot be safely cast to a supported"

diff --git a/lib/iris/fileformats/nimrod_load_rules.py b/lib/iris/fileformats/nimrod_load_rules.py
@@ -126,42 +126,41 @@ def units(cube, field):
         "n/a": "1",
     }
 
+    dtype_original = cube.dtype
     field_units = remove_unprintable_chars(field.units)
     if field_units == "m/2-25k":
         # Handle strange visibility units
-        cube.data = (cube.data.astype(np.float32) + 25000.0) * 2
+        cube.data = (cube.data + 25000.0) * 2
         field_units = "m"
     if "*" in field_units:
         # Split into unit string and integer
         unit_list = field_units.split("*")
         if "^" in unit_list[1]:
             # Split out magnitude
             unit_sublist = unit_list[1].split("^")
-            cube.data = cube.data.astype(np.float32) / float(unit_sublist[0]) ** float(
-                unit_sublist[1]
-            )
+            cube.data = cube.data / float(unit_sublist[0]) ** float(unit_sublist[1])
         else:
-            cube.data = cube.data.astype(np.float32) / float(unit_list[1])
+            cube.data = cube.data / float(unit_list[1])
         field_units = unit_list[0]
     if "ug/m3E1" in field_units:
         # Split into unit string and integer
         unit_list = field_units.split("E")
-        cube.data = cube.data.astype(np.float32) / 10.0
+        cube.data = cube.data / 10.0
         field_units = unit_list[0]
     if field_units == "%":
         # Convert any percentages into fraction
         field_units = "1"
-        cube.data = cube.data.astype(np.float32) / 100.0
+        cube.data = cube.data / 100.0
     if field_units == "oktas":
         field_units = "1"
-        cube.data = cube.data.astype(np.float32) / 8.0
+        cube.data = cube.data / 8.0
     if field_units == "dBZ":
         # cf_units doesn't recognise decibels (dBZ), but does know BZ
         field_units = "BZ"
-        cube.data = cube.data.astype(np.float32) / 10.0
+        cube.data = cube.data / 10.0
     if field_units == "g/Kg":
         field_units = "kg/kg"
-        cube.data = cube.data.astype(np.float32) / 1000.0
+        cube.data = cube.data / 1000.0
     if not field_units:
         if field.field_code == 8:
             # Relative Humidity data are unitless, but not "unknown"
@@ -175,6 +174,14 @@ def units(cube, field):
         # Deal with the case where the units are of the form '/unit' eg
         # '/second' in the Nimrod file. This converts to the form unit^-1
         field_units = field_units[1:] + "^-1"
+
+    if cube.dtype != dtype_original:
+        # Original development logic: if any arithmetic takes place, ensure
+        #  the data type is float32 (starts as an int). Unknown why.
+        #  Automatic casting is returning inconsistent types when masks are
+        #  involved, so the new logic is to do the casting as the final step.
+        cube.data = cube.data.astype(np.float32)
+
     try:
         cube.units = field_units
     except ValueError:

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
@@ -11,6 +11,7 @@
 import os
 import re
 import struct
+from typing import Any
 import warnings
 
 import cf_units
@@ -950,6 +951,21 @@ def t2(self):
 
     def __repr__(self):
         """Return a string representation of the PP field."""
+
+        def _str_tuple(to_print: Any):
+            """Print NumPy scalars within tuples as numbers, not np objects.
+
+            E.g. ``lbuser`` is a tuple of NumPy scalars.
+
+            NumPy v2 by default prints ``np.int32(1)`` instead of ``1`` when
+            printing an iterable of scalars.
+            """
+            if isinstance(to_print, tuple):
+                result = "(" + ", ".join([str(i) for i in to_print]) + ")"
+            else:
+                result = str(to_print)
+            return result
+
         # Define an ordering on the basic header names
         attribute_priority_lookup = {name: loc[0] for name, loc in self.HEADER_DEFN}
 
@@ -975,9 +991,8 @@ def __repr__(self):
             ),
         )
 
-        return (
-            "PP Field" + "".join(["\n   %s: %s" % (k, v) for k, v in attributes]) + "\n"
-        )
+        contents = "".join([f"\n   {k}: {_str_tuple(v)}" for k, v in attributes])
+        return f"PP Field{contents}\n"
 
     @property
     def stash(self):
@@ -1178,7 +1193,7 @@ def save(self, file_handle):
             data.dtype = data.dtype.newbyteorder(">")
 
         # Create the arrays which will hold the header information
-        lb = np.empty(shape=NUM_LONG_HEADERS, dtype=np.dtype(">u%d" % PP_WORD_DEPTH))
+        lb = np.empty(shape=NUM_LONG_HEADERS, dtype=np.dtype(">i%d" % PP_WORD_DEPTH))
         b = np.empty(shape=NUM_FLOAT_HEADERS, dtype=np.dtype(">f%d" % PP_WORD_DEPTH))
 
         # Fill in the header elements from the PPField

diff --git a/lib/iris/pandas.py b/lib/iris/pandas.py
@@ -173,7 +173,7 @@ def as_cube(
     # 1.6 doesn't. Since we don't care about preserving the order we can
     # just force it back to C-order.)
     order = "C" if copy else "A"
-    data = np.array(pandas_array, copy=copy, order=order)
+    data = np.array(pandas_array.values, copy=copy, order=order)
     cube = Cube(np.ma.masked_invalid(data, copy=False))
     _add_iris_coord(cube, "index", pandas_array.index, 0, calendars.get(0, None))
     if pandas_array.ndim == 2:
@@ -308,7 +308,7 @@ def as_cubes(
     Pandas uses ``NaN`` rather than masking data. Converted
     :class:`~iris.cube.Cube` can be masked in downstream user code :
 
-    >>> my_series = Series([300, np.NaN, 302], name="air_temperature")
+    >>> my_series = Series([300, np.nan, 302], name="air_temperature")
     >>> converted_cube = as_cubes(my_series)[0]
     >>> print(converted_cube.data)
     [300.  nan 302.]

diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py
@@ -319,7 +319,7 @@ def test_regrid_reorder_axis(self):
         dest = _resampled_grid(self.realistic_cube[0, 0, :3, :2], 3, 3)
         res = regrid_area_weighted(src, dest)
         self.assertArrayShapeStats(src, (4, 3, 2), 288.08868, 0.008262919)
-        self.assertArrayShapeStats(res, (4, 9, 6), 288.08865, 0.00826281)
+        self.assertArrayShapeStats(res, (4, 9, 6), 288.0886, 0.008271061)
         # Reshape src so that the coords are ordered [x, z, y],
         # the mean and std statistics should be the same
         data = np.moveaxis(src.data.copy(), 2, 0)
@@ -329,7 +329,7 @@ def test_regrid_reorder_axis(self):
         src.add_dim_coord(lon, 0)
         res = regrid_area_weighted(src, dest)
         self.assertArrayShapeStats(src, (2, 4, 3), 288.08868, 0.008262919)
-        self.assertArrayShapeStats(res, (6, 4, 9), 288.08865, 0.00826281)
+        self.assertArrayShapeStats(res, (6, 4, 9), 288.0886, 0.008271061)
         # Reshape src so that the coords are ordered [y, x, z],
         # the mean and std statistics should be the same
         data = np.moveaxis(src.data.copy(), 2, 0)
@@ -340,7 +340,7 @@ def test_regrid_reorder_axis(self):
         dest = _resampled_grid(self.realistic_cube[0, 0, :3, :2], 3, 3)
         res = regrid_area_weighted(src, dest)
         self.assertArrayShapeStats(src, (3, 2, 4), 288.08868, 0.008262919)
-        self.assertArrayShapeStats(res, (9, 6, 4), 288.08865, 0.00826281)
+        self.assertArrayShapeStats(res, (9, 6, 4), 288.0886, 0.008271061)
 
     def test_regrid_lon_to_half_res(self):
         src = self.simple_cube