Skip to content

Commit 17571b5

Browse files
Fix pandas datetime decoding with NumPy >= 2.0 for small integer dtypes (#9518)
* Fix pandas datetime decoding with np.int32 values and NumPy >= 2 Thanks @langmore for noting this issue and suggesting this workaround. * Refine what's new entry
1 parent e313853 commit 17571b5

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

doc/whats-new.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ Bug fixes
4747
- Make illegal path-like variable names when constructing a DataTree from a Dataset
4848
(:issue:`9339`, :pull:`9378`)
4949
By `Etienne Schalk <https://github.com/etienneschalk>`_.
50+
- Work around `upstream pandas issue
51+
<https://github.com/pandas-dev/pandas/issues/56996>`_ to ensure that we can
52+
decode times encoded with small integer dtype values (e.g. ``np.int32``) in
53+
environments with NumPy 2.0 or greater without needing to fall back to cftime
54+
(:pull:`9518`). By `Spencer Clark <https://github.com/spencerkclark>`_.
5055
- Fix bug when encoding times with missing values as floats in the case when
5156
the non-missing times could in theory be encoded with integers
5257
(:issue:`9488`, :pull:`9497`). By `Spencer Clark

xarray/coding/times.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,15 @@ def _decode_datetime_with_pandas(
254254
"pandas."
255255
)
256256

257+
# Work around pandas.to_timedelta issue with dtypes smaller than int64 and
258+
# NumPy 2.0 by casting all int and uint data to int64 and uint64,
259+
# respectively. See https://github.com/pandas-dev/pandas/issues/56996 for
260+
# more details.
261+
if flat_num_dates.dtype.kind == "i":
262+
flat_num_dates = flat_num_dates.astype(np.int64)
263+
elif flat_num_dates.dtype.kind == "u":
264+
flat_num_dates = flat_num_dates.astype(np.uint64)
265+
257266
time_units, ref_date_str = _unpack_netcdf_time_units(units)
258267
time_units = _netcdf_to_numpy_timeunit(time_units)
259268
try:

xarray/tests/test_coding_times.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import pandas as pd
1010
import pytest
11-
from pandas.errors import OutOfBoundsDatetime
11+
from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta
1212

1313
from xarray import (
1414
DataArray,
@@ -1136,11 +1136,16 @@ def test_should_cftime_be_used_target_not_npable():
11361136
_should_cftime_be_used(src, "noleap", False)
11371137

11381138

1139-
@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64])
1140-
def test_decode_cf_datetime_uint(dtype):
1139+
@pytest.mark.parametrize(
1140+
"dtype",
1141+
[np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64],
1142+
)
1143+
def test_decode_cf_datetime_varied_integer_dtypes(dtype):
11411144
units = "seconds since 2018-08-22T03:23:03Z"
11421145
num_dates = dtype(50)
1143-
result = decode_cf_datetime(num_dates, units)
1146+
# Set use_cftime=False to ensure we cannot mask a failure by falling back
1147+
# to cftime.
1148+
result = decode_cf_datetime(num_dates, units, use_cftime=False)
11441149
expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns"))
11451150
np.testing.assert_equal(result, expected)
11461151

@@ -1154,6 +1159,14 @@ def test_decode_cf_datetime_uint64_with_cftime():
11541159
np.testing.assert_equal(result, expected)
11551160

11561161

1162+
def test_decode_cf_datetime_uint64_with_pandas_overflow_error():
1163+
units = "nanoseconds since 1970-01-01"
1164+
calendar = "standard"
1165+
num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000)
1166+
with pytest.raises(OutOfBoundsTimedelta):
1167+
decode_cf_datetime(num_dates, units, calendar, use_cftime=False)
1168+
1169+
11571170
@requires_cftime
11581171
def test_decode_cf_datetime_uint64_with_cftime_overflow_error():
11591172
units = "microseconds since 1700-01-01"
@@ -1438,10 +1451,8 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None
14381451
"days since 1700-01-01",
14391452
np.dtype("int32"),
14401453
),
1441-
"mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": (
1442-
"250YS",
1443-
"days since 1700-01-01",
1444-
np.dtype("int32"),
1454+
"mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param(
1455+
"250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime
14451456
),
14461457
"pandas-encoding-with-default-units-and-dtype": ("250YS", None, None),
14471458
}

0 commit comments

Comments
 (0)