From 1cfe72d694b790072e3845462433b23ddf4d67d0 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 18 Dec 2019 12:28:55 -0500 Subject: [PATCH 1/2] datetime_to_numeric: convert timedelta objects using np.timedelta64 type conversion. add overflow tests --- xarray/core/duck_array_ops.py | 43 +++++++++++++++++++++-------- xarray/tests/test_duck_array_ops.py | 22 ++++++++++++++- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index cf616acb485..7d84bc3cc03 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -372,43 +372,62 @@ def _datetime_nanmin(array): def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): - """Convert an array containing datetime-like data to an array of floats. + """Return an array containing datetime-like data to numerical values. + + Convert the datetime array to a timedelta relative to an offset. Parameters ---------- - da : np.array - Input data - offset: Scalar with the same type of array or None - If None, subtract minimum values to reduce round off error - datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', - 'us', 'ns', 'ps', 'fs', 'as'} - dtype: target dtype + da : array-like + Input data + offset: None, datetime or cftime.datetime + Datetime offset. If None, this is set by default to the array's minimum + value to reduce round off errors. + datetime_unit: {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} + If not None, convert output to a given datetime unit. Note that some + conversions are not allowed due to non-linear relationships between units. + dtype: dtype + Output dtype. Returns ------- array + Numerical representation of datetime object relative to an offset. + + Notes + ----- + Some datetime unit conversions won't work, for example from days to years, even + though some calendars would allow for them (e.g. no_leap). This is because there + is no `cftime.timedelta` object. """ # TODO: make this function dask-compatible? + # Set offset to minimum if not given if offset is None: if array.dtype.kind in "Mm": offset = _datetime_nanmin(array) else: offset = min(array) + + # Compute timedelta object. + # For np.datetime64, this can silently yield garbage due to overflow. array = array - offset - if not hasattr(array, "dtype"): # scalar is converted to 0d-array + # Scalar is converted to 0d-array + if not hasattr(array, "dtype"): array = np.array(array) + # Convert timedelta objects to timedelta64[ms] dtype. if array.dtype.kind in "O": - # possibly convert object array containing datetime.timedelta - array = np.asarray(pd.Series(array.ravel())).reshape(array.shape) + array = array.astype("timedelta64[ms]") + # Convert to specified timedelta units. if datetime_unit: array = array / np.timedelta64(1, datetime_unit) - # convert np.NaT to np.nan + # Convert np.NaT to np.nan if array.dtype.kind in "mM": return np.where(isnull(array), np.nan, array.astype(dtype)) + return array.astype(dtype) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index aee7bbd6b11..3e8f533af7b 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -669,10 +669,22 @@ def test_datetime_to_numeric_datetime64(): expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) + # Test overflow. Need to convert to ms first otherwise we get garbage + # TODO: make datetime_to_numeric more robust to overflow errors. + offset = np.datetime64("0001-01-01") + ms = times.astype("datetime64[ms]") + result = duck_array_ops.datetime_to_numeric( + ms, offset=offset, datetime_unit="D", dtype=int + ) + expected = 730119 + np.arange(0, 35, 7) + np.testing.assert_array_equal(result, expected) + @requires_cftime def test_datetime_to_numeric_cftime(): - times = cftime_range("2000", periods=5, freq="7D").values + import cftime + + times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) @@ -686,3 +698,11 @@ def test_datetime_to_numeric_cftime(): result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) + + # Test potential overflow (outputs are different from timedelta64. Expected ?) + offset = cftime.DatetimeGregorian(1, 1, 1) + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="D", dtype=int + ) + expected = 730121 + np.arange(0, 35, 7) + np.testing.assert_array_equal(result, expected) From 49641632ac4c13f53ff5499d0bc583690ad70f4d Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 18 Dec 2019 12:33:00 -0500 Subject: [PATCH 2/2] added interp test --- xarray/tests/test_interp.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index b93325d7eab..e3af8b5873a 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -662,3 +662,10 @@ def test_datetime_interp_noerror(): coords={"time": pd.date_range("01-01-2001", periods=50, freq="H")}, ) a.interp(x=xi, time=xi.time) # should not raise an error + + +@requires_cftime +def test_3641(): + times = xr.cftime_range("0001", periods=3, freq="500Y") + da = xr.DataArray(range(3), dims=["time"], coords=[times]) + da.interp(time=["0002-05-01"])