From 06f317e4c1e806ce2bbe5c56b2c7c832e9c2aac4 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Tue, 22 Oct 2024 11:50:00 +0800 Subject: [PATCH 1/8] BUG: fix DataFrame(data=[None, 1], dtype='timedelta64[ns]') raising ValueError --- pandas/core/arrays/timedeltas.py | 2 +- pandas/tests/frame/test_constructors.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a8a0037d0bbb9..3c9f015dfc788 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1111,7 +1111,7 @@ def sequence_to_td64ns( else: mask = np.isnan(data) - data = cast_from_unit_vectorized(data, unit or "ns") + data = cast_from_unit_vectorized(data.ravel(), unit or "ns").reshape(data.shape) data[mask] = iNaT data = data.view("m8[ns]") copy = False diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 0a924aa393be5..3d8213cb3d11a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2772,6 +2772,14 @@ def test_construction_datetime_resolution_inference(self, cons): res_dtype2 = tm.get_dtype(obj2) assert res_dtype2 == "M8[us, US/Pacific]", res_dtype2 + def test_construction_nan_value_timedelta64_dtype(self): + # GH#60064 + result = DataFrame([None, 1], dtype="timedelta64[ns]") + expected = DataFrame( + ["NaT", "0 days 00:00:00.000000001"], dtype="timedelta64[ns]" + ) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): From b5bec39808cc509d2f538869a04157097e96e450 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 26 Oct 2024 10:44:51 +0800 Subject: [PATCH 2/8] ci From 98dd50f9341cb694f855524793473c84fb5d53a8 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 27 Oct 2024 12:06:45 +0800 Subject: [PATCH 3/8] Add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e5376177d3381..187354c3de798 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -758,6 +758,7 @@ Styler Other ^^^^^ +- Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`) - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) From 9c34762c518ee3403023e2f27bd4cbf9f21d2596 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sun, 27 Oct 2024 22:58:09 +0800 Subject: [PATCH 4/8] Move to maybe_downcast_to_dtype --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/dtypes/cast.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 187354c3de798..042daf5baea4d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -611,6 +611,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`) +- Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`) - Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`) - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`) @@ -758,7 +759,6 @@ Styler Other ^^^^^ -- Bug in :class:`DataFrame` raising ``ValueError`` when ``dtype`` is ``timedelta64`` and ``data`` is a list containing ``None`` (:issue:`60064`) - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3c9f015dfc788..a8a0037d0bbb9 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1111,7 +1111,7 @@ def sequence_to_td64ns( else: mask = np.isnan(data) - data = cast_from_unit_vectorized(data.ravel(), unit or "ns").reshape(data.shape) + data = cast_from_unit_vectorized(data, unit or "ns") data[mask] = iNaT data = data.view("m8[ns]") copy = False diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ba07b1761557..fde3ce702e1d1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1225,6 +1225,9 @@ def maybe_cast_to_datetime( _ensure_nanosecond_dtype(dtype) if lib.is_np_dtype(dtype, "m"): + if getattr(value, "ndim", 1) == 2 and value.shape[1] == 1: + res = TimedeltaArray._from_sequence(value.ravel(), dtype=dtype) + return res.reshape(value.shape) res = TimedeltaArray._from_sequence(value, dtype=dtype) return res else: From 0627217160e0be3de2fda80f6eecf7e04ea7bbb3 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Mon, 28 Oct 2024 10:38:13 +0800 Subject: [PATCH 5/8] Fix mypy failure --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fde3ce702e1d1..0c5e6c5522f9f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1225,7 +1225,7 @@ def maybe_cast_to_datetime( _ensure_nanosecond_dtype(dtype) if lib.is_np_dtype(dtype, "m"): - if getattr(value, "ndim", 1) == 2 and value.shape[1] == 1: + if isinstance(value, np.ndarray) and value.ndim == 2 and value.shape[1] == 1: res = TimedeltaArray._from_sequence(value.ravel(), dtype=dtype) return res.reshape(value.shape) res = TimedeltaArray._from_sequence(value, dtype=dtype) From d93d5b20c019b4bca68f05058880326cdf5f42b3 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Thu, 31 Oct 2024 21:26:23 +0800 Subject: [PATCH 6/8] Move upper level --- pandas/core/construction.py | 2 ++ pandas/core/dtypes/cast.py | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 1e1292f8ef089..ef6ea13b0d036 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -807,6 +807,8 @@ def _try_cast( ) elif dtype.kind in "mM": + if is_ndarray and arr.ndim == 2 and arr.shape[1] == 1: + return maybe_cast_to_datetime(arr.ravel(), dtype).reshape(arr.shape) return maybe_cast_to_datetime(arr, dtype) # GH#15832: Check if we are requesting a numeric dtype and diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0c5e6c5522f9f..6ba07b1761557 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1225,9 +1225,6 @@ def maybe_cast_to_datetime( _ensure_nanosecond_dtype(dtype) if lib.is_np_dtype(dtype, "m"): - if isinstance(value, np.ndarray) and value.ndim == 2 and value.shape[1] == 1: - res = TimedeltaArray._from_sequence(value.ravel(), dtype=dtype) - return res.reshape(value.shape) res = TimedeltaArray._from_sequence(value, dtype=dtype) return res else: From 11a9a7e4059ea15bd618c5d2e37f4300b0a210c8 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 2 Nov 2024 20:48:01 +0800 Subject: [PATCH 7/8] Update --- pandas/core/construction.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index ef6ea13b0d036..8df4f7e3e08f9 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -807,8 +807,12 @@ def _try_cast( ) elif dtype.kind in "mM": - if is_ndarray and arr.ndim == 2 and arr.shape[1] == 1: - return maybe_cast_to_datetime(arr.ravel(), dtype).reshape(arr.shape) + if is_ndarray: + arr = cast(np.ndarray, arr) + if arr.ndim == 2 and arr.shape[1] == 1: + # GH#60081: DataFrame Constructor converts 1D data to array of + # shape (N, 1), but maybe_cast_to_datetime assumes 1D input + return maybe_cast_to_datetime(arr[:, 0], dtype).reshape(arr.shape) return maybe_cast_to_datetime(arr, dtype) # GH#15832: Check if we are requesting a numeric dtype and From bf680ce7bd3d03f0a6511b3d0037df9c405049ae Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 2 Nov 2024 22:07:36 +0800 Subject: [PATCH 8/8] mypy --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ba07b1761557..8850b75323d68 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1205,7 +1205,7 @@ def maybe_infer_to_datetimelike( def maybe_cast_to_datetime( value: np.ndarray | list, dtype: np.dtype -) -> ExtensionArray | np.ndarray: +) -> DatetimeArray | TimedeltaArray | np.ndarray: """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT