Skip to content

Commit

Permalink
BUG: resolution inference with NaT ints/floats/strings (pandas-dev#55981
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jbrockmendel committed Nov 16, 2023
1 parent 171cbcd commit 02e2bae
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 16 deletions.
1 change: 1 addition & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2636,6 +2636,7 @@ def maybe_convert_objects(ndarray[object] objects,
tsobj = convert_to_tsobject(val, None, None, 0, 0)
tsobj.ensure_reso(NPY_FR_ns)
except OutOfBoundsDatetime:
# e.g. test_out_of_s_bounds_datetime64
seen.object_ = True
break
else:
Expand Down
34 changes: 19 additions & 15 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,12 @@ cpdef array_to_datetime(
Returns
-------
np.ndarray
May be datetime64[ns] or object dtype
May be datetime64[creso_unit] or object dtype
tzinfo or None
"""
cdef:
Py_ssize_t i, n = values.size
object val, tz
object val
ndarray[int64_t] iresult
npy_datetimestruct dts
bint utc_convert = bool(utc)
Expand All @@ -467,7 +467,7 @@ cpdef array_to_datetime(
_TSObject _ts
float tz_offset
set out_tzoffset_vals = set()
tzinfo tz_out = None
tzinfo tz, tz_out = None
cnp.flatiter it = cnp.PyArray_IterNew(values)
NPY_DATETIMEUNIT item_reso
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
Expand Down Expand Up @@ -522,15 +522,14 @@ cpdef array_to_datetime(

elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
else:
# we now need to parse this as if unit='ns'
item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)
state.found_other = True

Expand All @@ -552,6 +551,16 @@ cpdef array_to_datetime(
_ts = convert_str_to_tsobject(
val, None, dayfirst=dayfirst, yearfirst=yearfirst
)

if _ts.value == NPY_NAT:
# e.g. "NaT" string or empty string, we do not consider
# this as either tzaware or tznaive. See
# test_to_datetime_with_empty_str_utc_false_format_mixed
# We also do not update resolution inference based on this,
# see test_infer_with_nat_int_float_str
iresult[i] = _ts.value
continue

item_reso = _ts.creso
state.update_creso(item_reso)
if infer_reso:
Expand All @@ -562,12 +571,7 @@ cpdef array_to_datetime(
iresult[i] = _ts.value

tz = _ts.tzinfo
if _ts.value == NPY_NAT:
# e.g. "NaT" string or empty string, we do not consider
# this as either tzaware or tznaive. See
# test_to_datetime_with_empty_str_utc_false_format_mixed
pass
elif tz is not None:
if tz is not None:
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
nsecs = tz.utcoffset(None).total_seconds()
Expand Down Expand Up @@ -640,7 +644,7 @@ cpdef array_to_datetime(
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"M8[{abbrev}]")
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
return result, tz_out


Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ cdef _get_format_regex(str fmt):


cdef class DatetimeParseState:
def __cinit__(self, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns):
def __cinit__(self, NPY_DATETIMEUNIT creso):
# found_tz and found_naive are specifically about datetime/Timestamp
# objects with and without tzinfos attached.
self.found_tz = False
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/tslibs/test_array_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,24 @@ def test_infer_heterogeneous(self):
assert tz is None
tm.assert_numpy_array_equal(result, expected[::-1])

@pytest.mark.parametrize(
"item", [float("nan"), NaT.value, float(NaT.value), "NaT", ""]
)
def test_infer_with_nat_int_float_str(self, item):
# floats/ints get inferred to nanos *unless* they are NaN/iNaT,
# similar NaT string gets treated like NaT scalar (ignored for resolution)
dt = datetime(2023, 11, 15, 15, 5, 6)

arr = np.array([dt, item], dtype=object)
result, tz = tslib.array_to_datetime(arr, creso=creso_infer)
assert tz is None
expected = np.array([dt, np.datetime64("NaT")], dtype="M8[us]")
tm.assert_numpy_array_equal(result, expected)

result2, tz2 = tslib.array_to_datetime(arr[::-1], creso=creso_infer)
assert tz2 is None
tm.assert_numpy_array_equal(result2, expected[::-1])


class TestArrayToDatetimeWithTZResolutionInference:
def test_array_to_datetime_with_tz_resolution(self):
Expand Down

0 comments on commit 02e2bae

Please sign in to comment.