@@ -171,6 +171,20 @@ def _unpack_netcdf_time_units(units: str) -> tuple[str, str]:
171
171
return delta_units , ref_date
172
172
173
173
174
+ def _unpack_delta_ref_date (units ):
175
+ # same us _unpack_netcdf_time_units but finalizes ref_date for
176
+ # processing in encode_cf_datetime
177
+ delta , _ref_date = _unpack_netcdf_time_units (units )
178
+ # TODO: the strict enforcement of nanosecond precision Timestamps can be
179
+ # relaxed when addressing GitHub issue #7493.
180
+ ref_date = nanosecond_precision_timestamp (_ref_date )
181
+ # If the ref_date Timestamp is timezone-aware, convert to UTC and
182
+ # make it timezone-naive (GH 2649).
183
+ if ref_date .tz is not None :
184
+ ref_date = ref_date .tz_convert (None )
185
+ return delta , ref_date
186
+
187
+
174
188
def _decode_cf_datetime_dtype (
175
189
data , units : str , calendar : str , use_cftime : bool | None
176
190
) -> np .dtype :
@@ -251,9 +265,12 @@ def _decode_datetime_with_pandas(
251
265
252
266
# Cast input ordinals to integers of nanoseconds because pd.to_timedelta
253
267
# works much faster when dealing with integers (GH 1399).
254
- flat_num_dates_ns_int = (flat_num_dates * _NS_PER_TIME_DELTA [delta ]).astype (
255
- np .int64
256
- )
268
+ # properly handle NaN/NaT to prevent casting NaN to int
269
+ nan = np .isnan (flat_num_dates ) | (flat_num_dates == np .iinfo (np .int64 ).min )
270
+ flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA [delta ]
271
+ flat_num_dates_ns_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
272
+ flat_num_dates_ns_int [nan ] = np .iinfo (np .int64 ).min
273
+ flat_num_dates_ns_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
257
274
258
275
# Use pd.to_timedelta to safely cast integer values to timedeltas,
259
276
# and add those to a Timestamp to safely produce a DatetimeIndex. This
@@ -575,6 +592,9 @@ def _should_cftime_be_used(
575
592
576
593
def _cleanup_netcdf_time_units (units : str ) -> str :
577
594
delta , ref_date = _unpack_netcdf_time_units (units )
595
+ delta = delta .lower ()
596
+ if not delta .endswith ("s" ):
597
+ delta = f"{ delta } s"
578
598
try :
579
599
units = f"{ delta } since { format_timestamp (ref_date )} "
580
600
except (OutOfBoundsDatetime , ValueError ):
@@ -635,32 +655,41 @@ def encode_cf_datetime(
635
655
"""
636
656
dates = np .asarray (dates )
637
657
658
+ data_units = infer_datetime_units (dates )
659
+
638
660
if units is None :
639
- units = infer_datetime_units ( dates )
661
+ units = data_units
640
662
else :
641
663
units = _cleanup_netcdf_time_units (units )
642
664
643
665
if calendar is None :
644
666
calendar = infer_calendar_name (dates )
645
667
646
- delta , _ref_date = _unpack_netcdf_time_units (units )
647
668
try :
648
669
if not _is_standard_calendar (calendar ) or dates .dtype .kind == "O" :
649
670
# parse with cftime instead
650
671
raise OutOfBoundsDatetime
651
672
assert dates .dtype == "datetime64[ns]"
652
673
674
+ delta , ref_date = _unpack_delta_ref_date (units )
653
675
delta_units = _netcdf_to_numpy_timeunit (delta )
654
676
time_delta = np .timedelta64 (1 , delta_units ).astype ("timedelta64[ns]" )
655
677
656
- # TODO: the strict enforcement of nanosecond precision Timestamps can be
657
- # relaxed when addressing GitHub issue #7493.
658
- ref_date = nanosecond_precision_timestamp (_ref_date )
659
-
660
- # If the ref_date Timestamp is timezone-aware, convert to UTC and
661
- # make it timezone-naive (GH 2649).
662
- if ref_date .tz is not None :
663
- ref_date = ref_date .tz_convert (None )
678
+ # check if times can be represented with given units
679
+ if data_units != units :
680
+ data_delta , data_ref_date = _unpack_delta_ref_date (data_units )
681
+ needed_delta = _infer_time_units_from_diff (
682
+ (data_ref_date - ref_date ).to_timedelta64 ()
683
+ )
684
+ needed_time_delta = np .timedelta64 (
685
+ 1 , _netcdf_to_numpy_timeunit (needed_delta )
686
+ ).astype ("timedelta64[ns]" )
687
+ if needed_delta != delta and time_delta > needed_time_delta :
688
+ warnings .warn (
689
+ f"Times can't be serialized faithfully with requested units { units !r} . "
690
+ f"Resolution of { needed_delta !r} needed. "
691
+ f"Serializing timeseries to floating point."
692
+ )
664
693
665
694
# Wrap the dates in a DatetimeIndex to do the subtraction to ensure
666
695
# an OverflowError is raised if the ref_date is too far away from
@@ -670,8 +699,12 @@ def encode_cf_datetime(
670
699
671
700
# Use floor division if time_delta evenly divides all differences
672
701
# to preserve integer dtype if possible (GH 4045).
673
- if np .all (time_deltas % time_delta == np .timedelta64 (0 , "ns" )):
674
- num = time_deltas // time_delta
702
+ # NaT prevents us from using datetime64 directly, but we can safely coerce
703
+ # to int64 in presence of NaT, so we just dropna before check (GH 7817).
704
+ if np .all (time_deltas .dropna () % time_delta == np .timedelta64 (0 , "ns" )):
705
+ # calculate int64 floor division
706
+ num = time_deltas // time_delta .astype (np .int64 )
707
+ num = num .astype (np .int64 , copy = False )
675
708
else :
676
709
num = time_deltas / time_delta
677
710
num = num .values .reshape (dates .shape )
@@ -704,9 +737,10 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
704
737
) or contains_cftime_datetimes (variable ):
705
738
dims , data , attrs , encoding = unpack_for_encoding (variable )
706
739
707
- (data , units , calendar ) = encode_cf_datetime (
708
- data , encoding .pop ("units" , None ), encoding .pop ("calendar" , None )
709
- )
740
+ units = encoding .pop ("units" , None )
741
+ calendar = encoding .pop ("calendar" , None )
742
+ (data , units , calendar ) = encode_cf_datetime (data , units , calendar )
743
+
710
744
safe_setitem (attrs , "units" , units , name = name )
711
745
safe_setitem (attrs , "calendar" , calendar , name = name )
712
746
0 commit comments