23
23
from xarray .compat .pdcompat import default_precision_timestamp , timestamp_as_unit
24
24
from xarray .core import indexing
25
25
from xarray .core .common import contains_cftime_datetimes , is_np_datetime_like
26
- from xarray .core .duck_array_ops import array_all , array_any , asarray , ravel , reshape
26
+ from xarray .core .duck_array_ops import array_all , asarray , ravel , reshape
27
27
from xarray .core .formatting import first_n_items , format_timestamp , last_item
28
28
from xarray .core .utils import attempt_import , emit_user_level_warning
29
29
from xarray .core .variable import Variable
@@ -298,6 +298,19 @@ def _unpack_time_unit_and_ref_date(
298
298
return time_unit , ref_date
299
299
300
300
301
+ def _unpack_time_units_and_ref_date_cftime (units : str , calendar : str ):
302
+ # same as _unpack_netcdf_time_units but finalizes ref_date for
303
+ # processing in encode_cf_datetime
304
+ time_units , ref_date = _unpack_netcdf_time_units (units )
305
+ ref_date = cftime .num2date (
306
+ 0 ,
307
+ units = f"microseconds since { ref_date } " ,
308
+ calendar = calendar ,
309
+ only_use_cftime_datetimes = True ,
310
+ )
311
+ return time_units , ref_date
312
+
313
+
301
314
def _decode_cf_datetime_dtype (
302
315
data ,
303
316
units : str ,
@@ -686,6 +699,7 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str:
686
699
# todo: check, if this function works correctly wrt np.timedelta64
687
700
unit_timedelta : Callable [[str ], timedelta ] | Callable [[str ], np .timedelta64 ]
688
701
zero_timedelta : timedelta | np .timedelta64
702
+ unique_timedeltas = asarray (unique_timedeltas )
689
703
if unique_timedeltas .dtype == np .dtype ("O" ):
690
704
time_units = _NETCDF_TIME_UNITS_CFTIME
691
705
unit_timedelta = _unit_timedelta_cftime
@@ -700,6 +714,10 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str:
700
714
return "seconds"
701
715
702
716
717
+ def _time_units_to_timedelta (units : str ) -> timedelta :
718
+ return timedelta (microseconds = _US_PER_TIME_DELTA [units ])
719
+
720
+
703
721
def infer_calendar_name (dates ) -> CFCalendar :
704
722
"""Given an array of datetimes, infer the CF calendar name"""
705
723
if is_np_datetime_like (dates .dtype ):
@@ -974,42 +992,6 @@ def _division(deltas, delta, floor):
974
992
return num
975
993
976
994
977
- def _cast_to_dtype_if_safe (num : np .ndarray , dtype : np .dtype ) -> np .ndarray :
978
- with warnings .catch_warnings ():
979
- warnings .filterwarnings ("ignore" , message = "overflow" )
980
- cast_num = np .asarray (num , dtype = dtype )
981
-
982
- if np .issubdtype (dtype , np .integer ):
983
- if not array_all (num == cast_num ):
984
- if np .issubdtype (num .dtype , np .floating ):
985
- raise ValueError (
986
- f"Not possible to cast all encoded times from "
987
- f"{ num .dtype !r} to { dtype !r} without losing precision. "
988
- f"Consider modifying the units such that integer values "
989
- f"can be used, or removing the units and dtype encoding, "
990
- f"at which point xarray will make an appropriate choice."
991
- )
992
- else :
993
- raise OverflowError (
994
- f"Not possible to cast encoded times from "
995
- f"{ num .dtype !r} to { dtype !r} without overflow. Consider "
996
- f"removing the dtype encoding, at which point xarray will "
997
- f"make an appropriate choice, or explicitly switching to "
998
- "a larger integer dtype."
999
- )
1000
- else :
1001
- if array_any (np .isinf (cast_num )):
1002
- raise OverflowError (
1003
- f"Not possible to cast encoded times from { num .dtype !r} to "
1004
- f"{ dtype !r} without overflow. Consider removing the dtype "
1005
- f"encoding, at which point xarray will make an appropriate "
1006
- f"choice, or explicitly switching to a larger floating point "
1007
- f"dtype."
1008
- )
1009
-
1010
- return cast_num
1011
-
1012
-
1013
995
def encode_cf_datetime (
1014
996
dates : T_DuckArray , # type: ignore[misc]
1015
997
units : str | None = None ,
@@ -1032,6 +1014,27 @@ def encode_cf_datetime(
1032
1014
return _eagerly_encode_cf_datetime (dates , units , calendar , dtype )
1033
1015
1034
1016
1017
+ def _infer_needed_units_numpy (ref_date , data_units ):
1018
+ needed_units , data_ref_date = _unpack_time_unit_and_ref_date (data_units )
1019
+ needed_units = _numpy_to_netcdf_timeunit (needed_units )
1020
+ ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
1021
+ data_delta = _unit_timedelta_numpy (needed_units )
1022
+ if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
1023
+ needed_units = _infer_time_units_from_diff (ref_delta )
1024
+ return needed_units
1025
+
1026
+
1027
+ def _infer_needed_units_cftime (ref_date , data_units , calendar ):
1028
+ needed_units , data_ref_date = _unpack_time_units_and_ref_date_cftime (
1029
+ data_units , calendar
1030
+ )
1031
+ ref_delta = abs (data_ref_date - ref_date )
1032
+ data_delta = _time_units_to_timedelta (needed_units )
1033
+ if (ref_delta % data_delta ) > timedelta (seconds = 0 ):
1034
+ needed_units = _infer_time_units_from_diff (ref_delta )
1035
+ return needed_units
1036
+
1037
+
1035
1038
def _eagerly_encode_cf_datetime (
1036
1039
dates : T_DuckArray , # type: ignore[misc]
1037
1040
units : str | None = None ,
@@ -1049,6 +1052,7 @@ def _eagerly_encode_cf_datetime(
1049
1052
if calendar is None :
1050
1053
calendar = infer_calendar_name (dates )
1051
1054
1055
+ raise_incompatible_units_error = False
1052
1056
try :
1053
1057
if not _is_standard_calendar (calendar ) or dates .dtype .kind == "O" :
1054
1058
# parse with cftime instead
@@ -1081,16 +1085,7 @@ def _eagerly_encode_cf_datetime(
1081
1085
time_deltas = dates_as_index - ref_date
1082
1086
1083
1087
# retrieve needed units to faithfully encode to int64
1084
- needed_unit , data_ref_date = _unpack_time_unit_and_ref_date (data_units )
1085
- needed_units = _numpy_to_netcdf_timeunit (needed_unit )
1086
- if data_units != units :
1087
- # this accounts for differences in the reference times
1088
- ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
1089
- data_delta = np .timedelta64 (1 , needed_unit )
1090
- if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
1091
- needed_units = _infer_time_units_from_diff (ref_delta )
1092
-
1093
- # needed time delta to encode faithfully to int64
1088
+ needed_units = _infer_needed_units_numpy (ref_date , data_units )
1094
1089
needed_time_delta = _unit_timedelta_numpy (needed_units )
1095
1090
1096
1091
floor_division = np .issubdtype (dtype , np .integer ) or dtype is None
@@ -1104,7 +1099,6 @@ def _eagerly_encode_cf_datetime(
1104
1099
f"Set encoding['dtype'] to floating point dtype to silence this warning."
1105
1100
)
1106
1101
elif np .issubdtype (dtype , np .integer ) and allow_units_modification :
1107
- floor_division = True
1108
1102
new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
1109
1103
emit_user_level_warning (
1110
1104
f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
@@ -1114,6 +1108,10 @@ def _eagerly_encode_cf_datetime(
1114
1108
)
1115
1109
units = new_units
1116
1110
time_delta = needed_time_delta
1111
+ floor_division = True
1112
+ elif np .issubdtype (dtype , np .integer ) and not allow_units_modification :
1113
+ new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
1114
+ raise_incompatible_units_error = True
1117
1115
1118
1116
# get resolution of TimedeltaIndex and align time_delta
1119
1117
# todo: check, if this works in any case
@@ -1123,14 +1121,39 @@ def _eagerly_encode_cf_datetime(
1123
1121
num = reshape (num .values , dates .shape )
1124
1122
1125
1123
except (OutOfBoundsDatetime , OverflowError , ValueError ):
1124
+ time_units , ref_date = _unpack_time_units_and_ref_date_cftime (units , calendar )
1125
+ time_delta_cftime = _time_units_to_timedelta (time_units )
1126
+ needed_units = _infer_needed_units_cftime (ref_date , data_units , calendar )
1127
+ needed_time_delta_cftime = _time_units_to_timedelta (needed_units )
1128
+
1129
+ if (
1130
+ np .issubdtype (dtype , np .integer )
1131
+ and time_delta_cftime > needed_time_delta_cftime
1132
+ ):
1133
+ new_units = f"{ needed_units } since { format_cftime_datetime (ref_date )} "
1134
+ if allow_units_modification :
1135
+ emit_user_level_warning (
1136
+ f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
1137
+ f"Serializing with units { new_units !r} instead. "
1138
+ f"Set encoding['dtype'] to floating point dtype to serialize with units { units !r} . "
1139
+ f"Set encoding['units'] to { new_units !r} to silence this warning ."
1140
+ )
1141
+ units = new_units
1142
+ else :
1143
+ raise_incompatible_units_error = True
1144
+
1126
1145
num = _encode_datetime_with_cftime (dates , units , calendar )
1127
1146
# do it now only for cftime-based flow
1128
1147
# we already covered for this in pandas-based flow
1129
1148
num = cast_to_int_if_safe (num )
1130
1149
1131
- if dtype is not None :
1132
- # todo: check, if this is really needed for all dtypes
1133
- num = _cast_to_dtype_if_safe (num , dtype )
1150
+ if raise_incompatible_units_error :
1151
+ raise ValueError (
1152
+ f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
1153
+ f"Consider setting encoding['dtype'] to a floating point dtype to serialize with "
1154
+ f"units { units !r} . Consider setting encoding['units'] to { new_units !r} to "
1155
+ f"serialize with an integer dtype."
1156
+ )
1134
1157
1135
1158
return num , units , calendar
1136
1159
@@ -1243,14 +1266,17 @@ def _eagerly_encode_cf_timedelta(
1243
1266
time_delta = needed_time_delta
1244
1267
time_delta = time_delta .astype (f"=m8[{ deltas_unit } ]" )
1245
1268
floor_division = True
1269
+ elif np .issubdtype (dtype , np .integer ) and not allow_units_modification :
1270
+ raise ValueError (
1271
+ f"Timedeltas can't be serialized faithfully to int64 with requested units { units !r} . "
1272
+ f"Consider setting encoding['dtype'] to a floating point dtype to serialize with "
1273
+ f"units { units !r} . Consider setting encoding['units'] to { needed_units !r} to "
1274
+ f"serialize with an integer dtype."
1275
+ )
1246
1276
1247
1277
num = _division (time_deltas , time_delta , floor_division )
1248
1278
num = reshape (num .values , timedeltas .shape )
1249
1279
1250
- if dtype is not None :
1251
- # todo: check, if this is needed for all dtypes
1252
- num = _cast_to_dtype_if_safe (num , dtype )
1253
-
1254
1280
return num , units
1255
1281
1256
1282
@@ -1328,20 +1354,15 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
1328
1354
1329
1355
units = encoding .pop ("units" , None )
1330
1356
calendar = encoding .pop ("calendar" , None )
1331
- dtype = encoding .pop ("dtype" , None )
1357
+ dtype = encoding .get ("dtype" , None )
1332
1358
1333
1359
# in the case of packed data we need to encode into
1334
1360
# float first, the correct dtype will be established
1335
1361
# via CFScaleOffsetCoder/CFMaskCoder
1336
- set_dtype_encoding = None
1337
1362
if "add_offset" in encoding or "scale_factor" in encoding :
1338
- set_dtype_encoding = dtype
1339
1363
dtype = data .dtype if data .dtype .kind == "f" else "float64"
1340
1364
(data , units , calendar ) = encode_cf_datetime (data , units , calendar , dtype )
1341
1365
1342
- # retain dtype for packed data
1343
- if set_dtype_encoding is not None :
1344
- safe_setitem (encoding , "dtype" , set_dtype_encoding , name = name )
1345
1366
safe_setitem (attrs , "units" , units , name = name )
1346
1367
safe_setitem (attrs , "calendar" , calendar , name = name )
1347
1368
@@ -1393,22 +1414,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
1393
1414
if np .issubdtype (variable .data .dtype , np .timedelta64 ):
1394
1415
dims , data , attrs , encoding = unpack_for_encoding (variable )
1395
1416
1396
- dtype = encoding .pop ("dtype" , None )
1417
+ dtype = encoding .get ("dtype" , None )
1397
1418
1398
1419
# in the case of packed data we need to encode into
1399
1420
# float first, the correct dtype will be established
1400
1421
# via CFScaleOffsetCoder/CFMaskCoder
1401
- set_dtype_encoding = None
1402
1422
if "add_offset" in encoding or "scale_factor" in encoding :
1403
- set_dtype_encoding = dtype
1404
1423
dtype = data .dtype if data .dtype .kind == "f" else "float64"
1405
1424
1406
1425
data , units = encode_cf_timedelta (data , encoding .pop ("units" , None ), dtype )
1407
1426
1408
- # retain dtype for packed data
1409
- if set_dtype_encoding is not None :
1410
- safe_setitem (encoding , "dtype" , set_dtype_encoding , name = name )
1411
-
1412
1427
safe_setitem (attrs , "units" , units , name = name )
1413
1428
1414
1429
return Variable (dims , data , attrs , encoding , fastpath = True )
0 commit comments