24
24
from xarray .core .common import contains_cftime_datetimes , is_np_datetime_like
25
25
from xarray .core .duck_array_ops import asarray , ravel , reshape
26
26
from xarray .core .formatting import first_n_items , format_timestamp , last_item
27
- from xarray .core .pdcompat import nanosecond_precision_timestamp
27
+ from xarray .core .options import _get_datetime_resolution
28
+ from xarray .core .pdcompat import default_precision_timestamp
28
29
from xarray .core .utils import emit_user_level_warning
29
30
from xarray .core .variable import Variable
30
31
from xarray .namedarray .parallelcompat import T_ChunkedArray , get_chunked_array_type
@@ -193,9 +194,7 @@ def _unpack_time_units_and_ref_date(units: str) -> tuple[str, pd.Timestamp]:
193
194
# same us _unpack_netcdf_time_units but finalizes ref_date for
194
195
# processing in encode_cf_datetime
195
196
time_units , _ref_date = _unpack_netcdf_time_units (units )
196
- # TODO: the strict enforcement of nanosecond precision Timestamps can be
197
- # relaxed when addressing GitHub issue #7493.
198
- ref_date = nanosecond_precision_timestamp (_ref_date )
197
+ ref_date = default_precision_timestamp (_ref_date )
199
198
# If the ref_date Timestamp is timezone-aware, convert to UTC and
200
199
# make it timezone-naive (GH 2649).
201
200
if ref_date .tz is not None :
@@ -266,20 +265,54 @@ def _decode_datetime_with_pandas(
266
265
time_units , ref_date_str = _unpack_netcdf_time_units (units )
267
266
time_units = _netcdf_to_numpy_timeunit (time_units )
268
267
try :
269
- # TODO: the strict enforcement of nanosecond precision Timestamps can be
270
- # relaxed when addressing GitHub issue #7493.
271
- ref_date = nanosecond_precision_timestamp (ref_date_str )
268
+ # relaxed to non-nanosecond resolution
269
+ ref_date = pd .Timestamp (ref_date_str )
270
+ # strip tz information
271
+ if ref_date .tz is not None :
272
+ ref_date = ref_date .tz_convert (None )
273
+ # get default unit and delta
274
+ default_unit = _get_datetime_resolution ()
275
+ default_delta = np .timedelta64 (1 , default_unit ).astype ("timedelta64[ns]" )
276
+ # get ref_date and time delta
277
+ ref_date_delta = np .timedelta64 (1 , ref_date .unit ).astype ("timedelta64[ns]" )
278
+ time_delta = np .timedelta64 (1 , time_units ).astype ("timedelta64[ns]" )
279
+ # choose the highest resolution
280
+ new_time_units = {
281
+ ref_date_delta : ref_date .unit ,
282
+ time_delta : time_units ,
283
+ default_delta : default_unit ,
284
+ }[min (default_delta , ref_date_delta , time_delta )]
285
+ # transform to the highest needed resolution
286
+ # this will raise accordingly
287
+ ref_date = ref_date .as_unit (new_time_units )
272
288
except ValueError as err :
273
289
# ValueError is raised by pd.Timestamp for non-ISO timestamp
274
290
# strings, in which case we fall back to using cftime
275
291
raise OutOfBoundsDatetime from err
276
292
293
+ dunit = ref_date .unit
294
+
277
295
with warnings .catch_warnings ():
278
296
warnings .filterwarnings ("ignore" , "invalid value encountered" , RuntimeWarning )
279
297
if flat_num_dates .size > 0 :
280
298
# avoid size 0 datetimes GH1329
281
- pd .to_timedelta (flat_num_dates .min (), time_units ) + ref_date
282
- pd .to_timedelta (flat_num_dates .max (), time_units ) + ref_date
299
+ fnd_min , fnd_max = flat_num_dates .min (), flat_num_dates .max ()
300
+ min_delta = fnd_min * np .timedelta64 (1 , time_units )
301
+ max_delta = fnd_max * np .timedelta64 (1 , time_units )
302
+ if not np .isnan (min_delta ):
303
+ # this will raise on overflow
304
+ (ref_date + min_delta ).as_unit (dunit )
305
+ # this will raise on dtype oveflow
306
+ if not np .int64 (min_delta ) == fnd_min :
307
+ # todo: add error message
308
+ raise OutOfBoundsTimedelta
309
+ if not np .isnan (max_delta ):
310
+ # this will raise on overflow
311
+ (ref_date + max_delta ).as_unit (dunit )
312
+ # this will raise on dtype oveflow
313
+ if not np .int64 (max_delta ) == fnd_max :
314
+ # todo: add error message
315
+ raise OutOfBoundsTimedelta
283
316
284
317
# To avoid integer overflow when converting to nanosecond units for integer
285
318
# dtypes smaller than np.int64 cast all integer and unsigned integer dtype
@@ -292,20 +325,25 @@ def _decode_datetime_with_pandas(
292
325
elif flat_num_dates .dtype .kind in "f" :
293
326
flat_num_dates = flat_num_dates .astype (np .float64 )
294
327
295
- # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
296
- # works much faster when dealing with integers (GH 1399).
297
- # properly handle NaN/NaT to prevent casting NaN to int
328
+ # keep NaT/nan mask
298
329
nan = np .isnan (flat_num_dates ) | (flat_num_dates == np .iinfo (np .int64 ).min )
299
- flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA [time_units ]
300
- flat_num_dates_ns_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
301
- flat_num_dates_ns_int [nan ] = np .iinfo (np .int64 ).min
302
- flat_num_dates_ns_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
303
330
304
- # Use pd.to_timedelta to safely cast integer values to timedeltas,
305
- # and add those to a Timestamp to safely produce a DatetimeIndex. This
306
- # ensures that we do not encounter integer overflow at any point in the
307
- # process without raising OutOfBoundsDatetime.
308
- return (pd .to_timedelta (flat_num_dates_ns_int , "ns" ) + ref_date ).values
331
+ # in case we need to change the unit, we fix the numbers here
332
+ # this should be safe, as errors would have been raised above
333
+ ns_time_unit = _NS_PER_TIME_DELTA [time_units ]
334
+ ns_dunit = _NS_PER_TIME_DELTA [dunit ]
335
+ if flat_num_dates .dtype .kind in "iuf" and (ns_time_unit > ns_dunit ):
336
+ flat_num_dates *= np .int64 (ns_time_unit / ns_dunit )
337
+ time_units = dunit
338
+
339
+ # Cast input ordinals to integers and properly handle NaN/NaT
340
+ # to prevent casting NaN to int
341
+ flat_num_dates_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
342
+ flat_num_dates_int [nan ] = np .iinfo (np .int64 ).min
343
+ flat_num_dates_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
344
+
345
+ # cast to timedelta64[time_units] and add to ref_date
346
+ return ref_date + flat_num_dates_int .astype (f"timedelta64[{ time_units } ]" )
309
347
310
348
311
349
def decode_cf_datetime (
@@ -370,7 +408,7 @@ def to_timedelta_unboxed(value, **kwargs):
370
408
371
409
def to_datetime_unboxed (value , ** kwargs ):
372
410
result = pd .to_datetime (value , ** kwargs ).to_numpy ()
373
- assert result .dtype == "datetime64[ns ]"
411
+ assert result .dtype == f "datetime64[{ _get_datetime_resolution () } ]"
374
412
return result
375
413
376
414
@@ -390,7 +428,11 @@ def _unit_timedelta_cftime(units: str) -> timedelta:
390
428
391
429
def _unit_timedelta_numpy (units : str ) -> np .timedelta64 :
392
430
numpy_units = _netcdf_to_numpy_timeunit (units )
393
- return np .timedelta64 (_NS_PER_TIME_DELTA [numpy_units ], "ns" )
431
+ default_unit = _get_datetime_resolution ()
432
+ return np .timedelta64 (
433
+ int (_NS_PER_TIME_DELTA [numpy_units ] / _NS_PER_TIME_DELTA [default_unit ]),
434
+ default_unit ,
435
+ )
394
436
395
437
396
438
def _infer_time_units_from_diff (unique_timedeltas ) -> str :
@@ -411,7 +453,10 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str:
411
453
412
454
413
455
def _time_units_to_timedelta64 (units : str ) -> np .timedelta64 :
414
- return np .timedelta64 (1 , _netcdf_to_numpy_timeunit (units )).astype ("timedelta64[ns]" )
456
+ default_unit = _get_datetime_resolution ()
457
+ return np .timedelta64 (1 , _netcdf_to_numpy_timeunit (units )).astype (
458
+ f"timedelta64[{ default_unit } ]"
459
+ )
415
460
416
461
417
462
def infer_calendar_name (dates ) -> CFCalendar :
@@ -440,13 +485,11 @@ def infer_datetime_units(dates) -> str:
440
485
unique time deltas in `dates`)
441
486
"""
442
487
dates = ravel (np .asarray (dates ))
443
- if np .asarray (dates ).dtype == "datetime64[ns]" :
488
+ if np .issubdtype ( np . asarray (dates ).dtype , "datetime64" ) :
444
489
dates = to_datetime_unboxed (dates )
445
490
dates = dates [pd .notnull (dates )]
446
491
reference_date = dates [0 ] if len (dates ) > 0 else "1970-01-01"
447
- # TODO: the strict enforcement of nanosecond precision Timestamps can be
448
- # relaxed when addressing GitHub issue #7493.
449
- reference_date = nanosecond_precision_timestamp (reference_date )
492
+ reference_date = default_precision_timestamp (reference_date )
450
493
else :
451
494
reference_date = dates [0 ] if len (dates ) > 0 else "1970-01-01"
452
495
reference_date = format_cftime_datetime (reference_date )
@@ -479,17 +522,15 @@ def cftime_to_nptime(times, raise_on_invalid: bool = True) -> np.ndarray:
479
522
If raise_on_invalid is True (default), invalid dates trigger a ValueError.
480
523
Otherwise, the invalid element is replaced by np.NaT."""
481
524
times = np .asarray (times )
482
- # TODO: the strict enforcement of nanosecond precision datetime values can
483
- # be relaxed when addressing GitHub issue #7493.
484
- new = np .empty (times .shape , dtype = "M8[ns]" )
525
+ new = np .empty (times .shape , dtype = f"M8[{ _get_datetime_resolution ()} ]" )
485
526
dt : pd .Timestamp | Literal ["NaT" ]
486
527
for i , t in np .ndenumerate (times ):
487
528
try :
488
529
# Use pandas.Timestamp in place of datetime.datetime, because
489
530
# NumPy casts it safely it np.datetime64[ns] for dates outside
490
531
# 1678 to 2262 (this is not currently the case for
491
532
# datetime.datetime).
492
- dt = nanosecond_precision_timestamp (
533
+ dt = default_precision_timestamp (
493
534
t .year , t .month , t .day , t .hour , t .minute , t .second , t .microsecond
494
535
)
495
536
except ValueError as e :
@@ -546,10 +587,8 @@ def convert_time_or_go_back(date, date_type):
546
587
547
588
This is meant to convert end-of-month dates into a new calendar.
548
589
"""
549
- # TODO: the strict enforcement of nanosecond precision Timestamps can be
550
- # relaxed when addressing GitHub issue #7493.
551
590
if date_type == pd .Timestamp :
552
- date_type = nanosecond_precision_timestamp
591
+ date_type = default_precision_timestamp
553
592
try :
554
593
return date_type (
555
594
date .year ,
@@ -757,7 +796,7 @@ def _eagerly_encode_cf_datetime(
757
796
if not _is_standard_calendar (calendar ) or dates .dtype .kind == "O" :
758
797
# parse with cftime instead
759
798
raise OutOfBoundsDatetime
760
- assert dates .dtype == "datetime64[ns]"
799
+ assert np . issubdtype ( dates .dtype , "datetime64" )
761
800
762
801
time_units , ref_date = _unpack_time_units_and_ref_date (units )
763
802
time_delta = _time_units_to_timedelta64 (time_units )
0 commit comments