From 5d76e3dd497cdf6ef255147fb192dd9d112f58bc Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 16:51:06 -0500 Subject: [PATCH 01/10] fix to buddy box pandas 3 compatibility --- tobac/segmentation/watershed_segmentation.py | 38 ++++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py index c1323652..7524ebe5 100644 --- a/tobac/segmentation/watershed_segmentation.py +++ b/tobac/segmentation/watershed_segmentation.py @@ -776,7 +776,7 @@ def segmentation_timestep( ) # Get features that are needed for the buddy box - buddy_features = deepcopy(features_in.iloc[feat_inds]) + buddy_features = features_in.iloc[feat_inds].copy() # create arrays to contain points of all buddies # and their transpositions/transformations @@ -824,15 +824,15 @@ def segmentation_timestep( ) # edit value in buddy_features dataframe - buddy_features.hdim_1.values[buddy_looper] = ( - pbc_utils.transfm_pbc_point( - float(buddy_feat.hdim_1), hdim1_min, hdim1_max - ) + buddy_features.iloc[ + buddy_looper, buddy_features.columns.get_loc("hdim_1") + ] = pbc_utils.transfm_pbc_point( + float(buddy_feat.hdim_1), hdim1_min, hdim1_max ) - buddy_features.hdim_2.values[buddy_looper] = ( - pbc_utils.transfm_pbc_point( - float(buddy_feat.hdim_2), hdim2_min, hdim2_max - ) + buddy_features.iloc[ + buddy_looper, buddy_features.columns.get_loc("hdim_2") + ] = pbc_utils.transfm_pbc_point( + float(buddy_feat.hdim_2), hdim2_min, hdim2_max ) buddy_looper = buddy_looper + 1 @@ -903,16 +903,16 @@ def segmentation_timestep( if "vdim" not in buddy_features: buddy_features["vdim"] = np.zeros(len(buddy_features), dtype=int) for buddy_looper in range(0, len(buddy_features)): - buddy_features.vdim.values[buddy_looper] = ( - buddy_features.vdim.values[buddy_looper] - bbox_zstart - ) - - buddy_features.hdim_1.values[buddy_looper] = ( - buddy_features.hdim_1.values[buddy_looper] - bbox_ystart - ) - buddy_features.hdim_2.values[buddy_looper] = ( - buddy_features.hdim_2.values[buddy_looper] - bbox_xstart - ) + buddy_features.iloc[ + buddy_looper, buddy_features.columns.get_loc("vdim") + ] = (buddy_features.vdim.values[buddy_looper] - bbox_zstart) + + buddy_features.iloc[ + buddy_looper, buddy_features.columns.get_loc("hdim_1") + ] = (buddy_features.hdim_1.values[buddy_looper] - bbox_ystart) + buddy_features.iloc[ + buddy_looper, buddy_features.columns.get_loc("hdim_2") + ] = (buddy_features.hdim_2.values[buddy_looper] - bbox_xstart) # Create dask array from input data: buddy_data = buddy_rgn From 859553220b5ac39fcb30bd7a1eac7e2235aeb274 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 16:53:01 -0500 Subject: [PATCH 02/10] fix to_datestr test - pandas 3 now has us as default. Added that as a possible output --- tobac/tests/test_datetime.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tobac/tests/test_datetime.py b/tobac/tests/test_datetime.py index 82a9f7b7..20a3fd23 100644 --- a/tobac/tests/test_datetime.py +++ b/tobac/tests/test_datetime.py @@ -143,6 +143,7 @@ def test_to_datestr(): assert ( datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000000" or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00" + or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000" ) From 670a5591222b833d11c44fe539114ff1a28746c2 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:21:35 -0500 Subject: [PATCH 03/10] fix feature detection for pandas 3 --- tobac/feature_detection.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index d5729ff2..494b3177 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -638,9 +638,9 @@ def feature_detection_threshold( # find the updated label, and overwrite all of label_ind indices with # updated label labels_2_alt = labels_2[label_z, y_val_alt, x_val_alt] - labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( - labels_2_alt - ) + labels_2[ + label_locs_v, label_locs_h1, label_locs_h2 + ] = labels_2_alt skip_list = np.append(skip_list, label_ind) break @@ -684,9 +684,9 @@ def feature_detection_threshold( # find the updated label, and overwrite all of label_ind indices with # updated label labels_2_alt = labels_2[label_z, y_val_alt, label_x] - labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( - labels_2_alt - ) + labels_2[ + label_locs_v, label_locs_h1, label_locs_h2 + ] = labels_2_alt new_label_ind = labels_2_alt skip_list = np.append(skip_list, label_ind) @@ -728,9 +728,9 @@ def feature_detection_threshold( # find the updated label, and overwrite all of label_ind indices with # updated label labels_2_alt = labels_2[label_z, label_y, x_val_alt] - labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( - labels_2_alt - ) + labels_2[ + label_locs_v, label_locs_h1, label_locs_h2 + ] = labels_2_alt new_label_ind = labels_2_alt skip_list = np.append(skip_list, label_ind) @@ -1539,7 +1539,6 @@ def feature_detection_multithreshold( # we map the feature index to the original index if return_labels: - for i, time_i, label_field_i, features_i in field_and_features_over_time( label_fields, features ): @@ -1684,9 +1683,11 @@ def filter_min_distance( # Calculate feature locations in cartesian coordinates if is_3D: - feature_locations = features[ - [z_coordinate_name, y_coordinate_name, x_coordinate_name] - ].to_numpy() + feature_locations = ( + features[[z_coordinate_name, y_coordinate_name, x_coordinate_name]] + .to_numpy() + .copy() + ) feature_locations[:, 0] *= dz feature_locations[:, 1:] *= dxy else: From b96908ae6806ed4a181411ff92594fa9d2960e33 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:21:55 -0500 Subject: [PATCH 04/10] update tracking to allow for us output --- tobac/tests/test_tracking.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tobac/tests/test_tracking.py b/tobac/tests/test_tracking.py index 70be714c..951d5f56 100644 --- a/tobac/tests/test_tracking.py +++ b/tobac/tests/test_tracking.py @@ -549,7 +549,10 @@ def test_untracked_nat(): assert np.all(pd.isnull(output["time_cell"])) # the exact data type depends on architecture, so # instead just check by name - assert output["time_cell"].dtype.name == "timedelta64[ns]" + assert ( + output["time_cell"].dtype.name == "timedelta64[ns]" + or output["time_cell"].dtype.name == "timedelta64[us]" + ) @pytest.mark.parametrize( From 175406fdb211d6e2eb5bf7af455dbc57eaef9d55 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:36:51 -0500 Subject: [PATCH 05/10] update datetime functions to allow for a precision marker, and have matching maintain the precision. --- tobac/utils/datetime.py | 81 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/tobac/utils/datetime.py b/tobac/utils/datetime.py index 611603cb..1e83b9fb 100644 --- a/tobac/utils/datetime.py +++ b/tobac/utils/datetime.py @@ -1,11 +1,12 @@ """Functions for converting between and working with different datetime formats""" -from typing import Union +from typing import Union, Optional, Literal import datetime import numpy as np import pandas as pd import xarray as xr import cftime +import re def to_cftime( @@ -51,6 +52,7 @@ def to_cftime( def to_timestamp( dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], + precision: Optional[Literal["ns", "us", "s", "ms"]] = None, ) -> pd.Timestamp: """Converts a provided datetime-like object to a pandas timestamp @@ -58,6 +60,13 @@ def to_timestamp( ---------- dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] A datetime-like object or array of datetime-like objects to be converted + precision : Optional[Literal["ns", "us", "s", "ms"]] + The precision of the timestamp. If None, the default precision is used. + The default precision is ns for Pandas 2 and before; us for Pandas 3 + - "ns": nanoseconds + - "us": microseconds + - "ms": milliseconds + - "s": seconds Returns ------- @@ -74,6 +83,9 @@ def to_timestamp( else: pd_dates = pd.to_datetime(dates) + if precision is not None: + pd_dates = pd_dates.astype(f"datetime64[{precision}]") + if squeeze_output: return next(iter(pd_dates)) return pd_dates @@ -99,6 +111,7 @@ def to_datetime( def to_datetime64( dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], + precision: Optional[Literal["ns", "us", "s", "ms"]] = None, ) -> np.datetime64: """Converts a provided datetime-like object to numpy datetime64 objects @@ -106,17 +119,25 @@ def to_datetime64( ---------- dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] A datetime-like object or array of datetime-like objects to be converted + precision : Optional[Literal["ns", "us", "s", "ms"]] + The precision of the timestamp. If None, the default precision is used. + The default precision is ns for Pandas 2 and before; us for Pandas 3 + - "ns": nanoseconds + - "us": microseconds + - "ms": milliseconds + - "s": seconds Returns ------- np.datetime64 A numpy datetime64 or array of numpy datetime64s """ - return to_timestamp(dates).to_numpy() + return to_timestamp(dates, precision).to_numpy() def to_datestr( dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], + precision: Optional[Literal["ns", "us", "s", "ms"]] = None, ) -> str: """Converts a provided datetime-like object to ISO format date strings @@ -124,24 +145,64 @@ def to_datestr( ---------- dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime] A datetime-like object or array of datetime-like objects to be converted + precision : Optional[Literal["ns", "us", "s", "ms"]] + The precision of the timestamp. If None, the default precision is used. + The default precision is ns for Pandas 2 and before; us for Pandas 3 + - "ns": nanoseconds + - "us": microseconds + - "ms": milliseconds + - "s": seconds Returns ------- str A string or array of strings in ISO date format """ - dates = to_datetime64(dates) + dates = to_datetime64(dates, precision) if hasattr(dates, "__iter__"): return dates.astype(str) return str(dates) +def detect_str_precision(datestr: str) -> Literal["s", "ms", "us", "ns"]: + """Detects the precision of a datetime str by counting the number of digits after . + Parameters + ---------- + datestr : str + Input string + + Returns + ------- + Literal['s', 'ms', 'us', 'ns'] + The precision of the string based on the number of digits after . + + Raises + ------ + ValueError + Raises a ValueError if the input string is not a datetime string or if + the number of digits after . is not evenly divisible by 3 + """ + + digits_matching = re.search(r"\.(\d+)", datestr) + if not digits_matching: + return "s" + n = len(digits_matching.group(1)) + if n <= 3: + return "ms" + elif n <= 6: + return "us" + elif n <= 9: + return "ns" + else: + raise ValueError("Finer than ns precision.") + + def match_datetime_format( dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], target: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime], ) -> Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]: """Converts the provided datetime-like objects to the same datetime format - as the provided target + as the provided target, ensuring that the precisions match Parameters ---------- @@ -164,7 +225,8 @@ def match_datetime_format( of datetime-like objects """ if isinstance(target, str): - return to_datestr(dates) + precision = detect_str_precision(target) + return to_datestr(dates, precision) if isinstance(target, xr.DataArray): target = target.values if isinstance(target, pd.Series): @@ -172,13 +234,16 @@ def match_datetime_format( if hasattr(target, "__iter__"): target = target[0] if isinstance(target, str): - return to_datestr(dates) + precision = detect_str_precision(target) + return to_datestr(dates, precision) if isinstance(target, cftime.datetime): return to_cftime(dates, target.calendar) if isinstance(target, pd.Timestamp): - return to_timestamp(dates) + precision = target.unit + return to_timestamp(dates, precision=precision) if isinstance(target, np.datetime64): - return to_datetime64(dates) + precision = np.datetime_data(target)[0] + return to_datetime64(dates, precision=precision) if isinstance(target, datetime.datetime): return to_datetime(dates) raise ValueError("Target is not a valid datetime format") From 1b967b2605b476a7d2d95b1e050acb858a4b46b4 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:37:18 -0500 Subject: [PATCH 06/10] update the datetime utilities to allow for us precision on test_to_datestr_array --- tobac/tests/test_datetime.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tobac/tests/test_datetime.py b/tobac/tests/test_datetime.py index 20a3fd23..8625aa22 100644 --- a/tobac/tests/test_datetime.py +++ b/tobac/tests/test_datetime.py @@ -164,9 +164,11 @@ def test_to_datestr_array(): cftime.DatetimeNoLeap(2000, 1, 1), ] for date in test_dates: - assert datetime_utils.to_datestr([date]) == [ - "2000-01-01T00:00:00.000000000" - ] or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"] + assert ( + datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00.000000000"] + or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"] + or (datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00.000000"]) + ) def test_match_datetime_format(): From 154d63f671e4776c7ec8ab6c8a16b66222350970 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:38:33 -0500 Subject: [PATCH 07/10] update environment files to allow for pandas 3 support --- environment-ci.yml | 2 +- environment-examples.yml | 2 +- environment.yml | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environment-ci.yml b/environment-ci.yml index ee3f8646..c325cea6 100644 --- a/environment-ci.yml +++ b/environment-ci.yml @@ -7,7 +7,7 @@ dependencies: - scipy - scikit-image - scikit-learn - - pandas<3 + - pandas - matplotlib - iris - xarray diff --git a/environment-examples.yml b/environment-examples.yml index a49e4dbf..d64f7e17 100644 --- a/environment-examples.yml +++ b/environment-examples.yml @@ -6,7 +6,7 @@ dependencies: - scipy - scikit-image - scikit-learn - - pandas<3 + - pandas - matplotlib - iris - xarray<2024.10.0 diff --git a/environment.yml b/environment.yml index 2de0f196..1fa30644 100644 --- a/environment.yml +++ b/environment.yml @@ -7,7 +7,7 @@ dependencies: - scipy - scikit-image - scikit-learn - - pandas<3 + - pandas - matplotlib - iris - xarray diff --git a/pyproject.toml b/pyproject.toml index d424377a..916dcd14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ dependencies = [ "scipy", "scikit-image", "scikit-learn", - "pandas<3", + "pandas", "matplotlib", "scitools-iris", "xarray", From c2939f526daf214e40b9b5dbf95b3dcfb7bd4b3a Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:39:39 -0500 Subject: [PATCH 08/10] update changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8ea83be..ffe7ad6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ### Tobac Changelog +_**Unreleased Changes:**_ + +**Bug fixes** + +- Update internals to allow for pandas 3 compatibility [#568](https://github.com/tobac-project/tobac/pull/568) + _**Version 1.6.3:**_ **Enhancements for Users** From c3e1e7305b27242f55a508480e133641425f84cb Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:52:12 -0500 Subject: [PATCH 09/10] add tests to detect str precision --- tobac/tests/test_datetime.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tobac/tests/test_datetime.py b/tobac/tests/test_datetime.py index 8625aa22..a5c002e3 100644 --- a/tobac/tests/test_datetime.py +++ b/tobac/tests/test_datetime.py @@ -217,3 +217,18 @@ def test_match_datetime_format_error(): """ with pytest.raises(ValueError, match="Target is not a valid datetime*"): datetime_utils.match_datetime_format(datetime(2000, 1, 1), 1.5) + + +@pytest.mark.parametrize( + ["date_in", "precision"], + [ + ("2000-02-04T00:00:00", "s"), + ("2000-02-04T00:00:00.000", "ms"), + ("2000-02-04T00:00:00.000000", "us"), + ("2000-02-04T00:00:00.000000000", "ns"), + ("00:00:00", "s"), + ], +) +def test_detect_str_precision(date_in: str, precision: str): + """test that detect_str_precision returns the right precision""" + assert datetime_utils.detect_str_precision(date_in) == precision From b08cfbe1802b6d05e457d7597f46fbe770304b09 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 10 Mar 2026 21:59:51 -0500 Subject: [PATCH 10/10] black formatting --- tobac/feature_detection.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 494b3177..b7d8e99a 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -638,9 +638,9 @@ def feature_detection_threshold( # find the updated label, and overwrite all of label_ind indices with # updated label labels_2_alt = labels_2[label_z, y_val_alt, x_val_alt] - labels_2[ - label_locs_v, label_locs_h1, label_locs_h2 - ] = labels_2_alt + labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( + labels_2_alt + ) skip_list = np.append(skip_list, label_ind) break @@ -684,9 +684,9 @@ def feature_detection_threshold( # find the updated label, and overwrite all of label_ind indices with # updated label labels_2_alt = labels_2[label_z, y_val_alt, label_x] - labels_2[ - label_locs_v, label_locs_h1, label_locs_h2 - ] = labels_2_alt + labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( + labels_2_alt + ) new_label_ind = labels_2_alt skip_list = np.append(skip_list, label_ind) @@ -728,9 +728,9 @@ def feature_detection_threshold( # find the updated label, and overwrite all of label_ind indices with # updated label labels_2_alt = labels_2[label_z, label_y, x_val_alt] - labels_2[ - label_locs_v, label_locs_h1, label_locs_h2 - ] = labels_2_alt + labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( + labels_2_alt + ) new_label_ind = labels_2_alt skip_list = np.append(skip_list, label_ind)