tobac-project · freemansw1 · Mar 10, 2026 · Mar 10, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 ### Tobac Changelog
 
+_**Unreleased Changes:**_
+
+**Bug fixes**
+
+- Update internals to allow for pandas 3 compatibility [#568](https://github.com/tobac-project/tobac/pull/568)
+
 _**Version 1.6.3:**_
 
 **Enhancements for Users**

diff --git a/environment-ci.yml b/environment-ci.yml
@@ -7,7 +7,7 @@ dependencies:
   - scipy
   - scikit-image
   - scikit-learn
-  - pandas<3
+  - pandas
   - matplotlib
   - iris
   - xarray

diff --git a/environment-examples.yml b/environment-examples.yml
@@ -6,7 +6,7 @@ dependencies:
   - scipy
   - scikit-image
   - scikit-learn
-  - pandas<3
+  - pandas
   - matplotlib
   - iris
   - xarray<2024.10.0

diff --git a/environment.yml b/environment.yml
@@ -7,7 +7,7 @@ dependencies:
   - scipy
   - scikit-image
   - scikit-learn
-  - pandas<3
+  - pandas
   - matplotlib
   - iris
   - xarray

diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
     "scipy",
     "scikit-image",
     "scikit-learn",
-    "pandas<3",
+    "pandas",
     "matplotlib",
     "scitools-iris",
     "xarray",

diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py
@@ -1539,7 +1539,6 @@ def feature_detection_multithreshold(
 
         # we map the feature index to the original index
         if return_labels:
-
             for i, time_i, label_field_i, features_i in field_and_features_over_time(
                 label_fields, features
             ):
@@ -1684,9 +1683,11 @@ def filter_min_distance(
 
     # Calculate feature locations in cartesian coordinates
     if is_3D:
-        feature_locations = features[
-            [z_coordinate_name, y_coordinate_name, x_coordinate_name]
-        ].to_numpy()
+        feature_locations = (
+            features[[z_coordinate_name, y_coordinate_name, x_coordinate_name]]
+            .to_numpy()
+            .copy()
+        )
         feature_locations[:, 0] *= dz
         feature_locations[:, 1:] *= dxy
     else:

diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py
@@ -776,7 +776,7 @@ def segmentation_timestep(
                 )
 
             # Get features that are needed for the buddy box
-            buddy_features = deepcopy(features_in.iloc[feat_inds])
+            buddy_features = features_in.iloc[feat_inds].copy()
 
             # create arrays to contain points of all buddies
             # and their transpositions/transformations
@@ -824,15 +824,15 @@ def segmentation_timestep(
                 )
 
                 # edit value in buddy_features dataframe
-                buddy_features.hdim_1.values[buddy_looper] = (
-                    pbc_utils.transfm_pbc_point(
-                        float(buddy_feat.hdim_1), hdim1_min, hdim1_max
-                    )
+                buddy_features.iloc[
+                    buddy_looper, buddy_features.columns.get_loc("hdim_1")
+                ] = pbc_utils.transfm_pbc_point(
+                    float(buddy_feat.hdim_1), hdim1_min, hdim1_max
                 )
-                buddy_features.hdim_2.values[buddy_looper] = (
-                    pbc_utils.transfm_pbc_point(
-                        float(buddy_feat.hdim_2), hdim2_min, hdim2_max
-                    )
+                buddy_features.iloc[
+                    buddy_looper, buddy_features.columns.get_loc("hdim_2")
+                ] = pbc_utils.transfm_pbc_point(
+                    float(buddy_feat.hdim_2), hdim2_min, hdim2_max
                 )
 
                 buddy_looper = buddy_looper + 1
@@ -903,16 +903,16 @@ def segmentation_timestep(
             if "vdim" not in buddy_features:
                 buddy_features["vdim"] = np.zeros(len(buddy_features), dtype=int)
             for buddy_looper in range(0, len(buddy_features)):
-                buddy_features.vdim.values[buddy_looper] = (
-                    buddy_features.vdim.values[buddy_looper] - bbox_zstart
-                )
-
-                buddy_features.hdim_1.values[buddy_looper] = (
-                    buddy_features.hdim_1.values[buddy_looper] - bbox_ystart
-                )
-                buddy_features.hdim_2.values[buddy_looper] = (
-                    buddy_features.hdim_2.values[buddy_looper] - bbox_xstart
-                )
+                buddy_features.iloc[
+                    buddy_looper, buddy_features.columns.get_loc("vdim")
+                ] = (buddy_features.vdim.values[buddy_looper] - bbox_zstart)
+
+                buddy_features.iloc[
+                    buddy_looper, buddy_features.columns.get_loc("hdim_1")
+                ] = (buddy_features.hdim_1.values[buddy_looper] - bbox_ystart)
+                buddy_features.iloc[
+                    buddy_looper, buddy_features.columns.get_loc("hdim_2")
+                ] = (buddy_features.hdim_2.values[buddy_looper] - bbox_xstart)
 
             # Create dask array from input data:
             buddy_data = buddy_rgn

diff --git a/tobac/tests/test_datetime.py b/tobac/tests/test_datetime.py
@@ -143,6 +143,7 @@ def test_to_datestr():
         assert (
             datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000000"
             or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00"
+            or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000"
         )
 
 
@@ -163,9 +164,11 @@ def test_to_datestr_array():
         cftime.DatetimeNoLeap(2000, 1, 1),
     ]
     for date in test_dates:
-        assert datetime_utils.to_datestr([date]) == [
-            "2000-01-01T00:00:00.000000000"
-        ] or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"]
+        assert (
+            datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00.000000000"]
+            or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"]
+            or (datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00.000000"])
+        )
 
 
 def test_match_datetime_format():
@@ -214,3 +217,18 @@ def test_match_datetime_format_error():
     """
     with pytest.raises(ValueError, match="Target is not a valid datetime*"):
         datetime_utils.match_datetime_format(datetime(2000, 1, 1), 1.5)
+
+
+@pytest.mark.parametrize(
+    ["date_in", "precision"],
+    [
+        ("2000-02-04T00:00:00", "s"),
+        ("2000-02-04T00:00:00.000", "ms"),
+        ("2000-02-04T00:00:00.000000", "us"),
+        ("2000-02-04T00:00:00.000000000", "ns"),
+        ("00:00:00", "s"),
+    ],
+)
+def test_detect_str_precision(date_in: str, precision: str):
+    """test that detect_str_precision returns the right precision"""
+    assert datetime_utils.detect_str_precision(date_in) == precision
diff --git a/tobac/tests/tracking_tests/test_tracking.py b/tobac/tests/tracking_tests/test_tracking.py
@@ -549,7 +549,10 @@ def test_untracked_nat():
     assert np.all(pd.isnull(output["time_cell"]))
     # the exact data type depends on architecture, so
     # instead just check by name
-    assert output["time_cell"].dtype.name == "timedelta64[ns]"
+    assert (
+        output["time_cell"].dtype.name == "timedelta64[ns]"
+        or output["time_cell"].dtype.name == "timedelta64[us]"
+    )
 
 
 @pytest.mark.parametrize(

diff --git a/tobac/utils/datetime.py b/tobac/utils/datetime.py
@@ -1,11 +1,12 @@
 """Functions for converting between and working with different datetime formats"""
 
-from typing import Union
+from typing import Union, Optional, Literal
 import datetime
 import numpy as np
 import pandas as pd
 import xarray as xr
 import cftime
+import re
 
 
 def to_cftime(
@@ -51,13 +52,21 @@ def to_cftime(
 
 def to_timestamp(
     dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
+    precision: Optional[Literal["ns", "us", "s", "ms"]] = None,
 ) -> pd.Timestamp:
     """Converts a provided datetime-like object to a pandas timestamp
 
     Parameters
     ----------
     dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]
         A datetime-like object or array of datetime-like objects to be converted
+    precision : Optional[Literal["ns", "us", "s", "ms"]]
+        The precision of the timestamp. If None, the default precision is used.
+        The default precision is ns for Pandas 2 and before; us for Pandas 3
+        - "ns": nanoseconds
+        - "us": microseconds
+        - "ms": milliseconds
+        - "s": seconds
 
     Returns
     -------
@@ -74,6 +83,9 @@ def to_timestamp(
     else:
         pd_dates = pd.to_datetime(dates)
 
+    if precision is not None:
+        pd_dates = pd_dates.astype(f"datetime64[{precision}]")
+
     if squeeze_output:
         return next(iter(pd_dates))
     return pd_dates
@@ -99,49 +111,98 @@ def to_datetime(
 
 def to_datetime64(
     dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
+    precision: Optional[Literal["ns", "us", "s", "ms"]] = None,
 ) -> np.datetime64:
     """Converts a provided datetime-like object to numpy datetime64 objects
 
     Parameters
     ----------
     dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]
         A datetime-like object or array of datetime-like objects to be converted
+    precision : Optional[Literal["ns", "us", "s", "ms"]]
+        The precision of the timestamp. If None, the default precision is used.
+        The default precision is ns for Pandas 2 and before; us for Pandas 3
+        - "ns": nanoseconds
+        - "us": microseconds
+        - "ms": milliseconds
+        - "s": seconds
 
     Returns
     -------
     np.datetime64
         A numpy datetime64 or array of numpy datetime64s
     """
-    return to_timestamp(dates).to_numpy()
+    return to_timestamp(dates, precision).to_numpy()
 
 
 def to_datestr(
     dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
+    precision: Optional[Literal["ns", "us", "s", "ms"]] = None,
 ) -> str:
     """Converts a provided datetime-like object to ISO format date strings
 
     Parameters
     ----------
     dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]
         A datetime-like object or array of datetime-like objects to be converted
+    precision : Optional[Literal["ns", "us", "s", "ms"]]
+        The precision of the timestamp. If None, the default precision is used.
+        The default precision is ns for Pandas 2 and before; us for Pandas 3
+        - "ns": nanoseconds
+        - "us": microseconds
+        - "ms": milliseconds
+        - "s": seconds
 
     Returns
     -------
     str
         A string or array of strings in ISO date format
     """
-    dates = to_datetime64(dates)
+    dates = to_datetime64(dates, precision)
     if hasattr(dates, "__iter__"):
         return dates.astype(str)
     return str(dates)
 
 
+def detect_str_precision(datestr: str) -> Literal["s", "ms", "us", "ns"]:
+    """Detects the precision of a datetime str by counting the number of digits after .
+    Parameters
+    ----------
+    datestr : str
+        Input string
+
+    Returns
+    -------
+    Literal['s', 'ms', 'us', 'ns']
+        The precision of the string based on the number of digits after .
+
+    Raises
+    ------
+    ValueError
+        Raises a ValueError if the input string is not a datetime string or if
+        the number of digits after . is not evenly divisible by 3
+    """
+
+    digits_matching = re.search(r"\.(\d+)", datestr)
+    if not digits_matching:
+        return "s"
+    n = len(digits_matching.group(1))
+    if n <= 3:
+        return "ms"
+    elif n <= 6:
+        return "us"
+    elif n <= 9:
+        return "ns"
+    else:
+        raise ValueError("Finer than ns precision.")
+
+
 def match_datetime_format(
     dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
     target: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
 ) -> Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]:
     """Converts the provided datetime-like objects to the same datetime format
-    as the provided target
+    as the provided target, ensuring that the precisions match
 
     Parameters
     ----------
@@ -164,21 +225,25 @@ def match_datetime_format(
         of datetime-like objects
     """
     if isinstance(target, str):
-        return to_datestr(dates)
+        precision = detect_str_precision(target)
+        return to_datestr(dates, precision)
     if isinstance(target, xr.DataArray):
         target = target.values
     if isinstance(target, pd.Series):
         target = target.to_numpy()
     if hasattr(target, "__iter__"):
         target = target[0]
     if isinstance(target, str):
-        return to_datestr(dates)
+        precision = detect_str_precision(target)
+        return to_datestr(dates, precision)
     if isinstance(target, cftime.datetime):
         return to_cftime(dates, target.calendar)
     if isinstance(target, pd.Timestamp):
-        return to_timestamp(dates)
+        precision = target.unit
+        return to_timestamp(dates, precision=precision)
     if isinstance(target, np.datetime64):
-        return to_datetime64(dates)
+        precision = np.datetime_data(target)[0]
+        return to_datetime64(dates, precision=precision)
     if isinstance(target, datetime.datetime):
         return to_datetime(dates)
     raise ValueError("Target is not a valid datetime format")