Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
### Tobac Changelog

_**Unreleased Changes:**_

**Bug fixes**

- Update internals to allow for pandas 3 compatibility [#568](https://github.com/tobac-project/tobac/pull/568)

_**Version 1.6.3:**_

**Enhancements for Users**
Expand Down
2 changes: 1 addition & 1 deletion environment-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
- scipy
- scikit-image
- scikit-learn
- pandas<3
- pandas
- matplotlib
- iris
- xarray
Expand Down
2 changes: 1 addition & 1 deletion environment-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:
- scipy
- scikit-image
- scikit-learn
- pandas<3
- pandas
- matplotlib
- iris
- xarray<2024.10.0
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
- scipy
- scikit-image
- scikit-learn
- pandas<3
- pandas
- matplotlib
- iris
- xarray
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dependencies = [
"scipy",
"scikit-image",
"scikit-learn",
"pandas<3",
"pandas",
"matplotlib",
"scitools-iris",
"xarray",
Expand Down
9 changes: 5 additions & 4 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,7 +1539,6 @@ def feature_detection_multithreshold(

# we map the feature index to the original index
if return_labels:

for i, time_i, label_field_i, features_i in field_and_features_over_time(
label_fields, features
):
Expand Down Expand Up @@ -1684,9 +1683,11 @@ def filter_min_distance(

# Calculate feature locations in cartesian coordinates
if is_3D:
feature_locations = features[
[z_coordinate_name, y_coordinate_name, x_coordinate_name]
].to_numpy()
feature_locations = (
features[[z_coordinate_name, y_coordinate_name, x_coordinate_name]]
.to_numpy()
.copy()
)
feature_locations[:, 0] *= dz
feature_locations[:, 1:] *= dxy
else:
Expand Down
38 changes: 19 additions & 19 deletions tobac/segmentation/watershed_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ def segmentation_timestep(
)

# Get features that are needed for the buddy box
buddy_features = deepcopy(features_in.iloc[feat_inds])
buddy_features = features_in.iloc[feat_inds].copy()

# create arrays to contain points of all buddies
# and their transpositions/transformations
Expand Down Expand Up @@ -824,15 +824,15 @@ def segmentation_timestep(
)

# edit value in buddy_features dataframe
buddy_features.hdim_1.values[buddy_looper] = (
pbc_utils.transfm_pbc_point(
float(buddy_feat.hdim_1), hdim1_min, hdim1_max
)
buddy_features.iloc[
buddy_looper, buddy_features.columns.get_loc("hdim_1")
] = pbc_utils.transfm_pbc_point(
float(buddy_feat.hdim_1), hdim1_min, hdim1_max
)
buddy_features.hdim_2.values[buddy_looper] = (
pbc_utils.transfm_pbc_point(
float(buddy_feat.hdim_2), hdim2_min, hdim2_max
)
buddy_features.iloc[
buddy_looper, buddy_features.columns.get_loc("hdim_2")
] = pbc_utils.transfm_pbc_point(
float(buddy_feat.hdim_2), hdim2_min, hdim2_max
)

buddy_looper = buddy_looper + 1
Expand Down Expand Up @@ -903,16 +903,16 @@ def segmentation_timestep(
if "vdim" not in buddy_features:
buddy_features["vdim"] = np.zeros(len(buddy_features), dtype=int)
for buddy_looper in range(0, len(buddy_features)):
buddy_features.vdim.values[buddy_looper] = (
buddy_features.vdim.values[buddy_looper] - bbox_zstart
)

buddy_features.hdim_1.values[buddy_looper] = (
buddy_features.hdim_1.values[buddy_looper] - bbox_ystart
)
buddy_features.hdim_2.values[buddy_looper] = (
buddy_features.hdim_2.values[buddy_looper] - bbox_xstart
)
buddy_features.iloc[
buddy_looper, buddy_features.columns.get_loc("vdim")
] = (buddy_features.vdim.values[buddy_looper] - bbox_zstart)

buddy_features.iloc[
buddy_looper, buddy_features.columns.get_loc("hdim_1")
] = (buddy_features.hdim_1.values[buddy_looper] - bbox_ystart)
buddy_features.iloc[
buddy_looper, buddy_features.columns.get_loc("hdim_2")
] = (buddy_features.hdim_2.values[buddy_looper] - bbox_xstart)

# Create dask array from input data:
buddy_data = buddy_rgn
Expand Down
24 changes: 21 additions & 3 deletions tobac/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def test_to_datestr():
assert (
datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000000"
or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00"
or datetime_utils.to_datestr(date) == "2000-01-01T00:00:00.000000"
)


Expand All @@ -163,9 +164,11 @@ def test_to_datestr_array():
cftime.DatetimeNoLeap(2000, 1, 1),
]
for date in test_dates:
assert datetime_utils.to_datestr([date]) == [
"2000-01-01T00:00:00.000000000"
] or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"]
assert (
datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00.000000000"]
or datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00"]
or (datetime_utils.to_datestr([date]) == ["2000-01-01T00:00:00.000000"])
)


def test_match_datetime_format():
Expand Down Expand Up @@ -214,3 +217,18 @@ def test_match_datetime_format_error():
"""
with pytest.raises(ValueError, match="Target is not a valid datetime*"):
datetime_utils.match_datetime_format(datetime(2000, 1, 1), 1.5)


@pytest.mark.parametrize(
["date_in", "precision"],
[
("2000-02-04T00:00:00", "s"),
("2000-02-04T00:00:00.000", "ms"),
("2000-02-04T00:00:00.000000", "us"),
("2000-02-04T00:00:00.000000000", "ns"),
("00:00:00", "s"),
],
)
def test_detect_str_precision(date_in: str, precision: str):
"""test that detect_str_precision returns the right precision"""
assert datetime_utils.detect_str_precision(date_in) == precision
5 changes: 4 additions & 1 deletion tobac/tests/tracking_tests/test_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,10 @@ def test_untracked_nat():
assert np.all(pd.isnull(output["time_cell"]))
# the exact data type depends on architecture, so
# instead just check by name
assert output["time_cell"].dtype.name == "timedelta64[ns]"
assert (
output["time_cell"].dtype.name == "timedelta64[ns]"
or output["time_cell"].dtype.name == "timedelta64[us]"
)


@pytest.mark.parametrize(
Expand Down
81 changes: 73 additions & 8 deletions tobac/utils/datetime.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Functions for converting between and working with different datetime formats"""

from typing import Union
from typing import Union, Optional, Literal
import datetime
import numpy as np
import pandas as pd
import xarray as xr
import cftime
import re


def to_cftime(
Expand Down Expand Up @@ -51,13 +52,21 @@ def to_cftime(

def to_timestamp(
dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
precision: Optional[Literal["ns", "us", "s", "ms"]] = None,
) -> pd.Timestamp:
"""Converts a provided datetime-like object to a pandas timestamp

Parameters
----------
dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]
A datetime-like object or array of datetime-like objects to be converted
precision : Optional[Literal["ns", "us", "s", "ms"]]
The precision of the timestamp. If None, the default precision is used.
The default precision is ns for Pandas 2 and before; us for Pandas 3
- "ns": nanoseconds
- "us": microseconds
- "ms": milliseconds
- "s": seconds

Returns
-------
Expand All @@ -74,6 +83,9 @@ def to_timestamp(
else:
pd_dates = pd.to_datetime(dates)

if precision is not None:
pd_dates = pd_dates.astype(f"datetime64[{precision}]")

if squeeze_output:
return next(iter(pd_dates))
return pd_dates
Expand All @@ -99,49 +111,98 @@ def to_datetime(

def to_datetime64(
dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
precision: Optional[Literal["ns", "us", "s", "ms"]] = None,
) -> np.datetime64:
"""Converts a provided datetime-like object to numpy datetime64 objects

Parameters
----------
dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]
A datetime-like object or array of datetime-like objects to be converted
precision : Optional[Literal["ns", "us", "s", "ms"]]
The precision of the timestamp. If None, the default precision is used.
The default precision is ns for Pandas 2 and before; us for Pandas 3
- "ns": nanoseconds
- "us": microseconds
- "ms": milliseconds
- "s": seconds

Returns
-------
np.datetime64
A numpy datetime64 or array of numpy datetime64s
"""
return to_timestamp(dates).to_numpy()
return to_timestamp(dates, precision).to_numpy()


def to_datestr(
dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
precision: Optional[Literal["ns", "us", "s", "ms"]] = None,
) -> str:
"""Converts a provided datetime-like object to ISO format date strings

Parameters
----------
dates : Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]
A datetime-like object or array of datetime-like objects to be converted
precision : Optional[Literal["ns", "us", "s", "ms"]]
The precision of the timestamp. If None, the default precision is used.
The default precision is ns for Pandas 2 and before; us for Pandas 3
- "ns": nanoseconds
- "us": microseconds
- "ms": milliseconds
- "s": seconds

Returns
-------
str
A string or array of strings in ISO date format
"""
dates = to_datetime64(dates)
dates = to_datetime64(dates, precision)
if hasattr(dates, "__iter__"):
return dates.astype(str)
return str(dates)


def detect_str_precision(datestr: str) -> Literal["s", "ms", "us", "ns"]:
"""Detects the precision of a datetime str by counting the number of digits after .
Parameters
----------
datestr : str
Input string

Returns
-------
Literal['s', 'ms', 'us', 'ns']
The precision of the string based on the number of digits after .

Raises
------
ValueError
Raises a ValueError if the input string is not a datetime string or if
the number of digits after . is not evenly divisible by 3
"""

digits_matching = re.search(r"\.(\d+)", datestr)
if not digits_matching:
return "s"
n = len(digits_matching.group(1))
if n <= 3:
return "ms"
elif n <= 6:
return "us"
elif n <= 9:
return "ns"
else:
raise ValueError("Finer than ns precision.")


def match_datetime_format(
dates: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
target: Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime],
) -> Union[str, datetime.datetime, np.datetime64, pd.Timestamp, cftime.datetime]:
"""Converts the provided datetime-like objects to the same datetime format
as the provided target
as the provided target, ensuring that the precisions match

Parameters
----------
Expand All @@ -164,21 +225,25 @@ def match_datetime_format(
of datetime-like objects
"""
if isinstance(target, str):
return to_datestr(dates)
precision = detect_str_precision(target)
return to_datestr(dates, precision)
if isinstance(target, xr.DataArray):
target = target.values
if isinstance(target, pd.Series):
target = target.to_numpy()
if hasattr(target, "__iter__"):
target = target[0]
if isinstance(target, str):
return to_datestr(dates)
precision = detect_str_precision(target)
return to_datestr(dates, precision)
if isinstance(target, cftime.datetime):
return to_cftime(dates, target.calendar)
if isinstance(target, pd.Timestamp):
return to_timestamp(dates)
precision = target.unit
return to_timestamp(dates, precision=precision)
if isinstance(target, np.datetime64):
return to_datetime64(dates)
precision = np.datetime_data(target)[0]
return to_datetime64(dates, precision=precision)
if isinstance(target, datetime.datetime):
return to_datetime(dates)
raise ValueError("Target is not a valid datetime format")
Loading