From 913b231f5933fc13d43c0dc05e8a82f3307717c3 Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 14:18:58 +0100 Subject: [PATCH 01/16] Add convert_feature_mask_to_cells function and tests --- tobac/tests/test_utils_mask.py | 276 +++++++++++++++++++++++++++++++++ tobac/utils/mask.py | 36 +++++ 2 files changed, 312 insertions(+) create mode 100644 tobac/tests/test_utils_mask.py diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py new file mode 100644 index 00000000..0dc27a79 --- /dev/null +++ b/tobac/tests/test_utils_mask.py @@ -0,0 +1,276 @@ +"""Tests for utils.mask""" + +from datetime import datetime +import warnings +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from tobac.utils.mask import convert_feature_mask_to_cells + + +def test_convert_feature_mask_to_cells_single_cell(): + """Test basic functionality of convert_feature_mask_to_cells with a single + tracked cell + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + } + ) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask) + + # Test all cell mask values are 0 or 1 + assert np.all(np.isin(cell_mask.values, [0, 1])) + + # Test all cell mask values where the feature mask is not zero are 1 + assert np.all(cell_mask.values[test_mask.values != 0] == 1) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(cell_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert cell_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_feature_mask_to_cells_multiple_cells(): + """Test functionality of convert_feature_mask_to_cells with multiple cells + and non-consecutive feature and cell values + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[1, 3:, 3:] = 5 + test_data[2, 3:, 3:] = 6 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 5, 6], + "frame": [0, 1, 1, 2], + "time": [ + datetime(2000, 1, 1, 0), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 2), + ], + "cell": [1, 1, 3, 3], + } + ) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask) + + # Test all cell mask values are 0, 1, or 3 + assert np.all(np.isin(cell_mask.values, [0, 1, 3])) + + # Test all cell mask values where the feature mask is 1 or 2 are 1 + assert np.all(cell_mask.values[np.isin(test_mask.values, [1, 2])] == 1) + + # Test all cell mask values where the feature mask is 5 or 6 are 3 + assert np.all(cell_mask.values[np.isin(test_mask.values, [5, 6])] == 3) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(cell_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert cell_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_feature_mask_to_cells_mismatched_mask(): + """ + Test a situation when the user provides a mask that does not correspond to + the given feature dataframe, and has additional values. This should raise a + ValueError and inform the user of the problem. + """ + + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 4 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + } + ) + + with pytest.raises( + ValueError, match="Values in feature_mask are not present in features*" + ): + cell_mask = convert_feature_mask_to_cells(test_features, test_mask) + + +def test_convert_feature_mask_to_cells_no_cell_column(): + """ + Test correct error handling when convert_feature_mask_to_cells is given a + features dataframe with no cell column + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + } + ) + + with pytest.raises(ValueError, match="`cell` column not found in features input*"): + cell_mask = convert_feature_mask_to_cells(test_features, test_mask) + + +def test_convert_feature_mask_to_cells_stub_value(): + """ + Test filtering of stub values from cell_mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + } + ) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask) + + # Test that without providing a stub value the stub feature is relabelled to -1 + assert np.all(cell_mask.values[test_mask.values == 3] == -1) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask, stubs=-1) + + # Test that providing a stub value the stub feature is relabelled to 0 + assert np.all(cell_mask.values[test_mask.values == 3] == 0) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask, stubs=-999) + + # Test that providing a different stub value the stub feature is relabelled to -1 + assert np.all(cell_mask.values[test_mask.values == 3] == -1) + + +def test_convert_feature_mask_to_cells_no_input_mutation(): + """Test that convert_feature_mask_to_cells does not alter the input features + and mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask, stubs=-1) + + # Test dataframe is the same + pd.testing.assert_frame_equal( + test_features, features_copy + ) + + # Test mask is the same + assert mask_copy.equals(test_mask) \ No newline at end of file diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index 2ed2d310..8f5a7391 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -1,5 +1,10 @@ """Provide essential methods for masking""" +from typing import Optional +import numpy as np +import pandas as pd +import xarray as xr + def column_mask_from2D(mask_2D, cube, z_coord="model_level_number"): """Turn 2D watershedding mask into a 3D mask of selected columns. @@ -362,3 +367,34 @@ def mask_all_surface(mask, masked=False, z_coord="model_level_number"): if masked: mask_i_surface.data = ma.masked_equal(mask_i_surface.core_data(), 0) return mask_i_surface + + +def convert_feature_mask_to_cells( + features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None +) -> xr.DataArray: + if "cell" not in features.columns: + raise ValueError( + "`cell` column not found in features input, please perform tracking on this data before converting features to cells" + ) + + cell_mask = feature_mask.copy() + + cell_mapper = xr.DataArray( + features.cell.copy(), dims=("feature",), coords=dict(feature=features.feature) + ) + + if stubs is not None: + cell_mapper.data[features.cell == stubs] = 0 + + wh_nonzero_label = np.flatnonzero(cell_mask) + + try: + cell_mask.data.ravel()[wh_nonzero_label] = cell_mapper.loc[ + feature_mask.values.ravel()[wh_nonzero_label] + ] + except KeyError: + raise ValueError( + "Values in feature_mask are not present in features, please ensure that you are using the correct feature_mask for the tracked features, and that any filtering has been applied to both the mask and features" + ) + + return cell_mask From 8e163ef612e0f8141778c9c78a51950945b5a70e Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 14:28:26 +0100 Subject: [PATCH 02/16] Add documentation to convert_feature_mask_to_cells --- tobac/tests/test_utils_mask.py | 10 ++++------ tobac/utils/mask.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index 0dc27a79..31c3a356 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -232,7 +232,7 @@ def test_convert_feature_mask_to_cells_stub_value(): def test_convert_feature_mask_to_cells_no_input_mutation(): - """Test that convert_feature_mask_to_cells does not alter the input features + """Test that convert_feature_mask_to_cells does not alter the input features and mask """ test_data = np.zeros([3, 4, 5], dtype=int) @@ -268,9 +268,7 @@ def test_convert_feature_mask_to_cells_no_input_mutation(): cell_mask = convert_feature_mask_to_cells(test_features, test_mask, stubs=-1) # Test dataframe is the same - pd.testing.assert_frame_equal( - test_features, features_copy - ) - + pd.testing.assert_frame_equal(test_features, features_copy) + # Test mask is the same - assert mask_copy.equals(test_mask) \ No newline at end of file + assert mask_copy.equals(test_mask) diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index 8f5a7391..f6b62c7f 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -372,6 +372,37 @@ def mask_all_surface(mask, masked=False, z_coord="model_level_number"): def convert_feature_mask_to_cells( features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None ) -> xr.DataArray: + """Relabels a feature mask provided by tobac.segmentation with the cell + values provided by tobac.linking_trackpy + + Parameters + ---------- + features : pd.DataFrame + A feature dataframe with cell values provided by tobac.linking_trackpy + feature_mask : xr.DataArray + A feature mask from tobac.segmentation corresponding to the features in + the feature dataframe input + stubs : int, optional (default: None) + The stub values used for unlinked cells in tobac.linking_trackpy. If + None, the stub cells with be relabelled with the stub cell value in the + feature dataframe. If a value is provided, the masked regions + corresponding to stub cells with be removed from the output. WARNING: + using this input will make it impossible to perfectly reverse this + operation using convert_cell_mask_to_features. + + Returns + ------- + xr.DataArray + A mask of cell regions corresponding to the cells in the input dataframe + + Raises + ------ + ValueError + If the features input does not have a cell column + ValueError + If there are labels in the feature_mask that are not present in the + features dataframe + """ if "cell" not in features.columns: raise ValueError( "`cell` column not found in features input, please perform tracking on this data before converting features to cells" @@ -397,4 +428,6 @@ def convert_feature_mask_to_cells( "Values in feature_mask are not present in features, please ensure that you are using the correct feature_mask for the tracked features, and that any filtering has been applied to both the mask and features" ) + cell_mask = cell_mask.assign_attrs(dict(units="cell")) + return cell_mask From e41c4b65a75ffc5cd368a40cff9cb5da91a7aa94 Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:13:50 +0100 Subject: [PATCH 03/16] Add convert_cell_mask_to_features function and tests --- tobac/tests/test_utils_mask.py | 289 ++++++++++++++++++++++++++++++++- tobac/utils/mask.py | 105 +++++++++++- 2 files changed, 390 insertions(+), 4 deletions(-) diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index 31c3a356..c67a662b 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -7,7 +7,10 @@ import pytest import xarray as xr -from tobac.utils.mask import convert_feature_mask_to_cells +from tobac.utils.mask import ( + convert_cell_mask_to_features, + convert_feature_mask_to_cells, +) def test_convert_feature_mask_to_cells_single_cell(): @@ -272,3 +275,287 @@ def test_convert_feature_mask_to_cells_no_input_mutation(): # Test mask is the same assert mask_copy.equals(test_mask) + + +def test_convert_cell_mask_to_features_single_timestep(): + """Test basic functionality of convert_cell_mask_to_features with a single + tracked cell and timestep + """ + test_data = np.zeros([1, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict(time=[datetime(2000, 1, 1, 0)]), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [2], + "frame": [0], + "time": [datetime(2000, 1, 1, 0)], + "cell": [1], + } + ) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + # Test all feature mask values are 0 or 2 + assert np.all(np.isin(feature_mask.values, [0, 2])) + + # Test all feature mask values where the cell mask is not zero are 2 + assert np.all(feature_mask.values[test_mask.values != 0] == 2) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(feature_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert feature_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_cell_mask_to_features_single_cell(): + """Test basic functionality of convert_cell_mask_to_features with a single + tracked cell + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + } + ) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + # Test all feature mask values where the cell mask is not zero are in test_features.feature + assert np.all( + np.isin(feature_mask.values[test_mask.values != 0], test_features.feature) + ) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(feature_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert feature_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_cell_mask_to_features_multiple_cells(): + """Test functionality of convert_cell_mask_to_features with multiple cells + and non-consecutive feature and cell values + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[1, 3:, 3:] = 3 + test_data[2, 3:, 3:] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 5, 6], + "frame": [0, 1, 1, 2], + "time": [ + datetime(2000, 1, 1, 0), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 2), + ], + "cell": [1, 1, 3, 3], + } + ) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + # Test all feature mask values where the cell mask is not zero are in test_features.feature + assert np.all( + np.isin(feature_mask.values[test_mask.values != 0], test_features.feature) + ) + + # Test all cell mask values where the cell mask is 1 are 1 or 2 + assert np.all(np.isin(feature_mask.values[test_mask.values == 1], [1, 2])) + + # Test all cell mask values where the cell mask is 3 are 5 or 6 + assert np.all(np.isin(feature_mask.values[test_mask.values == 3], [5, 6])) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(feature_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert feature_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_cell_mask_to_features_0_cell(): + """Test functionality of convert_feature_mask_to_cells when a cell has the + value 0 + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 0 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + } + ) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + # Test all feature mask values where the cell mask is not zero are in test_features.feature + assert np.all( + np.isin(feature_mask.values[test_mask.values != 0], test_features.feature) + ) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(feature_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert feature_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_cell_mask_to_features_stub_cell(): + """Test functionality of convert_feature_mask_to_cells when a cell has a + stub value but cell mask is 0 + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = -1 + test_data[1, 3:, 3:] = -1 + test_data[2, 3:, 3:] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 5, 6], + "frame": [0, 1, 1, 2], + "time": [ + datetime(2000, 1, 1, 0), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 2), + ], + "cell": [1, -1, -1, 3], + } + ) + + # Test without stub value provided the correct error is raised. + with pytest.raises( + ValueError, + match="Duplicate cell values found for a single timestep in features. This may be because there are stub cells *", + ): + feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask, stubs=-1) + + # Test all feature mask values where the cell mask is not zero are in test_features.feature + assert np.all( + np.isin(feature_mask.values[test_mask.values > 0], test_features.feature) + ) + + # Test all cell mask values where the feature mask is zero or the stub value are 0 + assert np.all(feature_mask.values[np.isin(test_mask.values, [0, -1])] == 0) + + # Test coords are the same + assert feature_mask.coords.keys() == test_mask.coords.keys() + + with pytest.raises( + ValueError, + match="Duplicate cell values found for a single timestep in features that does not match the provided stub value*", + ): + feature_mask = convert_cell_mask_to_features( + test_features, test_mask, stubs=-999 + ) + + +def test_convert_cell_mask_to_features_mismatched_cell(): + """Test functionality of convert_feature_mask_to_cells when a cell exists in + the mask that does not occur in the features dataframe + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 2], + } + ) + + with pytest.raises( + ValueError, match="Cell values in cell_mask are not present in features, *" + ): + feature_mask = convert_cell_mask_to_features(test_features, test_mask) diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index f6b62c7f..8a55ca9d 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -5,6 +5,8 @@ import pandas as pd import xarray as xr +from tobac.utils.generators import field_and_features_over_time + def column_mask_from2D(mask_2D, cube, z_coord="model_level_number"): """Turn 2D watershedding mask into a 3D mask of selected columns. @@ -386,9 +388,10 @@ def convert_feature_mask_to_cells( The stub values used for unlinked cells in tobac.linking_trackpy. If None, the stub cells with be relabelled with the stub cell value in the feature dataframe. If a value is provided, the masked regions - corresponding to stub cells with be removed from the output. WARNING: - using this input will make it impossible to perfectly reverse this - operation using convert_cell_mask_to_features. + corresponding to stub cells with be removed from the output. Warning: + the presence of stub cells may make it impossible to perfectly + reconstruct the feature mask afterwards as any stub features will be + removed. Returns ------- @@ -431,3 +434,99 @@ def convert_feature_mask_to_cells( cell_mask = cell_mask.assign_attrs(dict(units="cell")) return cell_mask + + +def convert_cell_mask_to_features( + features: pd.DataFrame, + cell_mask: xr.DataArray, + stubs: Optional[int] = None, +) -> xr.DataArray: + """Relabels a cell mask, such as that produced by + convert_feature_mask_to_cells, to the feature values provided by + tobac.linking_trackpy + + Parameters + ---------- + features : pd.DataFrame + A feature dataframe with cell values provided by tobac.linking_trackpy + cell_mask : xr.DataArray + A cekk mask corresponding to the cells in the feature dataframe input + stubs : int, optional (default: None) + The stub values used for unlinked cells in tobac.linking_trackpy. If + None, the stub cells with be relabelled with the stub cell value in the + feature dataframe. If a value is provided, the masked regions + corresponding to stub cells with be removed from the output. Warning: + features with stub values will be set to zero in the output feature + mask + + Returns + ------- + xr.DataArray + A mask of feature regions corresponding to the features in the input + dataframe + + Raises + ------ + ValueError + If duplicate cell values are present at any timestep in the input + dataframe and the stubs parameter is not provided + ValueError + If duplicate cell values are present at any timestep in the input + dataframe that are not equal to the provided stubs value + ValueError + If cell_mask includes cell values not present in the input dataframe + """ + feature_mask = cell_mask.copy() + + for i, _, mask_slice, features_slice in field_and_features_over_time( + feature_mask, features + ): + + if stubs is None: + if np.any(features_slice.cell.duplicated(keep=False)): + raise ValueError( + "Duplicate cell values found for a single timestep in features. This may be because there are stub cells included in the dataframe. If so, please provide these using the stubs parameter" + ) + + cell_mapper = xr.DataArray( + features_slice.feature.copy(), + dims=("feature",), + coords=dict(feature=features_slice.cell.copy()), + ) + + else: + features_slice = features_slice.copy() + # Set feature label to 0 for stub cells + features_slice.loc[features_slice.cell == stubs, ["feature"]] = 0 + + if np.any( + features_slice.loc[features_slice.feature != 0, ["cell"]].duplicated( + keep=False + ) + ): + raise ValueError( + "Duplicate cell values found for a single timestep in features that does not match the provided stub value. This may be because the stub value provided is incorrect." + ) + + features_slice = features_slice[~features_slice.duplicated()] + + cell_mapper = xr.DataArray( + features_slice.feature.copy(), + dims=("feature",), + coords=dict(feature=features_slice.cell.copy()), + ) + + wh_nonzero_label = np.flatnonzero(mask_slice) + + try: + feature_mask.data[i].ravel()[wh_nonzero_label] = cell_mapper.loc[ + mask_slice.values.ravel()[wh_nonzero_label] + ] + except KeyError: + raise ValueError( + "Cell values in cell_mask are not present in features, please ensure that you are using the correct cell_mask for the tracked features, and that any filtering has been applied to both the mask and features" + ) + + feature_mask = feature_mask.assign_attrs(dict(units="feature")) + + return feature_mask From c20b958c3e7203512d3144d61fa5d23ac7a95f0d Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:18:24 +0100 Subject: [PATCH 04/16] Add convert_cell_mask_to_features function and tests --- tobac/tests/test_utils_mask.py | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index c67a662b..a1d4c377 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -559,3 +559,38 @@ def test_convert_cell_mask_to_features_mismatched_cell(): ValueError, match="Cell values in cell_mask are not present in features, *" ): feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + +def test_convert_cell_mask_to_features_no_cell_column(): + """ + Test correct error handling when convert_cell_mask_to_features is given a + features dataframe with no cell column + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + } + ) + + with pytest.raises(ValueError, match="`cell` column not found in features input*"): + cell_mask = convert_cell_mask_to_features(test_features, test_mask) \ No newline at end of file From a3d408f6bcbd6818c208b48d4ec8c8167bfca22d Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:23:45 +0100 Subject: [PATCH 05/16] Add tests for missing cell column and input mutation to convert_cell_mask_to_features --- tobac/tests/test_utils_mask.py | 35 ---------------------------------- 1 file changed, 35 deletions(-) diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index a1d4c377..c67a662b 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -559,38 +559,3 @@ def test_convert_cell_mask_to_features_mismatched_cell(): ValueError, match="Cell values in cell_mask are not present in features, *" ): feature_mask = convert_cell_mask_to_features(test_features, test_mask) - - -def test_convert_cell_mask_to_features_no_cell_column(): - """ - Test correct error handling when convert_cell_mask_to_features is given a - features dataframe with no cell column - """ - test_data = np.zeros([3, 4, 5], dtype=int) - test_data[0, 1:3, 1:4] = 1 - test_data[1, 1:3, 1:4] = 1 - test_data[2, 1:3, 1:4] = 1 - - test_mask = xr.DataArray( - test_data, - dims=("time", "y", "x"), - coords=dict( - time=pd.date_range( - datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 - ) - ), - attrs=dict(units="feature"), - ) - - test_features = pd.DataFrame( - { - "feature": [1, 2, 3], - "frame": [0, 1, 2], - "time": pd.date_range( - datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 - ), - } - ) - - with pytest.raises(ValueError, match="`cell` column not found in features input*"): - cell_mask = convert_cell_mask_to_features(test_features, test_mask) \ No newline at end of file From b12dc16cbc8801732f2f0e1ec1ff3bf94b900048 Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:23:55 +0100 Subject: [PATCH 06/16] Add tests for missing cell column and input mutation to convert_cell_mask_to_features --- tobac/tests/test_utils_mask.py | 78 ++++++++++++++++++++++++++++++++++ tobac/utils/mask.py | 7 +++ 2 files changed, 85 insertions(+) diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index c67a662b..5089fa68 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -559,3 +559,81 @@ def test_convert_cell_mask_to_features_mismatched_cell(): ValueError, match="Cell values in cell_mask are not present in features, *" ): feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + +def test_convert_cell_mask_to_features_no_cell_column(): + """ + Test correct error handling when convert_cell_mask_to_features is given a + features dataframe with no cell column + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + } + ) + + with pytest.raises(ValueError, match="`cell` column not found in features input*"): + feature_mask = convert_cell_mask_to_features(test_features, test_mask) + + +def test_convert_cell_mask_to_features_no_input_mutation(): + """Test that convert_cell_mask_to_features does not alter the input features + and mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = -1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask, stubs=-1) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert mask_copy.equals(test_mask) diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index 8a55ca9d..da6207fc 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -467,6 +467,8 @@ def convert_cell_mask_to_features( Raises ------ + ValueError + If the features input does not have a cell column ValueError If duplicate cell values are present at any timestep in the input dataframe and the stubs parameter is not provided @@ -476,6 +478,11 @@ def convert_cell_mask_to_features( ValueError If cell_mask includes cell values not present in the input dataframe """ + if "cell" not in features.columns: + raise ValueError( + "`cell` column not found in features input, please perform tracking on this data before converting features to cells" + ) + feature_mask = cell_mask.copy() for i, _, mask_slice, features_slice in field_and_features_over_time( From 0bc2d05a9785fad64cfcd127587e3240c919c3cb Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:43:19 +0100 Subject: [PATCH 07/16] Add return_cells option to segmentation and tests --- tobac/segmentation/watershed_segmentation.py | 31 ++++ .../segmentation_tests/test_segmentation.py | 147 ++++++++++++++++-- 2 files changed, 168 insertions(+), 10 deletions(-) diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py index 6e4b8716..c113f9e6 100644 --- a/tobac/segmentation/watershed_segmentation.py +++ b/tobac/segmentation/watershed_segmentation.py @@ -34,6 +34,7 @@ import copy import logging import datetime +from token import OP import warnings import iris.cube @@ -50,6 +51,7 @@ from tobac.utils import get_statistics from tobac.utils import decorators from tobac.utils.generators import field_and_features_over_time +from tobac.utils.mask import convert_feature_mask_to_cells def add_markers( @@ -1135,6 +1137,8 @@ def segmentation( segment_number_unassigned: int = 0, statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None, time_padding: Optional[datetime.timedelta] = datetime.timedelta(seconds=0.5), + return_cells: bool = False, + stubs: Optional[int] = None, ) -> tuple[xr.DataArray, pd.DataFrame]: """Use watershedding to determine region above a threshold value around initial seeding position for all time steps of @@ -1212,6 +1216,18 @@ def segmentation( timestep that is time_padding off of the feature. Extremely useful when converting between micro- and nanoseconds, as is common when using Pandas dataframes. + return_cells: bool, optional (default: False) + If True, the segmentation mask returned will use the cell values of the + input dataframe, rather than the feature values. This requires the + features input to be the output from tobac.linking_trackpy + stubs: int, optional (default: None) + The stub values used for unlinked cells in tobac.linking_trackpy, used + when return_cells=True If None, the stub cells with be relabelled with + the stub cell value in the feature dataframe. If a value is provided, + the masked regions corresponding to stub cells with be removed from the + output. Warning: the presence of stub cells may make it impossible to + perfectly reconstruct the feature mask afterwards as any stub features + will be removed. Returns ------- @@ -1250,6 +1266,12 @@ def segmentation( ) ) from exc + # Check features has cell column if return_cells is True: + if return_cells and "cell" not in features.columns: + raise ValueError( + "`cell` column not found in features input, please perform tracking on this data before performing segmentation with `return_cells=True`" + ) + # create our output dataarray segmentation_out_data = xr.DataArray( np.zeros(field.shape, dtype=int), @@ -1300,6 +1322,15 @@ def segmentation( # Merge output from individual timesteps: features_out = pd.concat(features_out_list) + + # Convert feature mask to cells if return_cells is True: + if return_cells: + segmentation_out_data = convert_feature_mask_to_cells( + features_out, + segmentation_out_data, + stubs=stubs, + ) + logging.debug("Finished segmentation") return segmentation_out_data, features_out diff --git a/tobac/tests/segmentation_tests/test_segmentation.py b/tobac/tests/segmentation_tests/test_segmentation.py index e7f0a97d..89be2b31 100644 --- a/tobac/tests/segmentation_tests/test_segmentation.py +++ b/tobac/tests/segmentation_tests/test_segmentation.py @@ -1,6 +1,9 @@ -import pytest -import tobac.segmentation as seg +from datetime import datetime +from os import WTERMSIG import numpy as np +import pandas as pd +import xarray as xr +import pytest from tobac import segmentation, feature_detection, testing from tobac.utils import periodic_boundaries as pbc_utils @@ -86,7 +89,7 @@ def test_segmentation_timestep_2D_feature_2D_seg(): ) for pbc_option in ["none", "hdim_1", "hdim_2", "both"]: - out_seg_mask, out_df = seg.segmentation_timestep( + out_seg_mask, out_df = segmentation.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, dxy=test_dxy, @@ -154,7 +157,7 @@ def test_segmentation_timestep_2D_feature_2D_seg(): ) for pbc_option in ["none", "hdim_1", "hdim_2", "both"]: - out_seg_mask, out_df = seg.segmentation_timestep( + out_seg_mask, out_df = segmentation.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, dxy=test_dxy, @@ -222,7 +225,7 @@ def test_segmentation_timestep_2D_feature_2D_seg(): ) for pbc_option in ["none", "hdim_1", "hdim_2", "both"]: - out_seg_mask, out_df = seg.segmentation_timestep( + out_seg_mask, out_df = segmentation.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, dxy=test_dxy, @@ -762,7 +765,7 @@ def test_segmentation_timestep_3d_buddy_box( common_seg_opts["seed_3D_flag"] = "box" common_seg_opts["seed_3D_size"] = seed_3D_size - out_seg_mask, out_df = seg.segmentation_timestep( + out_seg_mask, out_df = segmentation.segmentation_timestep( field_in=test_data_iris, features_in=test_feature_ds, **common_seg_opts ) @@ -790,7 +793,7 @@ def test_segmentation_timestep_3d_buddy_box( PBC_flag="both", ) test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) - out_seg_mask_shifted, out_df = seg.segmentation_timestep( + out_seg_mask_shifted, out_df = segmentation.segmentation_timestep( field_in=test_data_iris_shifted, features_in=test_feature_ds_shifted, **common_seg_opts, @@ -895,7 +898,7 @@ def test_add_markers_pbcs( common_marker_opts["seed_3D_flag"] = "box" common_marker_opts["seed_3D_size"] = seed_3D_size - marker_arr = seg.add_markers( + marker_arr = segmentation.add_markers( test_feature_ds, np.zeros(dset_size), **common_marker_opts ) @@ -931,7 +934,7 @@ def test_add_markers_pbcs( test_feature_ds_shifted = pd.concat([test_feature_ds_1, test_feature_ds_2]) - marker_arr_shifted = seg.add_markers( + marker_arr_shifted = segmentation.add_markers( test_feature_ds_shifted, np.zeros(dset_size), **common_marker_opts ) @@ -989,7 +992,7 @@ def test_empty_segmentation(PBC_flag): seg_arr, data_type="iris", z_dim_num=0, y_dim_num=1, x_dim_num=2 ) - out_seg_mask, out_df = seg.segmentation_timestep( + out_seg_mask, out_df = segmentation.segmentation_timestep( field_in=test_data_iris, features_in=test_feature, **seg_opts ) @@ -1181,3 +1184,127 @@ def test_seg_alt_unseed_num(below_thresh, above_thresh, error): seg_out_arr = seg_output.core_data() assert np.all(correct_seg_arr == seg_out_arr) + + +def test_segmentation_return_cells(): + """Test segmentation with the return_cells option""" + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[:, 1:3, 1:4] = 2 + + test_data = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "hdim_1": [1.5, 1.5, 1.5], + "hdim_2": [2, 2, 2], + "cell": [1, 1, 1], + } + ) + + cell_mask, _ = segmentation.segmentation( + test_features, test_data, 1, threshold=1, return_cells=True + ) + + assert np.all(cell_mask.values[test_data.values == 2] == 1) + assert np.all(cell_mask.values[test_data.values == 0] == 0) + + +def test_segmentation_return_cells_stubs(): + """Test segmentation with the return_cells option and stubs option""" + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[:, 1:3, 1:4] = 2 + + test_data = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "hdim_1": [1.5, 1.5, 1.5], + "hdim_2": [2, 2, 2], + "cell": [1, 1, -1], + } + ) + + # Without stubs + cell_mask, _ = segmentation.segmentation( + test_features, test_data, 1, threshold=1, return_cells=True + ) + + assert np.all(cell_mask[:-1].values[test_data[:-1].values == 2] == 1) + assert np.all(cell_mask[-1].values[test_data[-1].values == 2] == -1) + assert np.all(cell_mask.values[test_data.values == 0] == 0) + + # With stubs + cell_mask, _ = segmentation.segmentation( + test_features, test_data, 1, threshold=1, return_cells=True, stubs=-1 + ) + + assert np.all(cell_mask[:-1].values[test_data[:-1].values == 2] == 1) + assert np.all(cell_mask[-1].values[test_data[-1].values == 2] == 0) + assert np.all(cell_mask.values[test_data.values == 0] == 0) + + +def test_segmentation_return_cells_no_cell_column(): + """Test segmentation with the return_cells raise the correct error if the + input features has no cell column + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[:, 1:3, 1:4] = 2 + + test_data = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "hdim_1": [1.5, 1.5, 1.5], + "hdim_2": [2, 2, 2], + } + ) + + with pytest.raises( + ValueError, + match="`cell` column not found in features input, please perform tracking on this data before performing segmentation with *", + ): + cell_mask, _ = segmentation.segmentation( + test_features, test_data, 1, threshold=1, return_cells=True + ) From adcd141ec674751f0149b23d0a057e70f72b0960 Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:44:20 +0100 Subject: [PATCH 08/16] Add imports of convert_feature_mask_to_cells and convert_cell_mask_to_features to __init__ --- tobac/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tobac/__init__.py b/tobac/__init__.py index 646e620a..55560fc5 100644 --- a/tobac/__init__.py +++ b/tobac/__init__.py @@ -76,6 +76,7 @@ add_coordinates, get_spacings, ) +from utils.mask import convert_feature_mask_to_cells, convert_cell_mask_to_features from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy from .wrapper import maketrack From 3191ac8c1b36011541b04d202e93b9523f716d76 Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:44:45 +0100 Subject: [PATCH 09/16] Add imports of convert_feature_mask_to_cells and convert_cell_mask_to_features to __init__ --- tobac/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tobac/__init__.py b/tobac/__init__.py index 55560fc5..ed2e2cf3 100644 --- a/tobac/__init__.py +++ b/tobac/__init__.py @@ -76,7 +76,7 @@ add_coordinates, get_spacings, ) -from utils.mask import convert_feature_mask_to_cells, convert_cell_mask_to_features +from .utils.mask import convert_feature_mask_to_cells, convert_cell_mask_to_features from .feature_detection import feature_detection_multithreshold from .tracking import linking_trackpy from .wrapper import maketrack From 7522b545f34a9c0634ba9831eb0e9c0f1f602d7f Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:46:42 +0100 Subject: [PATCH 10/16] Remove unnecessary import --- tobac/tests/segmentation_tests/test_segmentation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tobac/tests/segmentation_tests/test_segmentation.py b/tobac/tests/segmentation_tests/test_segmentation.py index 89be2b31..ede65e6a 100644 --- a/tobac/tests/segmentation_tests/test_segmentation.py +++ b/tobac/tests/segmentation_tests/test_segmentation.py @@ -1,5 +1,4 @@ from datetime import datetime -from os import WTERMSIG import numpy as np import pandas as pd import xarray as xr From bf12a882d5dee79b0261db416fcc076252dd2346 Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 17:55:02 +0100 Subject: [PATCH 11/16] Add iris_to_xarray decorators to convert_feature_mask_to_cells and convert_cell_mask_to_features --- tobac/utils/mask.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index da6207fc..cb12a2e1 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -5,6 +5,7 @@ import pandas as pd import xarray as xr +from tobac.utils.decorators import iris_to_xarray from tobac.utils.generators import field_and_features_over_time @@ -371,6 +372,7 @@ def mask_all_surface(mask, masked=False, z_coord="model_level_number"): return mask_i_surface +@iris_to_xarray() def convert_feature_mask_to_cells( features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None ) -> xr.DataArray: @@ -436,6 +438,7 @@ def convert_feature_mask_to_cells( return cell_mask +@iris_to_xarray() def convert_cell_mask_to_features( features: pd.DataFrame, cell_mask: xr.DataArray, From d71ed11d7b12c07f700eeb526d73ea61f66df99e Mon Sep 17 00:00:00 2001 From: William Jones Date: Sat, 12 Jul 2025 18:02:13 +0100 Subject: [PATCH 12/16] Remove unnecessary import and fix typo in docstring --- tobac/segmentation/watershed_segmentation.py | 1 - tobac/utils/mask.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py index c113f9e6..2cef565b 100644 --- a/tobac/segmentation/watershed_segmentation.py +++ b/tobac/segmentation/watershed_segmentation.py @@ -34,7 +34,6 @@ import copy import logging import datetime -from token import OP import warnings import iris.cube diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index cb12a2e1..a0af5f83 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -453,7 +453,7 @@ def convert_cell_mask_to_features( features : pd.DataFrame A feature dataframe with cell values provided by tobac.linking_trackpy cell_mask : xr.DataArray - A cekk mask corresponding to the cells in the feature dataframe input + A cell mask corresponding to the cells in the feature dataframe input stubs : int, optional (default: None) The stub values used for unlinked cells in tobac.linking_trackpy. If None, the stub cells with be relabelled with the stub cell value in the From 7263ae7cbd020694a4799919ce22c278ad9201a1 Mon Sep 17 00:00:00 2001 From: William Jones Date: Wed, 10 Dec 2025 09:25:52 +0000 Subject: [PATCH 13/16] Add inplace keyword to feature/cell mask conversion functions and use in segmentation --- tobac/segmentation/watershed_segmentation.py | 1 + tobac/utils/mask.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py index 2cef565b..09742984 100644 --- a/tobac/segmentation/watershed_segmentation.py +++ b/tobac/segmentation/watershed_segmentation.py @@ -1328,6 +1328,7 @@ def segmentation( features_out, segmentation_out_data, stubs=stubs, + inplace=True, ) logging.debug("Finished segmentation") diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index a0af5f83..dd16fd9b 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -374,7 +374,7 @@ def mask_all_surface(mask, masked=False, z_coord="model_level_number"): @iris_to_xarray() def convert_feature_mask_to_cells( - features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None + features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None, inplace: bool = False ) -> xr.DataArray: """Relabels a feature mask provided by tobac.segmentation with the cell values provided by tobac.linking_trackpy @@ -394,6 +394,8 @@ def convert_feature_mask_to_cells( the presence of stub cells may make it impossible to perfectly reconstruct the feature mask afterwards as any stub features will be removed. + inplace : bool, optional (default: False) + If True, update the cell mask in-place Returns ------- @@ -413,7 +415,10 @@ def convert_feature_mask_to_cells( "`cell` column not found in features input, please perform tracking on this data before converting features to cells" ) - cell_mask = feature_mask.copy() + if inplace: + cell_mask = feature_mask + else: + cell_mask = feature_mask.copy() cell_mapper = xr.DataArray( features.cell.copy(), dims=("feature",), coords=dict(feature=features.feature) @@ -443,6 +448,7 @@ def convert_cell_mask_to_features( features: pd.DataFrame, cell_mask: xr.DataArray, stubs: Optional[int] = None, + inplace: bool = False, ) -> xr.DataArray: """Relabels a cell mask, such as that produced by convert_feature_mask_to_cells, to the feature values provided by @@ -461,6 +467,8 @@ def convert_cell_mask_to_features( corresponding to stub cells with be removed from the output. Warning: features with stub values will be set to zero in the output feature mask + inplace : bool, optional (default: False) + If True, update the cell mask in-place Returns ------- @@ -486,7 +494,10 @@ def convert_cell_mask_to_features( "`cell` column not found in features input, please perform tracking on this data before converting features to cells" ) - feature_mask = cell_mask.copy() + if inplace: + feature_mask = cell_mask + else: + feature_mask = cell_mask.copy() for i, _, mask_slice, features_slice in field_and_features_over_time( feature_mask, features From a96321e50dd7b1cd77012a77b23d3b740e92d8fb Mon Sep 17 00:00:00 2001 From: William Jones Date: Wed, 10 Dec 2025 10:22:52 +0000 Subject: [PATCH 14/16] Add tests for inplace mask transforms --- tobac/tests/test_utils_mask.py | 89 ++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index 5089fa68..24e8ac94 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -276,6 +276,49 @@ def test_convert_feature_mask_to_cells_no_input_mutation(): # Test mask is the same assert mask_copy.equals(test_mask) +def test_convert_feature_mask_to_cells_inplace(): + """Test that convert_feature_mask_to_cells does alter the input mask when + the inplace keyword is used + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + cell_mask = convert_feature_mask_to_cells(test_features, test_mask, stubs=-1, inplace=True) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert cell_mask.equals(test_mask) + assert not mask_copy.equals(test_mask) + def test_convert_cell_mask_to_features_single_timestep(): """Test basic functionality of convert_cell_mask_to_features with a single @@ -637,3 +680,49 @@ def test_convert_cell_mask_to_features_no_input_mutation(): # Test mask is the same assert mask_copy.equals(test_mask) + + +def test_convert_cell_mask_to_features_inplace(): + """Test that convert_cell_mask_to_features does alter the input mask when + the inplace keyword is used + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = -1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + feature_mask = convert_cell_mask_to_features(test_features, test_mask, stubs=-1, inplace=True) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert feature_mask.equals(test_mask) + assert not mask_copy.equals(test_mask) + + From c6081b08c5c49acb7f9331b8d50ec690321dc047 Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 12 Dec 2025 11:05:46 +0000 Subject: [PATCH 15/16] Add feature/cell to track mask conversion utils and tests --- tobac/merge_split.py | 2 +- tobac/tests/test_utils_mask.py | 640 +++++++++++++++++++++++++++++++++ tobac/utils/mask.py | 151 ++++++++ 3 files changed, 792 insertions(+), 1 deletion(-) diff --git a/tobac/merge_split.py b/tobac/merge_split.py index 58bdf20e..bd46d49d 100644 --- a/tobac/merge_split.py +++ b/tobac/merge_split.py @@ -65,7 +65,7 @@ def merge_split_MEST( cell and the start of a related cell, by default 5 frames. cell_number_unassigned: int, optional - Value given tp unassigned/non-tracked cells by tracking, by default -1. + Value given to unassigned/non-tracked cells by tracking, by default -1. vertical_coord: str, optional Name of the vertical coordinate, default None. The vertical coordinate diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index 24e8ac94..31e0b243 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -9,7 +9,9 @@ from tobac.utils.mask import ( convert_cell_mask_to_features, + convert_cell_mask_to_tracks, convert_feature_mask_to_cells, + convert_feature_mask_to_tracks, ) @@ -726,3 +728,641 @@ def test_convert_cell_mask_to_features_inplace(): assert not mask_copy.equals(test_mask) +def test_convert_feature_mask_to_tracks_single_track(): + """Test basic functionality of convert_feature_mask_to_tracks with a single + track + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + "track": [1, 1, 1], + } + ) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask) + + # Test all cell mask values are 0 or 1 + assert np.all(np.isin(track_mask.values, [0, 1])) + + # Test all cell mask values where the feature mask is not zero are 1 + assert np.all(track_mask.values[test_mask.values != 0] == 1) + + # Test all cell mask values where the feature mask is zero are 0 + assert np.all(track_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert track_mask.coords.keys() == test_mask.coords.keys() + +def test_convert_feature_mask_to_tracks_multiple_tracks(): + """Test functionality of convert_feature_mask_to_tracks with multiple tracks + and non-consecutive feature and track values + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[1, 3:, 3:] = 5 + test_data[2, 3:, 3:] = 6 + test_data[2, 0, 0] = 8 + test_data[2, 0, 1] = 9 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 5, 6, 8, 9], + "frame": [0, 1, 1, 2, 2, 2], + "time": [ + datetime(2000, 1, 1, 0), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 2), + datetime(2000, 1, 1, 2), + datetime(2000, 1, 1, 2), + ], + "cell": [1, 1, 3, 3, 4, 5], + "track": [1, 1, 2, 2, 4, 4] + } + ) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask) + + # Test all track mask values are 0, 1, 2 or 4 + assert np.all(np.isin(track_mask.values, [0, 1, 2, 4])) + + # Test all track mask values where the feature mask is 1 or 2 are 1 + assert np.all(track_mask.values[np.isin(test_mask.values, [1, 2])] == 1) + + # Test all track mask values where the feature mask is 5 or 6 are 2 + assert np.all(track_mask.values[np.isin(test_mask.values, [5, 6])] == 2) + + # Test all track mask values where the feature mask is 8 or 9 are 4 + assert np.all(track_mask.values[np.isin(test_mask.values, [8, 9])] == 4) + + # Test all track mask values where the feature mask is zero are 0 + assert np.all(track_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert track_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_feature_mask_to_tracks_mismatched_mask(): + """ + Test a situation when the user provides a mask that does not correspond to + the given feature dataframe, and has additional values. This should raise a + ValueError and inform the user of the problem. + """ + + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 4 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + "track": [1, 1, 1], + } + ) + + with pytest.raises( + ValueError, match="Values in feature_mask are not present in features*" + ): + track_mask = convert_feature_mask_to_tracks(test_features, test_mask) + + +def test_convert_feature_mask_to_tracks_no_track_column(): + """ + Test correct error handling when convert_feature_mask_to_tracks is given a + features dataframe with no track column + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + } + ) + + with pytest.raises(ValueError, match="`track` column not found in features input*"): + track_mask = convert_feature_mask_to_tracks(test_features, test_mask) + + +def test_convert_feature_mask_to_tracks_stub_value(): + """ + Test filtering of stub values from track_mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + "track": [1, 1, -1], + } + ) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask) + + # Test that without providing a stub value the stub feature is relabelled to -1 + assert np.all(track_mask.values[test_mask.values == 3] == -1) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask, stubs=-1) + + # Test that providing a stub value the stub feature is relabelled to 0 + assert np.all(track_mask.values[test_mask.values == 3] == 0) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask, stubs=-999) + + # Test that providing a different stub value the stub feature is relabelled to -1 + assert np.all(track_mask.values[test_mask.values == 3] == -1) + + +def test_convert_feature_mask_to_tracks_no_input_mutation(): + """Test that convert_feature_mask_to_tracks does not alter the input features + and mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + "track": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask, stubs=-1) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert mask_copy.equals(test_mask) + +def test_convert_feature_mask_to_tracks_inplace(): + """Test that convert_feature_mask_to_tracks does alter the input mask when + the inplace keyword is used + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 3 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="feature"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + "track": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + track_mask = convert_feature_mask_to_tracks(test_features, test_mask, stubs=-1, inplace=True) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert track_mask.equals(test_mask) + assert not mask_copy.equals(test_mask) + + +# Cell to track tests + +def test_convert_cell_mask_to_tracks_single_track(): + """Test basic functionality of convert_cell_mask_to_tracks with a single + track + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + "track": [1, 1, 1], + } + ) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask) + + # Test all cell mask values are 0 or 1 + assert np.all(np.isin(track_mask.values, [0, 1])) + + # Test all cell mask values where the cell mask is not zero are 1 + assert np.all(track_mask.values[test_mask.values != 0] == 1) + + # Test all cell mask values where the cell mask is zero are 0 + assert np.all(track_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert track_mask.coords.keys() == test_mask.coords.keys() + +def test_convert_cell_mask_to_tracks_multiple_tracks(): + """Test functionality of convert_cell_mask_to_tracks with multiple tracks + and non-consecutive cell and track values + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[1, 3:, 3:] = 3 + test_data[2, 3:, 3:] = 3 + test_data[2, 0, 0] = 4 + test_data[2, 0, 1] = 5 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 5, 6, 8, 9], + "frame": [0, 1, 1, 2, 2, 2], + "time": [ + datetime(2000, 1, 1, 0), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 1), + datetime(2000, 1, 1, 2), + datetime(2000, 1, 1, 2), + datetime(2000, 1, 1, 2), + ], + "cell": [1, 1, 3, 3, 4, 5], + "track": [1, 1, 2, 2, 4, 4] + } + ) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask) + + # Test all track mask values are 0, 1, 2 or 4 + assert np.all(np.isin(track_mask.values, [0, 1, 2, 4])) + + # Test all track mask values where the cell mask is 1 are 1 + assert np.all(track_mask.values[np.isin(test_mask.values, [1])] == 1) + + # Test all track mask values where the cell mask is 3 are 2 + assert np.all(track_mask.values[np.isin(test_mask.values, [3])] == 2) + + # Test all track mask values where the cell mask is 4 or 5 are 4 + assert np.all(track_mask.values[np.isin(test_mask.values, [4, 5])] == 4) + + # Test all track mask values where the cell mask is zero are 0 + assert np.all(track_mask.values[test_mask.values == 0] == 0) + + # Test coords are the same + assert track_mask.coords.keys() == test_mask.coords.keys() + + +def test_convert_cell_mask_to_tracks_mismatched_mask(): + """ + Test a situation when the user provides a mask that does not correspond to + the given cell dataframe, and has additional values. This should raise a + ValueError and inform the user of the problem. + """ + + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 2 + test_data[2, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, 1], + "track": [1, 1, 1], + } + ) + + with pytest.raises( + ValueError, match="Values in cell_mask are not present in features*" + ): + track_mask = convert_cell_mask_to_tracks(test_features, test_mask) + + +def test_convert_cell_mask_to_tracks_no_track_column(): + """ + Test correct error handling when convert_cell_mask_to_tracks is given a + features dataframe with no track column + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = 1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + } + ) + + with pytest.raises(ValueError, match="`track` column not found in features input*"): + track_mask = convert_cell_mask_to_tracks(test_features, test_mask) + + +def test_convert_cell_mask_to_tracks_stub_value(): + """ + Test filtering of stub values from track_mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = -1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + "track": [1, 1, -1], + } + ) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask) + + # Test that without providing a stub value the stub cell is relabelled to -1 + assert np.all(track_mask.values[test_mask.values == -1] == -1) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask, stubs=-1) + + # Test that providing a stub value the stub cell is relabelled to 0 + assert np.all(track_mask.values[test_mask.values == -1] == 0) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask, stubs=-999) + + # Test that providing a different stub value the stub cell is relabelled to -1 + assert np.all(track_mask.values[test_mask.values == -1] == -1) + + +def test_convert_cell_mask_to_tracks_no_input_mutation(): + """Test that convert_cell_mask_to_tracks does not alter the input features + and mask + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = -1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + "track": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask, stubs=-1) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert mask_copy.equals(test_mask) + +def test_convert_cell_mask_to_tracks_inplace(): + """Test that convert_cell_mask_to_tracks does alter the input mask when + the inplace keyword is used + """ + test_data = np.zeros([3, 4, 5], dtype=int) + test_data[0, 1:3, 1:4] = 1 + test_data[1, 1:3, 1:4] = 1 + test_data[2, 1:3, 1:4] = -1 + + test_mask = xr.DataArray( + test_data, + dims=("time", "y", "x"), + coords=dict( + time=pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ) + ), + attrs=dict(units="cell"), + ) + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "frame": [0, 1, 2], + "time": pd.date_range( + datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 + ), + "cell": [1, 1, -1], + "track": [1, 1, -1], + } + ) + + mask_copy = test_mask.copy(deep=True) + features_copy = test_features.copy(deep=True) + + track_mask = convert_cell_mask_to_tracks(test_features, test_mask, stubs=-1, inplace=True) + + # Test dataframe is the same + pd.testing.assert_frame_equal(test_features, features_copy) + + # Test mask is the same + assert track_mask.equals(test_mask) + assert not mask_copy.equals(test_mask) diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index dd16fd9b..ded39cf3 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -551,3 +551,154 @@ def convert_cell_mask_to_features( feature_mask = feature_mask.assign_attrs(dict(units="feature")) return feature_mask + + +@iris_to_xarray() +def convert_feature_mask_to_tracks( + features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None, inplace: bool = False +) -> xr.DataArray: + """Relabels a feature mask provided by tobac.segmentation with the track + values provided by tobac.merge_split.merge_split_MEST + + WARNING: it is not possible to reconstruct the feature mask from the output + from this function. The inplace keyword, and overwriting the original mask, + should be used with care. + + Parameters + ---------- + features : pd.DataFrame + A feature dataframe with cell values provided by tobac.linking_trackpy + feature_mask : xr.DataArray + A feature mask from tobac.segmentation corresponding to the features in + the feature dataframe input + stubs : int, optional (default: None) + The stub values used for unlinked cells in tobac.linking_trackpy. If + None, the stub cells with be relabelled with the stub cell value in the + feature dataframe. If a value is provided, the masked regions + corresponding to stub cells with be removed from the output. Warning: + the presence of stub cells may make it impossible to perfectly + reconstruct the feature mask afterwards as any stub features will be + removed. + inplace : bool, optional (default: False) + If True, update the cell mask in-place + + Returns + ------- + xr.DataArray + A mask of cell regions corresponding to the cells in the input dataframe + + Raises + ------ + ValueError + If the features input does not have a cell column + ValueError + If there are labels in the feature_mask that are not present in the + features dataframe + """ + if "track" not in features.columns: + raise ValueError( + "`track` column not found in features input, please perform merge/split detection on this data before converting features to tracks" + ) + + if inplace: + track_mask = feature_mask + else: + track_mask = feature_mask.copy() + + track_mapper = xr.DataArray( + features.track.copy(), dims=("feature",), coords=dict(feature=features.feature) + ) + + if stubs is not None: + track_mapper.data[features.cell == stubs] = 0 + + wh_nonzero_label = np.flatnonzero(track_mask) + + try: + track_mask.data.ravel()[wh_nonzero_label] = track_mapper.loc[ + feature_mask.values.ravel()[wh_nonzero_label] + ] + except KeyError: + raise ValueError( + "Values in feature_mask are not present in features, please ensure that you are using the correct feature_mask for the tracked features, and that any filtering has been applied to both the mask and features" + ) + + track_mask = track_mask.assign_attrs(dict(units="track")) + + return track_mask + +@iris_to_xarray() +def convert_cell_mask_to_tracks( + features: pd.DataFrame, cell_mask: xr.DataArray, stubs: Optional[int] = None, inplace: bool = False +) -> xr.DataArray: + """Relabels a cell mask provided by tobac.segmentation with the track + values provided by tobac.merge_split.merge_split_MEST + + WARNING: it is not possible to reconstruct the cell mask from the output + from this function. The inplace keyword, and overwriting the original mask, + should be used with care. + + Parameters + ---------- + features : pd.DataFrame + A feature dataframe with cell values provided by tobac.linking_trackpy + feature_mask : xr.DataArray + A feature mask from tobac.segmentation corresponding to the features in + the feature dataframe input + stubs : int, optional (default: None) + The stub values used for unlinked cells in tobac.linking_trackpy. If + None, the stub cells with be relabelled with the stub cell value in the + feature dataframe. If a value is provided, the masked regions + corresponding to stub cells with be removed from the output. Warning: + the presence of stub cells may make it impossible to perfectly + reconstruct the feature mask afterwards as any stub features will be + removed. + inplace : bool, optional (default: False) + If True, update the cell mask in-place + + Returns + ------- + xr.DataArray + A mask of cell regions corresponding to the cells in the input dataframe + + Raises + ------ + ValueError + If the features input does not have a cell column + ValueError + If there are labels in the feature_mask that are not present in the + features dataframe + """ + if "track" not in features.columns: + raise ValueError( + "`track` column not found in features input, please perform merge/split detection on this data before converting features to tracks" + ) + + if inplace: + track_mask = cell_mask + else: + track_mask = cell_mask.copy() + + feature_to_cell = features.track.groupby(features.cell).first() + + track_mapper = xr.DataArray( + feature_to_cell.copy(), dims=("cell",), coords=dict(cell=feature_to_cell.index) + ) + + if stubs is not None: + track_mapper.data[feature_to_cell.index == stubs] = 0 + + wh_nonzero_label = np.flatnonzero(track_mask) + + try: + track_mask.data.ravel()[wh_nonzero_label] = track_mapper.loc[ + cell_mask.values.ravel()[wh_nonzero_label] + ] + except KeyError: + raise ValueError( + "Values in cell_mask are not present in features, please ensure that you are using the correct cell_mask for the tracked features, and that any filtering has been applied to both the mask and features" + ) + + track_mask = track_mask.assign_attrs(dict(units="track")) + + return track_mask \ No newline at end of file From e3767f767e9712b9f35ec50084c189029caefb0c Mon Sep 17 00:00:00 2001 From: William Jones Date: Fri, 12 Dec 2025 11:07:50 +0000 Subject: [PATCH 16/16] Formatting --- tobac/segmentation/watershed_segmentation.py | 2 +- tobac/tests/test_utils_mask.py | 44 +++++++++++++------- tobac/utils/mask.py | 28 +++++++++---- 3 files changed, 49 insertions(+), 25 deletions(-) diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py index 09742984..d46336d2 100644 --- a/tobac/segmentation/watershed_segmentation.py +++ b/tobac/segmentation/watershed_segmentation.py @@ -1328,7 +1328,7 @@ def segmentation( features_out, segmentation_out_data, stubs=stubs, - inplace=True, + inplace=True, ) logging.debug("Finished segmentation") diff --git a/tobac/tests/test_utils_mask.py b/tobac/tests/test_utils_mask.py index 31e0b243..df1c556e 100644 --- a/tobac/tests/test_utils_mask.py +++ b/tobac/tests/test_utils_mask.py @@ -11,7 +11,7 @@ convert_cell_mask_to_features, convert_cell_mask_to_tracks, convert_feature_mask_to_cells, - convert_feature_mask_to_tracks, + convert_feature_mask_to_tracks, ) @@ -278,8 +278,9 @@ def test_convert_feature_mask_to_cells_no_input_mutation(): # Test mask is the same assert mask_copy.equals(test_mask) + def test_convert_feature_mask_to_cells_inplace(): - """Test that convert_feature_mask_to_cells does alter the input mask when + """Test that convert_feature_mask_to_cells does alter the input mask when the inplace keyword is used """ test_data = np.zeros([3, 4, 5], dtype=int) @@ -312,7 +313,9 @@ def test_convert_feature_mask_to_cells_inplace(): mask_copy = test_mask.copy(deep=True) features_copy = test_features.copy(deep=True) - cell_mask = convert_feature_mask_to_cells(test_features, test_mask, stubs=-1, inplace=True) + cell_mask = convert_feature_mask_to_cells( + test_features, test_mask, stubs=-1, inplace=True + ) # Test dataframe is the same pd.testing.assert_frame_equal(test_features, features_copy) @@ -685,7 +688,7 @@ def test_convert_cell_mask_to_features_no_input_mutation(): def test_convert_cell_mask_to_features_inplace(): - """Test that convert_cell_mask_to_features does alter the input mask when + """Test that convert_cell_mask_to_features does alter the input mask when the inplace keyword is used """ test_data = np.zeros([3, 4, 5], dtype=int) @@ -718,7 +721,9 @@ def test_convert_cell_mask_to_features_inplace(): mask_copy = test_mask.copy(deep=True) features_copy = test_features.copy(deep=True) - feature_mask = convert_cell_mask_to_features(test_features, test_mask, stubs=-1, inplace=True) + feature_mask = convert_cell_mask_to_features( + test_features, test_mask, stubs=-1, inplace=True + ) # Test dataframe is the same pd.testing.assert_frame_equal(test_features, features_copy) @@ -756,7 +761,7 @@ def test_convert_feature_mask_to_tracks_single_track(): datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 ), "cell": [1, 1, 1], - "track": [1, 1, 1], + "track": [1, 1, 1], } ) @@ -774,6 +779,7 @@ def test_convert_feature_mask_to_tracks_single_track(): # Test coords are the same assert track_mask.coords.keys() == test_mask.coords.keys() + def test_convert_feature_mask_to_tracks_multiple_tracks(): """Test functionality of convert_feature_mask_to_tracks with multiple tracks and non-consecutive feature and track values @@ -810,7 +816,7 @@ def test_convert_feature_mask_to_tracks_multiple_tracks(): datetime(2000, 1, 1, 2), ], "cell": [1, 1, 3, 3, 4, 5], - "track": [1, 1, 2, 2, 4, 4] + "track": [1, 1, 2, 2, 4, 4], } ) @@ -866,7 +872,7 @@ def test_convert_feature_mask_to_tracks_mismatched_mask(): datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 ), "cell": [1, 1, 1], - "track": [1, 1, 1], + "track": [1, 1, 1], } ) @@ -1002,8 +1008,9 @@ def test_convert_feature_mask_to_tracks_no_input_mutation(): # Test mask is the same assert mask_copy.equals(test_mask) + def test_convert_feature_mask_to_tracks_inplace(): - """Test that convert_feature_mask_to_tracks does alter the input mask when + """Test that convert_feature_mask_to_tracks does alter the input mask when the inplace keyword is used """ test_data = np.zeros([3, 4, 5], dtype=int) @@ -1037,7 +1044,9 @@ def test_convert_feature_mask_to_tracks_inplace(): mask_copy = test_mask.copy(deep=True) features_copy = test_features.copy(deep=True) - track_mask = convert_feature_mask_to_tracks(test_features, test_mask, stubs=-1, inplace=True) + track_mask = convert_feature_mask_to_tracks( + test_features, test_mask, stubs=-1, inplace=True + ) # Test dataframe is the same pd.testing.assert_frame_equal(test_features, features_copy) @@ -1049,6 +1058,7 @@ def test_convert_feature_mask_to_tracks_inplace(): # Cell to track tests + def test_convert_cell_mask_to_tracks_single_track(): """Test basic functionality of convert_cell_mask_to_tracks with a single track @@ -1077,7 +1087,7 @@ def test_convert_cell_mask_to_tracks_single_track(): datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 ), "cell": [1, 1, 1], - "track": [1, 1, 1], + "track": [1, 1, 1], } ) @@ -1095,6 +1105,7 @@ def test_convert_cell_mask_to_tracks_single_track(): # Test coords are the same assert track_mask.coords.keys() == test_mask.coords.keys() + def test_convert_cell_mask_to_tracks_multiple_tracks(): """Test functionality of convert_cell_mask_to_tracks with multiple tracks and non-consecutive cell and track values @@ -1131,7 +1142,7 @@ def test_convert_cell_mask_to_tracks_multiple_tracks(): datetime(2000, 1, 1, 2), ], "cell": [1, 1, 3, 3, 4, 5], - "track": [1, 1, 2, 2, 4, 4] + "track": [1, 1, 2, 2, 4, 4], } ) @@ -1187,7 +1198,7 @@ def test_convert_cell_mask_to_tracks_mismatched_mask(): datetime(2000, 1, 1, 0), datetime(2000, 1, 1, 2), periods=3 ), "cell": [1, 1, 1], - "track": [1, 1, 1], + "track": [1, 1, 1], } ) @@ -1323,8 +1334,9 @@ def test_convert_cell_mask_to_tracks_no_input_mutation(): # Test mask is the same assert mask_copy.equals(test_mask) + def test_convert_cell_mask_to_tracks_inplace(): - """Test that convert_cell_mask_to_tracks does alter the input mask when + """Test that convert_cell_mask_to_tracks does alter the input mask when the inplace keyword is used """ test_data = np.zeros([3, 4, 5], dtype=int) @@ -1358,7 +1370,9 @@ def test_convert_cell_mask_to_tracks_inplace(): mask_copy = test_mask.copy(deep=True) features_copy = test_features.copy(deep=True) - track_mask = convert_cell_mask_to_tracks(test_features, test_mask, stubs=-1, inplace=True) + track_mask = convert_cell_mask_to_tracks( + test_features, test_mask, stubs=-1, inplace=True + ) # Test dataframe is the same pd.testing.assert_frame_equal(test_features, features_copy) diff --git a/tobac/utils/mask.py b/tobac/utils/mask.py index ded39cf3..994f4c95 100644 --- a/tobac/utils/mask.py +++ b/tobac/utils/mask.py @@ -374,7 +374,10 @@ def mask_all_surface(mask, masked=False, z_coord="model_level_number"): @iris_to_xarray() def convert_feature_mask_to_cells( - features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None, inplace: bool = False + features: pd.DataFrame, + feature_mask: xr.DataArray, + stubs: Optional[int] = None, + inplace: bool = False, ) -> xr.DataArray: """Relabels a feature mask provided by tobac.segmentation with the cell values provided by tobac.linking_trackpy @@ -448,7 +451,7 @@ def convert_cell_mask_to_features( features: pd.DataFrame, cell_mask: xr.DataArray, stubs: Optional[int] = None, - inplace: bool = False, + inplace: bool = False, ) -> xr.DataArray: """Relabels a cell mask, such as that produced by convert_feature_mask_to_cells, to the feature values provided by @@ -555,13 +558,16 @@ def convert_cell_mask_to_features( @iris_to_xarray() def convert_feature_mask_to_tracks( - features: pd.DataFrame, feature_mask: xr.DataArray, stubs: Optional[int] = None, inplace: bool = False + features: pd.DataFrame, + feature_mask: xr.DataArray, + stubs: Optional[int] = None, + inplace: bool = False, ) -> xr.DataArray: """Relabels a feature mask provided by tobac.segmentation with the track values provided by tobac.merge_split.merge_split_MEST - WARNING: it is not possible to reconstruct the feature mask from the output - from this function. The inplace keyword, and overwriting the original mask, + WARNING: it is not possible to reconstruct the feature mask from the output + from this function. The inplace keyword, and overwriting the original mask, should be used with care. Parameters @@ -627,15 +633,19 @@ def convert_feature_mask_to_tracks( return track_mask + @iris_to_xarray() def convert_cell_mask_to_tracks( - features: pd.DataFrame, cell_mask: xr.DataArray, stubs: Optional[int] = None, inplace: bool = False + features: pd.DataFrame, + cell_mask: xr.DataArray, + stubs: Optional[int] = None, + inplace: bool = False, ) -> xr.DataArray: """Relabels a cell mask provided by tobac.segmentation with the track values provided by tobac.merge_split.merge_split_MEST - WARNING: it is not possible to reconstruct the cell mask from the output - from this function. The inplace keyword, and overwriting the original mask, + WARNING: it is not possible to reconstruct the cell mask from the output + from this function. The inplace keyword, and overwriting the original mask, should be used with care. Parameters @@ -701,4 +711,4 @@ def convert_cell_mask_to_tracks( track_mask = track_mask.assign_attrs(dict(units="track")) - return track_mask \ No newline at end of file + return track_mask