diff --git a/doc/conf.py b/doc/conf.py index 330884cc..8e36e014 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -124,15 +124,15 @@ def setup(app): # This should include all modules used in tobac. These are dummy imports, # but should include both required and optional dependencies. +# NOTE: this should use the name of the package as it is imported into python, not the name on conda/pypi autodoc_mock_imports = [ # "numpy", "scipy", - "scikit-image", "pandas", "pytables", "matplotlib", "iris", - "cf-units", + "cf_units", "xarray", "cartopy", "trackpy", @@ -140,6 +140,8 @@ def setup(app): "skimage", "sklearn", "cftime", + "dask", + "dask_image", ] autodoc_default_options = { diff --git a/environment-ci.yml b/environment-ci.yml index ee3f8646..647bec7d 100644 --- a/environment-ci.yml +++ b/environment-ci.yml @@ -15,6 +15,8 @@ dependencies: - trackpy - pytest - typing_extensions + - dask + - dask-image - black - coverage - pytest-cov diff --git a/environment-examples.yml b/environment-examples.yml index a49e4dbf..cc70d652 100644 --- a/environment-examples.yml +++ b/environment-examples.yml @@ -14,6 +14,8 @@ dependencies: - trackpy>=0.6.1 - pytest - typing_extensions + - dask + - dask-image - black - jupyter - notebook diff --git a/environment.yml b/environment.yml index 2de0f196..6d8db37f 100644 --- a/environment.yml +++ b/environment.yml @@ -14,3 +14,5 @@ dependencies: - cartopy - trackpy - typing_extensions + - dask + - dask-image diff --git a/pyproject.toml b/pyproject.toml index d424377a..72d5157f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,8 @@ dependencies = [ "cartopy", "trackpy", "typing_extensions", + "dask", + "dask-image", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index 90018575..51f301e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ xarray cartopy trackpy typing_extensions +dask +dask-image diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index d5729ff2..3637c34f 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -27,6 +27,9 @@ import numpy as np import pandas as pd import xarray as xr +import dask +from dask_image.ndmeasure import label +from scipy.ndimage import generate_binary_structure from scipy.spatial import KDTree from sklearn.neighbors import BallTree @@ -474,7 +477,6 @@ def feature_detection_threshold( for each feature (feature ids as keys). """ - from skimage.measure import label from skimage.morphology import binary_erosion from copy import deepcopy @@ -504,15 +506,38 @@ def feature_detection_threshold( # only include values greater than threshold # erode selected regions by n pixels if n_erosion_threshold > 0: - if is_3D: - selem = np.ones( - (n_erosion_threshold, n_erosion_threshold, n_erosion_threshold) - ) - else: - selem = np.ones((n_erosion_threshold, n_erosion_threshold)) + selem = np.ones( + [ + n_erosion_threshold, + ] + * (3 if is_3D else 2) + ) mask = binary_erosion(mask, selem) # detect individual regions, label and count the number of pixels included: - labels, num_labels = label(mask, background=0, return_num=True) + # Check PBC options TODO: make a more generic handler for PBC options + if PBC_flag == "none": + wrap_axes = None + elif PBC_flag == "hdim_1": + wrap_axes = (1,) if is_3D else (0,) + elif PBC_flag == "hdim_2": + wrap_axes = (2,) if is_3D else (1,) + elif PBC_flag == "both": + wrap_axes = (1, 2) if is_3D else (0, 1) + else: + raise ValueError( + "Options for periodic are currently: none, " + ", ".join(pbc_options) + ) + labels, num_labels = dask.compute( + *label( + mask, + structure=( + generate_binary_structure(3, 3) + if is_3D + else generate_binary_structure(2, 2) + ), + wrap_axes=wrap_axes, + ) + ) if not is_3D: # let's transpose labels to a 1,y,x array to make calculations etc easier. labels = labels[np.newaxis, :, :] @@ -525,221 +550,6 @@ def feature_detection_threshold( x_min = 0 x_max = labels.shape[2] - 1 - # deal with PBCs - # all options that involve dealing with periodic boundaries - pbc_options = ["hdim_1", "hdim_2", "both"] - if PBC_flag not in pbc_options and PBC_flag != "none": - raise ValueError( - "Options for periodic are currently: none, " + ", ".join(pbc_options) - ) - - # we need to deal with PBCs in some way. - if PBC_flag in pbc_options and num_labels > 0: - # - # create our copy of `labels` to edit - labels_2 = deepcopy(labels) - # points we've already edited - skip_list = np.array([]) - # labels that touch the PBC walls - wall_labels = np.array([], dtype=np.int32) - - all_label_props = internal_utils.get_label_props_in_dict(labels) - [ - all_labels_max_size, - all_label_locs_v, - all_label_locs_h1, - all_label_locs_h2, - ] = internal_utils.get_indices_of_labels_from_reg_prop_dict(all_label_props) - - # find the points along the boundaries - - # along hdim_1 or both horizontal boundaries - if PBC_flag == "hdim_1" or PBC_flag == "both": - # north and south wall - ns_wall = np.unique(labels[:, (y_min, y_max), :]) - wall_labels = np.append(wall_labels, ns_wall) - - # along hdim_2 or both horizontal boundaries - if PBC_flag == "hdim_2" or PBC_flag == "both": - # east/west wall - ew_wall = np.unique(labels[:, :, (x_min, x_max)]) - wall_labels = np.append(wall_labels, ew_wall) - - wall_labels = np.unique(wall_labels) - - for label_ind in wall_labels: - new_label_ind = label_ind - # 0 isn't a real index - if label_ind == 0: - continue - # skip this label if we have already dealt with it. - if np.any(label_ind == skip_list): - continue - - # create list for skip labels for this wall label only - skip_list_thisind = list() - - # get all locations of this label. - # TODO: harmonize x/y/z vs hdim1/hdim2/vdim. - label_locs_v = all_label_locs_v[label_ind] - label_locs_h1 = all_label_locs_h1[label_ind] - label_locs_h2 = all_label_locs_h2[label_ind] - - # loop through every point in the label - for label_z, label_y, label_x in zip( - label_locs_v, label_locs_h1, label_locs_h2 - ): - # check if this is the special case of being a corner point. - # if it's doubly periodic AND on both x and y boundaries, it's a corner point - # and we have to look at the other corner. - # here, we will only look at the corner point and let the below deal with x/y only. - if PBC_flag == "both" and ( - np.any(label_y == [y_min, y_max]) - and np.any(label_x == [x_min, x_max]) - ): - # adjust x and y points to the other side - y_val_alt = pbc_utils.adjust_pbc_point(label_y, y_min, y_max) - x_val_alt = pbc_utils.adjust_pbc_point(label_x, x_min, x_max) - - label_on_corner = labels[label_z, y_val_alt, x_val_alt] - - if (label_on_corner != 0) and ( - ~np.any(label_on_corner == skip_list) - ): - # alt_inds = np.where(labels==alt_label_3) - # get a list of indices where the label on the corner is so we can switch - # them in the new list. - - labels_2[ - all_label_locs_v[label_on_corner], - all_label_locs_h1[label_on_corner], - all_label_locs_h2[label_on_corner], - ] = label_ind - skip_list = np.append(skip_list, label_on_corner) - skip_list_thisind = np.append( - skip_list_thisind, label_on_corner - ) - - # if it's labeled and has already been dealt with for this label - elif ( - (label_on_corner != 0) - and (np.any(label_on_corner == skip_list)) - and (np.any(label_on_corner == skip_list_thisind)) - ): - # print("skip_list_thisind label - has already been treated this index") - continue - - # if it's labeled and has already been dealt with via a previous label - elif ( - (label_on_corner != 0) - and (np.any(label_on_corner == skip_list)) - and (~np.any(label_on_corner == skip_list_thisind)) - ): - # find the updated label, and overwrite all of label_ind indices with - # updated label - labels_2_alt = labels_2[label_z, y_val_alt, x_val_alt] - labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( - labels_2_alt - ) - skip_list = np.append(skip_list, label_ind) - break - - # on the hdim1 boundary and periodic on hdim1 - if (PBC_flag == "hdim_1" or PBC_flag == "both") and np.any( - label_y == [y_min, y_max] - ): - y_val_alt = pbc_utils.adjust_pbc_point(label_y, y_min, y_max) - - # get the label value on the opposite side - label_alt = labels[label_z, y_val_alt, label_x] - - # if it's labeled and not already been dealt with - if (label_alt != 0) and (~np.any(label_alt == skip_list)): - # find the indices where it has the label value on opposite side and change - # their value to original side - # print(all_label_locs_v[label_alt], alt_inds[0]) - labels_2[ - all_label_locs_v[label_alt], - all_label_locs_h1[label_alt], - all_label_locs_h2[label_alt], - ] = new_label_ind - # we have already dealt with this label. - skip_list = np.append(skip_list, label_alt) - skip_list_thisind = np.append(skip_list_thisind, label_alt) - - # if it's labeled and has already been dealt with for this label - elif ( - (label_alt != 0) - and (np.any(label_alt == skip_list)) - and (np.any(label_alt == skip_list_thisind)) - ): - continue - - # if it's labeled and has already been dealt with - elif ( - (label_alt != 0) - and (np.any(label_alt == skip_list)) - and (~np.any(label_alt == skip_list_thisind)) - ): - # find the updated label, and overwrite all of label_ind indices with - # updated label - labels_2_alt = labels_2[label_z, y_val_alt, label_x] - labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( - labels_2_alt - ) - new_label_ind = labels_2_alt - skip_list = np.append(skip_list, label_ind) - - if (PBC_flag == "hdim_2" or PBC_flag == "both") and np.any( - label_x == [x_min, x_max] - ): - x_val_alt = pbc_utils.adjust_pbc_point(label_x, x_min, x_max) - - # get the label value on the opposite side - label_alt = labels[label_z, label_y, x_val_alt] - - # if it's labeled and not already been dealt with - if (label_alt != 0) and (~np.any(label_alt == skip_list)): - # find the indices where it has the label value on opposite side and change - # their value to original side - labels_2[ - all_label_locs_v[label_alt], - all_label_locs_h1[label_alt], - all_label_locs_h2[label_alt], - ] = new_label_ind - # we have already dealt with this label. - skip_list = np.append(skip_list, label_alt) - skip_list_thisind = np.append(skip_list_thisind, label_alt) - - # if it's labeled and has already been dealt with for this label - elif ( - (label_alt != 0) - and (np.any(label_alt == skip_list)) - and (np.any(label_alt == skip_list_thisind)) - ): - continue - - # if it's labeled and has already been dealt with - elif ( - (label_alt != 0) - and (np.any(label_alt == skip_list)) - and (~np.any(label_alt == skip_list_thisind)) - ): - # find the updated label, and overwrite all of label_ind indices with - # updated label - labels_2_alt = labels_2[label_z, label_y, x_val_alt] - labels_2[label_locs_v, label_locs_h1, label_locs_h2] = ( - labels_2_alt - ) - new_label_ind = labels_2_alt - skip_list = np.append(skip_list, label_ind) - - # copy over new labels after we have adjusted everything - labels = labels_2 - - # END PBC treatment - # we need to get label properties again after we handle PBCs. - label_props = internal_utils.get_label_props_in_dict(labels) if len(label_props) > 0: (