diff --git a/starfish/core/spots/FindSpots/blob.py b/starfish/core/spots/FindSpots/blob.py index 93d548dab..1e65b215b 100644 --- a/starfish/core/spots/FindSpots/blob.py +++ b/starfish/core/spots/FindSpots/blob.py @@ -125,8 +125,17 @@ def image_to_spots( if self.detector_method == blob_dog: del spot_finding_args['num_sigma'] + # Convert to numpy array and handle singleton z-dimension for consistency + # This ensures (1, y, x) produces same results as (y, x) + data_image = np.asarray(data_image) + if data_image.ndim == 3 and data_image.shape[0] == 1: + # Squeeze out the singleton z-dimension before blob detection + data_image_for_detection = np.squeeze(data_image, axis=0) + else: + data_image_for_detection = data_image + fitted_blobs_array: np.ndarray = self.detector_method( - data_image, + data_image_for_detection, **spot_finding_args ) # type: ignore # error: Cannot call function of unknown type [operator] @@ -136,19 +145,43 @@ def image_to_spots( return PerImageSliceSpotResults(spot_attrs=empty_spot_attrs, extras=None) # measure intensities - data_image = np.asarray(data_image) - if self.is_volume: + # Determine dimensionality from the data passed to blob detector + # blob_log returns: + # - Scalar sigma: (n_blobs, ndim + 1) where columns are [coords..., sigma] + # - Anisotropic sigma: (n_blobs, 2*ndim) where columns are [coords..., sigmas...] + # We use data_image_for_detection.ndim to know if we did 2D or 3D detection + if data_image_for_detection.ndim == 3: + # 3D blob detection result: [z, y, x, sigma] or [z, y, x, sigma_z, sigma_y, sigma_x] z_inds = fitted_blobs_array[:, 0].astype(int) y_inds = fitted_blobs_array[:, 1].astype(int) x_inds = fitted_blobs_array[:, 2].astype(int) - radius = np.round(fitted_blobs_array[:, 3] * np.sqrt(3)) + # For radius, use first sigma column (scalar sigma) + # or average of sigma columns (anisotropic) + if fitted_blobs_array.shape[1] == 4: + # Scalar sigma + radius = np.round(fitted_blobs_array[:, 3] * np.sqrt(3)) + else: + # Anisotropic sigma - average the three sigma values + radius = np.round(fitted_blobs_array[:, 3:6].mean(axis=1) * np.sqrt(3)) intensities = data_image[tuple([z_inds, y_inds, x_inds])] else: - z_inds = np.asarray([0 for x in range(len(fitted_blobs_array))]) + # 2D blob detection result: [y, x, sigma] or [y, x, sigma_y, sigma_x] y_inds = fitted_blobs_array[:, 0].astype(int) x_inds = fitted_blobs_array[:, 1].astype(int) - radius = np.round(fitted_blobs_array[:, 2] * np.sqrt(2)) - intensities = data_image[tuple([z_inds, y_inds, x_inds])] + # For radius, use first sigma column (scalar sigma) + # or average of sigma columns (anisotropic) + if fitted_blobs_array.shape[1] == 3: + # Scalar sigma + radius = np.round(fitted_blobs_array[:, 2] * np.sqrt(2)) + else: + # Anisotropic sigma - average the two sigma values + radius = np.round(fitted_blobs_array[:, 2:4].mean(axis=1) * np.sqrt(2)) + z_inds = np.zeros(len(fitted_blobs_array), dtype=int) + # For 2D results, handle both 2D and 3D data_image + if data_image.ndim == 3: + intensities = data_image[z_inds, y_inds, x_inds] + else: + intensities = data_image[y_inds, x_inds] # construct dataframe spot_data = pd.DataFrame( @@ -218,13 +251,15 @@ def run( merged_z_tables[(r, ch)] = pd.concat( [merged_z_tables[(r, ch)], spot_attributes_list[i][0].spot_attrs.data]) new = [] - r_chs = sorted([*merged_z_tables]) - selectors = list(image_stack._iter_axes({Axes.ROUND, Axes.CH})) - for i, (r, ch) in enumerate(r_chs): - merged_z_tables[(r, ch)]['spot_id'] = range(len(merged_z_tables[(r, ch)])) - spot_attrs = SpotAttributes(merged_z_tables[(r, ch)].reset_index(drop=True)) - new.append((PerImageSliceSpotResults(spot_attrs=spot_attrs, extras=None), - selectors[i])) + # Iterate through the merged tables in the order expected by _iter_axes + for selector in image_stack._iter_axes({Axes.ROUND, Axes.CH}): + r = selector[Axes.ROUND] + ch = selector[Axes.CH] + if (r, ch) in merged_z_tables: + merged_z_tables[(r, ch)]['spot_id'] = range(len(merged_z_tables[(r, ch)])) + spot_attrs = SpotAttributes(merged_z_tables[(r, ch)].reset_index(drop=True)) + new.append((PerImageSliceSpotResults(spot_attrs=spot_attrs, extras=None), + selector)) spot_attributes_list = new diff --git a/starfish/core/spots/FindSpots/test/test_spot_detection.py b/starfish/core/spots/FindSpots/test/test_spot_detection.py index bbaeac66f..766a4ca54 100644 --- a/starfish/core/spots/FindSpots/test/test_spot_detection.py +++ b/starfish/core/spots/FindSpots/test/test_spot_detection.py @@ -158,3 +158,198 @@ def test_spot_detection_with_image_with_labeled_axes(): data_stack = _make_labeled_image() spot_results = gaussian_spot_detector.run(image_stack=data_stack) return spot_results + + +def test_blob_detector_2d_spot_coordinates(): + """Test that BlobDetector with is_volume=False produces correct y, x coordinates and radius. + + This is a regression test for the bug where y-values were all 0 and radius was incorrect + due to incorrect indexing of 2D images. + """ + # Create a simple 2D image with a bright spot + image_2d = np.zeros((100, 100), dtype=np.float32) + # Add a 5x5 bright spot centered approximately at y=50, x=60 + image_2d[48:53, 58:63] = 1.0 + + # Create an ImageStack with this 2D image + # Shape: (rounds=1, channels=1, z-planes=1, height=100, width=100) + image_stack = ImageStack.from_numpy(image_2d.reshape(1, 1, 1, 100, 100)) + + # Create a BlobDetector with is_volume=False + detector_2d = BlobDetector( + min_sigma=1, + max_sigma=3, + num_sigma=5, + threshold=0.01, + is_volume=False, + measurement_type='max' + ) + + # Run detection + spot_results = detector_2d.run(image_stack=image_stack) + + # Get the spot attributes for round 0, channel 0 + spots = spot_results[{Axes.ROUND: 0, Axes.CH: 0}].spot_attrs + + # Verify we found at least one spot + assert len(spots.data) > 0, "No spots detected" + + # Check that y-values are not all 0 (the bug symptom) + y_values = spots.data['y'].values + assert not np.all(y_values == 0), "All y-values are 0, indicating the bug is present" + + # Check that the detected spot is near the expected location (y=50, x=60) + # Allow 5 pixel tolerance since blob detection may not find exact center + assert np.any(np.abs(y_values - 50) < 5), f"No spot found near y=50, found: {y_values}" + + x_values = spots.data['x'].values + assert np.any(np.abs(x_values - 60) < 5), f"No spot found near x=60, found: {x_values}" + + # Check that radius is reasonable (not extremely large like 843.0 in the bug) + radius_values = spots.data['radius'].values + assert np.all(radius_values < 100), f"Radius values too large: {radius_values}" + assert np.all(radius_values > 0), f"Radius values should be positive: {radius_values}" + + +def test_blob_detector_2d_with_reference_image(): + """Test BlobDetector with is_volume=False and a reference_image. + + This tests the case where reference_image has multiple z-planes, which results in + a 3D data_image after squeezing ROUND and CH dimensions. + """ + # Create a 3D reference image with multiple z-planes + reference_3d = np.zeros((3, 100, 100), dtype=np.float32) + # Add a bright spot at z=0, y=30, x=40 + reference_3d[0, 28:33, 38:43] = 1.0 + + # Create ImageStacks with the reference image (3 z-planes, 1 round, 1 channel) + reference_stack = ImageStack.from_numpy(reference_3d.reshape(1, 1, 3, 100, 100)) + # Create a primary image stack (same dimensions) + primary_stack = ImageStack.from_numpy(reference_3d.reshape(1, 1, 3, 100, 100)) + + # Create a BlobDetector with is_volume=False + detector_2d = BlobDetector( + min_sigma=1, + max_sigma=3, + num_sigma=5, + threshold=0.01, + is_volume=False, + measurement_type='max' + ) + + # Run detection with reference_image + spot_results = detector_2d.run(image_stack=primary_stack, reference_image=reference_stack) + + # The reference image approach produces spots for all (round, ch) combinations + # For our case: 1 round x 1 channel = 1 combination + total_spots = spot_results.count_total_spots() + assert total_spots > 0, "No spots detected with reference image" + + # Check a specific round/channel + spots = spot_results[{Axes.ROUND: 0, Axes.CH: 0}].spot_attrs + + # Verify spot coordinates and radius are correct + y_values = spots.data['y'].values + x_values = spots.data['x'].values + radius_values = spots.data['radius'].values + + # Check that y-values are not all 0 + assert not np.all(y_values == 0), "All y-values are 0, bug is present" + + # Check that the spot is near the expected location (y=30, x=40) + assert np.any(np.abs(y_values - 30) < 5), f"No spot found near y=30, found: {y_values}" + assert np.any(np.abs(x_values - 40) < 5), f"No spot found near x=40, found: {x_values}" + + # Check that radius is reasonable + assert np.all(radius_values < 100), f"Radius values too large: {radius_values}" + assert np.all(radius_values > 0), f"Radius values should be positive: {radius_values}" + + +def test_blob_detector_round_channel_assignment(): + """Test that BlobDetector with is_volume=False assigns spots to correct (round, channel) pairs. + + This is a regression test for a bug where spots were assigned to incorrect round/channel + combinations due to a mismatch between the sorted (r, ch) tuples and the iteration order + from _iter_axes({Axes.ROUND, Axes.CH}). + """ + # Create a test ImageStack with 2 rounds, 3 channels, 1 z-plane + # Each (round, channel) pair gets a spot at a unique location + data = np.zeros((2, 3, 1, 100, 100), dtype=np.float32) + + # Add spots at different y-coordinates for each (round, channel) + # Round 0, Channel 0: y=10 + data[0, 0, 0, 8:13, 8:13] = 1.0 + # Round 0, Channel 1: y=20 + data[0, 1, 0, 18:23, 18:23] = 1.0 + # Round 0, Channel 2: y=30 + data[0, 2, 0, 28:33, 28:33] = 1.0 + # Round 1, Channel 0: y=40 + data[1, 0, 0, 38:43, 38:43] = 1.0 + # Round 1, Channel 1: y=50 + data[1, 1, 0, 48:53, 48:53] = 1.0 + # Round 1, Channel 2: y=60 + data[1, 2, 0, 58:63, 58:63] = 1.0 + + image_stack = ImageStack.from_numpy(data) + + # Test with is_volume=False + detector_2d = BlobDetector( + min_sigma=1, + max_sigma=3, + num_sigma=5, + threshold=0.01, + is_volume=False, + measurement_type='mean' + ) + + spots_2d = detector_2d.run(image_stack=image_stack) + + # Test with is_volume=True for comparison + detector_3d = BlobDetector( + min_sigma=1, + max_sigma=3, + num_sigma=5, + threshold=0.01, + is_volume=True, + measurement_type='mean' + ) + + spots_3d = detector_3d.run(image_stack=image_stack) + + # Define expected y-coordinates for each (round, channel) pair + expected_y = { + (0, 0): 10, + (0, 1): 20, + (0, 2): 30, + (1, 0): 40, + (1, 1): 50, + (1, 2): 60, + } + + # Check that both is_volume=False and is_volume=True produce the same results + for r in range(2): + for ch in range(3): + # Get spots for this (round, channel) + spots_2d_data = spots_2d[{Axes.ROUND: r, Axes.CH: ch}].spot_attrs.data + spots_3d_data = spots_3d[{Axes.ROUND: r, Axes.CH: ch}].spot_attrs.data + + # Both should have found the spot + assert len(spots_2d_data) > 0, \ + f"No spots found for Round={r}, CH={ch} with is_volume=False" + assert len(spots_3d_data) > 0, \ + f"No spots found for Round={r}, CH={ch} with is_volume=True" + + # Check that the y-coordinate matches the expected value + y_2d = spots_2d_data['y'].values[0] + y_3d = spots_3d_data['y'].values[0] + expected = expected_y[(r, ch)] + + assert np.abs(y_2d - expected) < 5, \ + f"is_volume=False: Round={r}, CH={ch} has y={y_2d:.0f}, expected ~{expected}" + assert np.abs(y_3d - expected) < 5, \ + f"is_volume=True: Round={r}, CH={ch} has y={y_3d:.0f}, expected ~{expected}" + + # Both should produce the same y-coordinate + assert np.abs(y_2d - y_3d) < 2, \ + f"Round={r}, CH={ch}: is_volume=False has y={y_2d:.0f} " \ + f"but is_volume=True has y={y_3d:.0f}" diff --git a/starfish/test/full_pipelines/api/test_iss_api.py b/starfish/test/full_pipelines/api/test_iss_api.py index 1b0466b1b..a7907b8e8 100644 --- a/starfish/test/full_pipelines/api/test_iss_api.py +++ b/starfish/test/full_pipelines/api/test_iss_api.py @@ -103,14 +103,14 @@ def test_iss_pipeline_cropped_data(tmpdir): # decode decoded = iss.decoded - # decoding identifies 4 genes, each with 1 count + # decoding identifies 15 genes, each with at least 1 count genes, gene_counts = iss.genes, iss.counts - assert np.array_equal(genes, np.array(['ACTB', 'CD68', 'CTSL2', 'EPCAM', + assert np.array_equal(genes, np.array(['ACTB', 'CCNB1', 'CD68', 'CTSL2', 'EPCAM', 'ETV4', 'GAPDH', 'GUS', 'HER2', 'RAC1', - 'TFRC', 'TP53', 'VEGF'])) + 'RPLP0', 'STK15', 'TFRC', 'TP53', 'VEGF'])) - assert np.array_equal(gene_counts, [19, 1, 5, 2, 1, 11, 1, 3, 2, 1, 1, 2]) - assert decoded.sizes[Features.AXIS] == 99 + assert np.array_equal(gene_counts, [21, 1, 1, 6, 2, 1, 12, 1, 3, 3, 1, 1, 1, 1, 2]) + assert decoded.sizes[Features.AXIS] == 102 masks = iss.masks @@ -145,11 +145,11 @@ def test_iss_pipeline_cropped_data(tmpdir): assert pipeline_log[2]['method'] == 'BlobDetector' assert pipeline_log[3]['method'] == 'PerRoundMaxChannel' - # 28 of the spots are assigned to cell 0 (although most spots do not decode!) - assert np.sum(assigned['cell_id'] == '1') == 28 + # 29 of the spots are assigned to cell 0 (although most spots do not decode!) + assert np.sum(assigned['cell_id'] == '1') == 29 expression_matrix = iss.cg # test that nans were properly removed from the expression matrix assert 'nan' not in expression_matrix.genes.data # test the number of spots that did not decode per cell - assert np.array_equal(expression_matrix.number_of_undecoded_spots.data, [13, 1, 36]) + assert np.array_equal(expression_matrix.number_of_undecoded_spots.data, [10, 1, 34])