Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion install.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ configure() {
log_info "Configuring SeeSpot..."

# Default values
DEFAULT_CACHE_DIR="$HOME/.seespot/cache"
DEFAULT_CACHE_DIR="$HOME/capsule/scratch/.seespot/cache"
DEFAULT_PORT=5555
DEFAULT_HOST="0.0.0.0"

Expand Down
122 changes: 110 additions & 12 deletions src/see_spot/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
from pathlib import Path
import polars as pl
import pandas as pd
import itertools
from typing import List, Tuple, Dict, Any
import yaml
Expand Down Expand Up @@ -300,10 +301,13 @@ def calculate_sankey_data(df: Any) -> Dict[str, Any]:
async def get_real_spots_data(
sample_size: int = SAMPLE_SIZE,
force_refresh: bool = False,
valid_spots_only: bool = False
valid_spots_only: bool = False,
sampling_type: str = "class_balanced",
display_chan: str = "mixed"
):
logger.info(f"Real spots data requested with sample size: {sample_size}, "
f"force_refresh: {force_refresh}, valid_spots_only: {valid_spots_only}")
f"force_refresh: {force_refresh}, valid_spots_only: {valid_spots_only}, "
f"sampling_type: {sampling_type}, display_chan: {display_chan}")

# Check if a dataset has been selected
if DATA_PREFIX is None:
Expand Down Expand Up @@ -452,7 +456,53 @@ async def get_real_spots_data(
# 4. Subsample the data
if len(df) > sample_size:
logger.info(f"Subsampling DataFrame from {len(df)} to {sample_size} rows.")
plot_df = df.sample(n=sample_size, random_state=None).copy()

if sampling_type == "class_balanced":
# Class-balanced sampling: sample equally from each channel
# Use the appropriate channel column based on display mode
channel_col = 'chan' if display_chan == 'mixed' else 'unmixed_chan'
logger.info(f"Using class-balanced sampling on column: {channel_col}")

# Get unique channels and their counts
unique_channels = df[channel_col].unique()
num_channels = len(unique_channels)
samples_per_channel = sample_size // num_channels

logger.info(f"Found {num_channels} unique channels, sampling {samples_per_channel} per channel")

# Sample from each channel
sampled_dfs = []
import secrets
for channel in unique_channels:
channel_df = df[df[channel_col] == channel]
n_samples = min(len(channel_df), samples_per_channel)
random_seed = secrets.randbelow(2**32)
sampled = channel_df.sample(n=n_samples, random_state=random_seed)
sampled_dfs.append(sampled)
logger.info(f"Channel {channel}: sampled {n_samples} from {len(channel_df)} spots")

# Concatenate all samples
plot_df = pd.concat(sampled_dfs, ignore_index=True)

# If we're short of target sample size, add random samples to fill
if len(plot_df) < sample_size:
remaining = sample_size - len(plot_df)
# Get spots not already sampled
remaining_df = df[~df.index.isin(plot_df.index)]
if len(remaining_df) > 0:
random_seed = secrets.randbelow(2**32)
additional = remaining_df.sample(n=min(remaining, len(remaining_df)), random_state=random_seed)
plot_df = pd.concat([plot_df, additional], ignore_index=True)
logger.info(f"Added {len(additional)} additional random samples to reach target")

plot_df = plot_df.copy()
logger.info(f"Class-balanced sampling complete: {len(plot_df)} total samples")
else:
# Random sampling
import secrets
random_seed = secrets.randbelow(2**32)
plot_df = df.sample(n=sample_size, random_state=random_seed).copy()
logger.info(f"Random sampling with seed: {random_seed}")
else:
plot_df = df.copy()
logger.info(f"Plotting DataFrame shape: {plot_df.shape}")
Expand Down Expand Up @@ -516,7 +566,8 @@ async def get_real_spots_data(
else:
spot_details = {
str(int(row['spot_id'])): {
col: row[col] for col in available_detail_cols if col != 'spot_id'
col: (row[col].item() if hasattr(row[col], 'item') else row[col])
for col in available_detail_cols if col != 'spot_id'
}
for _, row in spot_details_df.iterrows()
}
Expand Down Expand Up @@ -549,6 +600,11 @@ async def get_real_spots_data(
# 8. Convert DataFrame to list of records (dictionaries)
try:
data_for_frontend = plot_df_subset.to_dict(orient='records')
# Convert numpy types to native Python types for JSON serialization
for record in data_for_frontend:
for key, value in record.items():
if hasattr(value, 'item'): # numpy scalar
record[key] = value.item()
logger.info(f"Prepared {len(data_for_frontend)} records for frontend.")
except Exception as e:
logger.error(f"Error converting DataFrame to dict: {e}", exc_info=True)
Expand Down Expand Up @@ -662,13 +718,41 @@ async def create_neuroglancer_link(request: Request):
cross_section_scale,
)

# Determine JSON file name (env override allowed) and full S3 path
ng_json_filename = os.getenv(
"SEE_SPOT_NG_JSON_NAME", "phase_correlation_stitching_neuroglancer.json"
)
ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/{ng_json_filename}"
s3_key_for_json = f"{DATA_PREFIX}/{ng_json_filename}" # key relative to bucket
logger.info("Constructed Neuroglancer JSON path: %s", ng_json_path)
# Determine dataset context (tile-specific JSONs live under image_spot_detection)
import re

tile_pattern = re.compile(r"_X_\d+_Y_\d+_Z_\d+$")
tile_match = tile_pattern.search(DATA_PREFIX) if DATA_PREFIX else None
base_dataset_name = DATA_PREFIX
tile_folder = None

if tile_match and DATA_PREFIX:
parts = DATA_PREFIX.rsplit('_', 6)
if len(parts) > 1:
base_dataset_name = parts[0]
tile_suffix = '_'.join(parts[1:])
tile_folder = f"Tile_{tile_suffix}"
logger.info(
"Detected tile dataset for Neuroglancer request | base=%s tile=%s",
base_dataset_name,
tile_folder,
)

# Determine JSON file name (env override allowed for non-tile datasets) and full S3 path
if tile_folder:
ng_json_filename = f"{tile_folder}_spot_annotation_ng_link.json"
s3_key_for_json = (
f"{base_dataset_name}/image_spot_detection/{ng_json_filename}"
)
ng_json_path = f"s3://{S3_BUCKET}/{s3_key_for_json}"
logger.info("Using tile-specific Neuroglancer JSON: %s", ng_json_path)
else:
ng_json_filename = os.getenv(
"SEE_SPOT_NG_JSON_NAME", "phase_correlation_stitching_neuroglancer.json"
)
ng_json_path = f"s3://{S3_BUCKET}/{DATA_PREFIX}/{ng_json_filename}"
s3_key_for_json = f"{DATA_PREFIX}/{ng_json_filename}" # key relative to bucket
logger.info("Using default Neuroglancer JSON: %s", ng_json_path)

# Check existence of JSON on S3 (metadata only) for better diagnostics
json_metadata = None
Expand All @@ -692,7 +776,20 @@ async def create_neuroglancer_link(request: Request):
)

# Decide strategy: prefer JSON-based method when file exists; fall back otherwise
use_json_method = json_metadata is not None or "merged" in unmixed_spots_filename.lower()
if tile_folder:
use_json_method = json_metadata is not None
if not use_json_method:
logger.warning(
"Tile-specific Neuroglancer JSON missing: %s | falling back to direct method",
ng_json_path,
)
else:
merged_flag = (
unmixed_spots_filename.lower().find("merged") != -1
if isinstance(unmixed_spots_filename, str)
else False
)
use_json_method = json_metadata is not None or merged_flag
logger.info("Use JSON method decision: %s", use_json_method)

try:
Expand All @@ -711,6 +808,7 @@ async def create_neuroglancer_link(request: Request):
annotation_color=annotation_color,
spacing=3.0,
cross_section_scale=cross_section_scale,
hide_existing_annotations=True,
)
logger.info("Successfully built Neuroglancer link from JSON")
except Exception as json_err:
Expand Down
95 changes: 48 additions & 47 deletions src/see_spot/ng_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def create_link_from_json(
spacing=3.0,
cross_section_scale=None,
base_url="https://neuroglancer-demo.appspot.com",
hide_existing_annotations=True,
):
"""
Create a Neuroglancer link from an existing JSON file with updated position and annotation.
Expand All @@ -274,6 +275,9 @@ def create_link_from_json(
cross_section_scale (float, optional): Scale for cross-section view. If None, keeps existing value
base_url (str, optional): Base Neuroglancer URL. Default: "https://neuroglancer-demo.appspot.com"

hide_existing_annotations (bool, optional): When True, sets existing annotation
layers to invisible before adding the new spot annotation. Default: True

Returns:
--------
str: Direct Neuroglancer URL with updated state
Expand Down Expand Up @@ -318,55 +322,52 @@ def create_link_from_json(
state_dict["crossSectionScale"] = cross_section_scale
print(f"Updated crossSectionScale to: {cross_section_scale}")

# Find or create annotation layer
annotation_layer_found = False

if "layers" in state_dict:
# Look for existing annotation layer
for i, layer in enumerate(state_dict["layers"]):
# Hide existing annotation layers if requested
if hide_existing_annotations and "layers" in state_dict:
hidden_layers = 0
for layer in state_dict["layers"]:
if layer.get("type") == "annotation":
# Update existing annotation layer
annotation = {
"type": "point",
"id": str(spot_id),
"point": point_annotation,
}

# Update the layer properties
state_dict["layers"][i]["name"] = f"Spot {spot_id}"
state_dict["layers"][i]["annotationColor"] = annotation_color
state_dict["layers"][i][
"crossSectionAnnotationSpacing"
] = spacing
state_dict["layers"][i]["annotations"] = [annotation]

annotation_layer_found = True
print(f"Updated existing annotation layer with spot {spot_id}")
break

# If no annotation layer exists, create one
if not annotation_layer_found:
annotation_layer = {
"type": "annotation",
"name": f"Spot {spot_id}",
"tab": "annotations",
"visible": True,
"annotationColor": annotation_color,
"crossSectionAnnotationSpacing": spacing,
"projectionAnnotationSpacing": 10,
"tool": "annotatePoint",
"annotations": [
{
"type": "point",
"id": str(spot_id),
"point": point_annotation,
}
],
layer["visible"] = False
hidden_layers += 1
if hidden_layers:
print(f"Hid {hidden_layers} existing annotation layer(s) before adding spot {spot_id}")

# Ensure layers list exists and append fresh annotation layer for the selected spot
if "layers" not in state_dict or not isinstance(state_dict["layers"], list):
state_dict["layers"] = []

spot_layer_name = f"Spot {spot_id}"

# Remove any prior custom layer for this spot to avoid duplication
state_dict["layers"] = [
layer
for layer in state_dict["layers"]
if not (
layer.get("type") == "annotation"
and layer.get("name") == spot_layer_name
and layer.get("tab") == "annotations"
)
]

annotation_layer = {
"type": "annotation",
"name": spot_layer_name,
"tab": "annotations",
"visible": True,
"annotationColor": annotation_color,
"crossSectionAnnotationSpacing": spacing,
"projectionAnnotationSpacing": 10,
"tool": "annotatePoint",
"annotations": [
{
"type": "point",
"id": str(spot_id),
"point": point_annotation,
}
state_dict["layers"].append(annotation_layer)
print(f"Created new annotation layer with spot {spot_id}")
else:
print("Warning: No 'layers' found in Neuroglancer state")
],
}
state_dict["layers"].append(annotation_layer)
print(f"Appended new annotation layer with spot {spot_id}")

# Generate direct URL
direct_url = create_direct_neuroglancer_url(state_dict, base_url=base_url)
Expand Down
Loading
Loading