Skip to content

Remove serialem code and revise context determination #632

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 35 additions & 58 deletions src/murfey/client/analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,6 @@ def _find_context(self, file_path: Path) -> bool:
in the Context classes themselves.
"""
logger.debug(f"Finding context using file {str(file_path)!r}")
if "atlas" in file_path.parts:
self._context = SPAMetadataContext("epu", self._basepath)
return True

# CLEM workflow checks
# Look for LIF and XLIF files
Expand All @@ -129,75 +126,60 @@ def _find_context(self, file_path: Path) -> bool:
return True
# Look for TIFF files associated with CLEM workflow
# Leica's autosave mode seems to name the TIFFs in the format
# PostionXX--ZXX-CXX.tif
if (
"--" in file_path.name
and file_path.suffix in (".tiff", ".tif")
and self._environment
):
created_directories = set(
get_machine_config_client(
str(self._environment.url.geturl()),
instrument_name=self._environment.instrument_name,
demo=self._environment.demo,
).get("analyse_created_directories", [])
)
if created_directories.intersection(set(file_path.parts)):
self._context = CLEMContext("leica", self._basepath)
return True
# PostionXX--ZXX--CXX.tif
if all(
pattern in file_path.name for pattern in ("--Z", "--C")
) and file_path.suffix in (".tiff", ".tif"):
self._context = CLEMContext("leica", self._basepath)
return True

# Tomography and SPA workflow checks
split_file_name = file_path.name.split("_")
if split_file_name:
# Skip context for gain files
if "gain" in split_file_name[-1]:
if "atlas" in file_path.parts:
self._context = SPAMetadataContext("epu", self._basepath)
return True

if "Metadata" in file_path.parts or file_path.name == "EpuSession.dm":
self._context = SPAMetadataContext("epu", self._basepath)
return True
elif (
"Batch" in file_path.parts
or "SearchMaps" in file_path.parts
or "Thumbnails" in file_path.parts
or file_path.name == "Session.dm"
):
self._context = TomographyMetadataContext("tomo", self._basepath)
return True

split_file_stem = file_path.stem.split("_")
if split_file_stem:
if split_file_stem[-1] == "gain":
return False

# Files starting with "FoilHole" belong to the SPA workflow
if split_file_name[0].startswith("FoilHole"):
if split_file_stem[0].startswith("FoilHole") and split_file_stem[-1] in [
"Fractions",
"fractions",
"EER",
]:
if not self._context:
logger.info("Acquisition software: EPU")
self._context = SPAModularContext("epu", self._basepath)
self.parameters_model = ProcessingParametersSPA
return True

# Files starting with "Position" belong to the standard tomography workflow
# NOTE: not completely reliable, mdocs can be in tomography metadata as well
if (
split_file_name[0] == "Position"
split_file_stem[0] == "Position"
or "[" in file_path.name
or "Fractions" in split_file_name[-1]
or "fractions" in split_file_name[-1]
or "EER" in split_file_name[-1]
or split_file_stem[-1] in ["Fractions", "fractions", "EER"]
or file_path.suffix == ".mdoc"
):
if not self._context:
logger.info("Acquisition software: tomo")
self._context = TomographyContext("tomo", self._basepath)
self.parameters_model = ProcessingParametersTomo
return True

# Files with these suffixes belong to the serial EM tomography workflow
if file_path.suffix in (".mrc", ".tiff", ".tif", ".eer"):
# Ignore batch files and search maps
if any(p in file_path.parts for p in ("Batch", "SearchMaps")):
return False
# Ignore JPG files
if file_path.with_suffix(".jpg").is_file():
return False
# Ignore the averaged movies written out by the Falcon
if (
len(
list(
file_path.parent.glob(
f"{file_path.name}*{file_path.suffix}"
)
)
)
> 1
):
return False
self._context = TomographyContext("serialem", self._basepath)
self.parameters_model = ProcessingParametersTomo
return True
return False

def post_transfer(self, transferred_file: Path):
Expand Down Expand Up @@ -288,12 +270,7 @@ def _analyse(self):
if not dc_metadata:
try:
dc_metadata = self._context.gather_metadata(
(
transferred_file.with_suffix(".mdoc")
if self._context._acquisition_software
== "serialem"
else self._xml_file(transferred_file)
),
self._xml_file(transferred_file),
environment=self._environment,
)
except NotImplementedError:
Expand Down
33 changes: 2 additions & 31 deletions src/murfey/client/contexts/tomo.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,33 +400,6 @@ def _add_tomo_tilt(
required_strings=required_strings,
)

def _add_serialem_tilt(
self, file_path: Path, environment: MurfeyInstanceEnvironment | None = None
) -> List[str]:
delimiters = ("_", "-")
for d in delimiters:
if file_path.name.count(d) > 1:
delimiter = d
break
else:
delimiter = delimiters[0]

def _extract_tilt_series(p: Path) -> str:
split = p.name.split(delimiter)
for s in split:
if s.isdigit():
return s
raise ValueError(
f"No digits found in {p.name} after splitting on {delimiter}"
)

return self._add_tilt(
file_path,
lambda x: ".".join(x.name.split(delimiter)[-1].split(".")[:-1]),
environment=environment,
required_strings=[],
)

def post_transfer(
self,
transferred_file: Path,
Expand Down Expand Up @@ -464,10 +437,8 @@ def post_transfer(
required_strings=kwargs.get("required_strings")
or required_strings,
)
elif self._acquisition_software == "serialem":
completed_tilts = self._add_serialem_tilt(
transferred_file, environment=environment
)
else:
logger.warning(f"Unknown data file {transferred_file}")
if transferred_file.suffix == ".mdoc":
with open(transferred_file, "r") as md:
tilt_series = transferred_file.stem
Expand Down
2 changes: 1 addition & 1 deletion src/murfey/client/multigrid_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from murfey.util.api import url_path_for
from murfey.util.client import capture_delete, capture_post, get_machine_config_client

log = logging.getLogger("murfey.client.mutligrid_control")
log = logging.getLogger("murfey.client.multigrid_control")


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion src/murfey/util/dummy_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def initialise(dummy_location: Path) -> Path:
yaml.dump(
{
"m12": {
"acquisition_software": ["epu", "tomo", "serialem"],
"acquisition_software": ["epu", "tomo"],
"data_directories": [str(detector_dir)],
"rsync_basepath": str(dummy_location),
"calibrations": {"dummy": 0},
Expand Down
118 changes: 117 additions & 1 deletion tests/client/test_analyser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,122 @@
from __future__ import annotations

import pytest

from murfey.client.analyser import Analyser
from murfey.client.contexts.clem import CLEMContext
from murfey.client.contexts.spa import SPAModularContext
from murfey.client.contexts.spa_metadata import SPAMetadataContext
from murfey.client.contexts.tomo import TomographyContext
from murfey.client.contexts.tomo_metadata import TomographyMetadataContext
from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo

example_files = [
# Tomography
["visit/Position_1_001_0.0_20250715_012434_fractions.tiff", TomographyContext],
["visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", TomographyContext],
["visit/Position_1_2_003_6.0_20250715_012434_EER.eer", TomographyContext],
["visit/name1_004_9.0_20250715_012434_fractions.tiff", TomographyContext],
["visit/Position_1_[30.0].tiff", TomographyContext],
["visit/Position_1.mdoc", TomographyContext],
["visit/name1_2.mdoc", TomographyContext],
# Tomography metadata
["visit/Session.dm", TomographyMetadataContext],
["visit/SearchMaps/SearchMap.xml", TomographyMetadataContext],
["visit/Batch/BatchPositionsList.xml", TomographyMetadataContext],
["visit/Thumbnails/file.mrc", TomographyMetadataContext],
# SPA
["visit/FoilHole_01234_fractions.tiff", SPAModularContext],
["visit/FoilHole_01234_EER.eer", SPAModularContext],
# SPA metadata
["atlas/atlas.mrc", SPAMetadataContext],
["visit/EpuSession.dm", SPAMetadataContext],
["visit/Metadata/GridSquare.dm", SPAMetadataContext],
# CLEM LIF file
["visit/images/test_file.lif", CLEMContext],
# CLEM TIFF files
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12--Z02--C01.tif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12_Lng_LVCC--Z02--C01.tif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Series001--Z00--C00.tif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Series001_Lng_LVCC--Z00--C00.tif",
CLEMContext,
],
# CLEM TIFF file accompanying metadata
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12.xlif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12_Lng_LVCC.xlif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_histo.xlif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_Lng_LVCC_histo.xlif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001.xlif",
CLEMContext,
],
[
"visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001_Lng_LVCC.xlif",
CLEMContext,
],
]


@pytest.mark.parametrize("file_and_context", example_files)
def test_find_context(file_and_context, tmp_path):
# Unpack parametrised variables
file_name, context = file_and_context

# Pass the file to the Analyser; add environment as needed
analyser = Analyser(basepath_local=tmp_path)

# Check that the results are as expected
assert analyser._find_context(tmp_path / file_name)
assert isinstance(analyser._context, context)

# Checks for the specific workflow contexts
if isinstance(analyser._context, TomographyContext):
assert analyser.parameters_model == ProcessingParametersTomo
if isinstance(analyser._context, SPAModularContext):
assert analyser.parameters_model == ProcessingParametersSPA


contextless_files = [
"visit/Position_1_gain.tiff",
"visit/FoilHole_01234_gain.tiff",
"visit/file_1.mrc",
"visit/FoilHole_01234.mrc",
"visit/FoilHole_01234.jpg",
"visit/FoilHole_01234.xml",
"visit/images/test_file.lifext",
"visit/images/2024_03_14_12_34_56--Project001/Project001.xlef",
"visit/images/2024_03_14_12_34_56--Project001/Project001.xlef.lock",
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Position 12_histo.lof",
"visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Series001_histo.lof",
]


@pytest.mark.parametrize("bad_file", contextless_files)
def test_ignore_contextless_files(bad_file, tmp_path):
analyser = Analyser(tmp_path)
assert not analyser._find_context(tmp_path / bad_file)
assert not analyser._context


def test_analyser_setup_and_stopping(tmp_path):
Expand All @@ -23,7 +139,7 @@ def test_analyser_tomo_determination(tmp_path):


def test_analyser_epu_determination(tmp_path):
tomo_file = tmp_path / "FoilHole_12345_Data_6789.tiff"
tomo_file = tmp_path / "FoilHole_12345_Data_6789_Fractions.tiff"
analyser = Analyser(tmp_path)
analyser.start()
analyser.queue.put(tomo_file)
Expand Down