diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py index 86c07b37..18b28b2b 100644 --- a/src/murfey/client/analyser.py +++ b/src/murfey/client/analyser.py @@ -118,9 +118,6 @@ def _find_context(self, file_path: Path) -> bool: in the Context classes themselves. """ logger.debug(f"Finding context using file {str(file_path)!r}") - if "atlas" in file_path.parts: - self._context = SPAMetadataContext("epu", self._basepath) - return True # CLEM workflow checks # Look for LIF and XLIF files @@ -129,32 +126,41 @@ def _find_context(self, file_path: Path) -> bool: return True # Look for TIFF files associated with CLEM workflow # Leica's autosave mode seems to name the TIFFs in the format - # PostionXX--ZXX-CXX.tif - if ( - "--" in file_path.name - and file_path.suffix in (".tiff", ".tif") - and self._environment - ): - created_directories = set( - get_machine_config_client( - str(self._environment.url.geturl()), - instrument_name=self._environment.instrument_name, - demo=self._environment.demo, - ).get("analyse_created_directories", []) - ) - if created_directories.intersection(set(file_path.parts)): - self._context = CLEMContext("leica", self._basepath) - return True + # PostionXX--ZXX--CXX.tif + if all( + pattern in file_path.name for pattern in ("--Z", "--C") + ) and file_path.suffix in (".tiff", ".tif"): + self._context = CLEMContext("leica", self._basepath) + return True # Tomography and SPA workflow checks - split_file_name = file_path.name.split("_") - if split_file_name: - # Skip context for gain files - if "gain" in split_file_name[-1]: + if "atlas" in file_path.parts: + self._context = SPAMetadataContext("epu", self._basepath) + return True + + if "Metadata" in file_path.parts or file_path.name == "EpuSession.dm": + self._context = SPAMetadataContext("epu", self._basepath) + return True + elif ( + "Batch" in file_path.parts + or "SearchMaps" in file_path.parts + or "Thumbnails" in file_path.parts + or file_path.name == "Session.dm" + ): + self._context = TomographyMetadataContext("tomo", self._basepath) + return True + + split_file_stem = file_path.stem.split("_") + if split_file_stem: + if split_file_stem[-1] == "gain": return False # Files starting with "FoilHole" belong to the SPA workflow - if split_file_name[0].startswith("FoilHole"): + if split_file_stem[0].startswith("FoilHole") and split_file_stem[-1] in [ + "Fractions", + "fractions", + "EER", + ]: if not self._context: logger.info("Acquisition software: EPU") self._context = SPAModularContext("epu", self._basepath) @@ -162,42 +168,18 @@ def _find_context(self, file_path: Path) -> bool: return True # Files starting with "Position" belong to the standard tomography workflow + # NOTE: not completely reliable, mdocs can be in tomography metadata as well if ( - split_file_name[0] == "Position" + split_file_stem[0] == "Position" or "[" in file_path.name - or "Fractions" in split_file_name[-1] - or "fractions" in split_file_name[-1] - or "EER" in split_file_name[-1] + or split_file_stem[-1] in ["Fractions", "fractions", "EER"] + or file_path.suffix == ".mdoc" ): if not self._context: logger.info("Acquisition software: tomo") self._context = TomographyContext("tomo", self._basepath) self.parameters_model = ProcessingParametersTomo return True - - # Files with these suffixes belong to the serial EM tomography workflow - if file_path.suffix in (".mrc", ".tiff", ".tif", ".eer"): - # Ignore batch files and search maps - if any(p in file_path.parts for p in ("Batch", "SearchMaps")): - return False - # Ignore JPG files - if file_path.with_suffix(".jpg").is_file(): - return False - # Ignore the averaged movies written out by the Falcon - if ( - len( - list( - file_path.parent.glob( - f"{file_path.name}*{file_path.suffix}" - ) - ) - ) - > 1 - ): - return False - self._context = TomographyContext("serialem", self._basepath) - self.parameters_model = ProcessingParametersTomo - return True return False def post_transfer(self, transferred_file: Path): @@ -288,12 +270,7 @@ def _analyse(self): if not dc_metadata: try: dc_metadata = self._context.gather_metadata( - ( - transferred_file.with_suffix(".mdoc") - if self._context._acquisition_software - == "serialem" - else self._xml_file(transferred_file) - ), + self._xml_file(transferred_file), environment=self._environment, ) except NotImplementedError: diff --git a/src/murfey/client/contexts/tomo.py b/src/murfey/client/contexts/tomo.py index 08fe22c2..a9204259 100644 --- a/src/murfey/client/contexts/tomo.py +++ b/src/murfey/client/contexts/tomo.py @@ -400,33 +400,6 @@ def _add_tomo_tilt( required_strings=required_strings, ) - def _add_serialem_tilt( - self, file_path: Path, environment: MurfeyInstanceEnvironment | None = None - ) -> List[str]: - delimiters = ("_", "-") - for d in delimiters: - if file_path.name.count(d) > 1: - delimiter = d - break - else: - delimiter = delimiters[0] - - def _extract_tilt_series(p: Path) -> str: - split = p.name.split(delimiter) - for s in split: - if s.isdigit(): - return s - raise ValueError( - f"No digits found in {p.name} after splitting on {delimiter}" - ) - - return self._add_tilt( - file_path, - lambda x: ".".join(x.name.split(delimiter)[-1].split(".")[:-1]), - environment=environment, - required_strings=[], - ) - def post_transfer( self, transferred_file: Path, @@ -464,10 +437,8 @@ def post_transfer( required_strings=kwargs.get("required_strings") or required_strings, ) - elif self._acquisition_software == "serialem": - completed_tilts = self._add_serialem_tilt( - transferred_file, environment=environment - ) + else: + logger.warning(f"Unknown data file {transferred_file}") if transferred_file.suffix == ".mdoc": with open(transferred_file, "r") as md: tilt_series = transferred_file.stem diff --git a/src/murfey/client/multigrid_control.py b/src/murfey/client/multigrid_control.py index 1ce69e5b..a016b49f 100644 --- a/src/murfey/client/multigrid_control.py +++ b/src/murfey/client/multigrid_control.py @@ -23,7 +23,7 @@ from murfey.util.api import url_path_for from murfey.util.client import capture_delete, capture_post, get_machine_config_client -log = logging.getLogger("murfey.client.mutligrid_control") +log = logging.getLogger("murfey.client.multigrid_control") @dataclass diff --git a/src/murfey/util/dummy_setup.py b/src/murfey/util/dummy_setup.py index ca89645a..6ec45039 100644 --- a/src/murfey/util/dummy_setup.py +++ b/src/murfey/util/dummy_setup.py @@ -24,7 +24,7 @@ def initialise(dummy_location: Path) -> Path: yaml.dump( { "m12": { - "acquisition_software": ["epu", "tomo", "serialem"], + "acquisition_software": ["epu", "tomo"], "data_directories": [str(detector_dir)], "rsync_basepath": str(dummy_location), "calibrations": {"dummy": 0}, diff --git a/tests/client/test_analyser.py b/tests/client/test_analyser.py index f9256a0c..175916aa 100644 --- a/tests/client/test_analyser.py +++ b/tests/client/test_analyser.py @@ -1,6 +1,122 @@ from __future__ import annotations +import pytest + from murfey.client.analyser import Analyser +from murfey.client.contexts.clem import CLEMContext +from murfey.client.contexts.spa import SPAModularContext +from murfey.client.contexts.spa_metadata import SPAMetadataContext +from murfey.client.contexts.tomo import TomographyContext +from murfey.client.contexts.tomo_metadata import TomographyMetadataContext +from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo + +example_files = [ + # Tomography + ["visit/Position_1_001_0.0_20250715_012434_fractions.tiff", TomographyContext], + ["visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", TomographyContext], + ["visit/Position_1_2_003_6.0_20250715_012434_EER.eer", TomographyContext], + ["visit/name1_004_9.0_20250715_012434_fractions.tiff", TomographyContext], + ["visit/Position_1_[30.0].tiff", TomographyContext], + ["visit/Position_1.mdoc", TomographyContext], + ["visit/name1_2.mdoc", TomographyContext], + # Tomography metadata + ["visit/Session.dm", TomographyMetadataContext], + ["visit/SearchMaps/SearchMap.xml", TomographyMetadataContext], + ["visit/Batch/BatchPositionsList.xml", TomographyMetadataContext], + ["visit/Thumbnails/file.mrc", TomographyMetadataContext], + # SPA + ["visit/FoilHole_01234_fractions.tiff", SPAModularContext], + ["visit/FoilHole_01234_EER.eer", SPAModularContext], + # SPA metadata + ["atlas/atlas.mrc", SPAMetadataContext], + ["visit/EpuSession.dm", SPAMetadataContext], + ["visit/Metadata/GridSquare.dm", SPAMetadataContext], + # CLEM LIF file + ["visit/images/test_file.lif", CLEMContext], + # CLEM TIFF files + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12--Z02--C01.tif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12_Lng_LVCC--Z02--C01.tif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Series001--Z00--C00.tif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Series001_Lng_LVCC--Z00--C00.tif", + CLEMContext, + ], + # CLEM TIFF file accompanying metadata + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12.xlif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12_Lng_LVCC.xlif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_histo.xlif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_Lng_LVCC_histo.xlif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001.xlif", + CLEMContext, + ], + [ + "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001_Lng_LVCC.xlif", + CLEMContext, + ], +] + + +@pytest.mark.parametrize("file_and_context", example_files) +def test_find_context(file_and_context, tmp_path): + # Unpack parametrised variables + file_name, context = file_and_context + + # Pass the file to the Analyser; add environment as needed + analyser = Analyser(basepath_local=tmp_path) + + # Check that the results are as expected + assert analyser._find_context(tmp_path / file_name) + assert isinstance(analyser._context, context) + + # Checks for the specific workflow contexts + if isinstance(analyser._context, TomographyContext): + assert analyser.parameters_model == ProcessingParametersTomo + if isinstance(analyser._context, SPAModularContext): + assert analyser.parameters_model == ProcessingParametersSPA + + +contextless_files = [ + "visit/Position_1_gain.tiff", + "visit/FoilHole_01234_gain.tiff", + "visit/file_1.mrc", + "visit/FoilHole_01234.mrc", + "visit/FoilHole_01234.jpg", + "visit/FoilHole_01234.xml", + "visit/images/test_file.lifext", + "visit/images/2024_03_14_12_34_56--Project001/Project001.xlef", + "visit/images/2024_03_14_12_34_56--Project001/Project001.xlef.lock", + "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Position 12_histo.lof", + "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Series001_histo.lof", +] + + +@pytest.mark.parametrize("bad_file", contextless_files) +def test_ignore_contextless_files(bad_file, tmp_path): + analyser = Analyser(tmp_path) + assert not analyser._find_context(tmp_path / bad_file) + assert not analyser._context def test_analyser_setup_and_stopping(tmp_path): @@ -23,7 +139,7 @@ def test_analyser_tomo_determination(tmp_path): def test_analyser_epu_determination(tmp_path): - tomo_file = tmp_path / "FoilHole_12345_Data_6789.tiff" + tomo_file = tmp_path / "FoilHole_12345_Data_6789_Fractions.tiff" analyser = Analyser(tmp_path) analyser.start() analyser.queue.put(tomo_file)