diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml index 5b3dd093..a032cc9a 100644 --- a/.github/workflows/doc.yml +++ b/.github/workflows/doc.yml @@ -32,7 +32,7 @@ jobs: - name: Install dependencies run: | - uv sync --dev + uv sync --dev --extra viewer - name: Compile documentation run: | diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 26d53e48..33d5eeae 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -60,7 +60,7 @@ jobs: - name: Install dependencies (Linux) if: runner.os == 'Linux' run: | - uv sync --dev + uv sync --dev --extra viewer - name: Run tests (Linux) if: runner.os == 'Linux' diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d1282f3..415e7a56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- (dataset-viewer) add a trame app for dataset visual exploration. - (sample/features) add_field: check field size consistency with geometrical support. - (sample) add `set_trees` to `Sample` delegated methods: `sample.set_trees(...)` now works as a direct proxy to `SampleFeatures.set_trees`, consistent with other delegated tree methods. diff --git a/docs/source/core_concepts.rst b/docs/source/core_concepts.rst index 720a6c77..6ac29ba6 100644 --- a/docs/source/core_concepts.rst +++ b/docs/source/core_concepts.rst @@ -20,3 +20,4 @@ For more details and examples, see the :doc:`core_concepts` and :doc:`examples_t core_concepts/defaults core_concepts/disk_format core_concepts/interoperability + core_concepts/viewer diff --git a/docs/source/core_concepts/viewer.md b/docs/source/core_concepts/viewer.md new file mode 100644 index 00000000..56da518c --- /dev/null +++ b/docs/source/core_concepts/viewer.md @@ -0,0 +1,187 @@ +# Dataset viewer + +The dataset viewer is a small trame/VTK web application that lets +you browse PLAID datasets stored on disk and inspect their samples in 3D. +It ships as the `plaid-viewer` console script. + +## Architecture + +The viewer runs as a single trame server process: + +- `plaid.viewer.services.PlaidDatasetService` discovers datasets and + loads `plaid.Sample` instances. It uses + `plaid.storage.init_from_disk` to obtain `(dataset_dict, + converter_dict)` and materialises a sample on demand with + `converter.to_plaid(dataset, index)`, so every PLAID backend + (`hf_datasets`, `cgns`, `zarr`, ...) is supported uniformly. + Hugging Face Hub datasets are also supported: when a dataset id is + registered as a repo id, the service dispatches to + `plaid.storage.init_streaming_from_hub` instead, so samples are + streamed lazily without a full local copy. +- `plaid.viewer.services.ParaviewArtifactService` writes each selected + sample to a CGNS file (or `.cgns.series` sidecar for time-dependent + samples) in a per-process cache directory. +- `plaid.viewer.trame_app.server.build_server` assembles the UI + (Vuetify side drawer with dataset/split/sample selectors and display + options) and a VTK pipeline: `vtkCGNSReader` → optional cut plane → + optional threshold → composite-data geometry → mapper/actor. + +There is no separate FastAPI backend and no second port: dataset +discovery, CGNS export and the 3D view are all served by trame. + +## Launching the viewer + +```bash +uv run plaid-viewer --datasets-root /path/to/datasets +``` + +Useful options: + +| Option | Default | Description | +| ----------------- | ----------- | ------------------------------------------------------------------------------------------------ | +| `--datasets-root` | *required* | Directory containing one sub-directory per PLAID dataset. A single-dataset directory also works. | +| `--host` | `127.0.0.1` | Bind address for the trame HTTP server. | +| `--port` | `8080` | Port exposed by the trame HTTP server. | +| `--backend-id` | `disk` | PLAID backend identifier embedded in sample references and the cache key. | +| `--hub-repo` | `None` | Hugging Face Hub repo id (`namespace/name`) streamed via `init_streaming_from_hub`. Repeat the flag to pre-register multiple repos. | + +Open `http://:/` in your browser. + +### Streaming from the Hugging Face Hub + +Hub datasets can be added at launch time with `--hub-repo` or from the +running UI through the **Hub** tab in the side drawer (the drawer now +groups the local datasets root and the Hugging Face repo input under a +`Local / Hub` tab selector, hidden when `--disable-root-change` is set). +Each registered repo shows up as a removable chip and as a new entry in +the **Dataset** dropdown. Samples are loaded on demand through +`plaid.storage.init_streaming_from_hub`, so only the selected sample's +shards are fetched. + +```bash +# Start with one or more hub datasets pre-registered. +uv run plaid-viewer --hub-repo PLAID-lib/VKI-LS59 --hub-repo PLAID-lib/Rotor37 +``` + +Streaming splits returned by PLAID are forward-only +`datasets.IterableDataset` objects without `__len__`. The viewer adapts +accordingly: + +- A `streaming` chip appears in the toolbar to advertise the mode. +- The **Sample** slider starts at a single reachable step and grows by + one every time the user moves it to the right; each right-arrow press + consumes the next element from the iterator. +- Revisiting an already-fetched index simply re-renders the cached + sample; the slider cannot be rewound because the underlying iterator + cannot. +- Switching split or dataset rebuilds a fresh iterator from the Hub. +- When the stream is exhausted the slider caps at the last consumed + index and the counter label shows `(end of stream)`. + + +## Using the UI + +The side drawer provides, from top to bottom: + +1. **Dataset / Split** - two `VSelect` controls that pick the active + dataset and split. +2. **Sample** - a `VSlider` over the integer sample index of the current + split; the selected `sample_id` (and the total count) is shown under + the slider. +3. **Base** - a `VBtnToggle` with exclusive, mandatory selection: exactly + one renderable CGNS base exposed by `vtkCGNSReader.GetBaseSelection()` + is active at any time. Bases that contain + no `Zone_t` children (for example, a `Global` base storing only + reference scalars or free-standing tensors) are not rendered but are + summarised in the **Non-visual bases** accordion further down the + drawer: each `DataArray_t` is listed with its name, dtype, shape and a + short value preview. +4. **Field / Colormap / Show edges** - colour the geometry by any point + or cell array (all point and cell arrays are enabled on the reader + by default so every field shows up in the dropdown), pick from a set + of built-in colormaps and optionally overlay wireframe edges. +5. **Cut plane** - toggle a `vtkCutter` and interactively adjust its + normal and signed offset along that normal (the plane origin is the + current dataset's bounding-box centre). +6. **Threshold** - toggle a `vtkThreshold` filter on the currently + selected field and set the `[min, max]` range. Defaults are populated + from the field's data range. +7. **Select features** - an expandable panel listing the field paths + available for the current dataset (retrieved from the PLAID metadata + schema). Toggling checkboxes and clicking **Apply** filters the loaded + samples down to the selected fields: + - For disk-backed datasets the selection is forwarded to + `converter.to_plaid(dataset, index, features=...)`. PLAID expands + the list internally with + `plaid.utils.cgns_helper.update_features_for_CGNS_compatibility` + to preserve the CGNS conventions (coordinates, zones, grid + locations, etc. that make the kept fields renderable). The + user-facing selection is first intersected with the active split's + own feature catalogue, so paths that only live in another split + (for example a field present in `train` but not in `test`) do not + trigger a `Missing features` error. + - For streaming (Hugging Face Hub) datasets the expansion must be + done ahead of `init_streaming_from_hub`. The viewer calls + `update_features_for_CGNS_compatibility` itself and hands the + expanded list to the streaming loader, then invalidates the + current iterator so the next sample is materialised with the new + filter. + The **Clear** / **Select all** buttons in the panel header provide + shortcuts; an empty selection loads only the geometric support + (mesh + zones + metadata). +8. **Reset camera** - re-frames the current actor. + +The 3D view is a server-side `VtkRemoteView` (images are rendered on the +server and streamed to the browser). Camera manipulation uses the +ParaView-like trackball style: + +- Left mouse button: rotate. +- Middle mouse button (or Shift + left): pan. +- Mouse wheel (or right button drag): zoom. + +A status line at the bottom of the drawer reports the last action or +error. + +## Cache layout + +Artifacts are written under an **ephemeral** per-process temp directory +created by `plaid.viewer.cache.CacheRoot` (named +`plaid-viewer-{pid}-{token}` under `tempfile.gettempdir()`): + +``` +/datasets///// + meshes/ # one CGNS per timestep (time-dependent) + meshes.cgns.series # ParaView file-series sidecar (time-dependent) + mesh.cgns # single static mesh + metadata.json # cache key, sample ref, export version, ... +``` + +The cache holds **at most one artifact at a time**: once VTK has loaded +a sample's CGNS into memory the on-disk copy is no longer needed, so +the next `ensure_artifact` call removes the previous folder before +writing the new one. + +The whole cache root is deleted at shutdown through four complementary +layers: `atexit`, `SIGINT` / `SIGTERM` handlers, the `with CacheRoot()` +context manager used by the CLI, and an orphan sweep at startup that +removes directories left behind by previously-crashed processes. + +The cache key is a SHA-256 of the sample reference, backend id, PLAID +version and `ViewerConfig.export_version`. + +## Programmatic usage + +```python +from pathlib import Path +from plaid.viewer.cache import CacheRoot +from plaid.viewer.config import ViewerConfig +from plaid.viewer.services import ParaviewArtifactService, PlaidDatasetService +from plaid.viewer.trame_app.server import build_server + +config = ViewerConfig(datasets_root=Path("/path/to/datasets")) +with CacheRoot() as cache: + datasets = PlaidDatasetService(config) + artifacts = ParaviewArtifactService(datasets, cache.path) + server = build_server(datasets, artifacts) + server.start(host="127.0.0.1", port=8080, open_browser=False) +``` diff --git a/pyproject.toml b/pyproject.toml index 70bdd20e..bed9b34a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ name = "pyplaid" authors = [{name = "Safran", email = "fabien.casenave@safrangroup.com"}] description = "A package that implements a data model tailored for AI and ML in the context of physics problems" requires-python = ">=3.11, <3.14" +dynamic = ["version"] keywords=[ "machine learning", "physics", @@ -37,8 +38,13 @@ dependencies = [ "matplotlib>=3.8,<4", "pydantic>=2.6,<3", ] - -dynamic = ["version"] +[project.optional-dependencies] +viewer = [ + "trame>=3.6,<4.0", + "trame-vtk>=2.8,<3.0", + "trame-vuetify>=2.7,<3.0", + "vtk>=9.6.1", +] [tool.setuptools_scm] write_to = "src/plaid/_version.py" @@ -91,3 +97,6 @@ omit = ["src/plaid/examples/*"] [tool.pytest.ini_options] filterwarnings = "ignore::DeprecationWarning" + +[project.scripts] +plaid-viewer = "plaid.viewer.cli:main" diff --git a/src/plaid/viewer/__init__.py b/src/plaid/viewer/__init__.py new file mode 100644 index 00000000..9880bdd2 --- /dev/null +++ b/src/plaid/viewer/__init__.py @@ -0,0 +1,11 @@ +"""Dataset viewer for PLAID. + +This package hosts the raw PLAID dataset viewer: a FastAPI backend plus an +embedded trame/ParaView visualization server. PLAID owns the UI shell and +the page; PLAID owns data loading, sample interpretation, and CGNS export; +ParaView/trame owns the scientific visualization. +""" + +from plaid.viewer.models import ParaviewArtifact, SampleRef + +__all__ = ["ParaviewArtifact", "SampleRef"] diff --git a/src/plaid/viewer/cache.py b/src/plaid/viewer/cache.py new file mode 100644 index 00000000..451988b6 --- /dev/null +++ b/src/plaid/viewer/cache.py @@ -0,0 +1,179 @@ +"""Ephemeral artifact cache for the dataset viewer. + +The cache lives under a per-process temporary directory and is removed at +shutdown. Four cleanup layers cover all practical failure modes: + +1. ``atexit.register`` for normal Python exit. +2. Signal handlers for ``SIGINT`` / ``SIGTERM``. +3. A context manager (``with CacheRoot() as cache:`` in the CLI). +4. An orphan sweep at startup that removes directories left behind by + previously-crashed processes. +""" + +from __future__ import annotations + +import atexit +import errno +import logging +import os +import re +import shutil +import signal +import tempfile +import uuid +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Ephemeral tempdir naming: ``plaid-viewer-{pid}-{uuid4.hex}``. +_EPHEMERAL_PREFIX = "plaid-viewer-" +_EPHEMERAL_PATTERN = re.compile(r"^plaid-viewer-(?P\d+)-(?P[0-9a-f]+)$") + + +def _windows_process_is_alive(pid: int) -> bool: # pragma: no cover + """Return process liveness on Windows without sending a signal.""" + import ctypes # noqa: PLC0415 + + error_access_denied = 5 + process_query_limited_information = 0x1000 + still_active = 259 + + windll = getattr(ctypes, "WinDLL") + get_last_error = getattr(ctypes, "get_last_error") + kernel32 = windll("kernel32", use_last_error=True) + handle = kernel32.OpenProcess(process_query_limited_information, False, pid) + if not handle: + return get_last_error() == error_access_denied + + try: + exit_code = ctypes.c_ulong() + if not kernel32.GetExitCodeProcess(handle, ctypes.byref(exit_code)): + return False + return exit_code.value == still_active + finally: + kernel32.CloseHandle(handle) + + +def _process_is_alive(pid: int) -> bool: + """Return ``True`` if a process with the given pid is still running.""" + if pid <= 0: + return False + if os.name == "nt": + return _windows_process_is_alive(pid) + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + # The process exists but is owned by someone else. + return True + except OSError as exc: + return exc.errno != errno.ESRCH + return True + + +def sweep_orphans(temp_root: Path | None = None) -> list[Path]: + """Remove viewer tempdirs whose owning process is no longer running. + + Args: + temp_root: Base temp directory to scan. Defaults to + :func:`tempfile.gettempdir`. + + Returns: + List of directories that were removed. + """ + root = Path(temp_root) if temp_root is not None else Path(tempfile.gettempdir()) + removed: list[Path] = [] + if not root.is_dir(): + return removed + for entry in root.iterdir(): + if not entry.is_dir(): + continue + match = _EPHEMERAL_PATTERN.match(entry.name) + if match is None: + continue + pid = int(match.group("pid")) + if _process_is_alive(pid): + continue + try: + shutil.rmtree(entry, ignore_errors=True) + removed.append(entry) + logger.info("Removed orphan viewer cache: %s", entry) + except OSError as exc: + logger.warning("Could not remove orphan viewer cache %s: %s", entry, exc) + return removed + + +class CacheRoot: + """Context-manager-friendly ephemeral artifact cache directory. + + Creates a new tempdir named ``plaid-viewer-{pid}-{token}`` under the OS + temp root. The directory is removed at process exit (``atexit``), on + ``SIGINT`` / ``SIGTERM``, and when the context manager is closed. + """ + + def __init__( + self, + *, + install_signal_handlers: bool = True, + run_orphan_sweep: bool = True, + ) -> None: + if run_orphan_sweep: + sweep_orphans() + token = uuid.uuid4().hex[:12] + base = Path(tempfile.gettempdir()) + self._path = base / f"{_EPHEMERAL_PREFIX}{os.getpid()}-{token}" + self._path.mkdir(parents=True, exist_ok=False) + atexit.register(self._safe_cleanup) + if install_signal_handlers: + self._install_signal_handlers() + self._closed = False + + # ------------------------------------------------------------------ API + + @property + def path(self) -> Path: + """Root directory of the cache.""" + return self._path + + def close(self) -> None: + """Remove the cache directory.""" + if self._closed: + return + self._closed = True + self._safe_cleanup() + + def __enter__(self) -> "CacheRoot": # noqa: D105 + return self + + def __exit__(self, exc_type, exc, tb) -> None: # noqa: D105 + self.close() + + # -------------------------------------------------------------- Internals + + def _safe_cleanup(self) -> None: + try: + shutil.rmtree(self._path, ignore_errors=True) + except Exception as exc: + logger.warning("Failed to clean viewer cache %s: %s", self._path, exc) + + def _install_signal_handlers(self) -> None: + for sig in (signal.SIGINT, signal.SIGTERM): + try: + previous = signal.getsignal(sig) + except (ValueError, OSError): + continue + + def handler(signum, frame, _prev=previous): + self._safe_cleanup() + if callable(_prev) and _prev not in (signal.SIG_DFL, signal.SIG_IGN): + _prev(signum, frame) + # Re-raise the default behaviour to keep expected exit codes. + signal.signal(signum, signal.SIG_DFL) + os.kill(os.getpid(), signum) + + try: + signal.signal(sig, handler) + except (ValueError, OSError) as exc: + logger.debug("Unable to install handler for signal %s: %s", sig, exc) + continue diff --git a/src/plaid/viewer/cli.py b/src/plaid/viewer/cli.py new file mode 100644 index 00000000..90e94636 --- /dev/null +++ b/src/plaid/viewer/cli.py @@ -0,0 +1,160 @@ +"""Command-line entry point for the dataset viewer. + +Starts a single self-contained trame server. There is no FastAPI backend +and no separate port: dataset discovery, sample loading, CGNS export and +the 3D view are all served by the same trame process. +""" + +from __future__ import annotations + +import argparse +import logging +from pathlib import Path + +from plaid.viewer.cache import CacheRoot +from plaid.viewer.config import ViewerConfig +from plaid.viewer.preferences import get_last_datasets_root +from plaid.viewer.services import ParaviewArtifactService, PlaidDatasetService + +logger = logging.getLogger(__name__) + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="plaid-viewer", + description="Launch the dataset viewer (trame + VTK).", + ) + parser.add_argument( + "--datasets-root", + type=Path, + default=None, + help=( + "Directory containing one subdirectory per PLAID dataset. " + "When omitted, the viewer starts without a root and the user " + "selects one from the UI (unless --disable-root-change is set)." + ), + ) + parser.add_argument( + "--browse-roots", + type=Path, + nargs="+", + default=None, + help=( + "Directories the UI is allowed to expose through the datasets " + "root text field and file browser. Defaults to the user home " + "directory. Any path outside these roots is rejected." + ), + ) + parser.add_argument( + "--disable-root-change", + action="store_true", + help=( + "Hide the 'Datasets root' UI panel; the root stays fixed to " + "--datasets-root for the lifetime of the server. Recommended " + "for public deployments (e.g. Hugging Face Spaces)." + ), + ) + parser.add_argument( + "--dataset-id", + default=None, + help=( + "Dataset id selected when the viewer starts. Use together with " + "--disable-dataset-change to pin the UI to that dataset." + ), + ) + parser.add_argument( + "--disable-dataset-change", + action="store_true", + help=( + "Hide the 'Dataset' dropdown; the selected dataset stays fixed " + "for the lifetime of the server." + ), + ) + + parser.add_argument("--host", default="127.0.0.1", help="Trame server host.") + parser.add_argument("--port", type=int, default=8080, help="Trame server port.") + parser.add_argument( + "--backend-id", + default="disk", + help="PLAID backend identifier embedded in SampleRefs.", + ) + parser.add_argument( + "--hub-repo", + action="append", + default=None, + metavar="NAMESPACE/NAME", + help=( + "Register a Hugging Face Hub repo id streamed through " + "plaid.storage.init_streaming_from_hub. Repeat the flag to " + "pre-register multiple repos. Additional repos can be added " + "at runtime from the UI (unless --disable-root-change is set)." + ), + ) + return parser + + +def main(argv: list[str] | None = None) -> int: + """Run the viewer until interrupted. + + Args: + argv: Optional override of ``sys.argv[1:]`` for tests. + + Returns: + Process exit code. + """ + args = _build_parser().parse_args(argv) + logging.basicConfig( + level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s" + ) + + # Permanently silence the process's file-descriptor 2 so the HDF5 / + # CGNS C libraries (used by both VTK's ``vtkCGNSReader`` and PLAID's + # pyCGNS loader) cannot pollute the console with messages like + # ``Mismatch in number of children and child IDs read``. Python's + # ``sys.stderr`` is preserved so tracebacks and the logger keep + # working. See ``_reroute_c_stderr`` for the details. + from plaid.viewer.trame_app.server import ( # noqa: PLC0415 + _reroute_c_stderr, + ) + + _reroute_c_stderr() + + # When no explicit ``--datasets-root`` is passed, fall back to the + # last local root the user selected in a previous session (persisted + # under ``$XDG_CONFIG_HOME/plaid/viewer.json``). This makes the + # viewer "remember" the last dataset directory without requiring the + # CLI flag on every launch. + effective_datasets_root = args.datasets_root + if effective_datasets_root is None: + effective_datasets_root = get_last_datasets_root() + if effective_datasets_root is not None: + logger.info("Using persisted datasets root: %s", effective_datasets_root) + browse_roots = tuple(args.browse_roots) if args.browse_roots else () + config = ViewerConfig( + datasets_root=effective_datasets_root, + backend_id=args.backend_id, + browse_roots=browse_roots, + allow_root_change=not args.disable_root_change, + initial_dataset_id=args.dataset_id, + allow_dataset_change=not args.disable_dataset_change, + ) + + with CacheRoot() as cache: + dataset_service = PlaidDatasetService(config) + for repo_id in args.hub_repo or []: + try: + dataset_service.add_hub_dataset(repo_id) + except ValueError as exc: + logger.warning("Ignoring --hub-repo %r: %s", repo_id, exc) + artifact_service = ParaviewArtifactService(dataset_service, cache.path) + + # Deferred import so ``--help`` works without trame installed. + from plaid.viewer.trame_app.server import build_server # noqa: PLC0415 + + server = build_server(dataset_service, artifact_service) + server.start(host=args.host, port=args.port, open_browser=False) + return 0 + + +if __name__ == "__main__": # pragma: no cover - CLI entry + raise SystemExit(main()) diff --git a/src/plaid/viewer/config.py b/src/plaid/viewer/config.py new file mode 100644 index 00000000..0c075d69 --- /dev/null +++ b/src/plaid/viewer/config.py @@ -0,0 +1,47 @@ +"""Runtime configuration for the dataset viewer.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path + + +@dataclass(frozen=True) +class ViewerConfig: + """Static configuration for a viewer instance. + + Attributes: + datasets_root: Directory scanned to discover datasets. A dataset is a + subdirectory containing both ``data/`` and ``problem_definitions/`` + (or the root may itself be such a folder). When ``None``, the + viewer starts without a root and the user is expected to pick one + interactively (when ``allow_root_change`` is True). + backend_id: PLAID backend identifier embedded in :class:`SampleRef` + objects and in the artifact cache key. + export_version: Opaque string mixed into the artifact cache key. Bump + when export logic changes. + extra_cache_key_fields: Extra fields serialised into the cache key. + browse_roots: Directories the viewer is allowed to expose through the + built-in file browser / datasets-root text field. Every candidate + path must be a descendant of at least one of these roots. When + empty, defaults to ``(Path.home(),)`` at the service level. + allow_root_change: When ``True`` (default), the trame UI exposes a + panel to change the datasets root at runtime. Set to ``False`` for + public deployments (e.g. Hugging Face Spaces) where the root must + remain fixed to what the operator configured. + initial_dataset_id: Dataset selected when the viewer starts. When + ``None``, the first discovered local dataset is selected, falling + back to the first Hub dataset. + allow_dataset_change: When ``True`` (default), the trame UI exposes the + dataset dropdown. Set to ``False`` to pin the selection configured + by ``initial_dataset_id`` / startup discovery. + """ + + datasets_root: Path | None = None + backend_id: str = "disk" + export_version: str = "1" + extra_cache_key_fields: dict[str, str] = field(default_factory=dict) + browse_roots: tuple[Path, ...] = () + allow_root_change: bool = True + initial_dataset_id: str | None = None + allow_dataset_change: bool = True diff --git a/src/plaid/viewer/models.py b/src/plaid/viewer/models.py new file mode 100644 index 00000000..f6026c8e --- /dev/null +++ b/src/plaid/viewer/models.py @@ -0,0 +1,178 @@ +"""Data models for the gdataset viewer. + +Contains both immutable dataclasses used by services (`SampleRef`, +`ParaviewArtifact`) and pydantic models used as FastAPI response payloads. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from pydantic import BaseModel, Field + + +@dataclass(frozen=True) +class SampleRef: + """Backend-agnostic reference to a PLAID sample. + + Attributes: + backend_id: Identifier of the PLAID storage backend (e.g. ``"disk"``, + ``"hf_datasets"``, ``"zarr"``). + dataset_id: Identifier of the dataset (typically the dataset directory + name). + split: Optional split name (``"train"``, ``"test"``, ...). ``None`` + when the dataset is not split. + sample_id: Identifier of the sample within the split. For disk-backed + datasets this is the zero-based index rendered as a string. + """ + + backend_id: str + dataset_id: str + split: str | None + sample_id: str + + def encode(self) -> str: + """Return a URL-safe string identifier usable as a route parameter.""" + split = self.split if self.split is not None else "_" + return f"{self.backend_id}:{self.dataset_id}:{split}:{self.sample_id}" + + @classmethod + def decode(cls, value: str) -> "SampleRef": + """Parse a string produced by :meth:`encode`.""" + parts = value.split(":") + if len(parts) != 4: + raise ValueError(f"Invalid sample reference: {value!r}") + backend_id, dataset_id, split, sample_id = parts + return cls( + backend_id=backend_id, + dataset_id=dataset_id, + split=None if split == "_" else split, + sample_id=sample_id, + ) + + +@dataclass(frozen=True) +class ParaviewArtifact: + """A ParaView-readable artifact produced from a PLAID sample. + + For time-dependent samples, ``cgns_path`` points to a ``.cgns.series`` + sidecar file that groups multiple CGNS files into a single time sequence. + For single-timestep samples, it points to the single CGNS file directly. + + Attributes: + artifact_id: Stable identifier used in API routes. Derived from the + cache key. + cgns_path: Path to the file ParaView should open. Either a + ``.cgns.series`` sidecar (multi-time) or a ``.cgns`` file. + state_path: Optional ParaView state file (``.pvsm``) providing a + reasonable default scene. + metadata_path: Optional JSON metadata file describing the artifact. + cache_key: Deterministic SHA256 key over the artifact inputs. + created: ``True`` if the artifact was newly created, ``False`` if it + was already present in the cache. + """ + + artifact_id: str + cgns_path: Path + state_path: Path | None + metadata_path: Path | None + cache_key: str + created: bool + + +# --------------------------------------------------------------------------- +# API response models +# --------------------------------------------------------------------------- + + +class DatasetInfo(BaseModel): + """Summary information about an available dataset. + + ``backend_id`` identifies the loading mode: ``"disk"`` for datasets + opened with :func:`plaid.storage.init_from_disk` and ``"hub"`` for + Hugging Face repositories streamed through + :func:`plaid.storage.init_streaming_from_hub`. Streamed datasets do + not always expose a total sample count and may need to be navigated + sequentially through a streaming cursor. + """ + + dataset_id: str + backend_id: str + path: str + has_infos: bool = False + has_problem_definitions: bool = False + + +class DatasetDetail(DatasetInfo): + """Full detail view of a dataset. + + ``splits`` maps each split name to its sample count. The count is + ``None`` for streaming datasets where the total is unknown. + """ + + splits: dict[str, int | None] = Field(default_factory=dict) + infos: dict | None = None + problem_definitions: list[str] = Field(default_factory=list) + + +class SampleRefDTO(BaseModel): + """Serializable form of :class:`SampleRef` used by the API.""" + + backend_id: str + dataset_id: str + split: str | None + sample_id: str + encoded: str + + @classmethod + def from_ref(cls, ref: SampleRef) -> "SampleRefDTO": + """Build the DTO from a :class:`SampleRef`.""" + return cls( + backend_id=ref.backend_id, + dataset_id=ref.dataset_id, + split=ref.split, + sample_id=ref.sample_id, + encoded=ref.encode(), + ) + + +class SampleSummary(BaseModel): + """Minimal metadata describing a PLAID sample.""" + + ref: SampleRefDTO + n_times: int + time_values: list[float] + bases: list[str] + zones_by_base: dict[str, list[str]] = Field(default_factory=dict) + globals: dict[str, str] = Field(default_factory=dict) + fields_by_base: dict[str, list[str]] = Field(default_factory=dict) + + +class ValidationResult(BaseModel): + """Validation outcome for a PLAID sample.""" + + ref: SampleRefDTO + ok: bool + warnings: list[str] = Field(default_factory=list) + errors: list[str] = Field(default_factory=list) + + +class ArtifactInfo(BaseModel): + """Public view of a :class:`ParaviewArtifact`.""" + + artifact_id: str + cache_key: str + created: bool + cgns_path: str + state_path: str | None + metadata_path: str | None + is_time_series: bool + n_files: int + + +class ViewerUrl(BaseModel): + """Response model for the ``viewer-url`` endpoint.""" + + artifact_id: str + url: str diff --git a/src/plaid/viewer/preferences.py b/src/plaid/viewer/preferences.py new file mode 100644 index 00000000..220759c3 --- /dev/null +++ b/src/plaid/viewer/preferences.py @@ -0,0 +1,87 @@ +"""Persistent user preferences for the dataset viewer. + +The viewer stores a tiny JSON document under the OS-standard user config +directory so a handful of settings (currently only the last local +``datasets_root``) survive across sessions. The file is best-effort: +read/write errors are silently swallowed so a broken preferences file +never prevents the viewer from starting. + +Location: ``$XDG_CONFIG_HOME/plaid/viewer.json`` (falling back to +``~/.config/plaid/viewer.json``), overridable by setting +``PLAID_VIEWER_CONFIG_FILE``. +""" + +from __future__ import annotations + +import json +import logging +import os +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def _preferences_path() -> Path: + """Return the path to the persistent preferences file.""" + override = os.environ.get("PLAID_VIEWER_CONFIG_FILE") + if override: + return Path(override).expanduser() + base = os.environ.get("XDG_CONFIG_HOME") + root = Path(base).expanduser() if base else Path.home() / ".config" + return root / "plaid" / "viewer.json" + + +def load_preferences() -> dict[str, object]: + """Return the persisted preferences dict, or an empty dict on failure.""" + path = _preferences_path() + if not path.is_file(): + return {} + try: + return json.loads(path.read_text()) + except (OSError, json.JSONDecodeError) as exc: # noqa: BLE001 + logger.debug("Ignoring unreadable viewer preferences at %s: %s", path, exc) + return {} + + +def save_preferences(data: dict[str, object]) -> None: + """Persist ``data`` to the preferences file, creating parents as needed.""" + path = _preferences_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2, sort_keys=True)) + except OSError as exc: # noqa: BLE001 + logger.debug("Failed to persist viewer preferences to %s: %s", path, exc) + + +def update_preferences(**updates: object) -> dict[str, object]: + """Merge ``updates`` into the persisted preferences and return the result. + + Keys whose value is ``None`` are removed from the stored document so + clearing a setting (e.g. the datasets root) does not leave a stale + entry behind. + """ + current = load_preferences() + for key, value in updates.items(): + if value is None: + current.pop(key, None) + else: + current[key] = value + save_preferences(current) + return current + + +def get_last_datasets_root() -> Path | None: + """Return the persisted last-used datasets root, or ``None``.""" + value = load_preferences().get("datasets_root") + if not isinstance(value, str) or not value: + return None + candidate = Path(value).expanduser() + return candidate if candidate.is_dir() else None + + +def set_last_datasets_root(path: Path | str | None) -> None: + """Persist (or clear) the last-used datasets root.""" + if path is None: + update_preferences(datasets_root=None) + return + update_preferences(datasets_root=str(Path(path).expanduser().resolve())) diff --git a/src/plaid/viewer/services/__init__.py b/src/plaid/viewer/services/__init__.py new file mode 100644 index 00000000..c79ceef3 --- /dev/null +++ b/src/plaid/viewer/services/__init__.py @@ -0,0 +1,13 @@ +"""Services for the dataset viewer.""" + +from plaid.viewer.services.paraview_artifact_service import ( + ParaviewArtifactService, + ensure_paraview_artifact, +) +from plaid.viewer.services.plaid_dataset_service import PlaidDatasetService + +__all__ = [ + "ParaviewArtifactService", + "PlaidDatasetService", + "ensure_paraview_artifact", +] diff --git a/src/plaid/viewer/services/paraview_artifact_service.py b/src/plaid/viewer/services/paraview_artifact_service.py new file mode 100644 index 00000000..e9e4488e --- /dev/null +++ b/src/plaid/viewer/services/paraview_artifact_service.py @@ -0,0 +1,296 @@ +"""Produce ParaView-readable artifacts from PLAID samples. + +This module is the one place in PLAID that writes CGNS files on disk. It +delegates the actual CGNS export to PLAID (``Sample.save_to_dir`` writes one +CGNS per timestep under ``meshes/``), then adds: + +* A ``.cgns.series`` sidecar JSON file that ParaView's ``vtkCGNSReader`` / + ``vtkCGNSFileSeriesReader`` understands for multi-timestep samples. +* A deterministic artifact id derived from a SHA256 cache key so the same + inputs always resolve to the same folder. +* An optional ``scene.pvsm`` placeholder for future preset work. +* A ``metadata.json`` describing the artifact. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import shutil +from dataclasses import dataclass +from pathlib import Path + +from plaid.viewer.models import ParaviewArtifact, SampleRef +from plaid.viewer.services.plaid_dataset_service import PlaidDatasetService + +logger = logging.getLogger(__name__) + +EXPORT_VERSION = "1" +ARTIFACT_TYPE = "raw" + + +@dataclass(frozen=True) +class _ArtifactLayout: + """Internal paths for a single artifact folder.""" + + root: Path + meshes_dir: Path + series_path: Path + single_cgns_path: Path + metadata_path: Path + state_path: Path + + +def _plaid_version() -> str: + try: + from importlib.metadata import PackageNotFoundError, version + + return version("pyplaid") + except PackageNotFoundError: + return "unknown" + + +def _build_cache_key( + ref: SampleRef, *, export_version: str, extra: dict[str, str] | None = None +) -> str: + """Return a deterministic SHA256 cache key for a sample export.""" + payload = { + "backend_id": ref.backend_id, + "dataset_id": ref.dataset_id, + "split": ref.split, + "sample_id": ref.sample_id, + "export_mode": "default", + "artifact_type": ARTIFACT_TYPE, + "plaid_version": _plaid_version(), + "export_version": export_version, + } + if extra: + payload["extra"] = dict(sorted(extra.items())) + digest = hashlib.sha256( + json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") + ).hexdigest() + return digest + + +def _artifact_layout( + cache_root: Path, ref: SampleRef, cache_key: str +) -> _ArtifactLayout: + split = ref.split if ref.split is not None else "_default" + root = ( + cache_root + / "datasets" + / ref.dataset_id + / split + / ref.sample_id + / cache_key[:16] + ) + return _ArtifactLayout( + root=root, + meshes_dir=root / "meshes", + series_path=root / "meshes.cgns.series", + single_cgns_path=root / "mesh.cgns", + metadata_path=root / "metadata.json", + state_path=root / "scene.pvsm", + ) + + +def _write_series_sidecar( + series_path: Path, cgns_files: list[tuple[Path, float]] +) -> None: + """Write a ParaView ``.cgns.series`` sidecar for the given file list. + + Each entry's ``name`` is stored as a POSIX-style path relative to the + sidecar file so ``vtkCGNSFileSeriesReader`` can resolve it consistently + across platforms. Notably, time-series CGNS files live in the + ``meshes/`` subdirectory, so we keep that prefix instead of only the + file name. + """ + payload = { + "file-series-version": "1.0", + "files": [ + {"name": Path(path).as_posix(), "time": time} for path, time in cgns_files + ], + } + series_path.write_text(json.dumps(payload, indent=2)) + + +def _collect_time_values(sample) -> list[float]: + data = getattr(sample.features, "data", None) + if not data: + return [] + return sorted(float(t) for t in data.keys()) + + +class ParaviewArtifactService: + """Create and look up ParaView-readable artifacts in a cache directory. + + Args: + dataset_service: Used to load :class:`plaid.Sample` instances. + cache_root: Root of the artifact cache. Usually owned by a + :class:`plaid.viewer.cache.CacheRoot` instance. + export_version: Opaque string included in the cache key. Bump this + whenever the export logic changes in a backwards-incompatible way. + extra_cache_key_fields: Extra fields to mix into the cache key (for + example to invalidate artifacts when a preset template changes). + """ + + def __init__( + self, + dataset_service: PlaidDatasetService, + cache_root: Path, + *, + export_version: str = EXPORT_VERSION, + extra_cache_key_fields: dict[str, str] | None = None, + ) -> None: + self._dataset_service = dataset_service + self._cache_root = Path(cache_root) + self._cache_root.mkdir(parents=True, exist_ok=True) + self._export_version = export_version + self._extra = dict(extra_cache_key_fields or {}) + self._by_id: dict[str, ParaviewArtifact] = {} + # Path of the most recently ensured artifact. The cache keeps at most + # one artifact on disk at any time: once VTK has read the CGNS file + # into memory (``vtkCGNSReader.Update()`` in the trame pipeline), the + # on-disk copy is no longer needed, so we delete it as soon as the + # user asks for another sample. + self._current_root: Path | None = None + + # ------------------------------------------------------------ Public API + + def ensure_artifact( + self, ref: SampleRef, *, force: bool = False + ) -> ParaviewArtifact: + """Return a :class:`ParaviewArtifact` for ``ref``, creating it if needed. + + The cache holds at most one artifact: any previously-ensured artifact + whose layout root differs from ``ref``'s is removed from disk. + """ + cache_key = _build_cache_key( + ref, export_version=self._export_version, extra=self._extra + ) + layout = _artifact_layout(self._cache_root, ref, cache_key) + + # Evict the previous artifact (if any) as soon as the user requests + # a different one. ``force`` always rebuilds the current one. + if ( + self._current_root is not None + and self._current_root != layout.root + and self._current_root.exists() + ): + shutil.rmtree(self._current_root, ignore_errors=True) + self._by_id.clear() + if force and layout.root.exists(): + shutil.rmtree(layout.root) + + if layout.metadata_path.is_file() and not force: + artifact = self._load_existing(layout, cache_key) + else: + layout.root.mkdir(parents=True, exist_ok=True) + artifact = self._create(ref, layout, cache_key) + self._by_id = {artifact.artifact_id: artifact} + self._current_root = layout.root + return artifact + + def get(self, artifact_id: str) -> ParaviewArtifact: + """Return a previously-created artifact by id. + + Raises: + KeyError: If no artifact with this id has been created. + """ + if artifact_id not in self._by_id: + raise KeyError(f"Unknown artifact id: {artifact_id}") + return self._by_id[artifact_id] + + # -------------------------------------------------------------- Internals + + def _create( + self, + ref: SampleRef, + layout: _ArtifactLayout, + cache_key: str, + ) -> ParaviewArtifact: + sample = self._dataset_service.load_sample(ref) + times = _collect_time_values(sample) + + layout.meshes_dir.mkdir(exist_ok=True) + # PLAID writes one CGNS per timestep as ``meshes/mesh_{i:09d}.cgns``. + sample.save_to_dir(layout.root, overwrite=True) + + cgns_files = sorted(layout.meshes_dir.glob("mesh_*.cgns")) + if not cgns_files: + raise RuntimeError( + f"PLAID produced no CGNS files for sample {ref.encode()}" + ) + + is_time_series = len(cgns_files) > 1 or len(times) > 1 + if is_time_series: + pairs = [ + (layout.meshes_dir.relative_to(layout.root) / f.name, t) + for f, t in zip( + cgns_files, times or range(len(cgns_files)), strict=False + ) + ] + # Reformat to full-path-relative-to-series-file entries. + _write_series_sidecar( + layout.series_path, + [(Path("meshes") / pair[0].name, float(pair[1])) for pair in pairs], + ) + cgns_path = layout.series_path + else: + # Move the single CGNS file up one level for convenience. + cgns_files[0].replace(layout.single_cgns_path) + cgns_path = layout.single_cgns_path + + metadata = { + "artifact_type": ARTIFACT_TYPE, + "cache_key": cache_key, + "export_version": self._export_version, + "plaid_version": _plaid_version(), + "sample_ref": { + "backend_id": ref.backend_id, + "dataset_id": ref.dataset_id, + "split": ref.split, + "sample_id": ref.sample_id, + }, + "cgns_path": str(cgns_path.relative_to(layout.root)), + "is_time_series": is_time_series, + "n_files": len(cgns_files), + "time_values": list(times), + } + layout.metadata_path.write_text(json.dumps(metadata, indent=2)) + + return ParaviewArtifact( + artifact_id=cache_key[:16], + cgns_path=cgns_path, + state_path=None, + metadata_path=layout.metadata_path, + cache_key=cache_key, + created=True, + ) + + @staticmethod + def _load_existing(layout: _ArtifactLayout, cache_key: str) -> ParaviewArtifact: + metadata = json.loads(layout.metadata_path.read_text()) + cgns_path = layout.root / metadata["cgns_path"] + state_path = layout.state_path if layout.state_path.is_file() else None + return ParaviewArtifact( + artifact_id=cache_key[:16], + cgns_path=cgns_path, + state_path=state_path, + metadata_path=layout.metadata_path, + cache_key=cache_key, + created=False, + ) + + +def ensure_paraview_artifact( + sample_ref: SampleRef, + *, + cache_dir: Path, + dataset_service: PlaidDatasetService, + force: bool = False, +) -> ParaviewArtifact: + """Functional wrapper around :meth:`ParaviewArtifactService.ensure_artifact`.""" + service = ParaviewArtifactService(dataset_service, cache_dir) + return service.ensure_artifact(sample_ref, force=force) diff --git a/src/plaid/viewer/services/plaid_dataset_service.py b/src/plaid/viewer/services/plaid_dataset_service.py new file mode 100644 index 00000000..2d13f3e1 --- /dev/null +++ b/src/plaid/viewer/services/plaid_dataset_service.py @@ -0,0 +1,1162 @@ +"""Dataset discovery and sample introspection for the PLAID viewer. + +This service owns all PLAID-facing logic used by the viewer: + +- Discover datasets under a configured root directory. +- Load a split-wise ``(dataset_dict, converter_dict)`` pair through + :func:`plaid.storage.init_from_disk` and cache it for subsequent calls. +- Materialize PLAID :class:`plaid.Sample` instances via + ``converter.to_plaid(dataset, index)``, regardless of the underlying + backend (``hf_datasets``, ``cgns``, ``zarr`` ...). +- Summarize sample contents (bases, zones, fields, times, scalars). +- Report basic validation status via :meth:`Sample.check_completeness`. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from functools import lru_cache +from pathlib import Path +from typing import Any, Iterator + +from plaid.viewer.config import ViewerConfig +from plaid.viewer.models import ( + DatasetDetail, + DatasetInfo, + SampleRef, + SampleRefDTO, + SampleSummary, + ValidationResult, +) + +logger = logging.getLogger(__name__) + + +# Sentinel ``sample_id`` used for streaming datasets, where the only +# addressable sample is "the one currently produced by the iterator". +STREAM_CURSOR_ID = "cursor" + + +@dataclass +class _StreamCursor: + """Forward-only cursor over a streaming (``IterableDataset``) split. + + Streaming datasets returned by + :func:`plaid.storage.init_streaming_from_hub` do not support + indexing or ``len``. This cursor consumes the underlying iterable + one sample at a time and caches the most recently produced raw + record so repeated ``load_sample`` calls (e.g. when the UI loads + summary then full sample) do not advance the stream. + """ + + iterator: Iterator[Any] | None = None + position: int = -1 # -1 means "no sample fetched yet". + current_record: Any | None = None + exhausted: bool = False + extras: dict = field(default_factory=dict) + + +def _safe_list_dir(path: Path) -> list[Path]: + if not path.is_dir(): + return [] + return sorted(p for p in path.iterdir()) + + +def _array_preview(value, *, max_items: int = 6) -> str | None: + """Return a short string preview of a numpy-like array value.""" + if value is None: + return None + try: + import numpy as np # noqa: PLC0415 + except ImportError: # pragma: no cover - numpy is a transitive dep + return None + try: + arr = np.asarray(value) + except Exception: # noqa: BLE001 + return None + if arr.size == 0: + return "[]" + flat = arr.ravel() + if flat.size <= max_items: + return np.array2string(arr, separator=", ", threshold=max_items + 1) + head = np.array2string(flat[:max_items], separator=", ") + return f"{head[:-1]}, ...] (total {flat.size} values)" + + +def _collect_data_arrays(cgns_node) -> list[dict[str, object]]: + """Recursively collect ``DataArray_t`` descriptors under ``cgns_node``. + + Each entry contains the array name, its shape as a list, dtype as a + string, and a short string preview of the values. + """ + try: + from CGNS.PAT import cgnskeywords as CK # noqa: PLC0415 + except ImportError: # pragma: no cover + return [] + + entries: list[dict[str, object]] = [] + + def _walk(node) -> None: + name, value, children, label = node + if label == CK.DataArray_ts: + shape = list(getattr(value, "shape", ())) if value is not None else [] + dtype = str(getattr(value, "dtype", "")) + entries.append( + { + "name": name, + "shape": shape, + "dtype": dtype, + "preview": _array_preview(value), + } + ) + return + for child in children or []: + _walk(child) + + for child in cgns_node[2] or []: + _walk(child) + return entries + + +class PlaidDatasetService: + """High-level access to PLAID datasets stored under a root directory. + + A dataset is a subdirectory of ``config.datasets_root`` that contains a + ``data/`` directory readable by :func:`plaid.storage.init_from_disk`. + The function returns a ``dataset_dict`` and a ``converter_dict`` keyed + by split name; the viewer iterates splits and addresses samples by + integer index in ``range(len(dataset_dict[split]))``. + """ + + def __init__(self, config: ViewerConfig) -> None: + self._config = config + # Datasets root is kept on the service (not on the frozen config) + # so it can be changed at runtime through ``set_datasets_root``. + # ``None`` means no root has been selected yet: discovery methods + # return empty lists and the UI is expected to prompt the user. + self._datasets_root: Path | None = ( + Path(config.datasets_root) if config.datasets_root is not None else None + ) + # Sandbox for interactive root selection. Defaults to the user's + # home directory when no explicit ``browse_roots`` is configured. + # The configured ``datasets_root`` is always implicitly allowed so + # ``list_subdirs`` can start from there. + browse_roots: list[Path] = [Path(p).expanduser() for p in config.browse_roots] + if not browse_roots: + browse_roots = [Path.home()] + if self._datasets_root is not None: + # Make sure the startup root is always reachable even if + # ``browse_roots`` is more restrictive. + browse_roots.append(self._datasets_root) + self._browse_roots: tuple[Path, ...] = tuple( + dict.fromkeys(p.resolve() for p in browse_roots) + ) + # Cache of (dataset_dict, converter_dict) keyed by dataset_id to + # avoid re-parsing large arrow/zarr datasets on every call. + self._store_cache: dict[str, tuple[dict, dict]] = {} + # Registered Hugging Face Hub repositories that should be exposed + # as datasets through :func:`plaid.storage.init_streaming_from_hub`. + # The ``dataset_id`` used throughout the viewer is the raw + # ``repo_id`` string (e.g. ``"PLAID-lib/VKI-LS59"``), which never + # collides with a local directory name (it always contains a + # forward slash). + self._hub_repos: list[str] = [] + # Per-(dataset_id, split) streaming cursors. Streaming datasets + # are ``datasets.IterableDataset`` instances without ``__len__`` + # so we cannot index them. We maintain a forward-only cursor + # instead: ``_cursors[(dataset_id, split)] = (iterator, position, + # cached_sample)``. ``Next`` consumes the iterator and advances + # ``position``; ``Reset`` discards the iterator so a fresh one is + # built on the next access. + self._cursors: dict[tuple[str, str], _StreamCursor] = {} + # User-selected feature filter per dataset. ``None`` means "no + # filter" (load every feature, current default behaviour). An + # empty list means "all features unselected". + self._features: dict[str, list[str] | None] = {} + # Memoised ``(constant_feature_keys, variable_feature_keys)`` per + # dataset, retrieved through ``load_metadata_from_disk`` or + # ``load_metadata_from_hub``. Used to (a) populate the UI + # checkbox list through :meth:`list_available_features` and (b) + # expand user-selected feature paths with + # :func:`plaid.utils.cgns_helper.update_features_for_CGNS_compatibility` + # before handing them to ``init_streaming_from_hub`` (which, unlike + # :meth:`Converter.to_plaid`, does not expand features by itself). + self._feature_metadata: dict[str, tuple[list[str], list[str]]] = {} + # Memoised per-split feature catalogue for a dataset. Unlike + # ``_feature_metadata`` (which aggregates constants across + # splits so the UI can offer a union of fields), this mapping + # preserves the split boundary so :meth:`load_sample` can + # filter the user's selection down to what a specific split + # actually carries. ``PlaidSampleConverter.to_plaid`` otherwise + # raises ``KeyError('Missing features in dataset/converter: + # ...')`` whenever the request names a path that the split in + # hand does not know about. + self._split_feature_metadata: dict[str, dict[str, set[str]]] = {} + + # ----------------------------------------------------------- Discovery + + @property + def datasets_root(self) -> Path | None: + """Return the currently active datasets root, or ``None``.""" + return self._datasets_root + + @property + def browse_roots(self) -> tuple[Path, ...]: + """Return the sandbox directories for interactive path selection.""" + return self._browse_roots + + def set_datasets_root(self, path: Path | str | None) -> Path | None: + """Change the active datasets root at runtime. + + The new path (when not ``None``) must exist, be a directory, and be + located under one of ``browse_roots``. All per-dataset caches are + invalidated so the next discovery call reflects the new root. + + Args: + path: The new datasets root. ``None`` clears the current root. + + Returns: + The resolved new datasets root, or ``None`` if cleared. + + Raises: + ValueError: If the path does not exist, is not a directory, or + escapes ``browse_roots``. + """ + # Deferred import so the service module stays importable without + # write access to the user config directory (e.g. in read-only + # CI sandboxes that don't touch ``set_datasets_root`` anyway). + from plaid.viewer.preferences import ( # noqa: PLC0415 + set_last_datasets_root, + ) + + if path is None: + self._datasets_root = None + self._store_cache.clear() + set_last_datasets_root(None) + return None + resolved = Path(path).expanduser().resolve() + if not resolved.is_dir(): + raise ValueError(f"Not a directory: {resolved}") + self._ensure_within_browse_roots(resolved) + self._datasets_root = resolved + self._store_cache.clear() + # Persist the new root so the next launch of the viewer picks it + # up automatically when ``--datasets-root`` is not provided. + set_last_datasets_root(resolved) + return resolved + + def list_subdirs(self, path: Path | str | None = None) -> dict[str, object]: + """Return immediate subdirectories of ``path`` for the file browser. + + Each entry is tagged with ``is_plaid_candidate`` (``True`` when it + looks like a PLAID dataset, i.e. contains a ``data/`` subdirectory) + so the UI can highlight it. The returned ``path`` is always an + absolute resolved path inside ``browse_roots``. + + Args: + path: Directory to list. When ``None`` the first browse root is + used (typically ``$HOME``). + + Returns: + A dict ``{"path": str, "parent": str | None, + "entries": [{"name": str, "path": str, + "is_plaid_candidate": bool}, ...]}``. + + Raises: + ValueError: If ``path`` is not a directory or escapes the + sandbox. + """ + if path is None: + target = self._browse_roots[0] + else: + target = Path(path).expanduser().resolve() + if not target.is_dir(): + raise ValueError(f"Not a directory: {target}") + self._ensure_within_browse_roots(target) + entries: list[dict[str, object]] = [] + for entry in sorted(target.iterdir()): + if not entry.is_dir(): + continue + if entry.name.startswith("."): + continue + entries.append( + { + "name": entry.name, + "path": str(entry), + "is_plaid_candidate": (entry / "data").is_dir(), + } + ) + # Rank PLAID candidates first, then alphabetical (stable). + entries.sort(key=lambda e: (not e["is_plaid_candidate"], e["name"].lower())) + parent: str | None = None + if any( + target != root and root in target.parents for root in self._browse_roots + ): + parent = str(target.parent) + elif ( + target.parent != target + and any( # pragma: no cover - alternate browse-root ancestry guard + target.parent == root or root in target.parent.parents + for root in self._browse_roots + ) + ): + parent = str(target.parent) + return { + "path": str(target), + "parent": parent, + "entries": entries, + } + + def _ensure_within_browse_roots(self, path: Path) -> None: + for root in self._browse_roots: + try: + path.relative_to(root) + except ValueError: + continue + return + roots = ", ".join(str(r) for r in self._browse_roots) + raise ValueError(f"Path {path} is outside the allowed browse roots ({roots}).") + + def list_datasets(self) -> list[DatasetInfo]: + """Return a summary of every dataset available to the viewer. + + Local datasets (subdirectories of ``datasets_root``) and registered + Hugging Face Hub repositories (added via :meth:`add_hub_dataset`) + are both included, in that order. + """ + infos: list[DatasetInfo] = [] + root = self._datasets_root + if root is not None: + for entry in _safe_list_dir(root): + if not entry.is_dir(): + continue + if not (entry / "data").is_dir(): + continue + infos.append( + DatasetInfo( + dataset_id=entry.name, + backend_id="disk", + path=str(entry), + has_infos=(entry / "infos.yaml").exists() + or (entry / "infos.json").exists(), + has_problem_definitions=( + entry / "problem_definitions" + ).is_dir(), + ) + ) + for repo_id in self._hub_repos: + infos.append( + DatasetInfo( + dataset_id=repo_id, + backend_id="hub", + path=f"hf://{repo_id}", + has_infos=False, + has_problem_definitions=False, + ) + ) + + return infos + + @property + def hub_repos(self) -> tuple[str, ...]: + """Return the list of registered Hugging Face Hub repositories.""" + return tuple(self._hub_repos) + + def add_hub_dataset(self, repo_id: str) -> str: + """Register a Hugging Face Hub dataset to stream from. + + The dataset is exposed through :func:`plaid.storage.init_streaming_from_hub` + and appears in :meth:`list_datasets` with ``dataset_id == repo_id``. + + Args: + repo_id: Hugging Face repository identifier, e.g. + ``"PLAID-lib/VKI-LS59"``. Must contain a ``/`` separator. + + Returns: + The normalised ``repo_id``. + + Raises: + ValueError: If ``repo_id`` is empty or does not look like a + ``namespace/name`` pair. + """ + normalised = (repo_id or "").strip() + if not normalised: + raise ValueError("repo_id must be a non-empty string.") + if "/" not in normalised: + raise ValueError( + f"repo_id {normalised!r} must be of the form 'namespace/name'." + ) + if normalised in self._hub_repos: + return normalised + self._hub_repos.append(normalised) + return normalised + + def remove_hub_dataset(self, repo_id: str) -> None: + """Unregister a previously added Hugging Face Hub dataset.""" + if repo_id in self._hub_repos: + self._hub_repos.remove(repo_id) + self._store_cache.pop(repo_id, None) + self._features.pop(repo_id, None) + self._feature_metadata.pop(repo_id, None) + # Drop any streaming cursors owned by the removed dataset. + self._cursors = { + key: cur for key, cur in self._cursors.items() if key[0] != repo_id + } + + # ------------------------------------------------------- Feature filter + + def _load_feature_metadata(self, dataset_id: str) -> tuple[list[str], list[str]]: + """Return ``(constant_feature_keys, variable_feature_keys)`` for a dataset. + + Uses :func:`plaid.storage.common.reader.load_metadata_from_disk` for + local datasets and :func:`plaid.storage.common.reader.load_metadata_from_hub` + for registered Hugging Face Hub repositories. The result is + memoised on the service instance. + + Constant features are aggregated across splits (constant schemas + in PLAID are split-specific), variable features are global. + """ + if dataset_id in self._feature_metadata: + return self._feature_metadata[dataset_id] + # Deferred imports so the module stays importable without PLAID. + from plaid.storage.common.reader import ( # noqa: PLC0415 + load_metadata_from_disk, + load_metadata_from_hub, + ) + + if self._is_hub_dataset(dataset_id): + _flat_cst, variable_schema, constant_schema, _cgns_types = ( + load_metadata_from_hub(dataset_id) + ) + else: + base = self._dataset_dir(dataset_id) + _flat_cst, variable_schema, constant_schema, _cgns_types = ( + load_metadata_from_disk(str(base)) + ) + constant_keys: set[str] = set() + for split_const in (constant_schema or {}).values(): + constant_keys.update(split_const.keys()) + variable_keys = list((variable_schema or {}).keys()) + metadata = (sorted(constant_keys), sorted(variable_keys)) + self._feature_metadata[dataset_id] = metadata + # Build the per-split catalogue in one pass: variable features + # are global so every split shares them, but constant features + # are keyed by split. + per_split: dict[str, set[str]] = { + split: set(variable_keys) | set(split_const.keys()) + for split, split_const in (constant_schema or {}).items() + } + self._split_feature_metadata[dataset_id] = per_split + return metadata + + def _split_feature_keys(self, dataset_id: str, split_key: str) -> set[str]: + """Return the feature catalogue of a single split. + + Ensures the per-split mapping is populated (it is filled as a + side effect of :meth:`_load_feature_metadata`). Falls back to + the dataset-wide union when the split name is not recorded + (typical for streaming datasets that expose a single + ``__default__`` split). + """ + if dataset_id not in self._split_feature_metadata: + self._load_feature_metadata(dataset_id) + per_split = self._split_feature_metadata.get(dataset_id, {}) + if split_key in per_split: + return per_split[split_key] + constant_keys, variable_keys = self._load_feature_metadata(dataset_id) + return set(constant_keys) | set(variable_keys) + + def list_available_features(self, dataset_id: str) -> list[str]: + """Return the feature paths offered to the user for filtering. + + The viewer only exposes paths that are CGNS *fields* (i.e. what + :func:`plaid.containers.utils.get_feature_details_from_path` + classifies as ``type == "field"``). Globals, coordinates, + element connectivities, boundary conditions, etc. are hidden + because they are not what the user means when they want to + "filter the displayed features" in a 3D viewer. + + Paths ending in ``_times`` (time-series bookkeeping duplicates + of a field, e.g. ``Base_.../FlowSolution/Pressure_times``) are + also filtered out: they are artefacts of the temporal storage + layout, not distinct physical quantities the user would want to + toggle. + """ + # Deferred import - the helper lives in PLAID's containers module. + from plaid.containers.utils import ( # noqa: PLC0415 + get_feature_details_from_path, + ) + + constant_keys, variable_keys = self._load_feature_metadata(dataset_id) + candidates = set(constant_keys) | set(variable_keys) + fields: list[str] = [] + for path in candidates: + if path.endswith("_times"): + continue + try: + details = get_feature_details_from_path(path) + except Exception: # noqa: BLE001 - malformed path, skip + continue + # Only expose "genuine" field paths - i.e. those that carry + # a ``name`` entry in ``details``. Some variants returned by + # :func:`get_feature_details_from_path` are typed as + # ``"field"`` but describe a container (e.g. a + # ``FlowSolution_t`` node) rather than a specific data array, + # and therefore have no ``name``. Filtering on ``name`` + # removes those from the UI while keeping every real scalar + # / vector field the user can actually plot. + # ``GridLocation`` nodes are CGNS metadata (they describe + # *where* a field lives, e.g. ``Vertex`` vs ``CellCenter``) + # rather than a plottable field, so they must not appear in + # the viewer's feature selection. + name = details.get("name") + if details.get("type") == "field" and name and name != "GridLocation": + fields.append(path) + return sorted(fields) + + def get_features(self, dataset_id: str) -> list[str] | None: + """Return the active feature filter for ``dataset_id``. + + ``None`` means "no filter": every feature is loaded (default + behaviour). An explicit empty list means "no feature selected". + """ + return self._features.get(dataset_id) + + def set_features( + self, dataset_id: str, features: list[str] | None + ) -> list[str] | None: + """Set (or clear) the active feature filter for ``dataset_id``. + + Only the *user-visible* field paths (those returned by + :meth:`list_available_features`) are stored. Geometric supports + (coordinates, element connectivities, boundary conditions, + ``GridLocation`` metadata, ``_times`` bookkeeping paths, ...) + required to render the selected fields are handled transparently + by :meth:`Converter.to_plaid`, which runs + :func:`~plaid.utils.cgns_helper.update_features_for_CGNS_compatibility` + internally against its *own* per-split + ``constant_features`` / ``variable_features`` catalogues. We + therefore never pre-expand the selection here - doing so would + use the dataset-wide (union) catalogue and, on splits whose + data does not contain the selected fields, would hand PLAID a + list of coordinates *without the fields that justify them* and + trigger ``Missing features in dataset/converter`` in the CGNS + expander. + + For disk-backed datasets the filter is applied on every call to + :meth:`Converter.to_plaid` during :meth:`load_sample`. For + streaming (Hugging Face Hub) datasets it is injected into + :func:`plaid.storage.init_streaming_from_hub` *before* any + sample is consumed; we therefore invalidate the cached + ``(datasetdict, converterdict)`` and any open streaming cursors + so the next :meth:`_open` call rebuilds them with the new + feature list. + + Args: + dataset_id: Target dataset identifier. + features: Field paths to keep (subset of + :meth:`list_available_features`), or ``None`` to clear + the filter and load every feature. + + Returns: + The normalised, deduplicated feature list (``None`` when no + filter is active). + + Raises: + ValueError: If ``features`` contains paths not declared in + the dataset metadata. + """ + if features is None: + normalised: list[str] | None = None + else: + normalised = sorted(dict.fromkeys(str(f) for f in features)) + all_keys = set(self._load_feature_metadata(dataset_id)[0]) | set( + self._load_feature_metadata(dataset_id)[1] + ) + unknown = [f for f in normalised if f not in all_keys] + if unknown: + raise ValueError( + f"Unknown features for dataset {dataset_id!r}: {unknown}" + ) + self._features[dataset_id] = normalised + # Invalidate store cache so streaming datasets rebuild their + # IterableDataset with the new feature list. For disk datasets + # this is not strictly required (features are applied on each + # ``to_plaid`` call) but keeping a single invalidation policy is + # simpler and does not hurt performance measurably. + self._store_cache.pop(dataset_id, None) + self._cursors = { + key: cur for key, cur in self._cursors.items() if key[0] != dataset_id + } + return normalised + + def is_streaming(self, dataset_id: str) -> bool: + """Return ``True`` when ``dataset_id`` is a Hugging Face Hub stream. + + Streaming datasets have no ``__len__`` on their splits and must be + navigated forward-only through :meth:`advance_stream_cursor` / + :meth:`reset_stream_cursor` rather than indexed. + """ + if not self._is_hub_dataset(dataset_id): + return False + try: + datasetdict, _ = self._open(dataset_id) + except Exception: # noqa: BLE001 + return True + return not all(hasattr(ds, "__len__") for ds in datasetdict.values()) + + def get_dataset(self, dataset_id: str) -> DatasetDetail: + """Return detailed information about a single dataset.""" + if self._is_hub_dataset(dataset_id): + splits = self._splits_with_counts(dataset_id) + return DatasetDetail( + dataset_id=dataset_id, + backend_id="hub", + path=f"hf://{dataset_id}", + has_infos=False, + has_problem_definitions=False, + splits=splits, + infos=None, + problem_definitions=[], + ) + base = self._dataset_dir(dataset_id) + splits = self._splits_with_counts(dataset_id) + pb_defs_dir = base / "problem_definitions" + pb_defs = ( + [ + p.stem + for p in _safe_list_dir(pb_defs_dir) + if p.suffix in {".yaml", ".yml"} + ] + if pb_defs_dir.is_dir() + else [] + ) + return DatasetDetail( + dataset_id=dataset_id, + backend_id="disk", + path=str(base), + has_infos=(base / "infos.yaml").exists() or (base / "infos.json").exists(), + has_problem_definitions=bool(pb_defs), + splits=splits, + infos=self._load_infos(base), + problem_definitions=pb_defs, + ) + + def list_samples(self, dataset_id: str) -> list[SampleRefDTO]: + """Return every sample reference available in a dataset. + + For disk-backed datasets, sample ids are the zero-based integer + indices used with ``converter.to_plaid(dataset, index)``. For + streaming datasets (Hugging Face Hub), each split contributes a + single reference whose ``sample_id`` is the + :data:`STREAM_CURSOR_ID` sentinel; the actual sample is obtained + by advancing the per-split cursor with + :meth:`advance_stream_cursor`. + """ + datasetdict, _ = self._open(dataset_id) + streaming = self.is_streaming(dataset_id) + backend_id = "hub" if self._is_hub_dataset(dataset_id) else "disk" + + refs: list[SampleRef] = [] + for split, ds in datasetdict.items(): + split_key = None if split == "__default__" else split + if streaming: + refs.append( + SampleRef( + backend_id=backend_id, + dataset_id=dataset_id, + split=split_key, + sample_id=STREAM_CURSOR_ID, + ) + ) + continue + for index in range(len(ds)): + refs.append( + SampleRef( + backend_id=backend_id, + dataset_id=dataset_id, + split=split_key, + sample_id=str(index), + ) + ) + return [SampleRefDTO.from_ref(ref) for ref in refs] + + # --------------------------------------------------- Streaming cursors + + def stream_cursor_position(self, dataset_id: str, split: str | None) -> int: + """Return the current forward position of a streaming cursor. + + Returns ``-1`` before the first call to :meth:`advance_stream_cursor`. + """ + cursor = self._cursors.get(self._cursor_key(dataset_id, split)) + return cursor.position if cursor is not None else -1 + + def advance_stream_cursor(self, dataset_id: str, split: str | None) -> SampleRef: + """Consume the next record from the stream and return its ref. + + The returned :class:`SampleRef` always carries the + :data:`STREAM_CURSOR_ID` sentinel in its ``sample_id``; the + underlying record is cached on the service so a subsequent + :meth:`load_sample` call returns the freshly fetched sample. + + Raises: + StopIteration: If the underlying stream is exhausted. + """ + key = self._cursor_key(dataset_id, split) + cursor = self._cursors.get(key) + if cursor is None or cursor.iterator is None: + cursor = self._build_cursor(dataset_id, split) + self._cursors[key] = cursor + try: + record = next(cursor.iterator) + except StopIteration: + cursor.exhausted = True + raise + cursor.current_record = record + cursor.position += 1 + return SampleRef( + backend_id="hub", + dataset_id=dataset_id, + split=split, + sample_id=STREAM_CURSOR_ID, + ) + + def reset_stream_cursor(self, dataset_id: str, split: str | None) -> None: + """Rebuild a fresh iterator for ``(dataset_id, split)``. + + The cached record is discarded and the position reset to ``-1`` + so the next :meth:`advance_stream_cursor` call yields the first + sample again. + """ + key = self._cursor_key(dataset_id, split) + self._cursors[key] = self._build_cursor(dataset_id, split) + + @staticmethod + def _cursor_key(dataset_id: str, split: str | None) -> tuple[str, str]: + return dataset_id, split if split is not None else "__default__" + + def _build_cursor(self, dataset_id: str, split: str | None) -> _StreamCursor: + datasetdict, _ = self._open(dataset_id) + split_key = split if split is not None else "__default__" + if split_key not in datasetdict and len(datasetdict) == 1: + split_key = next(iter(datasetdict)) + if split_key not in datasetdict: + raise KeyError( + f"Split {split!r} not found in dataset {dataset_id!r}; " + f"available splits: {sorted(datasetdict.keys())}" + ) + return _StreamCursor(iterator=iter(datasetdict[split_key])) + + # -------------------------------------------------------------- Samples + + def load_sample(self, ref: SampleRef): + """Return a PLAID :class:`plaid.Sample` for the given reference. + + Uses ``converter.to_plaid(dataset, index)`` to rebuild the sample + from whatever backend store (hf_datasets, cgns, zarr) is in use. + """ + datasetdict, converterdict = self._open(ref.dataset_id) + split_key = ref.split if ref.split is not None else "__default__" + if split_key not in datasetdict: + # Fallback: some converters return a single unnamed split. + if len(datasetdict) == 1: + split_key = next(iter(datasetdict)) + else: + raise KeyError( + f"Split {ref.split!r} not found in dataset {ref.dataset_id!r}; " + f"available splits: {sorted(datasetdict.keys())}" + ) + dataset = datasetdict[split_key] + converter = converterdict[split_key] + # Streaming datasets expose a forward-only cursor rather than + # random access. The viewer drives the cursor explicitly via + # ``advance_stream_cursor`` and then calls ``load_sample`` with + # ``sample_id == STREAM_CURSOR_ID`` to materialise the PLAID + # sample from the most recently consumed raw record. + if ref.sample_id == STREAM_CURSOR_ID: + cursor = self._cursors.get(self._cursor_key(ref.dataset_id, ref.split)) + if cursor is None or cursor.current_record is None: + # Auto-advance once so a fresh selection behaves like + # "show me the first sample". + self.advance_stream_cursor(ref.dataset_id, ref.split) + cursor = self._cursors[self._cursor_key(ref.dataset_id, ref.split)] + # Streaming converters use ``sample_to_plaid`` (single record) + # rather than ``to_plaid(dataset, index)`` (random access). + return converter.sample_to_plaid(cursor.current_record) + + try: + index = int(ref.sample_id) + except ValueError as exc: + raise ValueError( + f"Invalid sample id {ref.sample_id!r}; expected an integer index." + ) from exc + features = self._features.get(ref.dataset_id) + if features is None: + # No filter active: load every feature. + return converter.to_plaid(dataset, index) + # ``features`` is a (possibly empty) list: the filter IS active. + # We must not fall through to the unfiltered branch, otherwise + # an empty selection would load every feature instead of none. + # + # Feature schemas are split-specific in PLAID: the UI dropdown + # aggregates every split's catalogue, so a user-selected field + # may be absent from the current split. ``Converter.to_plaid`` + # runs :func:`~plaid.utils.cgns_helper.update_features_for_CGNS_compatibility` + # internally against its own per-split ``constant_features`` / + # ``variable_features`` and raises + # ``KeyError('Missing features in dataset/converter: ...')`` + # for any unknown path. We therefore intersect the user's + # field selection with the split's catalogue first. Geometric + # supports required to render the kept fields are added by the + # converter itself on the ``to_plaid`` call. + split_constant = set(getattr(converter, "constant_features", set())) + split_variable = set(getattr(converter, "variable_features", set())) + split_keys = split_constant | split_variable + selected = [f for f in features if f in split_keys] + # The split's feature catalogue contains more than the fields + # the user can toggle in the UI: it also carries CGNS + # bookkeeping paths (coordinates, element connectivities, + # ``GridLocation`` metadata, ``_times`` series, ...) and the + # paths backing the sample's globals / scalars. Those entries + # must always be loaded, otherwise the rendered sample would + # lose its mesh and the "Globals" panel would be empty. + # + # We therefore compute the set of "user-controllable" field + # paths (the same set the UI exposes through + # :meth:`list_available_features`) and re-inject *only* the + # remaining split paths. Filtering by + # ``set(user_visible) - set(selected)`` is not enough: we have + # to build the complement inside the current split so that + # constant fields the user deselected are genuinely dropped. + user_visible = set(self.list_available_features(ref.dataset_id)) + # ``_times`` bookkeeping paths are hidden from the UI but + # semantically follow their companion field: toggling ``sdf`` on + # or off must also toggle ``sdf_times``. Treat them as linked + # to their base path so deselecting a field genuinely drops + # both entries (and re-selecting a field adds both back). + user_visible_linked = user_visible | {f"{path}_times" for path in user_visible} + selected_linked = set(selected) | { + f"{path}_times" for path in selected if f"{path}_times" in split_keys + } + always_keep = split_keys - user_visible_linked + augmented = sorted(selected_linked | always_keep) + if not augmented: + # Split has no bookkeeping paths AND user-selected fields + # were all absent from this split: nothing sensible to + # filter with. Fall back to the unfiltered load so the user + # still sees *something* (the raw sample). + return converter.to_plaid(dataset, index) + try: + return converter.to_plaid(dataset, index, features=augmented) + except KeyError: + # ``augmented`` can itself contain paths that the CGNS + # expander or the HF bridge reject (bookkeeping entries not + # materialised as columns in the backend store). A + # ``KeyError("Missing features in …")`` from that code path + # should not be user-facing: degrade gracefully to an + # unfiltered load. + return converter.to_plaid(dataset, index) + + def get_sample_summary(self, ref: SampleRef) -> SampleSummary: + """Return a minimal summary of the PLAID sample.""" + sample = self.load_sample(ref) + times = self._time_keys(sample) + bases, zones_by_base, fields_by_base = self._describe_tree(sample, times) + globals_dict = { + name: str(sample.get_scalar(name)) for name in sample.get_scalar_names() + } + return SampleSummary( + ref=SampleRefDTO.from_ref(ref), + n_times=len(times), + time_values=list(times), + bases=bases, + zones_by_base=zones_by_base, + fields_by_base=fields_by_base, + globals=globals_dict, + ) + + def list_time_values(self, ref: SampleRef) -> list[float]: + """Return the sorted list of time values available for a sample. + + Thin wrapper around :meth:`plaid.Sample.features.get_all_time_values` + that always returns a ``list[float]`` (it may be empty for static + samples). + """ + sample = self.load_sample(ref) + try: + times = sample.features.get_all_time_values() + except Exception: # noqa: BLE001 - defensive, PLAID shouldn't raise + return [] + return sorted(float(t) for t in times) + + def describe_globals( + self, ref: SampleRef, *, time: float | None = None + ) -> list[dict[str, object]]: + """Return PLAID global scalars/tensors reported by the sample. + + Uses :meth:`plaid.Sample.get_global_names` to enumerate globals + and :meth:`plaid.Sample.get_global` to fetch each value, so only + the "real" globals exposed by PLAID's API are reported. The CGNS + bookkeeping arrays ``IterationValues`` and ``TimeValues`` (which + describe time steps, not physical scalars) are filtered out. + + Args: + ref: The sample to inspect. + time: Optional time value; when ``None`` the sample's first + available time (or the static value) is used. + + Returns: + A list of ``{"name": str, "shape": list[int], "dtype": str, + "preview": str | None}`` descriptors, one per global. + """ + sample = self.load_sample(ref) + kwargs = {"time": time} if time is not None else {} + try: + names = sample.get_global_names(**kwargs) + except TypeError: + names = sample.get_global_names() + entries: list[dict[str, object]] = [] + for name in names: + if name in {"IterationValues", "TimeValues"}: + continue + try: + value = sample.get_global(name, **kwargs) + except TypeError: + value = sample.get_global(name) + except Exception: # noqa: BLE001 - skip unreadable globals + continue + shape = list(getattr(value, "shape", ())) if value is not None else [] + dtype = str(getattr(value, "dtype", type(value).__name__)) + entries.append( + { + "name": name, + "shape": shape, + "dtype": dtype, + "preview": _array_preview(value), + } + ) + return entries + + def describe_non_visual_bases( + self, ref: SampleRef + ) -> dict[str, list[dict[str, object]]]: + """Return data arrays of CGNS bases that carry no zones. + + Some datasets store auxiliary tensors (constants, global reference + values, look-up tables, ...) inside a CGNS base that has no + ``Zone_t`` children, so VTK cannot render them as geometry. This + method returns, for each zone-less base, a list of descriptors + ``{"name": str, "shape": list[int], "dtype": str, + "preview": str | None}`` suitable for display in the viewer. + + Args: + ref: The sample to inspect. + + Returns: + A mapping from base name to a list of data-array descriptors. + Bases that do contain zones are omitted. + """ + sample = self.load_sample(ref) + times = self._time_keys(sample) + if not times: + return {} + try: + from CGNS.PAT import cgnskeywords as CK # noqa: PLC0415 + from CGNS.PAT import cgnsutils as CU # noqa: PLC0415 + except ImportError: # pragma: no cover - defensive + return {} + tree = sample.features.data[times[0]] + summary: dict[str, list[dict[str, object]]] = {} + for base_node in ( + CU.hasChildType(tree, CK.CGNSBase_ts) or [] + ): # pragma: no cover - CGNS tree introspection + if CU.hasChildType(base_node, CK.Zone_ts): + continue + summary[base_node[0]] = _collect_data_arrays(base_node) + return summary + + def get_sample_validation(self, ref: SampleRef) -> ValidationResult: + """Check basic sample completeness using PLAID's built-in validator.""" + warnings: list[str] = [] + errors: list[str] = [] + try: + sample = self.load_sample(ref) + except Exception as exc: # noqa: BLE001 - surface error to API caller + return ValidationResult( + ref=SampleRefDTO.from_ref(ref), + ok=False, + errors=[f"Failed to load sample: {exc}"], + ) + try: + report = sample.check_completeness() + except Exception as exc: # noqa: BLE001 + return ValidationResult( + ref=SampleRefDTO.from_ref(ref), + ok=False, + errors=[f"Completeness check failed: {exc}"], + ) + ok = isinstance(report, str) and "error" not in report.lower() + if report and not ok: + errors.append(report) + elif report: + warnings.append(report) + return ValidationResult( + ref=SampleRefDTO.from_ref(ref), + ok=ok, + warnings=warnings, + errors=errors, + ) + + # -------------------------------------------------------------- Helpers + + def _dataset_dir(self, dataset_id: str) -> Path: + if self._datasets_root is None: + raise FileNotFoundError( + "No datasets root selected; call set_datasets_root first." + ) + base = self._datasets_root / dataset_id + if not base.is_dir(): + raise FileNotFoundError(f"Dataset not found: {dataset_id}") + return base + + def _is_hub_dataset(self, dataset_id: str) -> bool: + """Return ``True`` when ``dataset_id`` refers to a registered HF repo.""" + return dataset_id in self._hub_repos + + def _open(self, dataset_id: str) -> tuple[dict, dict]: + """Load (and cache) ``(dataset_dict, converter_dict)`` for a dataset. + + Dispatches between :func:`plaid.storage.init_from_disk` for local + datasets and :func:`plaid.storage.init_streaming_from_hub` for + registered Hugging Face Hub repositories. + """ + if dataset_id in self._store_cache: + return self._store_cache[dataset_id] + if self._is_hub_dataset(dataset_id): + # Deferred import so the module can be loaded without PLAID present. + from plaid.storage import init_streaming_from_hub # noqa: PLC0415 + from plaid.utils.cgns_helper import ( # noqa: PLC0415 + update_features_for_CGNS_compatibility, + ) + + features = self._features.get(dataset_id) + # ``features is None`` means "no filter active" - let PLAID + # materialise every feature, as before. An *empty* list is + # a deliberate user choice ("show me only the geometry"): + # we hand PLAID the union of every constant feature path + # (so ``init_streaming_from_hub`` keeps the mesh and zone + # metadata) and nothing else. Passing ``features=[]`` + # directly is not an option because PLAID's ``if features`` + # gate treats empty lists as "unfiltered". + if features is None: + datasetdict, converterdict = init_streaming_from_hub(dataset_id) + else: + constant_keys, variable_keys = self._load_feature_metadata(dataset_id) + base_features = list(features) if features else list(constant_keys) + expanded_features = update_features_for_CGNS_compatibility( + base_features, constant_keys, variable_keys + ) + try: + datasetdict, converterdict = init_streaming_from_hub( + dataset_id, features=expanded_features + ) + except KeyError: + # ``expanded_features`` is derived from the + # dataset-wide metadata union and can therefore name + # paths that are not materialised as columns in a + # given split's HF table. The HF bridge then raises + # ``KeyError("Missing features in hf_dataset: …")``. + # Degrade gracefully to an unfiltered stream so the + # user still sees the geometry instead of a hard + # failure. + datasetdict, converterdict = init_streaming_from_hub(dataset_id) + else: + # Deferred import so the module can be loaded without PLAID present. + from plaid.storage import init_from_disk # noqa: PLC0415 + + base = self._dataset_dir(dataset_id) + datasetdict, converterdict = init_from_disk(str(base)) + # Normalise split-less case to a stable "__default__" key. + if not datasetdict: + raise RuntimeError(f"Dataset {dataset_id!r} is empty.") + self._store_cache[dataset_id] = (datasetdict, converterdict) + return datasetdict, converterdict + + def _splits_with_counts(self, dataset_id: str) -> dict[str, int | None]: + """Return ``{split: len(ds)}``; ``None`` for streaming splits.""" + datasetdict, _ = self._open(dataset_id) + counts: dict[str, int | None] = {} + for split, ds in datasetdict.items(): + try: + counts[split] = len(ds) + except TypeError: + counts[split] = None + return counts + + @staticmethod + def _load_infos(base: Path) -> dict | None: + for candidate in (base / "infos.json", base / "infos.yaml", base / "infos.yml"): + if not candidate.is_file(): + continue + try: + text = candidate.read_text() + except OSError: + return None + if candidate.suffix == ".json": + try: + return json.loads(text) + except json.JSONDecodeError: + return None + try: + import yaml # type: ignore # noqa: PLC0415 + except ImportError: # pragma: no cover - pyyaml is transitive + return None + try: + return yaml.safe_load(text) + except yaml.YAMLError: + return None + return None + + @staticmethod + def _time_keys(sample) -> list[float]: + data = getattr(sample.features, "data", None) + if not data: + return [] + return sorted(float(t) for t in data.keys()) + + @staticmethod + def _describe_tree(sample, times: list[float]): + """Walk the CGNS tree of the first timestep and return bases, zones, fields.""" + bases: list[str] = [] + zones_by_base: dict[str, list[str]] = {} + fields_by_base: dict[str, list[str]] = {} + if not times: + return bases, zones_by_base, fields_by_base + tree = sample.features.data[times[0]] + # Deferred import - CGNS helpers live inside pyCGNS. + try: + from CGNS.PAT import cgnskeywords as CK # noqa: PLC0415 + from CGNS.PAT import cgnsutils as CU # noqa: PLC0415 + except ImportError: # pragma: no cover - defensive + return bases, zones_by_base, fields_by_base + for base_node in CU.hasChildType(tree, CK.CGNSBase_ts) or []: + base_name = base_node[0] + bases.append(base_name) + zones_by_base[base_name] = [] + field_names: set[str] = set() + for zone_node in CU.hasChildType(base_node, CK.Zone_ts) or []: + zones_by_base[base_name].append(zone_node[0]) + for sol_node in CU.hasChildType(zone_node, CK.FlowSolution_ts) or []: + for da in CU.hasChildType(sol_node, CK.DataArray_ts) or []: + field_names.add(da[0]) + fields_by_base[base_name] = sorted(field_names) + return bases, zones_by_base, fields_by_base + + +@lru_cache(maxsize=8) +def _cached_service(root: str, backend_id: str) -> PlaidDatasetService: + return PlaidDatasetService( + ViewerConfig(datasets_root=Path(root), backend_id=backend_id) + ) diff --git a/src/plaid/viewer/trame_app/__init__.py b/src/plaid/viewer/trame_app/__init__.py new file mode 100644 index 00000000..97c16f8f --- /dev/null +++ b/src/plaid/viewer/trame_app/__init__.py @@ -0,0 +1,5 @@ +"""Trame/ParaView visualization server for the dataset viewer.""" + +from plaid.viewer.trame_app.server import build_server + +__all__ = ["build_server"] diff --git a/src/plaid/viewer/trame_app/server.py b/src/plaid/viewer/trame_app/server.py new file mode 100644 index 00000000..7f39c910 --- /dev/null +++ b/src/plaid/viewer/trame_app/server.py @@ -0,0 +1,2071 @@ +"""Trame server for the dataset viewer. + +This module builds a self-contained trame application that lets users +browse PLAID datasets and visualize their samples. All UI is exposed as +trame/Vuetify widgets in a side drawer; the 3D view is a VTK *remote* +view (server-side rendering, streamed as images) driven by a lightweight +VTK pipeline (reader -> geometry -> mapper). Remote rendering avoids the +rare vtk.js rendering artefacts observed when geometry with several +disjoint 1D connected components (e.g. VKI-LS59 ``Base_1_2`` with two +airfoil profiles) is streamed to the browser. + + + +Architecture: + +- A :class:`PlaidDatasetService` is used to discover datasets and load + samples. +- A :class:`ParaviewArtifactService` converts a sample to a single CGNS + file (or ``.cgns.series`` sidecar for time-dependent samples). +- ``vtkCGNSReader`` (optionally wrapped in ``vtkCGNSFileSeriesReader``) feeds + the VTK pipeline. +- The user can colour the geometry by any point or cell field and + choose a colormap preset. + + +The server is started by :mod:`plaid.viewer.cli` but can also be used +as a library. +""" + +from __future__ import annotations + +import asyncio +import contextlib +import json +import logging +import os +from pathlib import Path + +from plaid.viewer.models import SampleRef +from plaid.viewer.services import ParaviewArtifactService, PlaidDatasetService +from plaid.viewer.services.plaid_dataset_service import STREAM_CURSOR_ID + +logger = logging.getLogger(__name__) + +_COLORMAPS = ["viridis", "plasma", "inferno", "magma", "coolwarm", "turbo", "jet"] + +_VTK_LOG_ROUTER_INSTALLED = False +_C_STDERR_REROUTED = False + + +def _select_initial_dataset_id( + configured_id: str | None, + local_dataset_ids: list[str], + hub_dataset_ids: list[str], +) -> str | None: + """Return the startup dataset id for the given discovered datasets. + + A CLI-provided id wins when it exists in either source list. Otherwise the + viewer keeps its historical default: first local dataset, then first Hub + dataset, then ``None``. + """ + if configured_id in (local_dataset_ids + hub_dataset_ids): + return configured_id + if local_dataset_ids: + return local_dataset_ids[0] + return hub_dataset_ids[0] if hub_dataset_ids else None + + +def _reroute_c_stderr() -> None: # pragma: no cover - process fd manipulation + """Permanently redirect the process's stderr file descriptor to /dev/null. + + VTK's CGNS reader and the underlying HDF5 library emit informational + messages such as ``Mismatch in number of children and child IDs read`` + directly via ``fprintf(stderr, ...)``. Those are not routed through + ``vtkOutputWindow`` and cannot be captured by a Python logger without + hijacking file descriptor 2. + + To keep Python's ``sys.stderr`` functional (pytest, tracebacks, etc.) we + save the current fd 2, reopen ``sys.stderr`` on top of the saved fd, and + only *then* redirect fd 2 itself to ``/dev/null``. C libraries that + write directly to ``stderr`` are silenced while Python ``print(..., + file=sys.stderr)`` and logging handlers keep working. + + Installed once per process. + """ + global _C_STDERR_REROUTED + if _C_STDERR_REROUTED: + return + import sys # noqa: PLC0415 + + try: + saved_fd = os.dup(2) + except OSError: # pragma: no cover - no fd 2 + return + try: + sys.stderr.flush() + except Exception: # noqa: BLE001 + pass + try: + sys.stderr = os.fdopen(saved_fd, "w", buffering=1) + except OSError: # pragma: no cover - defensive + os.close(saved_fd) + return + devnull_fd = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull_fd, 2) + os.close(devnull_fd) + _C_STDERR_REROUTED = True + + +def _install_vtk_log_router() -> None: + """Route VTK / HDF5 warnings to the Python ``logger`` at DEBUG level. + + ``vtkCGNSReader`` (through HDF5) emits chatty but harmless warnings such + as ``Mismatch in number of children and child IDs read`` when opening + CGNS files that contain bases without zones (e.g. ``Global``). By default + VTK writes those to ``stderr`` through a ``vtkOutputWindow``, which + pollutes the trame server console. We redirect all VTK messages to the + Python logger so users can opt in with ``PLAID_VIEWER_LOG=DEBUG`` + without any noise at INFO level. + + Installed once per process. + """ + global _VTK_LOG_ROUTER_INSTALLED + if _VTK_LOG_ROUTER_INSTALLED: + return + try: + import vtk # noqa: PLC0415 + except ImportError: + return + + # ``vtkPythonStdStreamCaptureHelper`` is not available in every VTK wheel, + # so we subclass ``vtkOutputWindow`` in Python and forward all messages. + class _LoggingOutputWindow(vtk.vtkOutputWindow): # type: ignore[misc] + def DisplayText(self, text: str) -> None: # noqa: N802 - VTK API + logger.debug("vtk: %s", text.rstrip()) + + def DisplayErrorText(self, text: str) -> None: # noqa: N802 - VTK API + logger.debug("vtk error: %s", text.rstrip()) + + def DisplayWarningText(self, text: str) -> None: # noqa: N802 - VTK API + logger.debug("vtk warning: %s", text.rstrip()) + + def DisplayGenericWarningText( # noqa: N802 - VTK API + self, text: str + ) -> None: + logger.debug("vtk warning: %s", text.rstrip()) + + def DisplayDebugText(self, text: str) -> None: # noqa: N802 - VTK API + logger.debug("vtk debug: %s", text.rstrip()) + + vtk.vtkOutputWindow.SetInstance(_LoggingOutputWindow()) + # Also silence VTK's own warning channel entirely; the logger now owns it. + vtk.vtkObject.GlobalWarningDisplayOff() + # VTK 9 routes most reader warnings (e.g. CGNS ``Mismatch in number of + # children and child IDs read``) through loguru via ``vtkLogger``, which + # writes to stderr independently from ``vtkOutputWindow``. Silence that + # channel as well so the server console stays clean. + if hasattr(vtk, "vtkLogger"): + try: + vtk.vtkLogger.SetStderrVerbosity(vtk.vtkLogger.VERBOSITY_OFF) + except AttributeError: + # Some VTK builds expose ``vtkLogger`` but not this verbosity API. + # Ignore to keep compatibility and continue without hard failure. + pass + _VTK_LOG_ROUTER_INSTALLED = True + + +@contextlib.contextmanager +def _silence_stderr(): + """Temporarily redirect file descriptor 2 to ``/dev/null``. + + Needed around ``vtkCGNSReader`` updates because the CGNS C library + writes messages such as ``Mismatch in number of children and child IDs + read`` directly to ``stderr`` (via ``fprintf``), bypassing VTK's + ``vtkOutputWindow`` and therefore our Python logger override. + """ + try: + saved = os.dup(2) + except OSError: # pragma: no cover - no fd 2 (unlikely) + yield + return + devnull_fd = os.open(os.devnull, os.O_WRONLY) + try: + os.dup2(devnull_fd, 2) + yield + finally: + os.dup2(saved, 2) + os.close(saved) + os.close(devnull_fd) + + +# --------------------------------------------------------------------------- +# VTK helpers +# --------------------------------------------------------------------------- + + +def _enable_all_selections(cgns_reader) -> None: + """Enable every base / point / cell array known to a ``vtkCGNSReader``. + + ``vtkCGNSReader`` selections are OFF by default for arrays (and for + any base beyond the first one) so the VTK output would otherwise miss + half of the data. We enable everything after ``UpdateInformation`` so + the UI can expose it to the user. + """ + cgns_reader.UpdateInformation() + cgns_reader.EnableAllBases() + cgns_reader.EnableAllPointArrays() + cgns_reader.EnableAllCellArrays() + + +def _disable_bases_on_reader(reader, base_names: list[str]) -> None: + """Disable the given bases on the reader's base selection. + + Keeps every other base enabled. Useful to hide zone-less CGNS bases + from ``vtkCGNSReader`` which otherwise logs ``No zones in base ...`` + warnings on every update. + """ + cgns = _cgns_reader_of(reader) + selection = cgns.GetBaseSelection() + for name in base_names: + if selection.ArrayExists(name): + selection.DisableArray(name) + cgns.Modified() + + +def _load_reader(cgns_path: Path): + """Return a ready-to-use VTK reader for ``cgns_path``. + + For a ``.cgns.series`` sidecar, the reader is wrapped in + ``vtkCGNSFileSeriesReader`` so ParaView's time controls work out of the + box. (Note: the generic ``vtkFileSeriesReader`` is not exposed by the + ``vtk`` PyPI wheel, only the CGNS-specialised series reader is.) + + All bases, point arrays and cell arrays are enabled by default; the + side drawer lets the user narrow the selection later. + """ + import vtk # noqa: PLC0415 + + if cgns_path.suffix == ".series": + payload = json.loads(cgns_path.read_text()) + entries = sorted( + payload.get("files", []), + key=lambda entry: float(entry.get("time", 0.0)), + ) + base_dir = cgns_path.parent + inner = vtk.vtkCGNSReader() + series = vtk.vtkCGNSFileSeriesReader() + series.SetReader(inner) + for entry in entries: + series.AddFileName(str((base_dir / entry["name"]).resolve())) + # ``vtkCGNSFileSeriesReader`` does not expose per-entry time setters: + # the timestep values are read from each CGNS file itself when the + # series reader pulls information from the underlying reader. + series.UpdateInformation() + inner.EnableAllBases() + inner.EnableAllPointArrays() + inner.EnableAllCellArrays() + # Do not call Update() here: the caller disables zone-less bases + # first (see ``_refresh_sample_view``) to avoid ``vtkCGNSReader`` + # logging ``No zones in base ...`` warnings. The pipeline's + # ``_apply_base_selection`` triggers the first Update(). + return series + + reader = vtk.vtkCGNSReader() + reader.SetFileName(str(cgns_path)) + _enable_all_selections(reader) + return reader + + +def _cgns_reader_of(reader): + """Return the underlying ``vtkCGNSReader`` for a plain or series reader.""" + if hasattr(reader, "GetReader"): + return reader.GetReader() + return reader + + +def _selection_names(selection) -> list[str]: + """Return the array names exposed by a ``vtkDataArraySelection``.""" + return [selection.GetArrayName(i) for i in range(selection.GetNumberOfArrays())] + + +def _reader_bases_and_fields(reader) -> tuple[list[str], list[str], list[str]]: + """Return ``(bases, point_fields, cell_fields)`` exposed by the reader.""" + cgns = _cgns_reader_of(reader) + bases = _selection_names(cgns.GetBaseSelection()) + point_fields = _selection_names(cgns.GetPointDataArraySelection()) + cell_fields = _selection_names(cgns.GetCellDataArraySelection()) + return bases, point_fields, cell_fields + + +def _advance_reader_time(reader, time_value: float) -> None: + """Ask a VTK reader to update to the given time value. + + Works both on a plain ``vtkCGNSReader`` (static sample, no-op on the + reader itself) and on a ``vtkCGNSFileSeriesReader`` wrapping it. We call + ``UpdateTimeStep`` when available and otherwise fall back to the + executive's ``SetUpdateTimeStep`` API. Any failure is logged but does + not propagate to the UI. + """ + try: + with _silence_stderr(): + update_time_step = getattr(reader, "UpdateTimeStep", None) + if callable(update_time_step): + update_time_step(time_value) + else: + executive = reader.GetExecutive() + executive.SetUpdateTimeStep(0, time_value) + reader.Update() + except Exception as exc: # noqa: BLE001 - defensive, VTK may be strict + logger.warning("Failed to advance reader to time %s: %s", time_value, exc) + + +def _apply_base_selection(reader, active_bases: list[str]) -> None: + """Enable exactly ``active_bases`` on the reader's base selection.""" + cgns = _cgns_reader_of(reader) + selection = cgns.GetBaseSelection() + selection.DisableAllArrays() + for name in active_bases: + selection.EnableArray(name) + cgns.Modified() + with _silence_stderr(): + reader.Update() + + +def _list_point_and_cell_fields(dataset) -> tuple[list[str], list[str]]: + """Return the point and cell field names available on ``dataset``.""" + point_fields: set[str] = set() + cell_fields: set[str] = set() + + def _visit(obj): + if obj is None: + return + if hasattr(obj, "GetNumberOfBlocks"): + for i in range(obj.GetNumberOfBlocks()): + _visit(obj.GetBlock(i)) + return + pd = obj.GetPointData() if hasattr(obj, "GetPointData") else None + cd = obj.GetCellData() if hasattr(obj, "GetCellData") else None + if pd is not None: + for i in range(pd.GetNumberOfArrays()): + point_fields.add(pd.GetArrayName(i)) + if cd is not None: + for i in range(cd.GetNumberOfArrays()): + cell_fields.add(cd.GetArrayName(i)) + + _visit(dataset) + return sorted(point_fields), sorted(cell_fields) + + +def _compute_field_range( + dataset, field_name: str, association: str +) -> tuple[float, float]: + """Return the (min, max) range of ``field_name`` across ``dataset``.""" + lo = float("inf") + hi = float("-inf") + + def _visit(obj): + nonlocal lo, hi + if obj is None: + return + if hasattr(obj, "GetNumberOfBlocks"): + for i in range(obj.GetNumberOfBlocks()): + _visit(obj.GetBlock(i)) + return + data = obj.GetPointData() if association == "point" else obj.GetCellData() + if data is None: + return + arr = data.GetArray(field_name) + if arr is None: + return + r = arr.GetRange(-1) + lo = min(lo, r[0]) + hi = max(hi, r[1]) + + _visit(dataset) + if lo == float("inf"): + return 0.0, 1.0 + return lo, hi + + +def _show_scalar_bar_for_field( + scalar_bar, lut, field_name: str, association: str +) -> None: + """Display ``scalar_bar`` as the legend for the active coloured field.""" + scalar_bar.SetLookupTable(lut) + scalar_bar.SetTitle(f"{field_name} ({association})") + scalar_bar.SetVisibility(True) + + +def _hide_scalar_bar(scalar_bar) -> None: + """Hide ``scalar_bar`` when no scalar field is currently coloured.""" + scalar_bar.SetVisibility(False) + + +# --------------------------------------------------------------------------- +# Pipeline +# --------------------------------------------------------------------------- + + +class _VtkPipeline: # pragma: no cover - requires real VTK rendering/display stack + """Minimal reader -> (cut) -> (threshold) -> geometry -> actor pipeline.""" + + def __init__(self) -> None: + import vtk # noqa: PLC0415 + + self.render_window = vtk.vtkRenderWindow() + # Off-screen rendering is required on headless servers (no X + # display). It does not prevent the interactor from receiving + # events forwarded from the browser by ``VtkRemoteView``: the + # events are dispatched to the interactor style, which mutates + # the server-side camera before the next frame is streamed. + self.render_window.OffScreenRenderingOn() + self.renderer = vtk.vtkRenderer() + self.renderer.SetBackground(0.12, 0.12, 0.14) + self.render_window.AddRenderer(self.renderer) + self.interactor = vtk.vtkRenderWindowInteractor() + self.interactor.SetRenderWindow(self.render_window) + # Without an explicit interactor style, ``vtkRenderWindowInteractor`` + # does not translate mouse events into camera manipulation, so the + # remote view appears frozen in the browser even though events are + # correctly forwarded. ``vtkInteractorStyleTrackballCamera`` is the + # standard ParaView-like style (LMB rotate, MMB pan, wheel zoom). + interactor_style = vtk.vtkInteractorStyleTrackballCamera() + self.interactor.SetInteractorStyle(interactor_style) + self.interactor.Initialize() + self._interactor_style = interactor_style # keep a reference alive + + # ParaView-like orientation marker anchored in the bottom-left corner. + # The widget is attached to the server-side interactor so it is rendered + # directly into the frames streamed by ``VtkRemoteView``. + self.axes_actor = vtk.vtkAxesActor() + self.orientation_marker = vtk.vtkOrientationMarkerWidget() + self.orientation_marker.SetOrientationMarker(self.axes_actor) + self.orientation_marker.SetInteractor(self.interactor) + self.orientation_marker.SetViewport(0.0, 0.0, 0.18, 0.18) + self.orientation_marker.SetEnabled(1) + self.orientation_marker.InteractiveOff() + + self.reader = None + self.actor = vtk.vtkActor() + # Gouraud shading (per-vertex normals interpolated across the + # triangle) looks noticeably smoother than flat shading on curved + # surfaces. Combined with a ``vtkPolyDataNormals`` step below, it + # gives a nice continuous lighting on CFD meshes without changing + # the geometry. + self.actor.GetProperty().SetInterpolationToGouraud() + self.mapper = vtk.vtkCompositePolyDataMapper() + self.actor.SetMapper(self.mapper) + self.renderer.AddActor(self.actor) + + self.lut = vtk.vtkLookupTable() + self.lut.SetHueRange(0.667, 0.0) # blue -> red + self.lut.Build() + + # Colour legend for the selected point/cell field. It stays hidden + # until scalar colouring is enabled by the field dropdown. + self.scalar_bar = vtk.vtkScalarBarActor() + self.scalar_bar.SetLookupTable(self.lut) + self.scalar_bar.SetNumberOfLabels(5) + self.scalar_bar.SetPosition(0.88, 0.08) + self.scalar_bar.SetWidth(0.1) + self.scalar_bar.SetHeight(0.35) + self.scalar_bar.GetTitleTextProperty().SetColor(1.0, 1.0, 1.0) + self.scalar_bar.GetLabelTextProperty().SetColor(1.0, 1.0, 1.0) + _hide_scalar_bar(self.scalar_bar) + self.renderer.AddActor2D(self.scalar_bar) + + self._current_dataset = None + + def load(self, cgns_path: Path) -> None: + """Load a new CGNS/series file and reset the pipeline.""" + self.reader = _load_reader(cgns_path) + self._rebuild() + + def update( + self, + *, + field: str | None, + association: str, + cmap: str, + show_edges: bool, + ) -> None: + """Rebuild the downstream pipeline with the current options.""" + if self.reader is None: + return + import vtk # noqa: PLC0415 + + pipeline_output = self.reader.GetOutputPort() + + geom = vtk.vtkCompositeDataGeometryFilter() + geom.SetInputConnection(pipeline_output) + geom.Update() + self._current_dataset = geom.GetOutput() + self.mapper.SetInputConnection(geom.GetOutputPort()) + + if field is not None: + self.mapper.SelectColorArray(field) + + if association == "point": + self.mapper.SetScalarModeToUsePointFieldData() + else: + self.mapper.SetScalarModeToUseCellFieldData() + self.mapper.SetColorModeToMapScalars() + self.mapper.ScalarVisibilityOn() + lo, hi = _compute_field_range(self.reader.GetOutput(), field, association) + self.lut = _build_lut(cmap, lo, hi) + self.mapper.SetLookupTable(self.lut) + self.mapper.SetScalarRange(lo, hi) + _show_scalar_bar_for_field(self.scalar_bar, self.lut, field, association) + else: + self.mapper.ScalarVisibilityOff() + _hide_scalar_bar(self.scalar_bar) + + self.actor.GetProperty().SetEdgeVisibility(bool(show_edges)) + self.actor.GetProperty().SetLineWidth(1.0) + + def reset_camera(self) -> None: + """Reset the camera to the default view orientation and framing. + + ``vtkRenderer.ResetCamera()`` only adjusts the camera *distance* + so the current actor fits in the viewport; it leaves the camera + orientation (position direction, view up) untouched. To match the + first-load behaviour after the user has rotated the scene, we + also reset the orientation to the VTK defaults (looking down + ``-Z`` with ``+Y`` up) before reframing. + """ + camera = self.renderer.GetActiveCamera() + camera.SetPosition(0.0, 0.0, 1.0) + camera.SetFocalPoint(0.0, 0.0, 0.0) + camera.SetViewUp(0.0, 1.0, 0.0) + camera.SetViewAngle(30.0) + self.renderer.ResetCamera() + + def _rebuild(self) -> None: + self.renderer.ResetCamera() + + +def _build_lut(cmap: str, lo: float, hi: float): + """Build a simple ``vtkLookupTable`` approximating a matplotlib colormap.""" + import vtk # noqa: PLC0415 + + # Minimal built-in approximations - use HueRange for the common cases. + lut = vtk.vtkLookupTable() + lut.SetTableRange(lo, hi) + lut.SetNumberOfColors(256) + presets = { + "viridis": (0.75, 0.0), + "plasma": (0.8, 0.05), + "inferno": (0.0, 0.15), + "magma": (0.85, 0.0), + "coolwarm": (0.667, 0.0), + "turbo": (0.7, 0.0), + "jet": (0.667, 0.0), + } + h0, h1 = presets.get(cmap, (0.667, 0.0)) + lut.SetHueRange(h0, h1) + lut.SetSaturationRange(1.0, 1.0) + lut.SetValueRange(1.0, 1.0) + lut.Build() + return lut + + +# --------------------------------------------------------------------------- +# Trame server +# --------------------------------------------------------------------------- + + +def build_server( # pragma: no cover - trame/VTK UI startup is not CI-headless safe + dataset_service: PlaidDatasetService, + artifact_service: ParaviewArtifactService, +): + """Create a configured trame :class:`Server` instance. + + Args: + dataset_service: Discovers datasets and loads PLAID samples. + artifact_service: Converts a :class:`SampleRef` to a ParaView-readable + artifact on disk. + + Returns: + The configured ``trame.app.Server``. Call ``.start(host=..., port=...)`` + to run it. + """ + from trame.app import ( + asynchronous, # noqa: PLC0415 + get_server, # noqa: PLC0415 + ) + from trame.ui.vuetify3 import SinglePageWithDrawerLayout # noqa: PLC0415 + from trame.widgets import html # noqa: PLC0415 + from trame.widgets import vtk as vtk_widgets # noqa: PLC0415 + from trame.widgets import vuetify3 as v3 # noqa: PLC0415 + + _install_vtk_log_router() + + server = get_server(client_type="vue3") + state, ctrl = server.state, server.controller + + pipeline = _VtkPipeline() + # Background task handle for the time-series playback loop (see + # ``_on_playing`` below). Kept here so successive toggles cancel the + # previous task instead of spawning duplicates. + play_task: dict[str, object] = {"task": None} + # One-shot flag raised by ``_apply_features`` so the next + # ``_refresh_sample_view_impl`` call rebuilds the ParaView artifact + # from scratch (its on-disk cache key does not include the feature + # filter, so without this force-refresh the renderer would keep + # showing the pre-filter CGNS file). + force_artifact_refresh: dict[str, bool] = {"pending": False} + + with _silence_stderr(): + datasets = dataset_service.list_datasets() + # Dataset ids are kept in two disjoint lists driven by the + # Local / Hub tabs so the dropdown always matches the active source + # (``init_from_disk`` vs ``init_streaming_from_hub``). The UI reads + # the right list via a ternary expression on ``source_tab``. + hub_ids_set = set(dataset_service.hub_repos) + local_dataset_ids = [ + d.dataset_id for d in datasets if d.dataset_id not in hub_ids_set + ] + hub_dataset_ids = [d.dataset_id for d in datasets if d.dataset_id in hub_ids_set] + dataset_ids = local_dataset_ids + hub_dataset_ids + + # --- Default state ---------------------------------------------------- + # Datasets root panel. ``allow_root_change`` gates the UI on the + # client: when False, the panel is hidden so a public deployment can + # pin the root from the CLI (``--datasets-root /data + # --disable-root-change``). + state.setdefault( + "datasets_root_text", + str(dataset_service.datasets_root) if dataset_service.datasets_root else "", + ) + state.setdefault("allow_root_change", dataset_service._config.allow_root_change) + state.setdefault("browse_dialog", False) + state.setdefault("browse_cwd", "") + state.setdefault("browse_parent", None) + state.setdefault("browse_entries", []) + + # Hugging Face Hub streaming. ``hub_repos`` mirrors the service state + # and ``hub_repo_input`` is the text field bound to the "Add hub + # dataset" panel. Hub datasets are exposed alongside local ones in + # ``dataset_ids``; the service dispatches to + # ``plaid.storage.init_streaming_from_hub`` when the selected dataset + # is a registered repo id. + state.setdefault("hub_repos", list(dataset_service.hub_repos)) + state.setdefault("hub_repo_input", "") + # Initial ``dataset_id`` follows the default ``source_tab`` ("local"): + # pick the first local dataset when any is available, otherwise fall + # back to the first hub dataset (so a viewer launched with only + # ``--hub-repo`` still has something selected). + initial_dataset_id = _select_initial_dataset_id( + dataset_service._config.initial_dataset_id, + local_dataset_ids, + hub_dataset_ids, + ) + if ( + dataset_service._config.initial_dataset_id is not None + and initial_dataset_id != dataset_service._config.initial_dataset_id + ): + logger.warning( + "Configured initial dataset %r was not found; falling back to %r", + dataset_service._config.initial_dataset_id, + initial_dataset_id, + ) + initial_source_tab = "hub" if initial_dataset_id in hub_dataset_ids else "local" + state.setdefault( + "allow_dataset_change", dataset_service._config.allow_dataset_change + ) + state.setdefault("dataset_id", initial_dataset_id) + # Separate lists per source so the dropdown only shows datasets that + # match the active tab. ``dataset_ids`` is kept for backwards + # compatibility (e.g. tests that inspect the full list) but the UI + # reads from ``local_dataset_ids`` / ``hub_dataset_ids`` directly. + state.setdefault("local_dataset_ids", local_dataset_ids) + state.setdefault("hub_dataset_ids", hub_dataset_ids) + state.setdefault("dataset_ids", dataset_ids) + + state.setdefault("splits", []) + state.setdefault("split", None) + # Active side-panel tab: "local" drives ``datasets_root_text`` and + # directory browsing, "hub" drives the Hugging Face repo input. When an + # initial Hub dataset is configured, start on the Hub tab so state and UI + # remain coherent. + state.setdefault("source_tab", initial_source_tab) + state.setdefault("sample_ids", []) + state.setdefault("sample_id", None) + state.setdefault("sample_index", 0) + state.setdefault("sample_count", 0) + # Streaming (Hugging Face Hub) navigation. Hub datasets expose + # ``IterableDataset`` splits without a ``__len__``, so the slider is + # driven by a forward-only cursor rather than a random-access index + # list. ``stream_position`` mirrors the service cursor (-1 before any + # fetch), ``stream_exhausted`` is set when the iterator raises + # ``StopIteration`` so the slider caps at the last consumed index. + state.setdefault("is_streaming", False) + state.setdefault("stream_position", -1) + state.setdefault("stream_exhausted", False) + + # Feature filtering state. ``available_features`` is the full list of + # feature paths declared in the dataset metadata (populated whenever + # ``dataset_id`` changes), ``selected_features`` is the subset the + # user kept through the checkbox panel. An empty ``selected_features`` + # means "no filter": every feature is loaded (default behaviour). + state.setdefault("available_features", []) + state.setdefault("selected_features", []) + + state.setdefault("base_options", []) + # Single active base (exclusive selection). Kept as a list internally + # so `_apply_base_selection` has a uniform interface, but the UI + # exposes it as a ``VBtnToggle`` with ``multiple=False``. + state.setdefault("active_base", None) + # PLAID globals (``sample.get_global_names`` / ``sample.get_global``) + + # for the current sample, minus the ``IterationValues`` / ``TimeValues`` + # bookkeeping arrays which describe time steps rather than physical + # scalars. + state.setdefault("sample_globals", []) + # Time axis. ``time_values`` mirrors ``sample.features.get_all_time_values()`` + # and ``time_index`` is the index of the currently displayed step. + state.setdefault("time_values", []) + state.setdefault("time_index", 0) + state.setdefault("time_count", 0) + state.setdefault("current_time", None) + state.setdefault("field_options", []) + state.setdefault("field", None) # "point:name" or "cell:name" + state.setdefault("cmap", "viridis") + state.setdefault("cmaps", _COLORMAPS) + state.setdefault("show_edges", False) + state.setdefault("field_range", [0.0, 1.0]) + state.setdefault("status", "Select a dataset to start.") + # Loading indicator: True while the VTK reader is opening a new sample + # or advancing to a new time step. Consumed by a ``VProgressLinear`` in + # the header and an overlay on top of the 3D view. + state.setdefault("loading", False) + # Time-series playback controls. + state.setdefault("playing", False) + state.setdefault("play_fps", 5) + state.setdefault("play_loop", True) + + # --- Helpers ---------------------------------------------------------- + + def _refresh_splits() -> None: + if not state.dataset_id: + state.splits = [] + state.split = None + # Propagate "no dataset" to sample list + 3D scene so the + # view does not linger on the last local sample when the + # user switches to the Hub tab without any registered repo. + _refresh_samples() + return + + try: + with _silence_stderr(): + detail = dataset_service.get_dataset(state.dataset_id) + splits = list(detail.splits.keys()) + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to load dataset: {exc}" + splits = [] + state.splits = splits + new_split = splits[0] if splits else None + # When the new dataset exposes the same first split name as the + # previous one (e.g. both default to ``train``), ``state.split`` + # does not change and the ``@state.change("split")`` listener is + # skipped: the sample list would keep pointing at the old dataset. + # Force a refresh in that case. + same_split = state.split == new_split + state.split = new_split + if same_split: + _refresh_samples() + + def _clear_scene(status: str | None = None) -> None: + """Empty the VTK view and all sample-related panels. + + Used whenever no sample should be displayed (no dataset + selected, streaming dataset waiting for the first ``Next`` + click, ...). Keeping this in a single place ensures the 3D + view never lingers on a stale frame from a previous selection. + """ + pipeline.reader = None + pipeline.mapper.RemoveAllInputConnections(0) + pipeline.mapper.ScalarVisibilityOff() + _hide_scalar_bar(pipeline.scalar_bar) + state.base_options = [] + state.active_base = None + state.field_options = [] + state.field = None + state.sample_globals = [] + state.time_values = [] + state.time_count = 0 + state.time_index = 0 + state.current_time = None + state.sample_ids = [] + state.sample_id = None + state.sample_count = 0 + state.sample_index = 0 + if status is not None: + state.status = status + ctrl.view_update() + + def _refresh_samples() -> None: + if not state.dataset_id: + # No dataset selected: clear everything, including the 3D + # scene. This matters when the user switches to the Hub tab + # without any registered repo - otherwise the view would + # keep showing the last local sample. + state.is_streaming = False + _clear_scene(status="Select a dataset to start.") + return + + split_key = state.split + if split_key == "__default__": + split_key = None + # Streaming datasets (HF Hub) are not random-access. The service + # returns a single synthetic ``SampleRef`` with the + # ``STREAM_CURSOR_ID`` sentinel per split, and we advance the + # cursor forward through ``advance_stream_cursor`` as the user + # moves the slider to the right. The slider exposes indices + # ``[0 .. cursor_position + 1]`` so the user can still revisit + # already-fetched samples via the converter cache but never + # rewind the underlying iterator (which is by construction + # forward-only). + try: + streaming = dataset_service.is_streaming(state.dataset_id) + except Exception: # noqa: BLE001 + streaming = False + state.is_streaming = streaming + if streaming: + # Reset the cursor so each (dataset, split) selection starts + # at the first available sample regardless of previous state. + try: + dataset_service.reset_stream_cursor(state.dataset_id, split_key) + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to reset stream cursor: {exc}" + return + state.stream_position = -1 + state.stream_exhausted = False + state.sample_ids = [] + state.sample_count = 0 + state.sample_index = 0 + # No sample has been fetched yet: the status bar invites the + # user to click "Next" to consume the first element of the + # stream. ``sample_id`` stays ``None`` so ``_refresh_sample_view`` + # short-circuits until the cursor has actually advanced. + state.sample_id = None + # Clear the VTK scene so the 3D view is empty while waiting + # for the first ``Next`` click. Without this, switching back + # to the Hub tab would still show the mesh of the previously + # loaded local dataset (or the previous streaming sample), + # which is confusing since no hub sample has been fetched yet. + pipeline.reader = None + pipeline.mapper.RemoveAllInputConnections(0) + pipeline.mapper.ScalarVisibilityOff() + _hide_scalar_bar(pipeline.scalar_bar) + state.base_options = [] + state.active_base = None + state.field_options = [] + state.field = None + state.sample_globals = [] + state.time_values = [] + state.time_count = 0 + state.time_index = 0 + state.current_time = None + ctrl.view_update() + state.status = "Streaming: click Next to fetch the first sample." + return + + try: + with _silence_stderr(): + refs = dataset_service.list_samples(state.dataset_id) + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to list samples: {exc}" + refs = [] + ids = [r.sample_id for r in refs if r.split == split_key] + state.sample_ids = ids + state.sample_count = len(ids) + state.sample_index = 0 + new_sample_id = ids[0] if ids else None + # Switching dataset/split may leave ``state.sample_id`` unchanged + # (e.g. both new and old first sample are "0"); in that case the + # ``@state.change("sample_id")`` hook would not fire and the 3D + # view would keep the previous sample. Force a refresh whenever + # the sample id is the same but the dataset/split context changed. + same_id = state.sample_id == new_sample_id + state.sample_id = new_sample_id + if same_id and new_sample_id is not None: + _refresh_sample_view() + + def _refresh_field_options() -> None: + """Restrict the field dropdown to arrays present in the active base. + + ``_list_point_and_cell_fields`` walks the reader's current output, + which reflects the currently enabled base selection, so fields + belonging to unselected bases are hidden. + """ + if pipeline.reader is None: + state.field_options = [] + state.field = None + return + points, cells = _list_point_and_cell_fields(pipeline.reader.GetOutput()) + options = [f"point:{n}" for n in points] + [f"cell:{n}" for n in cells] + state.field_options = options + # Preserve the previously selected field if it is still available. + if state.field not in options: + state.field = options[0] if options else None + + def _refresh_sample_view() -> None: + """Reload the current sample and refresh the full UI state. + + The call is intentionally synchronous: trame schedules state + broadcasts after the callback returns, so we rely on the + ``VProgressLinear`` shown while ``state.loading`` is True to + indicate activity. A previous async variant that ran the VTK work + in an executor caused the viewer to appear frozen, so we keep the + simple blocking flow and just expose ``state.loading`` for visual + feedback. + """ + if not (state.dataset_id and state.sample_id is not None): + return + state.loading = True + try: + _refresh_sample_view_impl() + finally: + state.loading = False + + def _refresh_sample_view_impl() -> None: + split = state.split if state.split != "__default__" else None + # Streaming datasets expose a "hub" backend regardless of the + # CLI-default backend id, so ``SampleRef`` carries the correct + # loader hint and the paraview artifact cache remains coherent + # across local/streaming switches. + backend_id = "hub" if state.is_streaming else dataset_service._config.backend_id + ref = SampleRef( + backend_id=backend_id, + dataset_id=state.dataset_id, + split=split, + sample_id=str(state.sample_id), + ) + + # Refresh time axis + globals panel (independent of VTK rendering). + # PLAID's CGNS loading (pyCGNS / CHLone) writes low-level HDF5 + # warnings such as "Mismatch in number of children and child IDs + # read" directly to stderr. Wrap every call that can trigger a + # CGNS read with ``_silence_stderr`` so the server console stays + # clean. + try: + with _silence_stderr(): + times = dataset_service.list_time_values(ref) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to list time values: %s", exc) + times = [] + state.time_values = times + state.time_count = len(times) + state.time_index = 0 + state.current_time = times[0] if times else None + try: + with _silence_stderr(): + state.sample_globals = dataset_service.describe_globals( + ref, time=state.current_time + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to describe globals: %s", exc) + state.sample_globals = [] + try: + # Streaming samples all share the same ``SampleRef`` (the + # ``STREAM_CURSOR_ID`` sentinel) and would therefore hit the + # paraview artifact cache on every Next click, returning the + # first consumed sample forever. ``force=True`` tells + # ``ensure_artifact`` to rebuild the on-disk CGNS from the + # freshly advanced stream cursor instead. + # + # Disk datasets additionally set ``force_artifact_refresh`` + # after the user applies a new feature filter: the artifact + # cache key is derived from ``SampleRef`` alone (no feature + # list), so without forcing a rebuild the renderer would + # keep displaying the pre-filter CGNS file. + force = state.is_streaming or force_artifact_refresh["pending"] + force_artifact_refresh["pending"] = False + with _silence_stderr(): + artifact = artifact_service.ensure_artifact(ref, force=force) + pipeline.load(artifact.cgns_path) + if pipeline.reader is None: + raise RuntimeError("VTK reader was not initialised") + # Disable zone-less bases *before* the reader's first Update() + # so ``vtkCGNSReader`` does not log ``No zones in base ...`` + # warnings for auxiliary bases like ``Global``. + try: + with _silence_stderr(): + non_visual_names = list( + dataset_service.describe_non_visual_bases(ref).keys() + ) + except Exception: # noqa: BLE001 + non_visual_names = [] + if non_visual_names: + _disable_bases_on_reader(pipeline.reader, non_visual_names) + with _silence_stderr(): + pipeline.reader.Update() + bases, _points, _cells = _reader_bases_and_fields(pipeline.reader) + non_visual_set = set(non_visual_names) + # The ``Global`` CGNS base is a PLAID bookkeeping base used to + # store sample-level metadata (scalar inputs/outputs, time + # values, ...). It is surfaced separately in the "Globals" + # panel of the drawer and should never appear alongside the + # ``Base__`` rendering bases in the base + # toggle: selecting it would hide every ``Base_x_y`` base and + # leave the 3D view empty. + visual_bases = [ + name + for name in bases + if name not in non_visual_set and name != "Global" + ] + state.base_options = visual_bases + + # Preserve the user's base selection across samples when the + # same base still exists; otherwise fall back to the first + # renderable base. + previous = state.active_base + if previous in visual_bases: + state.active_base = previous + else: + state.active_base = visual_bases[0] if visual_bases else None + if state.active_base is not None: + _apply_base_selection(pipeline.reader, [state.active_base]) + _refresh_field_options() + # For streaming datasets the sentinel ``cursor`` sample id + # would look like ``hub:repo:split:cursor``; replace it with + # a 0-based step counter that is meaningful to the user. + if state.is_streaming: + state.status = ( + f"Loaded streaming sample #{state.stream_position} " + f"from {state.dataset_id}" + + (f" / {state.split}" if state.split else "") + ) + else: + state.status = f"Loaded sample {ref.encode()}" + _apply_pipeline(reset_camera=True) + except Exception as exc: # noqa: BLE001 + # "Missing features" errors bubble up from the PLAID converter + # when a feature path selected by the user does not exist in + # the current split's schema (constant/variable features are + # declared per-split). The raw exception dumps the full list + # of missing paths, which is both noisy and unactionable in + # the viewer. We shorten it to a hint that the user should + # check the split-specific availability of the filter. + message = str(exc) + if "Missing features" in message: + state.status = ( + "Failed to load sample: Missing features in dataset, check split" + ) + else: + state.status = f"Failed to load sample: {exc}" + + def _apply_pipeline(*, reset_camera: bool = False) -> None: + """Rebuild the VTK pipeline and push the result to the client. + + With ``VtkRemoteView`` the VTK camera lives on the server, so + resetting it server-side and calling ``ctrl.view_update`` is + sufficient: the next rendered frame sent to the browser already + reflects the default orientation and reframed bounds. + """ + if pipeline.reader is None: + return + association = "point" + name: str | None = None + if state.field: + association, name = state.field.split(":", 1) + if name is not None: + lo, hi = _compute_field_range( + pipeline.reader.GetOutput(), name, association + ) + state.field_range = [float(lo), float(hi)] + pipeline.update( + field=name, + association=association, + cmap=state.cmap, + show_edges=bool(state.show_edges), + ) + if reset_camera: + pipeline.reset_camera() + ctrl.view_update() + + # --- State change handlers ------------------------------------------- + + def _refresh_available_features() -> None: + """Populate ``available_features`` and ``selected_features`` from PLAID. + + Called whenever the active ``dataset_id`` changes so the feature + checkbox panel in the drawer reflects what the current dataset + actually exposes. Errors during metadata loading (missing + ``variable_schema.yaml`` on non-PLAID directories, network + failures for Hub datasets, ...) are caught and logged: the panel + is simply emptied in that case. + """ + if not state.dataset_id: + state.available_features = [] + state.selected_features = [] + return + try: + with _silence_stderr(): + available = dataset_service.list_available_features(state.dataset_id) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to list features: %s", exc) + state.available_features = [] + state.selected_features = [] + return + state.available_features = available + current = dataset_service.get_features(state.dataset_id) + state.selected_features = list(current) if current else [] + + @ctrl.set("apply_features") + def _apply_features() -> None: + """Push ``selected_features`` to the service and reload the sample. + + The selection is forwarded verbatim to + :meth:`PlaidDatasetService.set_features`. In particular an + empty list is kept empty (not converted to ``None``): the user + then sees a sample that only contains the auto-injected non-field + paths (globals, mesh coordinates, ...), which removes every + coloured array from the 3D view. To restore the full dataset + the user can click the "Load all" shortcut or re-check every + feature manually. + """ + if not state.dataset_id: + return + features = list(state.selected_features or []) + try: + with _silence_stderr(): + # Pass the list unconditionally: ``None`` means "no + # filter at all" and is reserved for the initial state / + # explicit reset via :meth:`PlaidDatasetService.set_features`. + dataset_service.set_features(state.dataset_id, features) + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to set features: {exc}" + return + # Changing the feature filter invalidates the in-memory store + # cache (for streaming datasets, the iterator is rebuilt) and + # any cached paraview artifact for this dataset. The simplest + # way to propagate the change to the view is to run the full + # split/sample refresh cascade. + state.status = ( + f"Applied feature filter ({len(features)} selected)." + if features + else "Feature filter cleared (no field loaded)." + ) + # Force the next ``ensure_artifact`` call to rebuild the CGNS + # file; otherwise the cache would still return the pre-filter + # artifact and the renderer's field list would not change. + force_artifact_refresh["pending"] = True + _refresh_samples() + + @ctrl.set("clear_features") + def _clear_features() -> None: + """Clear the feature selection. + + After calling this, the sample contains only the auto-injected + non-field paths (globals, coordinates, connectivities) so the + 3D view shows the mesh with no coloured field. Use the + top-level "Load all" shortcut to restore every feature. + """ + state.selected_features = [] + _apply_features() + + @ctrl.set("select_all_features") + def _select_all_features() -> None: + """Select every available feature and apply the filter. + + Used by the top-level "Load all" shortcut button so the user + can restore the full-dataset view in a single click without + having to open the checkbox panel. Internally this is + equivalent to clearing the filter (an empty / full selection + both load every feature once non-field paths are re-injected + by :meth:`PlaidDatasetService.set_features`), but reflecting + the selection in the checkboxes gives clearer visual feedback. + """ + state.selected_features = list(state.available_features or []) + _apply_features() + + @state.change("dataset_id") + def _on_dataset(**_: object) -> None: + _refresh_available_features() + _refresh_splits() + + @state.change("source_tab") + def _on_source_tab(**_: object) -> None: + """Switch ``dataset_id`` to the first entry of the active source. + + The dropdown's ``items`` binding filters by ``source_tab`` on the + client, but the currently selected ``dataset_id`` may belong to + the other source and would then display as a stale selection. We + proactively pick the first id from the active list (or ``None`` + when empty) so the dropdown always reflects the active tab. + """ + if not state.allow_dataset_change: + return + active_ids = ( + list(state.hub_dataset_ids or []) + if state.source_tab == "hub" + else list(state.local_dataset_ids or []) + ) + new_id = active_ids[0] if active_ids else None + if state.dataset_id == new_id: + # ``@state.change('dataset_id')`` would not fire; refresh + # splits explicitly so the split dropdown and sample list + # stay coherent with the active tab. + _refresh_splits() + else: + state.dataset_id = new_id + + @state.change("split") + def _on_split(**_: object) -> None: + # Clear the active feature selection on every split switch so + # the user starts from a predictable, lightweight state: only + # the geometric supports (mesh coordinates, connectivities, + # globals, ...) associated with the split's available features + # are loaded, and no field is coloured in the 3D view. This + # avoids "Missing features in dataset, check split" errors when + # the previously-selected fields do not exist in the new split, + # and lets the user opt-in to specific fields through the + # checkbox panel. ``_apply_features`` triggers ``_refresh_samples`` + # under the hood, so we do not need to call it again here. + # + # Streaming (Hugging Face Hub) datasets are handled differently: + # they typically expose a single default split, so the multi- + # split "Missing features" issue does not apply. Pushing an + # empty feature filter through ``set_features`` would invalidate + # the store cache and force :meth:`_open` to re-instantiate the + # streaming iterator with an ``update_features_for_CGNS_compatibility`` + # expansion derived from the dataset-wide metadata union, which + # may not match the hub split's actual schema and ends up + # loading the wrong feature catalogue. We therefore skip the + # auto-clear for streaming datasets and let the user apply + # filters explicitly through the checkbox panel. + if not state.dataset_id: + _refresh_samples() + return + try: + streaming = dataset_service.is_streaming(state.dataset_id) + except Exception: # noqa: BLE001 + streaming = False + if streaming: + _refresh_samples() + return + state.selected_features = [] + _apply_features() + + @state.change("sample_index") + def _on_sample_index(**_: object) -> None: + try: + idx = int(state.sample_index) + except (TypeError, ValueError): + idx = 0 + # Streaming datasets: drive the forward-only cursor. The slider's + # maximum (``sample_count - 1``) always matches the most recent + # position the user has reached, so a right-arrow press grows the + # cursor by exactly one step; when the stream is exhausted the + # index is clamped back to the last valid position. + if state.is_streaming: + if state.dataset_id is None: + return + split = state.split if state.split != "__default__" else None + position = int(state.stream_position) + if idx <= position: + # Already-visited step: a streaming iterator cannot be + # rewound, so the view keeps the most recently fetched + # sample. We simply update the slider label. + state.sample_index = max(0, position) + return + # Advance the cursor step-by-step until it matches ``idx`` + # (the slider can only advance by one in normal use, but we + # stay robust to multi-step jumps). + while int(state.stream_position) < idx: + try: + dataset_service.advance_stream_cursor(state.dataset_id, split) + except StopIteration: + state.stream_exhausted = True + # Clamp back to the last fetched position. + state.sample_index = max(0, int(state.stream_position)) + state.status = "Stream exhausted." + return + state.stream_position = int(state.stream_position) + 1 + # Grow the slider's reachable range by one so the user can + # fetch the next sample on the next right-arrow press. + state.sample_count = int(state.stream_position) + 2 + state.sample_id = "cursor" + # ``sample_id`` did not actually change ("cursor" both times), + # so the ``@state.change("sample_id")`` listener is skipped. + # Force a refresh explicitly. + _refresh_sample_view() + return + ids = list(state.sample_ids or []) + if not ids: + state.sample_id = None + return + idx = max(0, min(idx, len(ids) - 1)) + state.sample_id = ids[idx] + + @state.change("sample_id") + def _on_sample(**_: object) -> None: + _refresh_sample_view() + + def _apply_time_step_impl() -> None: + """Synchronous work behind a time-axis update. + + Pushes the selected time step into the VTK pipeline and refreshes + the globals panel for the new time. Both are safe to call at + playback rates now that ``_on_time_index`` short-circuits during + playback, so the loop only performs one VTK update and one + globals read per frame. + """ + if pipeline.reader is not None and state.current_time is not None: + _advance_reader_time(pipeline.reader, float(state.current_time)) + _apply_pipeline() + if state.dataset_id and state.sample_id is not None: + split = state.split if state.split != "__default__" else None + ref = SampleRef( + backend_id=dataset_service._config.backend_id, + dataset_id=state.dataset_id, + split=split, + sample_id=str(state.sample_id), + ) + try: + with _silence_stderr(): + state.sample_globals = dataset_service.describe_globals( + ref, time=state.current_time + ) + except Exception as exc: # noqa: BLE001 + logger.warning("Failed to describe globals: %s", exc) + + @state.change("time_index") + def _on_time_index(**_: object) -> None: + times = list(state.time_values or []) + if not times: + state.current_time = None + return + try: + idx = int(state.time_index) + except (TypeError, ValueError): + idx = 0 + idx = max(0, min(idx, len(times) - 1)) + state.current_time = times[idx] + # During playback the loop (``_play_loop``) already advances the + # time step itself; without this short-circuit the listener + # would run a second ``_apply_time_step_impl`` per frame (double + # VTK update + double PLAID read), which saturates the trame + # WebSocket and stalls playback. + if state.playing: + return + state.loading = True + try: + _apply_time_step_impl() + finally: + state.loading = False + + async def _play_loop() -> None: + """Advance ``time_index`` at ``play_fps`` while ``playing`` is True. + + The loop directly updates ``time_index``, ``current_time`` and + runs the VTK time-step update synchronously (the VTK calls are + fast enough for typical CFD meshes). Relying on the + ``@state.change("time_index")`` listener was unreliable because + trame dispatches it asynchronously, so the playback could end + before the last frame was actually rendered. + + When the end of the time axis is reached, the loop either wraps + around (``play_loop=True``) or stops playback + (``play_loop=False``). The loop exits cleanly on + :class:`asyncio.CancelledError` so the Stop button can cancel the + task immediately. + """ + try: + while state.playing: + count = int(state.time_count or 0) + if count <= 1: + with state: + state.playing = False + break + nxt = int(state.time_index or 0) + 1 + if nxt >= count: + if state.play_loop: + nxt = 0 + else: + with state: + state.playing = False + break + times = list(state.time_values or []) + # Trame state mutations inside an asyncio task must be + # wrapped in ``with state:`` for the ``@state.change`` + # handlers to actually fire and for the client to receive + # the broadcast. Without this block, the slider / time + # label on the client do not update during playback. + with state: + state.time_index = nxt + state.current_time = times[nxt] if nxt < len(times) else None + _apply_time_step_impl() + fps = max(1, int(state.play_fps or 1)) + await asyncio.sleep(1.0 / fps) + except asyncio.CancelledError: + # Expected when playback is stopped or restarted: allow task to exit silently. + return + + @state.change("playing") + def _on_playing(**_: object) -> None: + existing = play_task.get("task") + if existing is not None and not existing.done(): # type: ignore[union-attr] + existing.cancel() # type: ignore[union-attr] + play_task["task"] = None + if state.playing and int(state.time_count or 0) > 1: + play_task["task"] = asynchronous.create_task(_play_loop()) + + @ctrl.set("toggle_play") + def _toggle_play() -> None: + state.playing = not bool(state.playing) + + @ctrl.set("stop_playback") + def _stop_playback() -> None: + """Stop playback and reset the time axis back to the first step. + + Using a controller callback is more robust than the inline + ``click="playing = false; time_index = 0"`` expression: if the + slider is already at index 0 the client-side assignment is a + no-op and no ``@state.change("time_index")`` listener runs, so + the VTK view would keep showing the last-played frame. Here we + always force a refresh by calling ``_apply_time_step_impl``. + """ + state.playing = False + times = list(state.time_values or []) + state.time_index = 0 + state.current_time = times[0] if times else None + state.loading = True + try: + _apply_time_step_impl() + finally: + state.loading = False + + @state.change("active_base") + def _on_base(**_: object) -> None: + if pipeline.reader is None: + return + active = [state.active_base] if state.active_base else [] + try: + _apply_base_selection(pipeline.reader, active) + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to update base: {exc}" + return + # Narrow the field dropdown to arrays that actually exist on the + # newly-selected base. + _refresh_field_options() + _apply_pipeline(reset_camera=True) + + @state.change("field", "cmap", "show_edges") + def _on_view_params(**_: object) -> None: + _apply_pipeline() + + # --- Datasets root management ---------------------------------------- + + def _reload_dataset_list() -> None: + """Re-discover datasets under the (possibly new) datasets root.""" + try: + with _silence_stderr(): + new_datasets = dataset_service.list_datasets() + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to list datasets: {exc}" + new_datasets = [] + hub_set = set(dataset_service.hub_repos) + local_ids = [d.dataset_id for d in new_datasets if d.dataset_id not in hub_set] + hub_ids = [d.dataset_id for d in new_datasets if d.dataset_id in hub_set] + new_ids = local_ids + hub_ids + state.local_dataset_ids = local_ids + state.hub_dataset_ids = hub_ids + state.dataset_ids = new_ids + # Force ``dataset_id`` to change so ``@state.change('dataset_id')`` + # fires and cascades through splits / samples / view refresh. + # Pick from the list that matches the active source tab. + if state.allow_dataset_change: + active_ids = hub_ids if state.source_tab == "hub" else local_ids + state.dataset_id = active_ids[0] if active_ids else None + elif state.dataset_id not in new_ids: + state.dataset_id = _select_initial_dataset_id( + dataset_service._config.initial_dataset_id, + local_ids, + hub_ids, + ) + + if not new_ids: + state.splits = [] + state.split = None + state.sample_ids = [] + state.sample_id = None + state.sample_count = 0 + state.base_options = [] + state.active_base = None + state.field_options = [] + state.field = None + state.sample_globals = [] + state.status = "No dataset found under the configured root." + + @ctrl.set("apply_datasets_root") + def _apply_datasets_root() -> None: + """Change the datasets root from the text field.""" + if not state.allow_root_change: + return + raw = (state.datasets_root_text or "").strip() + if not raw: + try: + dataset_service.set_datasets_root(None) + except Exception as exc: # noqa: BLE001 + state.status = f"Failed to clear datasets root: {exc}" + return + _reload_dataset_list() + state.status = "Datasets root cleared." + return + try: + resolved = dataset_service.set_datasets_root(raw) + except Exception as exc: # noqa: BLE001 + state.status = f"Invalid datasets root: {exc}" + return + state.datasets_root_text = str(resolved) if resolved else "" + _reload_dataset_list() + state.status = f"Datasets root set to {resolved}" + + def _load_browse_view(path: str | None) -> None: + try: + listing = dataset_service.list_subdirs(path) + except Exception as exc: # noqa: BLE001 + state.status = f"Cannot browse: {exc}" + return + state.browse_cwd = listing["path"] + state.browse_parent = listing["parent"] + state.browse_entries = listing["entries"] + + @ctrl.set("open_browse_dialog") + def _open_browse_dialog() -> None: + if not state.allow_root_change: + return + start = (state.datasets_root_text or "").strip() or None + try: + _load_browse_view(start) + except Exception: # noqa: BLE001 + _load_browse_view(None) + state.browse_dialog = True + + @ctrl.set("browse_cd") + def _browse_cd(path: str) -> None: + _load_browse_view(path) + + @ctrl.set("browse_up") + def _browse_up() -> None: + if state.browse_parent: + _load_browse_view(state.browse_parent) + + @ctrl.set("browse_select") + def _browse_select() -> None: + """Use ``browse_cwd`` as the new datasets root.""" + state.datasets_root_text = state.browse_cwd + state.browse_dialog = False + _apply_datasets_root() + + @ctrl.set("add_hub_repo") + def _add_hub_repo() -> None: + """Register the repo id from the text field for streaming. + + Calls :meth:`PlaidDatasetService.add_hub_dataset`, then rebuilds + the dataset list so the new entry is immediately selectable from + the dropdown. + """ + if not state.allow_root_change: + return + raw = (state.hub_repo_input or "").strip() + if not raw: + state.status = "Enter a Hugging Face repo id (e.g. namespace/name)." + return + try: + normalised = dataset_service.add_hub_dataset(raw) + except Exception as exc: # noqa: BLE001 + state.status = f"Invalid repo id: {exc}" + return + state.hub_repos = list(dataset_service.hub_repos) + state.hub_repo_input = "" + _reload_dataset_list() + # Select the newly added hub dataset to give immediate feedback when + # dataset selection is user-controlled. Pinned deployments keep their + # configured dataset. + if state.allow_dataset_change and normalised in (state.dataset_ids or []): + state.dataset_id = normalised + state.status = f"Streaming from {normalised}" + + @ctrl.set("remove_hub_repo") + def _remove_hub_repo(repo_id: str) -> None: + """Unregister a previously added hub repo.""" + if not state.allow_root_change: + return + dataset_service.remove_hub_dataset(repo_id) + state.hub_repos = list(dataset_service.hub_repos) + _reload_dataset_list() + state.status = f"Removed hub dataset {repo_id}" + + @ctrl.set("stream_next") + def _stream_next() -> None: + """Advance the streaming cursor and load the next sample. + + Handler behind the "Next" button shown (instead of the sample + slider) when the active dataset is a Hugging Face Hub stream. + The cursor is advanced one step on the service-side + ``_StreamCursor``; ``sample_id`` is then set to the new 0-based + step number so the existing ``@state.change("sample_id")`` + plumbing fires and pushes the fresh sample through the VTK + pipeline. + """ + if not state.is_streaming or state.dataset_id is None: + return + if state.stream_exhausted: + return + split = state.split if state.split != "__default__" else None + try: + dataset_service.advance_stream_cursor(state.dataset_id, split) + except StopIteration: + state.stream_exhausted = True + state.status = "Stream exhausted." + return + # Advance the UI counters. ``sample_id`` stays at the + # ``STREAM_CURSOR_ID`` sentinel ("cursor") because + # :meth:`PlaidDatasetService.load_sample` needs that sentinel to + # route through ``converter.sample_to_plaid`` (IterableDatasets + # have no ``to_plaid(dataset, index)`` random-access path). + # Instead of mutating ``sample_id`` we refresh the view + # directly; the service-side cursor has already moved one step + # forward so ``load_sample`` will pick up the new record. + new_position = int(state.stream_position) + 1 + state.stream_position = new_position + state.sample_count = new_position + 1 + state.sample_index = new_position + state.sample_id = STREAM_CURSOR_ID + # ``sample_id`` did not actually change (both times the sentinel + # ``STREAM_CURSOR_ID``), so the ``@state.change("sample_id")`` + # listener is skipped. Refresh the view directly instead. The + # status bar text is set inside ``_refresh_sample_view_impl`` as + # a 0-based step label for streaming mode. + _refresh_sample_view() + + @ctrl.set("reset_camera") + def _reset_camera() -> None: + + # With VtkRemoteView the camera lives on the server, so resetting + # it server-side in ``pipeline.reset_camera`` and pushing a new + # frame via ``ctrl.view_update`` is enough: the browser only + # renders the images we send it. + _apply_pipeline(reset_camera=True) + + # --- UI --------------------------------------------------------------- + + with SinglePageWithDrawerLayout(server) as layout: + layout.title.set_text("Dataset Viewer") + + with layout.drawer as drawer: + # Wider drawer to accommodate long CGNS feature paths such as + # ``Base_2_2/Zone/FlowSolution/Pressure`` without wrapping. + drawer.width = 460 + with v3.VContainer(classes="pa-2"): + # Source-selection tabs: pick between a local datasets + # root (``init_from_disk``) and Hugging Face Hub streaming + # (``init_streaming_from_hub``). The tabs only drive which + # form is rendered; registered datasets from either + # source always land in ``dataset_ids`` together. Hidden + # when ``--disable-root-change`` was passed on the CLI so + # a public deployment can pin the root for good. + with html.Div(v_if=("allow_root_change",), classes="mb-2"): + with v3.VTabs( + v_model=("source_tab",), + density="compact", + grow=True, + classes="mb-2", + ): + v3.VTab("Local", value="local") + v3.VTab("Hub", value="hub") + # Local datasets root form. + with html.Div(v_if=("source_tab === 'local'",)): + html.Div("Datasets root", classes="text-caption") + with html.Div(classes="d-flex align-center"): + v3.VTextField( + v_model=("datasets_root_text",), + density="compact", + hide_details=True, + placeholder="/absolute/path/to/datasets", + classes="mr-2", + clearable=True, + __events=[("keyup_enter", "keyup.enter")], + keyup_enter=ctrl.apply_datasets_root, + ) + v3.VBtn( + icon="mdi-folder-open", + click=ctrl.open_browse_dialog, + density="compact", + variant="tonal", + classes="mr-1", + ) + v3.VBtn( + icon="mdi-check", + click=ctrl.apply_datasets_root, + density="compact", + variant="tonal", + color="primary", + ) + # Hugging Face Hub streaming form. + with html.Div(v_if=("source_tab === 'hub'",)): + html.Div( + "Hugging Face Hub dataset", + classes="text-caption", + ) + with html.Div(classes="d-flex align-center"): + v3.VTextField( + v_model=("hub_repo_input",), + density="compact", + hide_details=True, + placeholder="namespace/name", + prepend_inner_icon="mdi-cloud-download", + classes="mr-2", + clearable=True, + __events=[("keyup_enter", "keyup.enter")], + keyup_enter=ctrl.add_hub_repo, + ) + v3.VBtn( + icon="mdi-plus", + click=ctrl.add_hub_repo, + density="compact", + variant="tonal", + color="primary", + ) + # Chip list of registered repos with a remove button. + with html.Div( + v_if=("(hub_repos || []).length > 0",), + classes="mt-1 d-flex flex-wrap", + ): + v3.VChip( + "{{ repo }}", + v_for="repo in hub_repos", + key="repo", + closable=True, + size="small", + classes="mr-1 mb-1", + click_close=(ctrl.remove_hub_repo, "[repo]"), + ) + v3.VDivider(classes="my-2") + + # The dropdown ``items`` are filtered by ``source_tab``: + # Local tab -> ``local_dataset_ids`` (``init_from_disk`` + # datasets), Hub tab -> ``hub_dataset_ids`` + # (``init_streaming_from_hub`` datasets). The user never + # sees ids from the inactive source in the same menu. + with html.Div(v_if=("allow_dataset_change",)): + v3.VSelect( + label="Dataset", + v_model=("dataset_id",), + items=( + "source_tab === 'hub' ? hub_dataset_ids : local_dataset_ids", + ), + density="compact", + ) + + v3.VSelect( + label="Split", + v_model=("split",), + items=("splits",), + density="compact", + ) + # Sample picker. Two mutually-exclusive widgets: + # - Local datasets expose a random-access slider over + # the integer sample indices. + # - Hub streaming datasets have no ``__len__`` and can + # only be consumed forward, so we expose a "Next" + # button that advances the ``_StreamCursor`` by one + # step via ``ctrl.stream_next``. + html.Div("Sample", classes="text-caption mt-2") + v3.VSlider( + v_if=("!is_streaming",), + v_model_number=("sample_index",), + min=0, + max=("sample_count > 0 ? sample_count - 1 : 0",), + step=1, + thumb_label=True, + hide_details=True, + disabled=("sample_count === 0",), + ) + with html.Div( + v_if=("is_streaming",), + classes="d-flex align-center mb-1", + ): + v3.VBtn( + "Next", + prepend_icon="mdi-arrow-right", + click=ctrl.stream_next, + disabled=("stream_exhausted",), + color="primary", + variant="tonal", + density="compact", + classes="mr-2", + ) + # Sample counter: for local datasets the slider exposes + # all ids up-front; for streaming datasets we report the + # step number (the total is unknown until the iterator + # is exhausted, at which point "end of stream" appears). + html.Div( + "{{ is_streaming" + " ? ('step ' + (stream_position + 1) + (stream_exhausted" + " ? ' (end of stream)' : ' (streaming)'))" + " : ((sample_id ?? '-') + ' / ' + sample_count + ' samples') }}", + classes="text-caption text-medium-emphasis mb-2", + ) + + # Time axis slider, only shown when the sample actually + # exposes a time axis (time-dependent samples). + with html.Div(v_if=("time_count > 1",), classes="mb-2"): + html.Div("Time", classes="text-caption mt-2") + v3.VSlider( + v_model_number=("time_index",), + min=0, + max=("time_count > 0 ? time_count - 1 : 0",), + step=1, + thumb_label=True, + hide_details=True, + ) + html.Div( + "t = {{ current_time }} " + "" + "({{ time_index + 1 }} / {{ time_count }})", + classes="text-caption text-medium-emphasis", + ) + # Playback controls: Play/Pause + FPS slider + loop. + with html.Div(classes="d-flex align-center mt-2"): + v3.VBtn( + icon=("playing ? 'mdi-pause' : 'mdi-play'",), + click="playing = !playing", + density="compact", + variant="tonal", + classes="mr-2", + ) + v3.VBtn( + icon="mdi-stop", + click=ctrl.stop_playback, + density="compact", + variant="tonal", + classes="mr-2", + ) + v3.VBtn( + icon=("play_loop ? 'mdi-repeat' : 'mdi-repeat-off'",), + click="play_loop = !play_loop", + density="compact", + variant="tonal", + ) + html.Div("FPS: {{ play_fps }}", classes="text-caption mt-1") + v3.VSlider( + v_model_number=("play_fps",), + min=1, + max=30, + step=1, + hide_details=True, + density="compact", + ) + v3.VDivider(classes="my-2") + html.Div("Base", classes="text-caption") + + with v3.VBtnToggle( + v_model=("active_base",), + mandatory=True, + density="compact", + divided=True, + classes="flex-wrap mb-2", + ): + v3.VBtn( + "{{ base }}", + v_for="base in base_options", + key="base", + value=("base",), + size="small", + ) + v3.VSelect( + label="Field", + v_model=("field",), + items=("field_options",), + density="compact", + ) + v3.VSelect( + label="Colormap", + v_model=("cmap",), + items=("cmaps",), + density="compact", + ) + v3.VSwitch( + label="Show edges", + v_model=("show_edges",), + density="compact", + hide_details=True, + ) + v3.VDivider(classes="my-2") + v3.VBtn("Reset camera", click=ctrl.reset_camera, block=True) + + # Feature filter panel. Only rendered when the active + # dataset exposes any feature path (otherwise the panel + # would be empty and misleading). Driven by the + # ``available_features`` / ``selected_features`` state + # vectors populated by ``_refresh_available_features``; + # the Apply button forwards the selection to + # :meth:`PlaidDatasetService.set_features`, which in turn + # invalidates the store cache and (for streaming + # datasets) rebuilds the iterator with an + # ``update_features_for_CGNS_compatibility`` expansion of + # the user selection. + # Feature filter panel. The expansion panel starts + # collapsed: most users only need the "Load all" shortcut + # button exposed above it, and the full checkbox list is + # only expanded when they actually want to subset the + # dataset. The top-level "Load all" button clears the + # current selection and forces a reload without the user + # having to open the panel at all. + # Hidden for streaming (Hugging Face Hub) datasets: + # feature filtering goes through ``init_streaming_from_hub`` + # which rebuilds the iterator from the dataset-wide + # metadata union, a workflow that does not fit the + # per-split viewer model and led to confusing "Missing + # features" errors. Streaming users therefore always see + # the full feature payload; local disk datasets keep the + # complete feature selection UI unchanged. + with html.Div( + v_if=("!is_streaming && (available_features || []).length > 0",), + classes="mt-3", + ): + v3.VDivider(classes="my-2") + with html.Div(classes="d-flex align-center mb-1"): + html.Div("Features", classes="text-subtitle-2 flex-grow-1") + v3.VBtn( + "Load all", + click=ctrl.select_all_features, + size="x-small", + color="primary", + variant="tonal", + ) + with v3.VExpansionPanels(variant="accordion", multiple=True): + with v3.VExpansionPanel(): + v3.VExpansionPanelTitle( + "Select features ({{ (selected_features" + " || []).length }} / {{ (available_features" + " || []).length }})" + ) + with v3.VExpansionPanelText(): + html.Div( + "Empty selection loads every feature.", + classes="text-caption text-medium-emphasis mb-1", + ) + with html.Div(classes="d-flex mb-1"): + v3.VBtn( + "Clear", + click="selected_features = []", + size="x-small", + variant="text", + classes="mr-1", + ) + v3.VBtn( + "Apply", + click=ctrl.apply_features, + size="x-small", + color="primary", + variant="tonal", + ) + with html.Div( + style="max-height: 240px; overflow: auto;", + classes="pa-1", + ): + v3.VCheckbox( + v_for="feat in available_features", + key="feat", + v_model=("selected_features",), + value=("feat",), + label=("feat",), + density="compact", + hide_details=True, + multiple=True, + ) + + html.Div("{{ status }}", classes="text-caption mt-2") + + # PLAID globals for the current sample (filtered out of + # ``IterationValues`` / ``TimeValues`` bookkeeping arrays). + with html.Div( + v_if=("(sample_globals || []).length > 0",), + classes="mt-3", + ): + html.Div("Globals", classes="text-subtitle-2 mb-1") + with v3.VList(density="compact"): + with v3.VListItem(v_for="g in sample_globals", key="g.name"): + v3.VListItemTitle( + "{{ g.name }} " + "" + "({{ g.dtype }}, shape={{ g.shape }})" + "" + ) + v3.VListItemSubtitle( + "{{ g.preview }}", classes="text-caption" + ) + + # File-system browser dialog for the datasets root. Scoped to the + # server's ``browse_roots`` sandbox so the user can only reach + # directories explicitly allowed by the operator. + with v3.VDialog(v_model=("browse_dialog",), max_width="640"): + with v3.VCard(): + v3.VCardTitle("Select datasets root") + v3.VCardSubtitle( + "{{ browse_cwd }}", classes="text-caption text-medium-emphasis" + ) + with v3.VCardText(style="max-height: 50vh; overflow: auto;"): + with v3.VList(density="compact"): + v3.VListItem( + prepend_icon="mdi-arrow-up", + title="..", + click=ctrl.browse_up, + v_if=("browse_parent",), + ) + with v3.VListItem( + v_for="e in browse_entries", + key="e.path", + click=(ctrl.browse_cd, "[e.path]"), + ): + v3.VListItemTitle("{{ e.name }}") + v3.VListItemSubtitle( + "PLAID dataset", + v_if=("e.is_plaid_candidate",), + classes="text-success", + ) + with v3.VCardActions(): + v3.VSpacer() + v3.VBtn( + "Cancel", + click="browse_dialog = false", + variant="text", + ) + v3.VBtn( + "Use this directory", + click=ctrl.browse_select, + color="primary", + variant="tonal", + ) + + # Indeterminate progress bar shown under the app bar while a sample + # or time step is being loaded on the server. + with layout.toolbar: + # Small chip in the toolbar that advertises whether the + # current dataset is streamed from the Hugging Face Hub (the + # sample slider is then forward-only) or browsed from a + # local PLAID directory (random access). + v3.VChip( + "streaming", + v_if=("is_streaming",), + size="small", + color="secondary", + prepend_icon="mdi-cloud-download", + classes="mr-2", + ) + v3.VProgressLinear( + indeterminate=True, + absolute=True, + location="bottom", + color="primary", + v_if=("loading",), + ) + + with layout.content: + with v3.VContainer(fluid=True, classes="fill-height pa-0 ma-0"): + view = vtk_widgets.VtkRemoteView(pipeline.render_window, ref="view") + + ctrl.view_update = view.update + ctrl.view_reset_camera = view.reset_camera + + # Trigger initial population. + _refresh_splits() + + return server diff --git a/tests/viewer/__init__.py b/tests/viewer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/viewer/conftest.py b/tests/viewer/conftest.py new file mode 100644 index 00000000..b3b8c59a --- /dev/null +++ b/tests/viewer/conftest.py @@ -0,0 +1,28 @@ +"""Shared fixtures for viewer tests. + +The viewer persists user preferences (currently the last-used datasets +root) to ``$XDG_CONFIG_HOME/plaid/viewer.json``. Tests that exercise +:meth:`PlaidDatasetService.set_datasets_root` would otherwise mutate the +real user preferences file, polluting interactive sessions with a path +from ``tmp_path``. We redirect preference persistence to a temporary +location for every viewer test through the +``PLAID_VIEWER_CONFIG_FILE`` environment variable honoured by +:mod:`plaid.viewer.preferences`. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +@pytest.fixture(autouse=True) +def _isolated_viewer_preferences( + tmp_path_factory: pytest.TempPathFactory, + monkeypatch: pytest.MonkeyPatch, +) -> Path: + """Redirect viewer preference persistence to a unique temporary file.""" + prefs_file = tmp_path_factory.mktemp("viewer_prefs") / "viewer.json" + monkeypatch.setenv("PLAID_VIEWER_CONFIG_FILE", str(prefs_file)) + return prefs_file diff --git a/tests/viewer/test_cache.py b/tests/viewer/test_cache.py new file mode 100644 index 00000000..4bd4a9ac --- /dev/null +++ b/tests/viewer/test_cache.py @@ -0,0 +1,198 @@ +"""Tests for the viewer artifact cache.""" + +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path + +import pytest + +from plaid.viewer import cache as cache_mod +from plaid.viewer.cache import CacheRoot, _process_is_alive, sweep_orphans + + +def test_ephemeral_cache_is_cleaned_up_on_close(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("TMPDIR", str(tmp_path)) + cache = CacheRoot(install_signal_handlers=False, run_orphan_sweep=False) + path = cache.path + assert path.exists() + cache.close() + assert not path.exists() + + +def test_context_manager_removes_ephemeral_dir(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setenv("TMPDIR", str(tmp_path)) + with CacheRoot(install_signal_handlers=False, run_orphan_sweep=False) as cache: + path = cache.path + assert path.exists() + assert not path.exists() + + +def test_sweep_orphans_removes_dead_pid_dir( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(cache_mod, "_process_is_alive", lambda _pid: False) + victim = tmp_path / "plaid-viewer-999999-deadbeefcafe" + victim.mkdir() + removed = sweep_orphans(tmp_path) + assert victim in removed + assert not victim.exists() + + +def test_sweep_orphans_keeps_live_pid_dir(tmp_path: Path) -> None: + import os + + live = tmp_path / f"plaid-viewer-{os.getpid()}-abc123def456" + live.mkdir() + removed = sweep_orphans(tmp_path) + assert live not in removed + assert live.exists() + + +def test_process_is_alive_branches(monkeypatch: pytest.MonkeyPatch) -> None: + # Force the POSIX-style branch; Windows delegation is tested separately below. + monkeypatch.setattr(cache_mod.os, "name", "posix") + + assert _process_is_alive(0) is False + + def missing(_pid: int, _sig: int) -> None: + raise ProcessLookupError + + monkeypatch.setattr(cache_mod.os, "kill", missing) + assert _process_is_alive(123) is False + + def denied(_pid: int, _sig: int) -> None: + raise PermissionError + + monkeypatch.setattr(cache_mod.os, "kill", denied) + assert _process_is_alive(123) is True + + def other_os_error(_pid: int, _sig: int) -> None: + raise OSError(5, "other") + + monkeypatch.setattr(cache_mod.os, "kill", other_os_error) + assert _process_is_alive(123) is True + + def no_such_process(_pid: int, _sig: int) -> None: + raise OSError(cache_mod.errno.ESRCH, "missing") + + monkeypatch.setattr(cache_mod.os, "kill", no_such_process) + assert _process_is_alive(123) is False + + +def test_process_is_alive_uses_windows_probe(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(cache_mod.os, "name", "nt") + monkeypatch.setattr(cache_mod, "_windows_process_is_alive", lambda pid: pid == 123) + + assert _process_is_alive(123) is True + assert _process_is_alive(456) is False + + +def test_sweep_orphans_ignores_non_dirs_and_non_matching_names(tmp_path: Path) -> None: + (tmp_path / "plain-file").write_text("x") + keep = tmp_path / "not-plaid-viewer" + keep.mkdir() + assert sweep_orphans(tmp_path / "missing") == [] + assert sweep_orphans(tmp_path) == [] + assert keep.exists() + + +def test_cache_runs_orphan_sweep_and_close_is_idempotent( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(cache_mod.tempfile, "gettempdir", lambda: str(tmp_path)) + monkeypatch.setattr(cache_mod, "_process_is_alive", lambda _pid: False) + victim = tmp_path / "plaid-viewer-999999-deadbeef" + victim.mkdir() + cache = CacheRoot(install_signal_handlers=False, run_orphan_sweep=True) + assert not victim.exists() + path = cache.path + cache.close() + cache.close() + assert not path.exists() + + +def test_cache_signal_handler_cleans_then_delegates( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(cache_mod.tempfile, "gettempdir", lambda: str(tmp_path)) + calls: list[tuple[str, object]] = [] + handlers: dict[int, Callable[[int, object], None]] = {} + + def previous(signum, _frame): + calls.append(("previous", signum)) + + def fake_getsignal(_sig): + return previous + + def fake_signal(sig, handler: Callable[[int, object], None]): + handlers[sig] = handler + calls.append(("signal", sig)) + + def fake_kill(_pid, sig): + calls.append(("kill", sig)) + + monkeypatch.setattr(cache_mod.signal, "getsignal", fake_getsignal) + monkeypatch.setattr(cache_mod.signal, "signal", fake_signal) + monkeypatch.setattr(cache_mod.os, "kill", fake_kill) + + cache = CacheRoot(install_signal_handlers=True, run_orphan_sweep=False) + path = cache.path + handler = handlers[cache_mod.signal.SIGINT] + handler(cache_mod.signal.SIGINT, None) + + assert not path.exists() + assert ("previous", cache_mod.signal.SIGINT) in calls + assert ("kill", cache_mod.signal.SIGINT) in calls + + +def test_sweep_orphans_logs_rmtree_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + victim = tmp_path / "plaid-viewer-999999-deadbeefcafe" + victim.mkdir() + + def broken_rmtree(_path: Path, ignore_errors: bool = False) -> None: # noqa: ARG001, FBT001, FBT002 + raise OSError("boom") + + monkeypatch.setattr(cache_mod.shutil, "rmtree", broken_rmtree) + removed = sweep_orphans(tmp_path) + assert removed == [] + assert "Could not remove orphan viewer cache" in caplog.text + + +def test_cache_safe_cleanup_logs_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setattr(cache_mod.tempfile, "gettempdir", lambda: str(tmp_path)) + cache = CacheRoot(install_signal_handlers=False, run_orphan_sweep=False) + + def broken_rmtree(_path: Path, ignore_errors: bool = False) -> None: # noqa: ARG001, FBT001, FBT002 + raise RuntimeError("boom") + + monkeypatch.setattr(cache_mod.shutil, "rmtree", broken_rmtree) + cache.close() + assert "Failed to clean viewer cache" in caplog.text + + +def test_cache_signal_handler_install_ignores_signal_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(cache_mod.tempfile, "gettempdir", lambda: str(tmp_path)) + + calls = {"getsignal": 0, "signal": 0} + + def flaky_getsignal(_sig): + calls["getsignal"] += 1 + if calls["getsignal"] == 1: + raise ValueError("not main thread") + return cache_mod.signal.SIG_IGN + + def broken_signal(_sig, _handler): + calls["signal"] += 1 + raise OSError("not main thread") + + monkeypatch.setattr(cache_mod.signal, "getsignal", flaky_getsignal) + monkeypatch.setattr(cache_mod.signal, "signal", broken_signal) + with CacheRoot(install_signal_handlers=True, run_orphan_sweep=False): + assert calls == {"getsignal": 2, "signal": 1} diff --git a/tests/viewer/test_cli.py b/tests/viewer/test_cli.py new file mode 100644 index 00000000..ef010d1b --- /dev/null +++ b/tests/viewer/test_cli.py @@ -0,0 +1,155 @@ +"""Tests for the viewer CLI parser that do not start the VTK/trame runtime.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from plaid.viewer import cli as cli_mod +from plaid.viewer.cli import _build_parser + + +def test_build_parser_defaults() -> None: + args = _build_parser().parse_args([]) + + assert args.datasets_root is None + assert args.browse_roots is None + assert args.disable_root_change is False + assert args.dataset_id is None + assert args.disable_dataset_change is False + assert args.host == "127.0.0.1" + assert args.port == 8080 + assert args.backend_id == "disk" + assert args.hub_repo is None + + +def test_build_parser_accepts_all_options(tmp_path: Path) -> None: + datasets_root = tmp_path / "datasets" + browse_a = tmp_path / "a" + browse_b = tmp_path / "b" + + args = _build_parser().parse_args( + [ + "--datasets-root", + str(datasets_root), + "--browse-roots", + str(browse_a), + str(browse_b), + "--disable-root-change", + "--dataset-id", + "dataset-b", + "--disable-dataset-change", + "--host", + "0.0.0.0", + "--port", + "9000", + "--backend-id", + "zarr", + "--hub-repo", + "org/one", + "--hub-repo", + "org/two", + ] + ) + + assert args.datasets_root == datasets_root + assert args.browse_roots == [browse_a, browse_b] + assert args.disable_root_change is True + assert args.dataset_id == "dataset-b" + assert args.disable_dataset_change is True + assert args.host == "0.0.0.0" + assert args.port == 9000 + assert args.backend_id == "zarr" + assert args.hub_repo == ["org/one", "org/two"] + + +def test_main_wires_services_without_starting_real_runtime( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + calls: list[tuple[str, object]] = [] + + class FakeCache: + def __init__(self): + calls.append(("cache", None)) + self.path = tmp_path / "cache-root" + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + calls.append(("cache-exit", exc_type)) + + class FakeDatasetService: + def __init__(self, config): + self.config = config + calls.append(("dataset-root", config.datasets_root)) + calls.append(("allow-root-change", config.allow_root_change)) + calls.append(("initial-dataset-id", config.initial_dataset_id)) + calls.append(("allow-dataset-change", config.allow_dataset_change)) + + def add_hub_dataset(self, repo_id: str) -> str: + calls.append(("hub", repo_id)) + if repo_id == "bad/repo": + raise ValueError("bad") + return repo_id + + class FakeArtifactService: + def __init__(self, _dataset_service, cache_path): + calls.append(("artifact-cache", cache_path)) + + class FakeServer: + def start(self, *, host: str, port: int, open_browser: bool) -> None: + calls.append(("start", (host, port, open_browser))) + + def fake_import(name, globals=None, locals=None, fromlist=(), level=0): # noqa: A002, ANN001, ANN002 + if name == "plaid.viewer.trame_app.server" and "_reroute_c_stderr" in fromlist: + return type( + "ServerModule", + (), + {"_reroute_c_stderr": lambda: calls.append(("stderr", None))}, + ) + if name == "plaid.viewer.trame_app.server" and "build_server" in fromlist: + return type( + "ServerModule", (), {"build_server": lambda _ds, _as: FakeServer()} + ) + return real_import(name, globals, locals, fromlist, level) + + real_import = __import__ + monkeypatch.setattr(cli_mod, "CacheRoot", FakeCache) + monkeypatch.setattr(cli_mod, "PlaidDatasetService", FakeDatasetService) + monkeypatch.setattr(cli_mod, "ParaviewArtifactService", FakeArtifactService) + monkeypatch.setattr( + cli_mod, "get_last_datasets_root", lambda: tmp_path / "persisted" + ) + monkeypatch.setattr("builtins.__import__", fake_import) + + assert ( + cli_mod.main( + [ + "--host", + "0.0.0.0", + "--port", + "9001", + "--disable-root-change", + "--dataset-id", + "dataset-b", + "--disable-dataset-change", + "--hub-repo", + "org/repo", + "--hub-repo", + "bad/repo", + ] + ) + == 0 + ) + + assert ("stderr", None) in calls + assert ("dataset-root", tmp_path / "persisted") in calls + assert ("allow-root-change", False) in calls + assert ("initial-dataset-id", "dataset-b") in calls + assert ("allow-dataset-change", False) in calls + assert ("hub", "org/repo") in calls + assert ("hub", "bad/repo") in calls + assert ("artifact-cache", tmp_path / "cache-root") in calls + assert ("start", ("0.0.0.0", 9001, False)) in calls diff --git a/tests/viewer/test_models.py b/tests/viewer/test_models.py new file mode 100644 index 00000000..953fe76b --- /dev/null +++ b/tests/viewer/test_models.py @@ -0,0 +1,31 @@ +"""Tests for viewer data models.""" + +from __future__ import annotations + +import pytest + +from plaid.viewer.models import SampleRef, SampleRefDTO + + +def test_sample_ref_roundtrip_with_split() -> None: + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + assert SampleRef.decode(ref.encode()) == ref + + +def test_sample_ref_roundtrip_without_split() -> None: + ref = SampleRef(backend_id="disk", dataset_id="ds", split=None, sample_id="42") + encoded = ref.encode() + assert "_" in encoded # sentinel for missing split + assert SampleRef.decode(encoded) == ref + + +def test_sample_ref_decode_invalid() -> None: + with pytest.raises(ValueError): + SampleRef.decode("too:few:parts") + + +def test_sample_ref_dto_round_trip() -> None: + ref = SampleRef(backend_id="b", dataset_id="d", split=None, sample_id="s") + dto = SampleRefDTO.from_ref(ref) + assert dto.encoded == ref.encode() + assert dto.split is None diff --git a/tests/viewer/test_paraview_artifact_service.py b/tests/viewer/test_paraview_artifact_service.py new file mode 100644 index 00000000..013d9620 --- /dev/null +++ b/tests/viewer/test_paraview_artifact_service.py @@ -0,0 +1,193 @@ +"""Tests for the ParaView artifact service. + +These tests only exercise the caching and file-layout logic. The real +``Sample.save_to_dir`` call is replaced by a fake service that writes fixture +CGNS files, so the tests do not depend on pyCGNS or a concrete PLAID sample. +""" + +from __future__ import annotations + +import json +import types +from pathlib import Path + +import pytest + +from plaid.viewer.models import SampleRef +from plaid.viewer.services.paraview_artifact_service import ( + ParaviewArtifactService, + _build_cache_key, + _collect_time_values, + _plaid_version, + ensure_paraview_artifact, +) + + +class _FakeSample: + def __init__(self, meshes_dir: Path, n_times: int) -> None: + self._meshes_dir = meshes_dir + self.features = type( + "F", (), {"data": {float(i): None for i in range(n_times)}} + )() + + def save_to_dir( + self, + path: Path, + overwrite: bool = False, # noqa: ARG002 + memory_safe: bool = False, # noqa: ARG002 + ) -> None: + meshes = Path(path) / "meshes" + meshes.mkdir(parents=True, exist_ok=True) + for i in range(len(self.features.data)): + (meshes / f"mesh_{i:09d}.cgns").write_bytes(b"CGNS_FAKE") + + +class _FakeDatasetService: + def __init__(self, n_times: int = 1) -> None: + self._n_times = n_times + + def load_sample(self, ref: SampleRef): # noqa: ARG002 - interface match + return _FakeSample(Path("."), self._n_times) + + +@pytest.fixture +def ref() -> SampleRef: + return SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + + +def test_ensure_artifact_single_timestep_creates_single_cgns( + tmp_path: Path, ref: SampleRef +) -> None: + service = ParaviewArtifactService(_FakeDatasetService(n_times=1), tmp_path) + artifact = service.ensure_artifact(ref) + assert artifact.created is True + assert artifact.cgns_path.suffix == ".cgns" + assert artifact.cgns_path.exists() + + +def test_ensure_artifact_time_series_writes_series_sidecar( + tmp_path: Path, ref: SampleRef +) -> None: + service = ParaviewArtifactService(_FakeDatasetService(n_times=3), tmp_path) + artifact = service.ensure_artifact(ref) + assert artifact.cgns_path.name.endswith(".cgns.series") + payload = json.loads(artifact.cgns_path.read_text()) + assert payload["file-series-version"] == "1.0" + assert len(payload["files"]) == 3 + assert payload["files"][0]["time"] == 0.0 + # Each entry must reference an existing CGNS file relative to the + # sidecar: CGNS files live in the ``meshes/`` subdirectory, so the + # ``name`` field has to keep that prefix (regression: previously only + # the file name was stored, which broke vtkFileSeriesReader). + sidecar_dir = artifact.cgns_path.parent + for entry in payload["files"]: + assert entry["name"].startswith("meshes/"), entry + assert (sidecar_dir / entry["name"]).is_file() + + +def test_ensure_artifact_is_idempotent(tmp_path: Path, ref: SampleRef) -> None: + service = ParaviewArtifactService(_FakeDatasetService(), tmp_path) + first = service.ensure_artifact(ref) + assert first.created is True + second = service.ensure_artifact(ref) + assert second.created is False + assert second.artifact_id == first.artifact_id + + +def test_force_recreates_artifact(tmp_path: Path, ref: SampleRef) -> None: + service = ParaviewArtifactService(_FakeDatasetService(), tmp_path) + first = service.ensure_artifact(ref) + second = service.ensure_artifact(ref, force=True) + assert second.created is True + assert second.artifact_id == first.artifact_id # cache key is deterministic + + +def test_ensure_artifact_evicts_previous_artifact(tmp_path: Path) -> None: + """The cache keeps at most one artifact on disk.""" + service = ParaviewArtifactService(_FakeDatasetService(), tmp_path) + ref_a = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + ref_b = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="1") + + first = service.ensure_artifact(ref_a) + first_root = first.cgns_path.parent + assert first_root.exists() + + second = service.ensure_artifact(ref_b) + second_root = second.cgns_path.parent + assert second_root.exists() + assert not first_root.exists() + # The by-id lookup only exposes the current artifact. + with pytest.raises(KeyError): + service.get(first.artifact_id) + assert service.get(second.artifact_id) is second + + +def test_cache_key_is_deterministic(ref: SampleRef) -> None: + key_a = _build_cache_key(ref, export_version="1") + key_b = _build_cache_key(ref, export_version="1") + assert key_a == key_b + key_c = _build_cache_key(ref, export_version="2") + assert key_c != key_a + key_d = _build_cache_key(ref, export_version="1", extra={"preset": "a"}) + assert key_d != key_a + + +def test_get_unknown_artifact_raises(tmp_path: Path) -> None: + service = ParaviewArtifactService(_FakeDatasetService(), tmp_path) + with pytest.raises(KeyError): + service.get("unknown") + + +def test_get_returns_created_artifact(tmp_path: Path, ref: SampleRef) -> None: + service = ParaviewArtifactService(_FakeDatasetService(), tmp_path) + artifact = service.ensure_artifact(ref) + assert service.get(artifact.artifact_id) is artifact + + +def test_collect_time_values_empty() -> None: + assert ( + _collect_time_values( + types.SimpleNamespace(features=types.SimpleNamespace(data={})) + ) + == [] + ) + assert _collect_time_values( + types.SimpleNamespace(features=types.SimpleNamespace(data={2: None, 1: None})) + ) == [1.0, 2.0] + + +def test_ensure_artifact_raises_when_sample_writes_no_cgns( + tmp_path: Path, ref: SampleRef +) -> None: + class EmptySample: + features = types.SimpleNamespace(data={0.0: None}) + + def save_to_dir(self, path: Path, overwrite: bool = False) -> None: # noqa: ARG002 + (Path(path) / "meshes").mkdir(parents=True, exist_ok=True) + + class EmptyService: + def load_sample(self, _ref: SampleRef): + return EmptySample() + + service = ParaviewArtifactService(EmptyService(), tmp_path) # type: ignore[arg-type] + with pytest.raises(RuntimeError, match="produced no CGNS"): + service.ensure_artifact(ref) + + +def test_functional_wrapper_creates_artifact(tmp_path: Path, ref: SampleRef) -> None: + artifact = ensure_paraview_artifact( + ref, + cache_dir=tmp_path, + dataset_service=_FakeDatasetService(), # type: ignore[arg-type] + ) + assert artifact.cgns_path.exists() + + +def test_plaid_version_unknown(monkeypatch: pytest.MonkeyPatch) -> None: + import importlib.metadata + + def raise_not_found(_name: str) -> str: + raise importlib.metadata.PackageNotFoundError + + monkeypatch.setattr(importlib.metadata, "version", raise_not_found) + assert _plaid_version() == "unknown" diff --git a/tests/viewer/test_plaid_dataset_service.py b/tests/viewer/test_plaid_dataset_service.py new file mode 100644 index 00000000..63fb36e5 --- /dev/null +++ b/tests/viewer/test_plaid_dataset_service.py @@ -0,0 +1,1294 @@ +"""Tests for dataset discovery and indexing in :class:`PlaidDatasetService`. + +The service builds on ``plaid.storage.init_from_disk``. To keep these tests +lightweight and free from real CGNS/arrow fixtures, we monkey-patch that +function to return small in-memory stand-ins for ``dataset_dict`` and +``converter_dict``. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from plaid.viewer.config import ViewerConfig +from plaid.viewer.models import SampleRef +from plaid.viewer.services import PlaidDatasetService +from plaid.viewer.services.plaid_dataset_service import ( + _array_preview, + _cached_service, + _safe_list_dir, +) + + +class _FakeDataset(list): + """Minimal list-like stand-in for ``datasets.Dataset``.""" + + +class _FakeConverter: + def __init__(self, samples_by_index: dict[int, object]) -> None: + self._samples = samples_by_index + + def to_plaid(self, dataset, index: int): # noqa: ARG002 - interface match + return self._samples[index] + + +def _make_dataset_dir(root: Path, name: str) -> Path: + base = root / name + (base / "data").mkdir(parents=True, exist_ok=True) + return base + + +def test_small_helpers_cover_edge_cases(tmp_path: Path) -> None: + import numpy as np + + assert _safe_list_dir(tmp_path / "missing") == [] + (tmp_path / "b").mkdir() + (tmp_path / "a").mkdir() + assert [p.name for p in _safe_list_dir(tmp_path)] == ["a", "b"] + assert _array_preview(None) is None + assert _array_preview([]) == "[]" + assert "total 8 values" in (_array_preview(np.arange(8), max_items=3) or "") + + class BadArray: + def __array__(self, *_args): + raise RuntimeError("bad") + + assert _array_preview(BadArray()) is None + + +def _install_fake_init_from_disk( + monkeypatch: pytest.MonkeyPatch, + payload: dict[str, tuple[dict, dict]], +) -> None: + """Patch ``plaid.storage.init_from_disk`` to return per-directory fixtures.""" + + def _fake(path: str): + base_name = Path(path).name + return payload[base_name] + + import plaid.storage as storage # noqa: PLC0415 + + monkeypatch.setattr(storage, "init_from_disk", _fake) + + +def test_list_datasets_returns_all_subdirectories_with_data(tmp_path: Path) -> None: + _make_dataset_dir(tmp_path, "ds_a") + _make_dataset_dir(tmp_path, "ds_b") + (tmp_path / "not_a_dataset").mkdir() # missing data/ subfolder + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ids = {d.dataset_id for d in service.list_datasets()} + assert ids == {"ds_a", "ds_b"} + + +def test_service_properties_and_dataset_listing_metadata(tmp_path: Path) -> None: + ds = _make_dataset_dir(tmp_path, "ds") + (tmp_path / "file").write_text("x") + (ds / "infos.json").write_text("{}") + (ds / "problem_definitions").mkdir() + service = PlaidDatasetService( + ViewerConfig(datasets_root=tmp_path, browse_roots=(tmp_path,)) + ) + assert service.datasets_root == tmp_path + assert service.browse_roots == (tmp_path.resolve(),) + info = service.list_datasets()[0] + assert info.has_infos is True + assert info.has_problem_definitions is True + assert service.hub_repos == () + + +def test_list_samples_uses_converter_to_plaid_indices( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + dataset_dict = { + "train": _FakeDataset(range(2)), + "test": _FakeDataset(range(1)), + } + converter_dict = { + "train": _FakeConverter({0: object(), 1: object()}), + "test": _FakeConverter({0: object()}), + } + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + refs = service.list_samples("ds") + assert len(refs) == 3 + assert {(r.split, r.sample_id) for r in refs} == { + ("train", "0"), + ("train", "1"), + ("test", "0"), + } + + +def test_load_sample_calls_converter_to_plaid_with_integer_index( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + target = object() + dataset_dict = {"train": _FakeDataset(range(3))} + converter_dict = {"train": _FakeConverter({2: target})} + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="2") + assert service.load_sample(ref) is target + + +def test_get_dataset_reports_split_counts_from_dataset_dict( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + dataset_dict = { + "train": _FakeDataset(range(3)), + "test": _FakeDataset(range(2)), + } + converter_dict = { + "train": _FakeConverter({}), + "test": _FakeConverter({}), + } + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + detail = service.get_dataset("ds") + assert detail.splits == {"train": 3, "test": 2} + + +def test_describe_non_visual_bases_lists_zoneless_bases_only( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Bases that carry no ``Zone_t`` child are reported with their arrays.""" + import types + + import numpy as np + from CGNS.PAT import cgnskeywords as CK + + _make_dataset_dir(tmp_path, "ds") + + # Build a minimal CGNS tree with one visual base (has a zone) and one + # non-visual base (only DataArrays under a UserDefinedData_t node). + pressure = np.array([1.5], dtype=np.float32) + visual_base = ["Geom", None, [["Zone1", None, [], CK.Zone_ts]], CK.CGNSBase_ts] + aux_base = [ + "Constants", + None, + [ + [ + "UD", + None, + [["Pressure", pressure, [], CK.DataArray_ts]], + "UserDefinedData_t", + ], + ], + CK.CGNSBase_ts, + ] + tree = ["CGNSTree", None, [visual_base, aux_base], "CGNSTree_t"] + + features = types.SimpleNamespace(data={0.0: tree}) + sample = types.SimpleNamespace(features=features) + + dataset_dict = {"train": _FakeDataset(range(1))} + converter_dict = {"train": _FakeConverter({0: sample})} + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + summary = service.describe_non_visual_bases(ref) + + assert list(summary.keys()) == ["Constants"] + entries = summary["Constants"] + assert len(entries) == 1 + entry = entries[0] + assert entry["name"] == "Pressure" + assert entry["shape"] == [1] + assert "float32" in entry["dtype"] + assert "1.5" in entry["preview"] + + +def test_describe_non_visual_bases_returns_empty_for_sample_without_times( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + import types + + _make_dataset_dir(tmp_path, "ds") + sample = types.SimpleNamespace(features=types.SimpleNamespace(data={})) + _install_fake_init_from_disk( + monkeypatch, + { + "ds": ( + {"train": _FakeDataset(range(1))}, + {"train": _FakeConverter({0: sample})}, + ) + }, + ) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + assert service.describe_non_visual_bases(ref) == {} + + +def test_load_sample_rejects_non_integer_sample_id( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + dataset_dict = {"train": _FakeDataset(range(1))} + converter_dict = {"train": _FakeConverter({0: object()})} + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef( + backend_id="disk", dataset_id="ds", split="train", sample_id="not-an-int" + ) + with pytest.raises(ValueError): + service.load_sample(ref) + + +def test_set_datasets_root_rejects_outside_sandbox(tmp_path: Path) -> None: + sandbox = tmp_path / "sandbox" + sandbox.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + service = PlaidDatasetService( + ViewerConfig(datasets_root=sandbox, browse_roots=(sandbox,)) + ) + with pytest.raises(Exception): + service.set_datasets_root(outside) + + +def test_set_datasets_root_updates_config(tmp_path: Path) -> None: + sandbox = tmp_path / "sandbox" + sandbox.mkdir() + sub = sandbox / "sub" + sub.mkdir() + service = PlaidDatasetService( + ViewerConfig(datasets_root=sandbox, browse_roots=(sandbox,)) + ) + resolved = service.set_datasets_root(sub) + assert resolved == sub.resolve() + assert service.datasets_root == sub.resolve() + + +def test_set_datasets_root_clear_and_rejects_non_directory(tmp_path: Path) -> None: + service = PlaidDatasetService( + ViewerConfig(datasets_root=tmp_path, browse_roots=(tmp_path,)) + ) + assert service.set_datasets_root(None) is None + assert service.datasets_root is None + with pytest.raises(ValueError): + service.set_datasets_root(tmp_path / "missing") + + +def test_list_subdirs_returns_entries(tmp_path: Path) -> None: + sandbox = tmp_path / "sandbox" + sandbox.mkdir() + (sandbox / "a").mkdir() + (sandbox / "b").mkdir() + (sandbox / "b" / "data").mkdir() + (sandbox / "b" / "problem_definitions").mkdir() + service = PlaidDatasetService( + ViewerConfig(datasets_root=sandbox, browse_roots=(sandbox,)) + ) + listing = service.list_subdirs(sandbox) + names = {e["name"] for e in listing["entries"]} + assert names == {"a", "b"} + plaid_entry = next(e for e in listing["entries"] if e["name"] == "b") + assert plaid_entry["is_plaid_candidate"] is True + + +def test_list_subdirs_default_hidden_files_and_parent(tmp_path: Path) -> None: + sandbox = tmp_path / "sandbox" + nested = sandbox / "nested" + nested.mkdir(parents=True) + (nested / ".hidden").mkdir() + (nested / "file.txt").write_text("x") + service = PlaidDatasetService( + ViewerConfig(datasets_root=sandbox, browse_roots=(sandbox,)) + ) + root_listing = service.list_subdirs(None) + assert root_listing["path"] == str(sandbox.resolve()) + nested_listing = service.list_subdirs(nested) + assert nested_listing["parent"] == str(sandbox.resolve()) + assert nested_listing["entries"] == [] + with pytest.raises(ValueError): + service.list_subdirs(nested / "missing") + + +def test_list_subdirs_parent_when_browse_roots_overlap(tmp_path: Path) -> None: + outer = tmp_path / "outer" + inner = outer / "inner" + inner.mkdir(parents=True) + service = PlaidDatasetService( + ViewerConfig(datasets_root=inner, browse_roots=(outer, inner)) + ) + listing = service.list_subdirs(inner) + assert listing["parent"] == str(outer.resolve()) + + +def test_list_subdirs_rejects_outside_sandbox(tmp_path: Path) -> None: + sandbox = tmp_path / "sandbox" + sandbox.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + service = PlaidDatasetService( + ViewerConfig(datasets_root=sandbox, browse_roots=(sandbox,)) + ) + with pytest.raises(Exception): + service.list_subdirs(outside) + + +# --------------------------------------------------------------------------- +# Hugging Face Hub streaming +# --------------------------------------------------------------------------- + + +def _install_fake_init_streaming_from_hub( + monkeypatch: pytest.MonkeyPatch, + payload: dict[str, tuple[dict, dict]], +) -> None: + """Patch ``plaid.storage.init_streaming_from_hub`` to return fixtures.""" + + def _fake(repo_id: str): + return payload[repo_id] + + import plaid.storage as storage # noqa: PLC0415 + + monkeypatch.setattr(storage, "init_streaming_from_hub", _fake, raising=False) + + +def test_add_hub_dataset_rejects_invalid_repo_id(tmp_path: Path) -> None: + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + with pytest.raises(ValueError): + service.add_hub_dataset("") + with pytest.raises(ValueError): + service.add_hub_dataset("missing-slash") + + +def test_add_hub_dataset_is_listed_alongside_local(tmp_path: Path) -> None: + _make_dataset_dir(tmp_path, "local_ds") + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset("PLAID-lib/VKI-LS59") + entries = service.list_datasets() + ids = {d.dataset_id: d.backend_id for d in entries} + assert ids == {"local_ds": "disk", "PLAID-lib/VKI-LS59": "hub"} + # Idempotent add + service.add_hub_dataset("PLAID-lib/VKI-LS59") + assert len(service.list_datasets()) == 2 + + +def test_list_samples_streams_from_hub( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "PLAID-lib/VKI-LS59" + dataset_dict = { + "train": _FakeDataset(range(2)), + } + converter_dict = { + "train": _FakeConverter({0: object(), 1: object()}), + } + _install_fake_init_streaming_from_hub( + monkeypatch, {repo_id: (dataset_dict, converter_dict)} + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + refs = service.list_samples(repo_id) + assert {(r.backend_id, r.split, r.sample_id) for r in refs} == { + ("hub", "train", "0"), + ("hub", "train", "1"), + } + + +def test_remove_hub_dataset_clears_cache( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/ds" + _install_fake_init_streaming_from_hub( + monkeypatch, + { + repo_id: ( + {"train": _FakeDataset(range(1))}, + {"train": _FakeConverter({0: object()})}, + ) + }, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + service.list_samples(repo_id) # populates cache + assert repo_id in service._store_cache # noqa: SLF001 + service.remove_hub_dataset(repo_id) + assert repo_id not in service._store_cache # noqa: SLF001 + assert repo_id not in [d.dataset_id for d in service.list_datasets()] + + +# --------------------------------------------------------------------------- +# Streaming cursor behaviour (IterableDataset without __len__) +# --------------------------------------------------------------------------- + + +class _FakeIterableDataset: + """Stand-in for ``datasets.IterableDataset`` - no ``__len__``.""" + + def __init__(self, records: list[object]) -> None: + self._records = records + + def __iter__(self): + return iter(self._records) + + +class _FakeStreamingConverter: + """Converter exposing ``sample_to_plaid`` (streaming API).""" + + def __init__(self, mapping: dict[int, object]) -> None: + # Maps the raw record itself to a PLAID sample, using id() lookup + # so we can assert the correct record was forwarded. + self._mapping = mapping + + def sample_to_plaid(self, record): + return self._mapping[record] + + # Intentionally no ``to_plaid`` method: streaming paths must not use it. + + +def _install_fake_streaming_dataset( + monkeypatch: pytest.MonkeyPatch, repo_id: str +) -> tuple[list[object], dict[int, object]]: + """Register a 3-record streaming dataset and return (records, mapping).""" + records = [object(), object(), object()] + mapping = {rec: object() for rec in records} + dataset_dict = {"train": _FakeIterableDataset(records)} + converter_dict = {"train": _FakeStreamingConverter(mapping)} + _install_fake_init_streaming_from_hub( + monkeypatch, {repo_id: (dataset_dict, converter_dict)} + ) + return records, mapping + + +def test_streaming_dataset_is_detected_as_streaming( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/stream" + _install_fake_streaming_dataset(monkeypatch, repo_id) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + assert service.is_streaming(repo_id) is True + # Splits without __len__ report a ``None`` count in the detail view. + detail = service.get_dataset(repo_id) + assert detail.splits == {"train": None} + + +def test_is_streaming_returns_true_when_hub_open_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset("org/broken") + + def broken(_dataset_id: str): + raise RuntimeError("network") + + monkeypatch.setattr(service, "_open", broken) + assert service.is_streaming("org/broken") is True + + +def test_list_samples_emits_single_cursor_ref_for_streaming( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/stream" + _install_fake_streaming_dataset(monkeypatch, repo_id) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + refs = service.list_samples(repo_id) + # Streaming splits surface a single synthetic reference using the + # sentinel sample id, regardless of how many records the stream holds. + assert len(refs) == 1 + assert refs[0].backend_id == "hub" + assert refs[0].sample_id == "cursor" + assert refs[0].split == "train" + + +def test_advance_stream_cursor_walks_records_forward( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/stream" + records, mapping = _install_fake_streaming_dataset(monkeypatch, repo_id) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + + # No sample fetched yet. + assert service.stream_cursor_position(repo_id, "train") == -1 + + ref0 = service.advance_stream_cursor(repo_id, "train") + assert service.stream_cursor_position(repo_id, "train") == 0 + # ``load_sample`` must materialise the record that the cursor just + # consumed, going through ``converter.sample_to_plaid``. + sample0 = service.load_sample(ref0) + assert sample0 is mapping[records[0]] + + # Advancing again moves forward and does not re-consume the first + # record. + ref1 = service.advance_stream_cursor(repo_id, "train") + assert service.stream_cursor_position(repo_id, "train") == 1 + assert service.load_sample(ref1) is mapping[records[1]] + + +def test_advance_stream_cursor_raises_when_exhausted( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/stream" + _install_fake_streaming_dataset(monkeypatch, repo_id) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + # Three records in the fake stream; the fourth advance must stop. + for _ in range(3): + service.advance_stream_cursor(repo_id, "train") + with pytest.raises(StopIteration): + service.advance_stream_cursor(repo_id, "train") + + +def test_reset_stream_cursor_rewinds_to_first_record( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/stream" + records, mapping = _install_fake_streaming_dataset(monkeypatch, repo_id) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + service.advance_stream_cursor(repo_id, "train") + service.advance_stream_cursor(repo_id, "train") + assert service.stream_cursor_position(repo_id, "train") == 1 + + service.reset_stream_cursor(repo_id, "train") + assert service.stream_cursor_position(repo_id, "train") == -1 + ref = service.advance_stream_cursor(repo_id, "train") + assert service.load_sample(ref) is mapping[records[0]] + + +def test_build_cursor_split_alias_and_missing_split( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/stream" + records = [object()] + _install_fake_init_streaming_from_hub( + monkeypatch, + { + repo_id: ( + {"only": _FakeIterableDataset(records)}, + {"only": _FakeStreamingConverter({records[0]: object()})}, + ) + }, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + + cursor = service._build_cursor(repo_id, None) # noqa: SLF001 + assert next(cursor.iterator) is records[0] + + service._store_cache[repo_id] = ( # noqa: SLF001 + {"a": _FakeIterableDataset([]), "b": _FakeIterableDataset([])}, + {"a": object(), "b": object()}, + ) + with pytest.raises(KeyError): + service._build_cursor(repo_id, "missing") # noqa: SLF001 + + +# --------------------------------------------------------------------------- +# Feature filtering +# --------------------------------------------------------------------------- + + +def _install_fake_metadata( + monkeypatch: pytest.MonkeyPatch, + *, + variable_schema: dict[str, object], + constant_schema: dict[str, dict[str, object]], +) -> None: + """Patch ``load_metadata_from_disk`` / ``load_metadata_from_hub``.""" + from plaid.storage.common import reader as reader_mod # noqa: PLC0415 + + def _fake(*_args, **_kwargs): + return ({}, variable_schema, constant_schema, {}) + + monkeypatch.setattr(reader_mod, "load_metadata_from_disk", _fake, raising=False) + monkeypatch.setattr(reader_mod, "load_metadata_from_hub", _fake, raising=False) + + +class _FeatureAwareConverter: + """Converter recording the feature list handed to ``to_plaid``.""" + + def __init__( + self, + samples_by_index: dict[int, object], + *, + constant_features: set[str] | None = None, + variable_features: set[str] | None = None, + ) -> None: + self._samples = samples_by_index + self.constant_features = constant_features or set() + self.variable_features = variable_features or set() + self.last_features: list[str] | None = None + + def to_plaid(self, dataset, index: int, features=None): # noqa: ARG002 + self.last_features = list(features) if features is not None else None + return self._samples[index] + + +def test_list_available_features_only_exposes_field_paths( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + variable = { + "Base_2_2/Zone/VertexFields/pressure": None, + "Base_2_2/Zone/GridCoordinates/CoordinateX": None, + } + constant = { + "train": { + "Base_2_2/Zone/VertexFields/sdf": None, + "Base_2_2/Zone/VertexFields/sdf_times": None, + "Base_2_2/Zone/VertexFields/GridLocation": None, + "Global/angle_in": None, + } + } + _install_fake_metadata( + monkeypatch, variable_schema=variable, constant_schema=constant + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + fields = service.list_available_features("ds") + assert "Base_2_2/Zone/VertexFields/pressure" in fields + assert "Base_2_2/Zone/VertexFields/sdf" in fields + # Coordinates, time bookkeeping, GridLocation metadata and scalars + # must not appear in the user-facing feature list. + assert "Base_2_2/Zone/GridCoordinates/CoordinateX" not in fields + assert "Base_2_2/Zone/VertexFields/sdf_times" not in fields + assert "Base_2_2/Zone/VertexFields/GridLocation" not in fields + assert "Global/angle_in" not in fields + + +def test_set_features_rejects_unknown_path( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + _install_fake_metadata( + monkeypatch, + variable_schema={"Base_2_2/Zone/VertexFields/pressure": None}, + constant_schema={"train": {}}, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + with pytest.raises(ValueError): + service.set_features("ds", ["not/a/feature"]) + + +def test_load_sample_forwards_selected_features_on_disk( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Disk path: ``to_plaid`` receives the filtered feature list.""" + _make_dataset_dir(tmp_path, "ds") + variable = {"Base_2_2/Zone/VertexFields/pressure": None} + constant = { + "train": { + "Base": None, + "Base_2_2/Zone": None, + "Base_2_2/Zone/VertexFields": None, + } + } + _install_fake_metadata( + monkeypatch, variable_schema=variable, constant_schema=constant + ) + target = object() + dataset_dict = {"train": _FakeDataset(range(1))} + converter = _FeatureAwareConverter( + {0: target}, + constant_features=set(constant["train"].keys()), + variable_features=set(variable.keys()), + ) + converter_dict = {"train": converter} + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.set_features("ds", ["Base_2_2/Zone/VertexFields/pressure"]) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + assert service.load_sample(ref) is target + # The user-selected field is forwarded, but the split's constant + # features (mesh supports + globals) are always appended so the + # rendered sample keeps its scalars/globals on top of the + # user-selected variable fields. + assert converter.last_features is not None + assert "Base_2_2/Zone/VertexFields/pressure" in converter.last_features + for path in constant["train"]: + assert path in converter.last_features + + +def test_load_sample_without_filter_does_not_forward_features( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + target = object() + dataset_dict = {"train": _FakeDataset(range(1))} + converter = _FeatureAwareConverter({0: target}) + converter_dict = {"train": converter} + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + assert service.load_sample(ref) is target + assert converter.last_features is None + + +def test_streaming_open_expands_features_via_cgns_helper( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Streaming path: ``init_streaming_from_hub`` receives the expanded list. + + The expansion is delegated to + ``plaid.utils.cgns_helper.update_features_for_CGNS_compatibility``; + we patch that helper to a deterministic stub and assert the service + hands the stub's output through. + """ + repo_id = "org/stream_filter" + variable = {"Base_2_2/Zone/VertexFields/pressure": None} + constant = {"train": {"Base": None, "Base_2_2/Zone": None}} + _install_fake_metadata( + monkeypatch, variable_schema=variable, constant_schema=constant + ) + + captured: dict[str, object] = {} + + def _fake_init_streaming_from_hub(_repo, features=None): + captured["features"] = features + return ( + {"train": _FakeDataset(range(1))}, + {"train": _FakeConverter({0: object()})}, + ) + + import plaid.storage as storage # noqa: PLC0415 + + monkeypatch.setattr( + storage, + "init_streaming_from_hub", + _fake_init_streaming_from_hub, + raising=False, + ) + + from plaid.utils import cgns_helper # noqa: PLC0415 + + def _fake_expand(features, _constant, _variable): + # Deterministic: append a sentinel so we can verify that the + # service actually routes through the helper instead of + # forwarding the raw user selection. + return sorted(set(features) | {"__expanded__"}) + + monkeypatch.setattr( + cgns_helper, + "update_features_for_CGNS_compatibility", + _fake_expand, + ) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + service.set_features(repo_id, ["Base_2_2/Zone/VertexFields/pressure"]) + service.list_samples(repo_id) # triggers ``_open`` + assert captured["features"] == [ + "Base_2_2/Zone/VertexFields/pressure", + "__expanded__", + ] + + +def test_set_features_invalidates_store_cache( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Changing the feature selection must force a reload of the dataset.""" + _make_dataset_dir(tmp_path, "ds") + variable = {"Base_2_2/Zone/VertexFields/pressure": None} + _install_fake_metadata( + monkeypatch, + variable_schema=variable, + constant_schema={"train": {}}, + ) + dataset_dict = {"train": _FakeDataset(range(1))} + converter = _FeatureAwareConverter( + {0: object()}, + variable_features=set(variable.keys()), + ) + _install_fake_init_from_disk( + monkeypatch, {"ds": (dataset_dict, {"train": converter})} + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.list_samples("ds") # populates cache + assert "ds" in service._store_cache # noqa: SLF001 + service.set_features("ds", ["Base_2_2/Zone/VertexFields/pressure"]) + assert "ds" not in service._store_cache # noqa: SLF001 + + +def test_get_and_clear_features( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + _install_fake_metadata( + monkeypatch, + variable_schema={"Base_2_2/Zone/VertexFields/pressure": None}, + constant_schema={"train": {}}, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + assert service.get_features("ds") is None + assert service.set_features("ds", None) is None + assert service.get_features("ds") is None + + +def test_split_feature_keys_falls_back_to_dataset_union( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + _install_fake_metadata( + monkeypatch, + variable_schema={"var": None}, + constant_schema={"train": {"const": None}}, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + assert service._split_feature_keys("ds", "train") == {"var", "const"} # noqa: SLF001 + assert service._split_feature_keys("ds", "missing") == {"var", "const"} # noqa: SLF001 + + +def test_open_raises_for_empty_dataset( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + _install_fake_init_from_disk(monkeypatch, {"ds": ({}, {})}) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + with pytest.raises(RuntimeError, match="empty"): + service.list_samples("ds") + + +def test_open_hub_feature_keyerror_falls_back_unfiltered( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + repo_id = "org/fallback" + _install_fake_metadata( + monkeypatch, + variable_schema={"Base_2_2/Zone/VertexFields/pressure": None}, + constant_schema={"train": {"Base_2_2": None, "Base_2_2/Zone": None}}, + ) + calls: list[object] = [] + + def fake_init(_repo: str, features=None): + calls.append(features) + if features is not None: + raise KeyError("missing") + return {"train": _FakeDataset(range(1))}, { + "train": _FakeConverter({0: object()}) + } + + import plaid.storage as storage # noqa: PLC0415 + + monkeypatch.setattr(storage, "init_streaming_from_hub", fake_init, raising=False) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + service.set_features(repo_id, ["Base_2_2/Zone/VertexFields/pressure"]) + service.list_samples(repo_id) + assert calls[0] is not None + assert calls[-1] is None + + +def test_load_sample_default_split_fallback_and_missing_split( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + target = object() + _install_fake_init_from_disk( + monkeypatch, + { + "ds": ( + {"only": _FakeDataset(range(1))}, + {"only": _FakeConverter({0: target})}, + ) + }, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + assert service.load_sample(SampleRef("disk", "ds", "missing", "0")) is target + + _make_dataset_dir(tmp_path, "ds2") + _install_fake_init_from_disk( + monkeypatch, + { + "ds": ( + {"only": _FakeDataset(range(1))}, + {"only": _FakeConverter({0: target})}, + ), + "ds2": ( + {"a": _FakeDataset(range(1)), "b": _FakeDataset(range(1))}, + { + "a": _FakeConverter({0: object()}), + "b": _FakeConverter({0: object()}), + }, + ), + }, + ) + service2 = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + with pytest.raises(KeyError): + service2.load_sample(SampleRef("disk", "ds2", "missing", "0")) + + +def test_load_sample_empty_augmented_falls_back_unfiltered( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + _install_fake_metadata( + monkeypatch, + variable_schema={"Base_2_2/Zone/VertexFields/pressure": None}, + constant_schema={"train": {}}, + ) + target = object() + dataset_dict = {"train": _FakeDataset(range(1))} + converter = _FeatureAwareConverter({0: target}) + _install_fake_init_from_disk( + monkeypatch, {"ds": (dataset_dict, {"train": converter})} + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.set_features("ds", ["Base_2_2/Zone/VertexFields/pressure"]) + assert service.load_sample(SampleRef("disk", "ds", "train", "0")) is target + assert converter.last_features is None + + +class _SummarySample: + def __init__(self, report: str = "warning") -> None: + import types + + self.features = types.SimpleNamespace( + data={}, get_all_time_values=lambda: [2, 1] + ) + self._report = report + + def get_scalar_names(self): + return ["s"] + + def get_scalar(self, _name: str): + return 3 + + def get_global_names(self, **_kwargs): + return ["IterationValues", "TimeValues", "g", "bad"] + + def get_global(self, name: str, **_kwargs): + if name == "bad": + raise RuntimeError("skip") + return 4 + + def check_completeness(self): + return self._report + + +def test_summary_time_globals_and_validation( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + sample = _SummarySample() + _install_fake_init_from_disk( + monkeypatch, + { + "ds": ( + {"train": _FakeDataset(range(1))}, + {"train": _FakeConverter({0: sample})}, + ) + }, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef("disk", "ds", "train", "0") + summary = service.get_sample_summary(ref) + assert summary.globals == {"s": "3"} + assert service.list_time_values(ref) == [1.0, 2.0] + assert service.describe_globals(ref) == [ + {"name": "g", "shape": [], "dtype": "int", "preview": "4"} + ] + validation = service.get_sample_validation(ref) + assert validation.ok is True + assert validation.warnings == ["warning"] + + +def test_time_globals_and_validation_error_branches( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _make_dataset_dir(tmp_path, "ds") + + class BadTimes(_SummarySample): + def __init__(self): + super().__init__("error: bad") + self.features.get_all_time_values = lambda: (_ for _ in ()).throw( + RuntimeError("bad") + ) + + def get_global_names(self, **_kwargs): + raise TypeError + + def get_global(self, _name: str, **_kwargs): + raise TypeError + + sample = BadTimes() + sample.get_global_names = lambda: ["g"] + sample.get_global = lambda _name: 5 + _install_fake_init_from_disk( + monkeypatch, + { + "ds": ( + {"train": _FakeDataset(range(1))}, + {"train": _FakeConverter({0: sample})}, + ) + }, + ) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + ref = SampleRef("disk", "ds", "train", "0") + assert service.list_time_values(ref) == [] + assert service.describe_globals(ref, time=1.0)[0]["name"] == "g" + assert service.get_sample_validation(ref).errors == ["error: bad"] + + class RaisingService(PlaidDatasetService): + def load_sample(self, ref): # noqa: ARG002 + raise RuntimeError("load") + + assert RaisingService(ViewerConfig()).get_sample_validation(ref).ok is False + + class BadCheck(_SummarySample): + def check_completeness(self): + raise RuntimeError("check") + + _install_fake_init_from_disk( + monkeypatch, + { + "ds": ( + {"train": _FakeDataset(range(1))}, + {"train": _FakeConverter({0: BadCheck()})}, + ) + }, + ) + assert ( + PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + .get_sample_validation(ref) + .ok + is False + ) + + +def test_dataset_dir_and_infos_helpers(tmp_path: Path) -> None: + service = PlaidDatasetService(ViewerConfig()) + with pytest.raises(FileNotFoundError): + service._dataset_dir("ds") # noqa: SLF001 + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + with pytest.raises(FileNotFoundError): + service._dataset_dir("missing") # noqa: SLF001 + base = _make_dataset_dir(tmp_path, "ds") + assert PlaidDatasetService._load_infos(base) is None # noqa: SLF001 + (base / "infos.json").write_text("bad") + assert PlaidDatasetService._load_infos(base) is None # noqa: SLF001 + (base / "infos.json").write_text('{"a": 1}') + assert PlaidDatasetService._load_infos(base) == {"a": 1} # noqa: SLF001 + (base / "infos.json").unlink() + (base / "infos.yaml").write_text("a: 2") + assert PlaidDatasetService._load_infos(base) == {"a": 2} # noqa: SLF001 + (base / "infos.yaml").write_text("a: [") + assert PlaidDatasetService._load_infos(base) is None # noqa: SLF001 + + +def test_load_infos_handles_read_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + base = _make_dataset_dir(tmp_path, "ds") + (base / "infos.json").write_text('{"a": 1}') + + original_read_text = Path.read_text + + def broken_read_text(self: Path, *args, **kwargs): # noqa: ANN002, ANN003 + if self.name == "infos.json": + raise OSError("boom") + return original_read_text(self, *args, **kwargs) + + monkeypatch.setattr(Path, "read_text", broken_read_text) + assert PlaidDatasetService._load_infos(base) is None # noqa: SLF001 + + +def test_time_keys_describe_tree_empty_and_cached_service(tmp_path: Path) -> None: + import types + + from CGNS.PAT import cgnskeywords as CK + + sample = types.SimpleNamespace(features=types.SimpleNamespace(data={})) + assert PlaidDatasetService._time_keys(sample) == [] # noqa: SLF001 + assert PlaidDatasetService._describe_tree(sample, []) == ( + {}, + {}, + {}, + ) or PlaidDatasetService._describe_tree(sample, []) == ([], {}, {}) # noqa: SLF001 + + visual_base = [ + "Base", + None, + [ + [ + "Zone", + None, + [ + [ + "FlowSolution", + None, + [["Pressure", None, [], CK.DataArray_ts]], + CK.FlowSolution_ts, + ] + ], + CK.Zone_ts, + ] + ], + CK.CGNSBase_ts, + ] + tree = ["CGNSTree", None, [visual_base], "CGNSTree_t"] + sample = types.SimpleNamespace(features=types.SimpleNamespace(data={0.0: tree})) + assert PlaidDatasetService._describe_tree(sample, [0.0]) == ( # noqa: SLF001 + ["Base"], + {"Base": ["Zone"]}, + {"Base": ["Pressure"]}, + ) + _cached_service.cache_clear() + assert _cached_service(str(tmp_path), "disk") is _cached_service( + str(tmp_path), "disk" + ) + + +def test_load_sample_auto_advances_cursor_on_first_access( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Calling ``load_sample`` with a cursor ref before any advance acts + like "give me the first sample". + """ + from plaid.viewer.models import SampleRef + + repo_id = "org/stream" + records, mapping = _install_fake_streaming_dataset(monkeypatch, repo_id) + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + service.add_hub_dataset(repo_id) + ref = SampleRef( + backend_id="hub", dataset_id=repo_id, split="train", sample_id="cursor" + ) + sample = service.load_sample(ref) + assert sample is mapping[records[0]] + assert service.stream_cursor_position(repo_id, "train") == 0 + + +class _KeyErrorOnFilteredConverter: + """Converter whose filtered ``to_plaid`` path raises like PLAID does. + + Mirrors the real failure mode: the converter declares + ``constant_features`` containing a path that its backing store + cannot materialise, so passing ``features=sorted(constant_features)`` + triggers ``KeyError("Missing features in …")`` deep inside PLAID. + The service must degrade gracefully and fall back to an unfiltered + load instead of letting the error surface to the user. + """ + + def __init__( + self, + samples_by_index: dict[int, object], + *, + constant_features: set[str], + variable_features: set[str] | None = None, + ) -> None: + self._samples = samples_by_index + self.constant_features = constant_features + self.variable_features = variable_features or set() + self.unfiltered_calls = 0 + + def to_plaid(self, dataset, index: int, features=None): # noqa: ARG002 + if features is not None: + raise KeyError("Missing features in dataset/converter: ['bogus']") + self.unfiltered_calls += 1 + return self._samples[index] + + +def test_load_sample_falls_back_when_empty_filter_triggers_missing_features( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Clearing the selection on a split whose ``constant_features`` trip + the CGNS expander should not raise ``Missing features``. + + Reproduces the viewer bug where, on a split that shares none of the + user-selected fields, the "geometry-only" fallback in + :meth:`PlaidDatasetService.load_sample` used to hand the split's + ``constant_features`` straight to ``Converter.to_plaid`` and crash + with ``KeyError("Missing features in …")``. The service must now + degrade to an unfiltered load so the user still sees the mesh. + """ + _make_dataset_dir(tmp_path, "ds") + variable = {"Base_2_2/Zone/VertexFields/pressure": None} + constant = {"train": {"Base": None, "Base_2_2/Zone": None}} + _install_fake_metadata( + monkeypatch, variable_schema=variable, constant_schema=constant + ) + target = object() + dataset_dict = {"train": _FakeDataset(range(1))} + converter = _KeyErrorOnFilteredConverter( + {0: target}, + constant_features=set(constant["train"].keys()), + variable_features=set(), # split has no variable features at all + ) + converter_dict = {"train": converter} + _install_fake_init_from_disk(monkeypatch, {"ds": (dataset_dict, converter_dict)}) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + # Emulate the UI: the user selected a field that exists elsewhere in + # the dataset metadata but not in this split. + service.set_features("ds", ["Base_2_2/Zone/VertexFields/pressure"]) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + + assert service.load_sample(ref) is target + assert converter.unfiltered_calls == 1 + + +def test_load_sample_does_not_reinject_deselected_constant_fields( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A user-visible *field* declared as a split constant must not be + silently re-added to the request when the user deselects it. + + In PLAID, ``constant_features`` can hold genuine field paths (a + field whose values happen to be constant across the split's + samples, e.g. a signed-distance field precomputed offline). Those + fields appear in the UI feature list and are toggleable. An + earlier fix for the "Missing features" crash blindly re-injected + every split constant on top of the user's selection, which + defeated the filter: deselecting ``sdf`` still loaded ``sdf``. + + The service must only re-inject CGNS bookkeeping paths + (coordinates, connectivities, ...), not user-visible fields. + """ + _make_dataset_dir(tmp_path, "ds") + variable = {"Base_2_2/Zone/VertexFields/pressure": None} + constant = { + "train": { + # User-visible field -> must drop when deselected. + "Base_2_2/Zone/VertexFields/sdf": None, + # Time-series bookkeeping for ``sdf`` -> must drop with it. + "Base_2_2/Zone/VertexFields/sdf_times": None, + # CGNS bookkeeping -> must always be kept. + "Base_2_2": None, + "Base_2_2/Zone": None, + "Base_2_2/Zone/GridCoordinates/CoordinateX": None, + } + } + _install_fake_metadata( + monkeypatch, variable_schema=variable, constant_schema=constant + ) + target = object() + dataset_dict = {"train": _FakeDataset(range(1))} + converter = _FeatureAwareConverter( + {0: target}, + constant_features=set(constant["train"].keys()), + variable_features=set(variable.keys()), + ) + _install_fake_init_from_disk( + monkeypatch, {"ds": (dataset_dict, {"train": converter})} + ) + + service = PlaidDatasetService(ViewerConfig(datasets_root=tmp_path)) + # User clears the selection -> load only the geometry. + service.set_features("ds", []) + ref = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0") + assert service.load_sample(ref) is target + # Bookkeeping paths are preserved so the renderer can draw the mesh... + assert converter.last_features is not None + assert "Base_2_2/Zone/GridCoordinates/CoordinateX" in converter.last_features + # ... but the deselected user-visible field must NOT be re-injected, + # and its ``_times`` bookkeeping path must follow the same fate. + assert "Base_2_2/Zone/VertexFields/sdf" not in converter.last_features + assert "Base_2_2/Zone/VertexFields/sdf_times" not in converter.last_features + assert "Base_2_2/Zone/VertexFields/pressure" not in converter.last_features diff --git a/tests/viewer/test_preferences.py b/tests/viewer/test_preferences.py new file mode 100644 index 00000000..49d788ae --- /dev/null +++ b/tests/viewer/test_preferences.py @@ -0,0 +1,69 @@ +"""Tests for viewer preference persistence.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from plaid.viewer import preferences as prefs + + +def test_preferences_path_uses_xdg_config_home( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.delenv("PLAID_VIEWER_CONFIG_FILE", raising=False) + monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path)) + assert prefs._preferences_path() == tmp_path / "plaid" / "viewer.json" + + +def test_load_preferences_handles_missing_invalid_and_valid_files( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "viewer.json" + monkeypatch.setenv("PLAID_VIEWER_CONFIG_FILE", str(path)) + assert prefs.load_preferences() == {} + path.write_text("not json") + assert prefs.load_preferences() == {} + path.write_text(json.dumps({"datasets_root": str(tmp_path)})) + assert prefs.load_preferences() == {"datasets_root": str(tmp_path)} + + +def test_update_and_last_datasets_root( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "viewer.json" + monkeypatch.setenv("PLAID_VIEWER_CONFIG_FILE", str(path)) + + prefs.save_preferences({"datasets_root": str(tmp_path), "other": 1}) + assert prefs.get_last_datasets_root() == tmp_path + updated = prefs.update_preferences(datasets_root=None) + assert updated == {"other": 1} + assert prefs.get_last_datasets_root() is None + prefs.set_last_datasets_root(tmp_path) + assert prefs.get_last_datasets_root() == tmp_path.resolve() + prefs.set_last_datasets_root(None) + assert prefs.get_last_datasets_root() is None + + +def test_get_last_datasets_root_rejects_bad_values( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "viewer.json" + monkeypatch.setenv("PLAID_VIEWER_CONFIG_FILE", str(path)) + path.write_text(json.dumps({"datasets_root": ""})) + assert prefs.get_last_datasets_root() is None + path.write_text(json.dumps({"datasets_root": str(tmp_path / "missing")})) + assert prefs.get_last_datasets_root() is None + + +def test_save_preferences_ignores_os_errors(monkeypatch: pytest.MonkeyPatch) -> None: + class BadPath: + parent = Path("/") + + def write_text(self, _text: str) -> None: + raise OSError("boom") + + monkeypatch.setattr(prefs, "_preferences_path", lambda: BadPath()) + prefs.save_preferences({"x": 1}) diff --git a/tests/viewer/test_trame_helpers.py b/tests/viewer/test_trame_helpers.py new file mode 100644 index 00000000..06f2b745 --- /dev/null +++ b/tests/viewer/test_trame_helpers.py @@ -0,0 +1,347 @@ +"""Headless tests for trame server helper functions using fakes.""" + +from __future__ import annotations + +import sys +import types +from pathlib import Path + +import pytest + +from plaid.viewer.trame_app import server as srv + + +class _Selection: + def __init__(self, names: list[str]) -> None: + self.names = names + self.enabled: list[str] = [] + self.disabled: list[str] = [] + + def GetNumberOfArrays(self) -> int: # noqa: N802 + return len(self.names) + + def GetArrayName(self, i: int) -> str: # noqa: N802 + return self.names[i] + + def ArrayExists(self, name: str) -> bool: # noqa: N802 + return name in self.names + + def DisableArray(self, name: str) -> None: # noqa: N802 + self.disabled.append(name) + + def DisableAllArrays(self) -> None: # noqa: N802 + self.disabled.extend(self.names) + + def EnableArray(self, name: str) -> None: # noqa: N802 + self.enabled.append(name) + + +class _Reader: + def __init__(self) -> None: + self.base = _Selection(["Base", "Global"]) + self.point = _Selection(["p"]) + self.cell = _Selection(["c"]) + self.modified = False + self.updated = False + + def GetBaseSelection(self): # noqa: N802 + return self.base + + def GetPointDataArraySelection(self): # noqa: N802 + return self.point + + def GetCellDataArraySelection(self): # noqa: N802 + return self.cell + + def Modified(self) -> None: # noqa: N802 + self.modified = True + + def Update(self) -> None: # noqa: N802 + self.updated = True + + +def test_reader_selection_helpers() -> None: + reader = _Reader() + wrapper = types.SimpleNamespace(GetReader=lambda: reader) + srv._disable_bases_on_reader(wrapper, ["Global", "Missing"]) + assert reader.base.disabled == ["Global"] + assert reader.modified is True + assert srv._reader_bases_and_fields(wrapper) == (["Base", "Global"], ["p"], ["c"]) + srv._apply_base_selection(reader, ["Base"]) + assert reader.base.enabled == ["Base"] + assert reader.updated is True + + +def test_advance_reader_time_update_and_fallback() -> None: + calls: list[object] = [] + + class WithUpdate: + def UpdateTimeStep(self, value: float) -> None: # noqa: N802 + calls.append(("time", value)) + + def Update(self) -> None: # noqa: N802 + calls.append("update") + + srv._advance_reader_time(WithUpdate(), 2.5) + assert calls == [("time", 2.5), "update"] + + class Exec: + def SetUpdateTimeStep(self, port: int, value: float) -> None: # noqa: N802 + calls.append(("exec", port, value)) + + class WithoutUpdate: + def GetExecutive(self): # noqa: N802 + return Exec() + + def Update(self) -> None: # noqa: N802 + calls.append("fallback-update") + + srv._advance_reader_time(WithoutUpdate(), 3.0) + assert ("exec", 0, 3.0) in calls + + +def test_advance_reader_time_swallows_reader_errors() -> None: + class Broken: + def UpdateTimeStep(self, _value: float) -> None: # noqa: N802 + raise RuntimeError("boom") + + srv._advance_reader_time(Broken(), 1.0) + + +class _Data: + def __init__(self, arrays: dict[str, tuple[float, float]]) -> None: + self.arrays = arrays + + def GetNumberOfArrays(self) -> int: # noqa: N802 + return len(self.arrays) + + def GetArrayName(self, i: int) -> str: # noqa: N802 + return list(self.arrays)[i] + + def GetArray(self, name: str): # noqa: N802 + rng = self.arrays.get(name) + return None if rng is None else types.SimpleNamespace(GetRange=lambda _idx: rng) + + +class _Leaf: + def __init__( + self, + point: dict[str, tuple[float, float]], + cell: dict[str, tuple[float, float]], + ) -> None: + self.point = _Data(point) + self.cell = _Data(cell) + + def GetPointData(self): # noqa: N802 + return self.point + + def GetCellData(self): # noqa: N802 + return self.cell + + +class _Blocks: + def __init__(self, blocks: list[object | None]) -> None: + self.blocks = blocks + + def GetNumberOfBlocks(self) -> int: # noqa: N802 + return len(self.blocks) + + def GetBlock(self, i: int): # noqa: N802 + return self.blocks[i] + + +def test_dataset_field_helpers() -> None: + dataset = _Blocks( + [ + None, + _Leaf({"p": (1.0, 2.0)}, {}), + _Leaf({"p": (-1.0, 4.0)}, {"c": (5.0, 6.0)}), + ] + ) + assert srv._list_point_and_cell_fields(dataset) == (["p"], ["c"]) + assert srv._compute_field_range(dataset, "p", "point") == (-1.0, 4.0) + assert srv._compute_field_range(dataset, "missing", "point") == (0.0, 1.0) + + class NoData: + def GetPointData(self): # noqa: N802 + return None + + def GetCellData(self): # noqa: N802 + return _Data({}) + + assert srv._compute_field_range(_Blocks([NoData()]), "p", "point") == (0.0, 1.0) + + +def test_scalar_bar_helpers() -> None: + class ScalarBar: + def __init__(self) -> None: + self.lut = None + self.title = None + self.visible = None + + def SetLookupTable(self, lut) -> None: # noqa: N802, ANN001 + self.lut = lut + + def SetTitle(self, title: str) -> None: # noqa: N802 + self.title = title + + def SetVisibility(self, visible: bool) -> None: # noqa: N802, FBT001 + self.visible = visible + + scalar_bar = ScalarBar() + lut = object() + + srv._show_scalar_bar_for_field(scalar_bar, lut, "pressure", "point") + assert scalar_bar.lut is lut + assert scalar_bar.title == "pressure (point)" + assert scalar_bar.visible is True + + srv._hide_scalar_bar(scalar_bar) + assert scalar_bar.visible is False + + +def test_load_reader_plain_and_build_lut( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + class FakeCGNSReader: + def __init__(self) -> None: + self.file_name = None + self.calls: list[str] = [] + + def SetFileName(self, name: str) -> None: # noqa: N802 + self.file_name = name + + def UpdateInformation(self) -> None: # noqa: N802 + self.calls.append("info") + + def EnableAllBases(self) -> None: # noqa: N802 + self.calls.append("bases") + + def EnableAllPointArrays(self) -> None: # noqa: N802 + self.calls.append("points") + + def EnableAllCellArrays(self) -> None: # noqa: N802 + self.calls.append("cells") + + class FakeLookupTable: + def __init__(self) -> None: + self.hue = None + + def SetTableRange(self, *_args): + pass # noqa: ANN002 + + def SetNumberOfColors(self, *_args): + pass # noqa: ANN002 + + def SetHueRange(self, *args): + self.hue = args # noqa: ANN002 + + def SetSaturationRange(self, *_args): + pass # noqa: ANN002 + + def SetValueRange(self, *_args): + pass # noqa: ANN002 + + def Build(self): + pass + + fake_vtk = types.SimpleNamespace( + vtkCGNSReader=FakeCGNSReader, vtkLookupTable=FakeLookupTable + ) + monkeypatch.setitem(sys.modules, "vtk", fake_vtk) + path = tmp_path / "mesh.cgns" + reader = srv._load_reader(path) + assert reader.file_name == str(path) + assert reader.calls == ["info", "bases", "points", "cells"] + assert srv._build_lut("unknown", 0.0, 1.0).hue == (0.667, 0.0) + + +def test_install_vtk_log_router_with_fake_vtk(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[object] = [] + captured: dict[str, object] = {} + + class FakeOutputWindow: + @staticmethod + def SetInstance(instance) -> None: # noqa: N802 + captured["instance"] = instance + calls.append(("output", instance.__class__.__name__)) + + class FakeObject: + @staticmethod + def GlobalWarningDisplayOff() -> None: # noqa: N802 + calls.append("warnings-off") + + class FakeLogger: + VERBOSITY_OFF = 0 + + @staticmethod + def SetStderrVerbosity(value: int) -> None: # noqa: N802 + calls.append(("verbosity", value)) + + fake_vtk = types.SimpleNamespace( + vtkOutputWindow=FakeOutputWindow, + vtkObject=FakeObject, + vtkLogger=FakeLogger, + ) + monkeypatch.setitem(sys.modules, "vtk", fake_vtk) + monkeypatch.setattr(srv, "_VTK_LOG_ROUTER_INSTALLED", False) + + srv._install_vtk_log_router() + srv._install_vtk_log_router() + + output = captured["instance"] + output.DisplayText("text") + output.DisplayErrorText("error") + output.DisplayWarningText("warning") + output.DisplayGenericWarningText("generic") + output.DisplayDebugText("debug") + assert calls == [ + ("output", "_LoggingOutputWindow"), + "warnings-off", + ("verbosity", 0), + ] + + +def test_install_vtk_log_router_ignores_missing_and_old_vtk( + monkeypatch: pytest.MonkeyPatch, +) -> None: + real_import = __import__ + + def missing_vtk(name, globals=None, locals=None, fromlist=(), level=0): # noqa: A002, ANN001, ANN002 + if name == "vtk": + raise ImportError("no vtk") + return real_import(name, globals, locals, fromlist, level) + + monkeypatch.delitem(sys.modules, "vtk", raising=False) + monkeypatch.setattr("builtins.__import__", missing_vtk) + monkeypatch.setattr(srv, "_VTK_LOG_ROUTER_INSTALLED", False) + srv._install_vtk_log_router() + assert srv._VTK_LOG_ROUTER_INSTALLED is False + + class FakeOutputWindow: + @staticmethod + def SetInstance(_instance) -> None: # noqa: N802 + pass + + class FakeObject: + @staticmethod + def GlobalWarningDisplayOff() -> None: # noqa: N802 + pass + + class OldLogger: + @staticmethod + def SetStderrVerbosity(_value: int) -> None: # noqa: N802 + raise AttributeError("old") + + monkeypatch.setattr("builtins.__import__", real_import) + monkeypatch.setitem( + sys.modules, + "vtk", + types.SimpleNamespace( + vtkOutputWindow=FakeOutputWindow, + vtkObject=FakeObject, + vtkLogger=OldLogger, + ), + ) + srv._install_vtk_log_router() + assert srv._VTK_LOG_ROUTER_INSTALLED is True diff --git a/tests/viewer/test_trame_server.py b/tests/viewer/test_trame_server.py new file mode 100644 index 00000000..5f237a43 --- /dev/null +++ b/tests/viewer/test_trame_server.py @@ -0,0 +1,123 @@ +"""Smoke tests for the trame dataset viewer server.""" + +from __future__ import annotations + +import json +import sys +import types +from pathlib import Path + +import pytest + + +@pytest.fixture +def empty_datasets_root(tmp_path: Path) -> Path: + """Return an existing but empty datasets directory.""" + root = tmp_path / "datasets" + root.mkdir() + return root + + +class _FakeCGNSReader: + def __init__(self) -> None: + self.file_name: str | None = None + self.enable_calls: list[str] = [] + + def SetFileName(self, name: str) -> None: # noqa: N802 - VTK API + self.file_name = name + + def UpdateInformation(self) -> None: # noqa: N802 - VTK API + self.enable_calls.append("UpdateInformation") + + def EnableAllBases(self) -> None: # noqa: N802 - VTK API + self.enable_calls.append("EnableAllBases") + + def EnableAllPointArrays(self) -> None: # noqa: N802 - VTK API + self.enable_calls.append("EnableAllPointArrays") + + def EnableAllCellArrays(self) -> None: # noqa: N802 - VTK API + self.enable_calls.append("EnableAllCellArrays") + + +class _FakeCGNSFileSeriesReader: + def __init__(self) -> None: + self.inner: _FakeCGNSReader | None = None + self.file_names: list[str] = [] + self.update_information_calls = 0 + + def SetReader(self, inner) -> None: # noqa: N802 - VTK API + self.inner = inner + + def AddFileName(self, name: str) -> None: # noqa: N802 - VTK API + self.file_names.append(name) + + def UpdateInformation(self) -> None: # noqa: N802 - VTK API + self.update_information_calls += 1 + + +def test_load_reader_series_uses_vtk_cgns_file_series_reader( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A ``.cgns.series`` sidecar must drive a ``vtkCGNSFileSeriesReader``. + + This guards against regressing to ``vtkFileSeriesReader``, which is not + available in the ``vtk`` PyPI wheel and would silently break time-series + rendering. + """ + series_path = tmp_path / "meshes.cgns.series" + sidecar = { + "file-series-version": "1.0", + "files": [ + {"name": "meshes/mesh_000000001.cgns", "time": 1.5}, + {"name": "meshes/mesh_000000000.cgns", "time": 0.0}, + ], + } + series_path.write_text(json.dumps(sidecar)) + + fake_vtk = types.SimpleNamespace( + vtkCGNSReader=_FakeCGNSReader, + vtkCGNSFileSeriesReader=_FakeCGNSFileSeriesReader, + ) + monkeypatch.setitem(sys.modules, "vtk", fake_vtk) + + from plaid.viewer.trame_app.server import _load_reader # noqa: PLC0415 + + reader = _load_reader(series_path) + + assert isinstance(reader, _FakeCGNSFileSeriesReader) + assert isinstance(reader.inner, _FakeCGNSReader) + # File names are added in ascending time order, not sidecar order. + expected_order = [ + str((tmp_path / "meshes/mesh_000000000.cgns").resolve()), + str((tmp_path / "meshes/mesh_000000001.cgns").resolve()), + ] + assert reader.file_names == expected_order + assert reader.update_information_calls == 1 + # Inner reader must have had its selections enabled so the pipeline + # produces non-empty output. + assert reader.inner.enable_calls == [ + "EnableAllBases", + "EnableAllPointArrays", + "EnableAllCellArrays", + ] + + +def test_select_initial_dataset_id_prefers_configured_dataset() -> None: + from plaid.viewer.trame_app.server import ( + _select_initial_dataset_id, # noqa: PLC0415 + ) + + assert _select_initial_dataset_id("b", ["a", "b"], ["org/repo"]) == "b" + assert ( + _select_initial_dataset_id("org/repo", ["a", "b"], ["org/repo"]) == "org/repo" + ) + + +def test_select_initial_dataset_id_falls_back_to_existing_dataset() -> None: + from plaid.viewer.trame_app.server import ( + _select_initial_dataset_id, # noqa: PLC0415 + ) + + assert _select_initial_dataset_id("missing", ["a", "b"], ["org/repo"]) == "a" + assert _select_initial_dataset_id(None, [], ["org/repo"]) == "org/repo" + assert _select_initial_dataset_id(None, [], []) is None diff --git a/uv.lock b/uv.lock index d71bc633..d25a8384 100644 --- a/uv.lock +++ b/uv.lock @@ -1067,7 +1067,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "1.12.0" +version = "1.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -1080,9 +1080,9 @@ dependencies = [ { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/52/1b54cb569509c725a32c1315261ac9fd0e6b91bbbf74d86fca10d3376164/huggingface_hub-1.12.0.tar.gz", hash = "sha256:7c3fe85e24b652334e5d456d7a812cd9a071e75630fac4365d9165ab5e4a34b6", size = 763091, upload-time = "2026-04-24T13:32:08.674Z" } +sdist = { url = "https://files.pythonhosted.org/packages/89/ff/ec7ed2eb43bd7ce8bb2233d109cc235c3e807ffe5e469dc09db261fac05e/huggingface_hub-1.13.0.tar.gz", hash = "sha256:f6df2dac5abe82ce2fe05873d10d5ff47bc677d616a2f521f4ee26db9415d9d0", size = 781788, upload-time = "2026-04-30T11:57:33.858Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/2b/ef03ddb96bd1123503c2bd6932001020292deea649e9bf4caa2cb65a85bf/huggingface_hub-1.12.0-py3-none-any.whl", hash = "sha256:d74939969585ee35748bd66de09baf84099d461bda7287cd9043bfb99b0e424d", size = 646806, upload-time = "2026-04-24T13:32:06.717Z" }, + { url = "https://files.pythonhosted.org/packages/93/db/4b1cdae9460ae1f3ca020cd767f013430ce23eb1d9c890ae3a0609b38d26/huggingface_hub-1.13.0-py3-none-any.whl", hash = "sha256:e942cb50d6a08dd5306688b1ac05bda157fd2fcc88b63dae405f7bd0d3234005", size = 660643, upload-time = "2026-04-30T11:57:31.802Z" }, ] [[package]] @@ -1573,6 +1573,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/5f/39cbadc320cd78f4834b0a9f7a2fa3c980dca942bf193f315837eacb8870/meshio-5.3.5-py3-none-any.whl", hash = "sha256:0736c6e34ecc768f62f2cde5d8233a3529512a9399b25c68ea2ca0d5900cdc10", size = 166162, upload-time = "2024-01-31T15:09:36.691Z" }, ] +[[package]] +name = "more-itertools" +version = "11.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/f7/139d22fef48ac78127d18e01d80cf1be40236ae489769d17f35c3d425293/more_itertools-11.0.2.tar.gz", hash = "sha256:392a9e1e362cbc106a2457d37cabf9b36e5e12efd4ebff1654630e76597df804", size = 144659, upload-time = "2026-04-09T15:01:33.297Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/98/6af411189d9413534c3eb691182bff1f5c6d44ed2f93f2edfe52a1bbceb8/more_itertools-11.0.2-py3-none-any.whl", hash = "sha256:6e35b35f818b01f691643c6c611bc0902f2e92b46c18fffa77ae1e7c46e912e4", size = 71939, upload-time = "2026-04-09T15:01:32.21Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -1582,6 +1591,41 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "msgpack" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4d/f2/bfb55a6236ed8725a96b0aa3acbd0ec17588e6a2c3b62a93eb513ed8783f/msgpack-1.1.2.tar.gz", hash = "sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e", size = 173581, upload-time = "2025-10-08T09:15:56.596Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/97/560d11202bcd537abca693fd85d81cebe2107ba17301de42b01ac1677b69/msgpack-1.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2e86a607e558d22985d856948c12a3fa7b42efad264dca8a3ebbcfa2735d786c", size = 82271, upload-time = "2025-10-08T09:14:49.967Z" }, + { url = "https://files.pythonhosted.org/packages/83/04/28a41024ccbd67467380b6fb440ae916c1e4f25e2cd4c63abe6835ac566e/msgpack-1.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:283ae72fc89da59aa004ba147e8fc2f766647b1251500182fac0350d8af299c0", size = 84914, upload-time = "2025-10-08T09:14:50.958Z" }, + { url = "https://files.pythonhosted.org/packages/71/46/b817349db6886d79e57a966346cf0902a426375aadc1e8e7a86a75e22f19/msgpack-1.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:61c8aa3bd513d87c72ed0b37b53dd5c5a0f58f2ff9f26e1555d3bd7948fb7296", size = 416962, upload-time = "2025-10-08T09:14:51.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/e0/6cc2e852837cd6086fe7d8406af4294e66827a60a4cf60b86575a4a65ca8/msgpack-1.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:454e29e186285d2ebe65be34629fa0e8605202c60fbc7c4c650ccd41870896ef", size = 426183, upload-time = "2025-10-08T09:14:53.477Z" }, + { url = "https://files.pythonhosted.org/packages/25/98/6a19f030b3d2ea906696cedd1eb251708e50a5891d0978b012cb6107234c/msgpack-1.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7bc8813f88417599564fafa59fd6f95be417179f76b40325b500b3c98409757c", size = 411454, upload-time = "2025-10-08T09:14:54.648Z" }, + { url = "https://files.pythonhosted.org/packages/b7/cd/9098fcb6adb32187a70b7ecaabf6339da50553351558f37600e53a4a2a23/msgpack-1.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bafca952dc13907bdfdedfc6a5f579bf4f292bdd506fadb38389afa3ac5b208e", size = 422341, upload-time = "2025-10-08T09:14:56.328Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ae/270cecbcf36c1dc85ec086b33a51a4d7d08fc4f404bdbc15b582255d05ff/msgpack-1.1.2-cp311-cp311-win32.whl", hash = "sha256:602b6740e95ffc55bfb078172d279de3773d7b7db1f703b2f1323566b878b90e", size = 64747, upload-time = "2025-10-08T09:14:57.882Z" }, + { url = "https://files.pythonhosted.org/packages/2a/79/309d0e637f6f37e83c711f547308b91af02b72d2326ddd860b966080ef29/msgpack-1.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:d198d275222dc54244bf3327eb8cbe00307d220241d9cec4d306d49a44e85f68", size = 71633, upload-time = "2025-10-08T09:14:59.177Z" }, + { url = "https://files.pythonhosted.org/packages/73/4d/7c4e2b3d9b1106cd0aa6cb56cc57c6267f59fa8bfab7d91df5adc802c847/msgpack-1.1.2-cp311-cp311-win_arm64.whl", hash = "sha256:86f8136dfa5c116365a8a651a7d7484b65b13339731dd6faebb9a0242151c406", size = 64755, upload-time = "2025-10-08T09:15:00.48Z" }, + { url = "https://files.pythonhosted.org/packages/ad/bd/8b0d01c756203fbab65d265859749860682ccd2a59594609aeec3a144efa/msgpack-1.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:70a0dff9d1f8da25179ffcf880e10cf1aad55fdb63cd59c9a49a1b82290062aa", size = 81939, upload-time = "2025-10-08T09:15:01.472Z" }, + { url = "https://files.pythonhosted.org/packages/34/68/ba4f155f793a74c1483d4bdef136e1023f7bcba557f0db4ef3db3c665cf1/msgpack-1.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:446abdd8b94b55c800ac34b102dffd2f6aa0ce643c55dfc017ad89347db3dbdb", size = 85064, upload-time = "2025-10-08T09:15:03.764Z" }, + { url = "https://files.pythonhosted.org/packages/f2/60/a064b0345fc36c4c3d2c743c82d9100c40388d77f0b48b2f04d6041dbec1/msgpack-1.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c63eea553c69ab05b6747901b97d620bb2a690633c77f23feb0c6a947a8a7b8f", size = 417131, upload-time = "2025-10-08T09:15:05.136Z" }, + { url = "https://files.pythonhosted.org/packages/65/92/a5100f7185a800a5d29f8d14041f61475b9de465ffcc0f3b9fba606e4505/msgpack-1.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:372839311ccf6bdaf39b00b61288e0557916c3729529b301c52c2d88842add42", size = 427556, upload-time = "2025-10-08T09:15:06.837Z" }, + { url = "https://files.pythonhosted.org/packages/f5/87/ffe21d1bf7d9991354ad93949286f643b2bb6ddbeab66373922b44c3b8cc/msgpack-1.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2929af52106ca73fcb28576218476ffbb531a036c2adbcf54a3664de124303e9", size = 404920, upload-time = "2025-10-08T09:15:08.179Z" }, + { url = "https://files.pythonhosted.org/packages/ff/41/8543ed2b8604f7c0d89ce066f42007faac1eaa7d79a81555f206a5cdb889/msgpack-1.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be52a8fc79e45b0364210eef5234a7cf8d330836d0a64dfbb878efa903d84620", size = 415013, upload-time = "2025-10-08T09:15:09.83Z" }, + { url = "https://files.pythonhosted.org/packages/41/0d/2ddfaa8b7e1cee6c490d46cb0a39742b19e2481600a7a0e96537e9c22f43/msgpack-1.1.2-cp312-cp312-win32.whl", hash = "sha256:1fff3d825d7859ac888b0fbda39a42d59193543920eda9d9bea44d958a878029", size = 65096, upload-time = "2025-10-08T09:15:11.11Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ec/d431eb7941fb55a31dd6ca3404d41fbb52d99172df2e7707754488390910/msgpack-1.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1de460f0403172cff81169a30b9a92b260cb809c4cb7e2fc79ae8d0510c78b6b", size = 72708, upload-time = "2025-10-08T09:15:12.554Z" }, + { url = "https://files.pythonhosted.org/packages/c5/31/5b1a1f70eb0e87d1678e9624908f86317787b536060641d6798e3cf70ace/msgpack-1.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:be5980f3ee0e6bd44f3a9e9dea01054f175b50c3e6cdb692bc9424c0bbb8bf69", size = 64119, upload-time = "2025-10-08T09:15:13.589Z" }, + { url = "https://files.pythonhosted.org/packages/6b/31/b46518ecc604d7edf3a4f94cb3bf021fc62aa301f0cb849936968164ef23/msgpack-1.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4efd7b5979ccb539c221a4c4e16aac1a533efc97f3b759bb5a5ac9f6d10383bf", size = 81212, upload-time = "2025-10-08T09:15:14.552Z" }, + { url = "https://files.pythonhosted.org/packages/92/dc/c385f38f2c2433333345a82926c6bfa5ecfff3ef787201614317b58dd8be/msgpack-1.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:42eefe2c3e2af97ed470eec850facbe1b5ad1d6eacdbadc42ec98e7dcf68b4b7", size = 84315, upload-time = "2025-10-08T09:15:15.543Z" }, + { url = "https://files.pythonhosted.org/packages/d3/68/93180dce57f684a61a88a45ed13047558ded2be46f03acb8dec6d7c513af/msgpack-1.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fdf7d83102bf09e7ce3357de96c59b627395352a4024f6e2458501f158bf999", size = 412721, upload-time = "2025-10-08T09:15:16.567Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ba/459f18c16f2b3fc1a1ca871f72f07d70c07bf768ad0a507a698b8052ac58/msgpack-1.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fac4be746328f90caa3cd4bc67e6fe36ca2bf61d5c6eb6d895b6527e3f05071e", size = 424657, upload-time = "2025-10-08T09:15:17.825Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/4398c46863b093252fe67368b44edc6c13b17f4e6b0e4929dbf0bdb13f23/msgpack-1.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fffee09044073e69f2bad787071aeec727183e7580443dfeb8556cbf1978d162", size = 402668, upload-time = "2025-10-08T09:15:19.003Z" }, + { url = "https://files.pythonhosted.org/packages/28/ce/698c1eff75626e4124b4d78e21cca0b4cc90043afb80a507626ea354ab52/msgpack-1.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5928604de9b032bc17f5099496417f113c45bc6bc21b5c6920caf34b3c428794", size = 419040, upload-time = "2025-10-08T09:15:20.183Z" }, + { url = "https://files.pythonhosted.org/packages/67/32/f3cd1667028424fa7001d82e10ee35386eea1408b93d399b09fb0aa7875f/msgpack-1.1.2-cp313-cp313-win32.whl", hash = "sha256:a7787d353595c7c7e145e2331abf8b7ff1e6673a6b974ded96e6d4ec09f00c8c", size = 65037, upload-time = "2025-10-08T09:15:21.416Z" }, + { url = "https://files.pythonhosted.org/packages/74/07/1ed8277f8653c40ebc65985180b007879f6a836c525b3885dcc6448ae6cb/msgpack-1.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:a465f0dceb8e13a487e54c07d04ae3ba131c7c5b95e2612596eafde1dccf64a9", size = 72631, upload-time = "2025-10-08T09:15:22.431Z" }, + { url = "https://files.pythonhosted.org/packages/e5/db/0314e4e2db56ebcf450f277904ffd84a7988b9e5da8d0d61ab2d057df2b6/msgpack-1.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:e69b39f8c0aa5ec24b57737ebee40be647035158f14ed4b40e6f150077e21a84", size = 64118, upload-time = "2025-10-08T09:15:23.402Z" }, +] + [[package]] name = "multidict" version = "6.7.1" @@ -2438,7 +2482,16 @@ dependencies = [ { name = "pyyaml" }, { name = "scikit-learn" }, { name = "tqdm" }, - { name = "zarr" }, + { name = "zarr", version = "3.1.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "zarr", version = "3.2.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] + +[package.optional-dependencies] +viewer = [ + { name = "trame" }, + { name = "trame-vtk" }, + { name = "trame-vuetify" }, + { name = "vtk" }, ] [package.dev-dependencies] @@ -2471,8 +2524,13 @@ requires-dist = [ { name = "pyyaml", specifier = ">=6,<7" }, { name = "scikit-learn", specifier = ">=1.4,<2" }, { name = "tqdm", specifier = ">=4.60,<5" }, + { name = "trame", marker = "extra == 'viewer'", specifier = ">=3.6,<4.0" }, + { name = "trame-vtk", marker = "extra == 'viewer'", specifier = ">=2.8,<3.0" }, + { name = "trame-vuetify", marker = "extra == 'viewer'", specifier = ">=2.7,<3.0" }, + { name = "vtk", marker = "extra == 'viewer'", specifier = ">=9.6.1" }, { name = "zarr", specifier = ">=3.1,<4" }, ] +provides-extras = ["viewer"] [package.metadata.requires-dev] dev = [ @@ -3303,9 +3361,83 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "trame" +version = "3.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, + { name = "trame-client" }, + { name = "trame-common" }, + { name = "trame-server" }, + { name = "wslink" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/35/ac/ebd44ac237841d131314e41e0b1654926b77517b0553d7a7f4227778db07/trame-3.12.0.tar.gz", hash = "sha256:88b861162cb8b025e84e93f17dcfd43a84d02d2c1608c9f6d58e3cd646a50c05", size = 23493, upload-time = "2025-08-18T20:21:40.655Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/70/15/5869b2c7556fce52306b6b65b06ec7c088f063b865cdfa75ad30bc229b7c/trame-3.12.0-py3-none-any.whl", hash = "sha256:9b33020625e0d1710d060c0fabe7b3be0e31b5e5138439ec9a796faf6fe96915", size = 28516, upload-time = "2025-08-18T20:21:39.037Z" }, +] + +[[package]] +name = "trame-client" +version = "3.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "trame-common" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/69/472f6e77e549b4a3129523ae959321ad751425fd92d75cbd5d0fe427685c/trame_client-3.12.1.tar.gz", hash = "sha256:7c310bce0a1d21e978f8c5e55d9b14e07111749164046f6678c2b2edbaf7bfc1", size = 246229, upload-time = "2026-04-29T22:33:55.69Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/98/4906ab32589659039a9dc4d3c1a606fd8cacbef436c27e191e74864d5d0d/trame_client-3.12.1-py3-none-any.whl", hash = "sha256:e72306222cd5520a468b5ca28bb65d8e44fe7981ddc861b78eea13c62abbcd43", size = 250749, upload-time = "2026-04-29T22:33:53.859Z" }, +] + +[[package]] +name = "trame-common" +version = "1.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/86/cbb08d6b5229783781a4a1ee882c95ab7c905d163f610b841335e6ddd759/trame_common-1.1.3.tar.gz", hash = "sha256:25a3894823bebf509d3bad2b0c545fbeee9eed5d6320d94f781ec595c18d8068", size = 18632, upload-time = "2026-03-17T22:52:35.223Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/40/bf161cf981eebf94bffbe9c23f4b35bf592b44d20b47d734258a17f1729c/trame_common-1.1.3-py3-none-any.whl", hash = "sha256:8d93cda32cfea869aaabaec5d91ded369882b1e7f28c0dba2a101a7896cfa5b2", size = 21977, upload-time = "2026-03-17T22:52:34.191Z" }, +] + +[[package]] +name = "trame-server" +version = "3.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, + { name = "wslink" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/54/e5a974c09f94bd795c20c311405ffa132f189ac609211305552d238a46ad/trame_server-3.10.0.tar.gz", hash = "sha256:0c341de976f758ff8e6076991e7f30be180384d4f386cf29aefa3915b801d118", size = 39765, upload-time = "2026-01-13T23:22:34.653Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/3a/d895d2069c9bf9288efde97aaa22845d3c711a7af031605863ac4019b7fc/trame_server-3.10.0-py3-none-any.whl", hash = "sha256:eb282f6bc6fa8fdbb2c65b8e6d22e088a27b56fe0b7a12f07cf2d9ea546bd935", size = 44458, upload-time = "2026-01-13T23:22:33.103Z" }, +] + +[[package]] +name = "trame-vtk" +version = "2.11.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "trame-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/de/b72ec543cf8f70ee0ef4645d04e911155db3dcba545a9cf35d6c80e849c9/trame_vtk-2.11.8.tar.gz", hash = "sha256:bef4a35d86d57bf9b4af44dda8f361f917b141e4f624c9ab7278b6c48d171e74", size = 810254, upload-time = "2026-04-24T00:28:17.494Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/11/aff660ffcc0f65546da4340902cd064cafda26e0a7750f6468a27378c717/trame_vtk-2.11.8-py3-none-any.whl", hash = "sha256:31c8220f59dcc3b5f2fcfe6de8b9796e8bdb7db5dcf790ee01df83d44e79a413", size = 831787, upload-time = "2026-04-24T00:28:15.317Z" }, +] + +[[package]] +name = "trame-vuetify" +version = "2.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "trame-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/9b/2eba8ec5eeba08d15c4e3758c028d1504d0b73c409e33171185e1bb03839/trame_vuetify-2.9.0.tar.gz", hash = "sha256:86cfa1387b97e9f18d15ce98ee238b6e6c0e0f921935aab6737ae0bed74ee70a", size = 4910348, upload-time = "2025-03-28T22:40:15.543Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/02/6a8f4a46ca0470c4d9c3ba9f3d97d4b3b19b6889eee751bdd9cde78b9792/trame_vuetify-2.9.0-py3-none-any.whl", hash = "sha256:3db6a6b3384c313befb9d8f0eaf39a9e6cd1d2b882babd1c85d8779ff0f4f2bc", size = 4938098, upload-time = "2025-03-28T22:40:13.329Z" }, +] + [[package]] name = "typer" -version = "0.25.0" +version = "0.25.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -3313,9 +3445,9 @@ dependencies = [ { name = "rich" }, { name = "shellingham" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7b/27/ede8cec7596e0041ba7e7b80b47d132562f56ff454313a16f6084e555c9f/typer-0.25.0.tar.gz", hash = "sha256:123eaf9f19bb40fd268310e12a542c0c6b4fab9c98d9d23342a01ff95e3ce930", size = 120150, upload-time = "2026-04-26T08:46:14.767Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/51/9aed62104cea109b820bbd6c14245af756112017d309da813ef107d42e7e/typer-0.25.1.tar.gz", hash = "sha256:9616eb8853a09ffeabab1698952f33c6f29ffdbceb4eaeecf571880e8d7664cc", size = 122276, upload-time = "2026-04-30T19:32:16.964Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/72/193d4e586ec5a4db834a36bbeb47641a62f951f114ffd0fe5b1b46e8d56f/typer-0.25.0-py3-none-any.whl", hash = "sha256:ac01b48823d3db9a83c9e164338057eadbb1c9957a2a6b4eeb486669c560b5dc", size = 55993, upload-time = "2026-04-26T08:46:15.889Z" }, + { url = "https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl", hash = "sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89", size = 58409, upload-time = "2026-04-30T19:32:18.271Z" }, ] [[package]] @@ -3372,6 +3504,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4b/eb/03bfb1299d4c4510329e470f13f9a4ce793df7fcb5a2fd3510f911066f61/virtualenv-21.3.0-py3-none-any.whl", hash = "sha256:4d28ee41f6d9ec8f1f00cd472b9ffbcedda1b3d3b9a575b5c94a2d004fd51bd7", size = 7594690, upload-time = "2026-04-27T17:05:55.468Z" }, ] +[[package]] +name = "vtk" +version = "9.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "matplotlib" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/70/8a68245293652aeba3448230ef30b90ab7aaa199fc158e7af8c4de66edf3/vtk-9.6.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:a2945c0320b5df8f697d49f7d759b2c230ac293188158574526c20bbcaf10241", size = 114551474, upload-time = "2026-03-26T23:34:29.585Z" }, + { url = "https://files.pythonhosted.org/packages/b4/4d/cdc2b1eb0ea3e322dc707a08e3d145ed556d897eb10385a923cbc932edc0/vtk-9.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b49b3c36e599f652077e60ead865957a65b557a1b53bcd60b26bdaabb81d170d", size = 106761418, upload-time = "2026-03-26T23:34:34.064Z" }, + { url = "https://files.pythonhosted.org/packages/72/92/5c9b9cdfe2738cc7b0dd51adacae67456ef53fcedae16b21a2cf9fbbd767/vtk-9.6.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3b3537cae99226f3082d3aeef2350b7329ee3cef7e7bd88d4ecacfcbfdadfaeb", size = 145873720, upload-time = "2026-03-26T23:34:39.925Z" }, + { url = "https://files.pythonhosted.org/packages/82/04/029bbc011f2346719e770e0ac961ff419948817a16fcda1249fe17a13525/vtk-9.6.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:30a21c6810d2465dc34dd5987f9fb566dcb8d4e65e06367d10a018c24eea6747", size = 135625426, upload-time = "2026-03-27T13:48:20.901Z" }, + { url = "https://files.pythonhosted.org/packages/ca/4f/bb831b2c46d63db2e6bfa11dcd8b405d526ed376390af66a27f6949749cf/vtk-9.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:dde3627b9d33b75efebe2465183cbc682a9f9a7c1529cf027a8871e60e11b3b2", size = 81247644, upload-time = "2026-03-27T13:49:52.1Z" }, + { url = "https://files.pythonhosted.org/packages/95/89/c274101ec7b9bf7356333fdacf5e634803fe6b40f776e82c6ce9d941e0ad/vtk-9.6.1-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:b8125e3e3bc3160e18853a15be98101d0efe662c16036179ab15ddf1669b32af", size = 114729308, upload-time = "2026-03-27T13:50:37.547Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1a/ecbebaf31724a00f85fc4dbf95992b507328f615362ee8fa5ea1a38cf9d6/vtk-9.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:956d05b8c53c6a9eba569de244e9c8229815bbb3e024bb9954fafe163407e66d", size = 106814956, upload-time = "2026-03-27T13:51:24.324Z" }, + { url = "https://files.pythonhosted.org/packages/46/66/ba3c8b277cfa8058e982bfbd47875d9c6b4c06e65f98d577c69a2628f8d4/vtk-9.6.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9728e8d41889a0f105b5d20a73a4da80f398b2cfe6057fa7a94cd61128c3ceb4", size = 145920093, upload-time = "2026-03-27T13:53:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/f5/cb/0bbf91cd45a8d8f5453fe01cddf44c913db6316b3a2b15f41893ae0ca9ad/vtk-9.6.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3b5ec2e56bd6165189aa2e6e896edda29460e63040f897e1a123a1592810266d", size = 135683842, upload-time = "2026-03-27T13:52:15.218Z" }, + { url = "https://files.pythonhosted.org/packages/08/c0/653c94939498a3976157f054b830ade5c1da48ae288a23547f55fc25a262/vtk-9.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:4022fda8af46636f74c3c1932c2365da13a1dc8779a6b1ea4b13dc5bbcdb729f", size = 81262921, upload-time = "2026-03-27T13:53:50.192Z" }, + { url = "https://files.pythonhosted.org/packages/a8/8d/16e597f86241772fe188bbdd86a74ce48eadd2dd9513e2410b4ea07f78aa/vtk-9.6.1-cp313-cp313-macosx_10_10_x86_64.whl", hash = "sha256:88983bce26f7665ac6e4fb7de16cf53b896140a1a6cadd942d3c13e7c74a8530", size = 114747320, upload-time = "2026-03-27T13:54:33.138Z" }, + { url = "https://files.pythonhosted.org/packages/63/ca/8f0c19bded437423479d0d3ff0b7457cf6ef68def322666df867e6dacc0f/vtk-9.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:94ed369a54c6cfacea0b34f42d7d3ef41fa06c1aabfc75d93cabdc9047454293", size = 106817051, upload-time = "2026-03-27T13:55:21.903Z" }, + { url = "https://files.pythonhosted.org/packages/82/22/c1d98e6e191481af1e5c82ae3fa750798d868aa442a76db027f6a7901b95/vtk-9.6.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:deeb86794cd42f922ea75711b9717e45841777624203727eb84595b709af1382", size = 145920554, upload-time = "2026-03-27T13:57:14.258Z" }, + { url = "https://files.pythonhosted.org/packages/16/5d/658f60209de7b41b634178aee1f458bcad149aa2654d16bd023c09afd29c/vtk-9.6.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fef8abc33168ad38b2622cf29048b7d5fe48a45789bf0a0421781f5cafa1e554", size = 135686060, upload-time = "2026-03-27T13:56:23.89Z" }, + { url = "https://files.pythonhosted.org/packages/f0/31/e4eb318901a8e736c936491e759ce03a1656792f728ae912db0e20997e9a/vtk-9.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:a5db7b2ff8fc3f56b547c8b9b7bc117a869c902683c86ef5cd6197c087f66183", size = 81264861, upload-time = "2026-03-27T13:57:47.164Z" }, +] + [[package]] name = "wcwidth" version = "0.6.0" @@ -3381,6 +3538,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, ] +[[package]] +name = "wslink" +version = "2.5.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "msgpack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/06/8340b98693fe886af59a86b69ca0eb9f8095d6dbdd7a28496d9f3a8fb33f/wslink-2.5.6.tar.gz", hash = "sha256:12f3a6135cb3a74c4f1af758942c6a4b34a51fcb700839abfb91b13064a4244c", size = 29784, upload-time = "2026-03-12T00:35:26.018Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/26/d23eb1cc5c8f084d861bbb7035fa911ecb86be51810428dd6284398d021a/wslink-2.5.6-py3-none-any.whl", hash = "sha256:89f23bad3b3522dcb78be84907487f6cf742c6b4526a666fd3e4013f5f705015", size = 37165, upload-time = "2026-03-12T00:35:24.655Z" }, +] + [[package]] name = "xxhash" version = "3.7.0" @@ -3566,19 +3736,46 @@ wheels = [ name = "zarr" version = "3.1.6" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.12' and sys_platform == 'win32'", + "python_full_version < '3.12' and sys_platform == 'emscripten'", + "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] dependencies = [ - { name = "donfig" }, - { name = "google-crc32c" }, - { name = "numcodecs" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "typing-extensions" }, + { name = "donfig", marker = "python_full_version < '3.12'" }, + { name = "google-crc32c", marker = "python_full_version < '3.12'" }, + { name = "numcodecs", marker = "python_full_version < '3.12'" }, + { name = "numpy", marker = "python_full_version < '3.12'" }, + { name = "packaging", marker = "python_full_version < '3.12'" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/31/5a/b8a0cf39a14c770c30bd1f2d120c54000c8cd9e84e8e79f38d9a7ce58071/zarr-3.1.6.tar.gz", hash = "sha256:d95e72cbea4b90e9a70679468b8266400331756232576ae2b43400ac5108d0eb", size = 386531, upload-time = "2026-03-23T17:25:18.748Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/de/7c/ba8ca8cbe9dbef8e83a95fc208fed8e6686c98b4719aaa0aa7f3d31fe390/zarr-3.1.6-py3-none-any.whl", hash = "sha256:b5a82c5079d1c3d4ee8f06746fa3b9a98a7d804300fa3f4be154362a33e1207e", size = 295655, upload-time = "2026-03-23T17:25:17.189Z" }, ] +[[package]] +name = "zarr" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'win32'", + "python_full_version >= '3.12' and sys_platform == 'emscripten'", + "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'", +] +dependencies = [ + { name = "donfig", marker = "python_full_version >= '3.12'" }, + { name = "google-crc32c", marker = "python_full_version >= '3.12'" }, + { name = "numcodecs", marker = "python_full_version >= '3.12'" }, + { name = "numpy", marker = "python_full_version >= '3.12'" }, + { name = "packaging", marker = "python_full_version >= '3.12'" }, + { name = "typing-extensions", marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/db/27/8f391a4304f503ab6f4df6e1724380ea2e35e78a5d1ba973ba2b1347df5b/zarr-3.2.0.tar.gz", hash = "sha256:5867fa8dd7910541075531368c8eaa6f35957ab5413c68c168830e83948665ed", size = 454948, upload-time = "2026-04-30T22:18:03.074Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/9e/2e99d08824f300046eba83b480d6be17f771f57eed80dd7c162381cbe4de/zarr-3.2.0-py3-none-any.whl", hash = "sha256:c693bd4ae24328f242e47e9e1ced221e919d9f62cad71030fd059e398320e555", size = 318784, upload-time = "2026-04-30T22:18:01.13Z" }, +] + [[package]] name = "zipp" version = "3.23.1"