Skip to content

Commit b4db6b7

Browse files
wip
1 parent 76d7194 commit b4db6b7

8 files changed

Lines changed: 82 additions & 83 deletions

File tree

docs/source/core_concepts/viewer.md

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ Useful options:
4040
| Option | Default | Description |
4141
| ----------------- | ----------- | ------------------------------------------------------------------------------------------------ |
4242
| `--datasets-root` | *required* | Directory containing one sub-directory per PLAID dataset. A single-dataset directory also works. |
43-
| `--cache-dir` | `None` | Persistent artifact cache. When omitted, an ephemeral temp dir is used and cleaned at shutdown. |
4443
| `--host` | `127.0.0.1` | Bind address for the trame HTTP server. |
4544
| `--port` | `8080` | Port exposed by the trame HTTP server. |
4645
| `--backend-id` | `disk` | PLAID backend identifier embedded in sample references and the cache key. |
@@ -145,7 +144,9 @@ error.
145144

146145
## Cache layout
147146

148-
Artifacts are written under:
147+
Artifacts are written under an **ephemeral** per-process temp directory
148+
created by `plaid.viewer.cache.CacheRoot` (named
149+
`plaid-viewer-{pid}-{token}` under `tempfile.gettempdir()`):
149150

150151
```
151152
<cache_root>/datasets/<dataset_id>/<split>/<sample_id>/<key_prefix>/
@@ -155,10 +156,18 @@ Artifacts are written under:
155156
metadata.json # cache key, sample ref, export version, ...
156157
```
157158

159+
The cache holds **at most one artifact at a time**: once VTK has loaded
160+
a sample's CGNS into memory the on-disk copy is no longer needed, so
161+
the next `ensure_artifact` call removes the previous folder before
162+
writing the new one.
163+
164+
The whole cache root is deleted at shutdown through four complementary
165+
layers: `atexit`, `SIGINT` / `SIGTERM` handlers, the `with CacheRoot()`
166+
context manager used by the CLI, and an orphan sweep at startup that
167+
removes directories left behind by previously-crashed processes.
168+
158169
The cache key is a SHA-256 of the sample reference, backend id, PLAID
159-
version and `ViewerConfig.export_version`. Re-running the viewer with
160-
the same inputs reuses existing artifacts; bumping `export_version`
161-
invalidates them.
170+
version and `ViewerConfig.export_version`.
162171

163172
## Programmatic usage
164173

@@ -170,7 +179,7 @@ from plaid.viewer.services import ParaviewArtifactService, PlaidDatasetService
170179
from plaid.viewer.trame_app.server import build_server
171180

172181
config = ViewerConfig(datasets_root=Path("/path/to/datasets"))
173-
with CacheRoot(persistent_dir=config.cache_dir) as cache:
182+
with CacheRoot() as cache:
174183
datasets = PlaidDatasetService(config)
175184
artifacts = ParaviewArtifactService(datasets, cache.path)
176185
server = build_server(datasets, artifacts)

src/plaid/viewer/cache.py

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
"""Ephemeral-by-default artifact cache for the dataset viewer.
1+
"""Ephemeral artifact cache for the dataset viewer.
22
3-
The cache lives under a per-process temporary directory by default and is
4-
removed at shutdown. Four cleanup layers cover all practical failure modes:
3+
The cache lives under a per-process temporary directory and is removed at
4+
shutdown. Four cleanup layers cover all practical failure modes:
55
66
1. ``atexit.register`` for normal Python exit.
77
2. Signal handlers for ``SIGINT`` / ``SIGTERM``.
8-
3. A FastAPI lifespan context (provided by callers).
8+
3. A context manager (``with CacheRoot() as cache:`` in the CLI).
99
4. An orphan sweep at startup that removes directories left behind by
1010
previously-crashed processes.
1111
"""
@@ -105,39 +105,28 @@ def sweep_orphans(temp_root: Path | None = None) -> list[Path]:
105105

106106

107107
class CacheRoot:
108-
"""Context-manager-friendly artifact cache directory.
108+
"""Context-manager-friendly ephemeral artifact cache directory.
109109
110-
When ``persistent_dir`` is ``None`` (the default), a new ephemeral tempdir
111-
named ``plaid-viewer-{pid}-{token}`` is created. The directory is
112-
removed at process exit (``atexit``), on ``SIGINT`` / ``SIGTERM``, and
113-
when the context manager is closed.
114-
115-
When ``persistent_dir`` is provided, that directory is used as-is and is
116-
**not** removed. Callers wanting persistence pass this.
110+
Creates a new tempdir named ``plaid-viewer-{pid}-{token}`` under the OS
111+
temp root. The directory is removed at process exit (``atexit``), on
112+
``SIGINT`` / ``SIGTERM``, and when the context manager is closed.
117113
"""
118114

119115
def __init__(
120116
self,
121-
persistent_dir: Path | None = None,
122117
*,
123118
install_signal_handlers: bool = True,
124119
run_orphan_sweep: bool = True,
125120
) -> None:
126-
self._ephemeral = persistent_dir is None
127-
if self._ephemeral:
128-
if run_orphan_sweep:
129-
sweep_orphans()
130-
token = uuid.uuid4().hex[:12]
131-
base = Path(tempfile.gettempdir())
132-
self._path = base / f"{_EPHEMERAL_PREFIX}{os.getpid()}-{token}"
133-
self._path.mkdir(parents=True, exist_ok=False)
134-
atexit.register(self._safe_cleanup)
135-
if install_signal_handlers:
136-
self._install_signal_handlers()
137-
else:
138-
assert persistent_dir is not None
139-
self._path = Path(persistent_dir)
140-
self._path.mkdir(parents=True, exist_ok=True)
121+
if run_orphan_sweep:
122+
sweep_orphans()
123+
token = uuid.uuid4().hex[:12]
124+
base = Path(tempfile.gettempdir())
125+
self._path = base / f"{_EPHEMERAL_PREFIX}{os.getpid()}-{token}"
126+
self._path.mkdir(parents=True, exist_ok=False)
127+
atexit.register(self._safe_cleanup)
128+
if install_signal_handlers:
129+
self._install_signal_handlers()
141130
self._closed = False
142131

143132
# ------------------------------------------------------------------ API
@@ -147,18 +136,12 @@ def path(self) -> Path:
147136
"""Root directory of the cache."""
148137
return self._path
149138

150-
@property
151-
def is_ephemeral(self) -> bool:
152-
"""Whether the cache directory is automatically cleaned up."""
153-
return self._ephemeral
154-
155139
def close(self) -> None:
156-
"""Remove the cache directory if it is ephemeral."""
140+
"""Remove the cache directory."""
157141
if self._closed:
158142
return
159143
self._closed = True
160-
if self._ephemeral:
161-
self._safe_cleanup()
144+
self._safe_cleanup()
162145

163146
def __enter__(self) -> "CacheRoot": # noqa: D105
164147
return self

src/plaid/viewer/cli.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,6 @@ def _build_parser() -> argparse.ArgumentParser:
7171
),
7272
)
7373

74-
parser.add_argument(
75-
"--cache-dir",
76-
type=Path,
77-
default=None,
78-
help=(
79-
"Persistent artifact cache directory. When omitted, an ephemeral "
80-
"per-process temp directory is used and cleaned up at shutdown."
81-
),
82-
)
8374
parser.add_argument("--host", default="127.0.0.1", help="Trame server host.")
8475
parser.add_argument("--port", type=int, default=8080, help="Trame server port.")
8576
parser.add_argument(
@@ -141,15 +132,14 @@ def main(argv: list[str] | None = None) -> int:
141132
browse_roots = tuple(args.browse_roots) if args.browse_roots else ()
142133
config = ViewerConfig(
143134
datasets_root=effective_datasets_root,
144-
cache_dir=args.cache_dir,
145135
backend_id=args.backend_id,
146136
browse_roots=browse_roots,
147137
allow_root_change=not args.disable_root_change,
148138
initial_dataset_id=args.dataset_id,
149139
allow_dataset_change=not args.disable_dataset_change,
150140
)
151141

152-
with CacheRoot(persistent_dir=config.cache_dir) as cache:
142+
with CacheRoot() as cache:
153143
dataset_service = PlaidDatasetService(config)
154144
for repo_id in args.hub_repo or []:
155145
try:

src/plaid/viewer/config.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ class ViewerConfig:
1616
(or the root may itself be such a folder). When ``None``, the
1717
viewer starts without a root and the user is expected to pick one
1818
interactively (when ``allow_root_change`` is True).
19-
cache_dir: Root directory for ParaView artifacts. When ``None``, an
20-
ephemeral per-process directory is created under the OS temp root
21-
and cleaned up at shutdown.
2219
backend_id: PLAID backend identifier embedded in :class:`SampleRef`
2320
objects and in the artifact cache key.
2421
export_version: Opaque string mixed into the artifact cache key. Bump
@@ -41,7 +38,6 @@ class ViewerConfig:
4138
"""
4239

4340
datasets_root: Path | None = None
44-
cache_dir: Path | None = None
4541
backend_id: str = "disk"
4642
export_version: str = "1"
4743
extra_cache_key_fields: dict[str, str] = field(default_factory=dict)

src/plaid/viewer/services/paraview_artifact_service.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,29 +149,47 @@ def __init__(
149149
self._export_version = export_version
150150
self._extra = dict(extra_cache_key_fields or {})
151151
self._by_id: dict[str, ParaviewArtifact] = {}
152+
# Path of the most recently ensured artifact. The cache keeps at most
153+
# one artifact on disk at any time: once VTK has read the CGNS file
154+
# into memory (``vtkCGNSReader.Update()`` in the trame pipeline), the
155+
# on-disk copy is no longer needed, so we delete it as soon as the
156+
# user asks for another sample.
157+
self._current_root: Path | None = None
152158

153159
# ------------------------------------------------------------ Public API
154160

155161
def ensure_artifact(
156162
self, ref: SampleRef, *, force: bool = False
157163
) -> ParaviewArtifact:
158-
"""Return a :class:`ParaviewArtifact` for ``ref``, creating it if needed."""
164+
"""Return a :class:`ParaviewArtifact` for ``ref``, creating it if needed.
165+
166+
The cache holds at most one artifact: any previously-ensured artifact
167+
whose layout root differs from ``ref``'s is removed from disk.
168+
"""
159169
cache_key = _build_cache_key(
160170
ref, export_version=self._export_version, extra=self._extra
161171
)
162172
layout = _artifact_layout(self._cache_root, ref, cache_key)
163173

174+
# Evict the previous artifact (if any) as soon as the user requests
175+
# a different one. ``force`` always rebuilds the current one.
176+
if (
177+
self._current_root is not None
178+
and self._current_root != layout.root
179+
and self._current_root.exists()
180+
):
181+
shutil.rmtree(self._current_root, ignore_errors=True)
182+
self._by_id.clear()
164183
if force and layout.root.exists():
165184
shutil.rmtree(layout.root)
166185

167186
if layout.metadata_path.is_file() and not force:
168187
artifact = self._load_existing(layout, cache_key)
169-
self._by_id[artifact.artifact_id] = artifact
170-
return artifact
171-
172-
layout.root.mkdir(parents=True, exist_ok=True)
173-
artifact = self._create(ref, layout, cache_key)
174-
self._by_id[artifact.artifact_id] = artifact
188+
else:
189+
layout.root.mkdir(parents=True, exist_ok=True)
190+
artifact = self._create(ref, layout, cache_key)
191+
self._by_id = {artifact.artifact_id: artifact}
192+
self._current_root = layout.root
175193
return artifact
176194

177195
def get(self, artifact_id: str) -> ParaviewArtifact:

tests/viewer/test_cache.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,10 @@ def test_ephemeral_cache_is_cleaned_up_on_close(tmp_path: Path, monkeypatch) ->
1616
cache = CacheRoot(install_signal_handlers=False, run_orphan_sweep=False)
1717
path = cache.path
1818
assert path.exists()
19-
assert cache.is_ephemeral is True
2019
cache.close()
2120
assert not path.exists()
2221

2322

24-
def test_persistent_cache_is_preserved(tmp_path: Path) -> None:
25-
target = tmp_path / "persistent"
26-
cache = CacheRoot(persistent_dir=target, install_signal_handlers=False)
27-
assert cache.path == target
28-
assert cache.is_ephemeral is False
29-
cache.close()
30-
assert target.exists()
31-
32-
3323
def test_context_manager_removes_ephemeral_dir(tmp_path: Path, monkeypatch) -> None:
3424
monkeypatch.setenv("TMPDIR", str(tmp_path))
3525
with CacheRoot(install_signal_handlers=False, run_orphan_sweep=False) as cache:

tests/viewer/test_cli.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ def test_build_parser_defaults() -> None:
1818
assert args.disable_root_change is False
1919
assert args.dataset_id is None
2020
assert args.disable_dataset_change is False
21-
assert args.cache_dir is None
2221
assert args.host == "127.0.0.1"
2322
assert args.port == 8080
2423
assert args.backend_id == "disk"
@@ -27,7 +26,6 @@ def test_build_parser_defaults() -> None:
2726

2827
def test_build_parser_accepts_all_options(tmp_path: Path) -> None:
2928
datasets_root = tmp_path / "datasets"
30-
cache_dir = tmp_path / "cache"
3129
browse_a = tmp_path / "a"
3230
browse_b = tmp_path / "b"
3331

@@ -42,8 +40,6 @@ def test_build_parser_accepts_all_options(tmp_path: Path) -> None:
4240
"--dataset-id",
4341
"dataset-b",
4442
"--disable-dataset-change",
45-
"--cache-dir",
46-
str(cache_dir),
4743
"--host",
4844
"0.0.0.0",
4945
"--port",
@@ -62,7 +58,6 @@ def test_build_parser_accepts_all_options(tmp_path: Path) -> None:
6258
assert args.disable_root_change is True
6359
assert args.dataset_id == "dataset-b"
6460
assert args.disable_dataset_change is True
65-
assert args.cache_dir == cache_dir
6661
assert args.host == "0.0.0.0"
6762
assert args.port == 9000
6863
assert args.backend_id == "zarr"
@@ -75,8 +70,8 @@ def test_main_wires_services_without_starting_real_runtime(
7570
calls: list[tuple[str, object]] = []
7671

7772
class FakeCache:
78-
def __init__(self, persistent_dir=None):
79-
calls.append(("cache", persistent_dir))
73+
def __init__(self):
74+
calls.append(("cache", None))
8075
self.path = tmp_path / "cache-root"
8176

8277
def __enter__(self):
@@ -132,8 +127,6 @@ def fake_import(name, globals=None, locals=None, fromlist=(), level=0): # noqa:
132127
assert (
133128
cli_mod.main(
134129
[
135-
"--cache-dir",
136-
str(tmp_path / "cache"),
137130
"--host",
138131
"0.0.0.0",
139132
"--port",

tests/viewer/test_paraview_artifact_service.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,26 @@ def test_force_recreates_artifact(tmp_path: Path, ref: SampleRef) -> None:
102102
assert second.artifact_id == first.artifact_id # cache key is deterministic
103103

104104

105+
def test_ensure_artifact_evicts_previous_artifact(tmp_path: Path) -> None:
106+
"""The cache keeps at most one artifact on disk."""
107+
service = ParaviewArtifactService(_FakeDatasetService(), tmp_path)
108+
ref_a = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="0")
109+
ref_b = SampleRef(backend_id="disk", dataset_id="ds", split="train", sample_id="1")
110+
111+
first = service.ensure_artifact(ref_a)
112+
first_root = first.cgns_path.parent
113+
assert first_root.exists()
114+
115+
second = service.ensure_artifact(ref_b)
116+
second_root = second.cgns_path.parent
117+
assert second_root.exists()
118+
assert not first_root.exists()
119+
# The by-id lookup only exposes the current artifact.
120+
with pytest.raises(KeyError):
121+
service.get(first.artifact_id)
122+
assert service.get(second.artifact_id) is second
123+
124+
105125
def test_cache_key_is_deterministic(ref: SampleRef) -> None:
106126
key_a = _build_cache_key(ref, export_version="1")
107127
key_b = _build_cache_key(ref, export_version="1")

0 commit comments

Comments
 (0)