From a96cdf5784a3f00ccfc7296d69751ff9739381ec Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Fri, 17 Apr 2026 09:52:26 +0000
Subject: [PATCH 1/3] small improvements + added doc website

---
 .github/workflows/docs.yaml                  |  64 ++++++
 README.md                                    |  18 +-
 docs/_generate_reference.py                  | 196 +++++++++++++++++++
 docs/_static/.gitkeep                        |   1 +
 docs/_static/sidebar.css                     | 119 +++++++++++
 docs/_templates/.gitkeep                     |   1 +
 docs/_templates/base.html                    |   9 +
 docs/_templates/layout.html                  |  11 ++
 docs/_templates/page.html                    |  57 ++++++
 docs/_templates/sidebar/github.html          |  15 ++
 docs/cli.md                                  |   2 +-
 docs/conf.py                                 |  68 +++++++
 docs/index.rst                               |  44 +++++
 docs/models.md                               | 180 ++++++++++++++++-
 docs/python-api.md                           |  13 ++
 docs/reference.rst                           | 170 ++++++++++++++++
 pyproject.toml                               |   7 +
 slide2vec/__init__.py                        |  11 +-
 slide2vec/api.py                             |  21 ++
 slide2vec/distributed/direct_embed_worker.py |   1 +
 slide2vec/distributed/pipeline_worker.py     |   1 +
 slide2vec/inference.py                       |  34 +++-
 slide2vec/progress.py                        |  22 +++
 tasks/lessons.md                             |   4 +
 tests/test_docs.py                           |  45 +++++
 tests/test_progress.py                       | 103 +++++++++-
 tests/test_regression_core.py                |  52 +++++
 tests/test_regression_inference.py           |  64 ++++++
 28 files changed, 1324 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/docs.yaml
 create mode 100644 docs/_generate_reference.py
 create mode 100644 docs/_static/.gitkeep
 create mode 100644 docs/_static/sidebar.css
 create mode 100644 docs/_templates/.gitkeep
 create mode 100644 docs/_templates/base.html
 create mode 100644 docs/_templates/layout.html
 create mode 100644 docs/_templates/page.html
 create mode 100644 docs/_templates/sidebar/github.html
 create mode 100644 docs/conf.py
 create mode 100644 docs/index.rst
 create mode 100644 docs/reference.rst
 create mode 100644 tests/test_docs.py

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
new file mode 100644
index 0000000..0924afa
--- /dev/null
+++ b/.github/workflows/docs.yaml
@@ -0,0 +1,64 @@
+name: Documentation
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+concurrency:
+  group: docs
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+jobs:
+  build:
+    name: Build docs
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.11"
+
+      - name: Install docs dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[testing,docs]"
+
+      - name: Run docs smoke test
+        run: python -m pytest -q -o addopts= -p no:cov tests/test_docs.py
+
+      - name: Build Sphinx site
+        run: python -m sphinx -W -b html docs docs/_build/html
+
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v5.0.0
+        with:
+          path: docs/_build/html
+
+  deploy:
+    name: Publish docs
+    if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/README.md b/README.md
index 6c26bc8..a8b4d88 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@
 
 `slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
 
+Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)
+
 ## Installation
 
 ```shell
@@ -37,6 +39,17 @@ x = embedded.x
 y = embedded.y
 ```
 
+Use `list_models()` when you want to inspect the shipped presets programmatically:
+
+```python
+from slide2vec import list_models
+
+all_models = list_models()
+tile_models = list_models("tile")
+slide_models = list_models("slide")
+patient_models = list_models("patient")
+```
+
 Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
 
 ```python
@@ -135,7 +148,8 @@ docker run --rm -it \
 
 ## Documentation
 
-- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
+- [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
 - [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
-- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
+- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
 - [`docs/models.md`](docs/models.md) for the full supported-model catalog
+- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
diff --git a/docs/_generate_reference.py b/docs/_generate_reference.py
new file mode 100644
index 0000000..bc5f92e
--- /dev/null
+++ b/docs/_generate_reference.py
@@ -0,0 +1,196 @@
+"""Generate the compact Sphinx reference page from public slide2vec metadata."""
+
+from __future__ import annotations
+
+from dataclasses import fields, is_dataclass
+from inspect import signature
+from pathlib import Path
+from textwrap import dedent
+import sys
+
+ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+
+from slide2vec.api import (  # noqa: E402
+    EmbeddedPatient,
+    EmbeddedSlide,
+    ExecutionOptions,
+    Model,
+    Pipeline,
+    PreprocessingConfig,
+    RunResult,
+    list_models,
+)
+from slide2vec.encoders import (  # noqa: E402
+    PatientEncoder,
+    SlideEncoder,
+    TileEncoder,
+    encoder_registry,
+    register_encoder,
+)
+
+
+def _field_names(cls: type) -> str:
+    if not is_dataclass(cls):
+        raise TypeError(f"{cls!r} is not a dataclass")
+    return ", ".join(f"``{field.name}``" for field in fields(cls))
+
+
+def _constructor_knobs(cls: type) -> str:
+    params = [
+        f"``{param.name}``"
+        for param in signature(cls.__init__).parameters.values()
+        if param.name != "self"
+    ]
+    return ", ".join(params)
+
+
+def _list_table(rows: list[tuple[str, str, str, str]]) -> str:
+    lines = [".. list-table::", "   :header-rows: 1", ""]
+    lines.extend(
+        [
+            "   * - Name",
+            "     - Class",
+            "     - Constructor knobs",
+            "     - Notes",
+        ]
+    )
+    for name, cls_name, knobs, notes in rows:
+        lines.extend(
+            [
+                f"   * - ``{name}``",
+                f"     - ``{cls_name}``",
+                f"     - {knobs}",
+                f"     - {notes}",
+            ]
+        )
+    return "\n".join(lines)
+
+
+def _api_table(rows: list[tuple[str, str]]) -> str:
+    lines = [".. list-table::", "   :header-rows: 1", ""]
+    lines.extend(
+        [
+            "   * - Symbol",
+            "     - Description",
+        ]
+    )
+    for symbol, desc in rows:
+        lines.extend(
+            [
+                f"   * - ``{symbol}``",
+                f"     - {desc}",
+            ]
+        )
+    return "\n".join(lines)
+
+
+def _config_table(rows: list[tuple[str, str, str]]) -> str:
+    lines = [".. list-table::", "   :header-rows: 1", ""]
+    lines.extend(
+        [
+            "   * - Config",
+            "     - Main fields",
+            "     - Purpose",
+        ]
+    )
+    for name, fields_text, purpose in rows:
+        lines.extend(
+            [
+                f"   * - ``{name}``",
+                f"     - {fields_text}",
+                f"     - {purpose}",
+            ]
+        )
+    return "\n".join(lines)
+
+
+def build_reference_rst() -> str:
+    """Return the full compact reference page as reStructuredText."""
+
+    api_rows = [
+        ("Model", "Direct in-memory embedding API for slide, tile, and patient workflows"),
+        ("Pipeline", "Manifest-driven batch processing and artifact writing"),
+        ("list_models", "Return the registered preset names, optionally filtered by level"),
+        ("PreprocessingConfig", "Whole-slide tiling, read-back, and spacing configuration"),
+        ("ExecutionOptions", "Runtime settings for batch size, precision, outputs, and workers"),
+        ("EmbeddedSlide", "In-memory result from Model.embed_slide(...) / Model.embed_slides(...)"),
+        ("EmbeddedPatient", "In-memory result from Model.embed_patient(...) / Model.embed_patients(...)"),
+    ]
+
+    config_rows = [
+        (
+            "PreprocessingConfig",
+            _field_names(PreprocessingConfig),
+            "Whole-slide segmentation, read strategy, and tiling geometry",
+        ),
+        (
+            "ExecutionOptions",
+            _field_names(ExecutionOptions),
+            "Runtime behavior and persisted output controls",
+        ),
+        (
+            "RunResult",
+            _field_names(RunResult),
+            "Summary of a manifest-driven pipeline run",
+        ),
+    ]
+
+    preset_rows = []
+    for name in sorted(list_models()):
+        info = encoder_registry.info(name)
+        cls = encoder_registry.require(name)
+        notes = []
+        level = str(info["level"])
+        notes.append(f"level={level}")
+        if "default_output_variant" in info:
+            notes.append(f"default={info['default_output_variant']}")
+        if "supported_spacing_um" in info:
+            notes.append(f"spacing={info['supported_spacing_um']}")
+        preset_rows.append((name, cls.__name__, _constructor_knobs(cls), "; ".join(notes)))
+
+    body = dedent(
+        """\
+        Compact Reference
+        =================
+
+        This page is a concise index of the public API and encoder registry. Use the
+        guide pages for workflow details and the docstrings for the exact contracts.
+
+        Main entry points
+        -----------------
+
+        """
+    )
+    body += _api_table(api_rows)
+    body += "\n\nEncoder contract\n----------------\n\n"
+    body += _api_table(
+        [
+            ("TileEncoder", "Base class for encoders that consume tiles directly"),
+            ("SlideEncoder", "Base class for encoders that pool tile features into slide features"),
+            ("PatientEncoder", "Base class for encoders that pool slide embeddings into patient embeddings"),
+            ("register_encoder", "Decorator used to register a custom encoder class and metadata"),
+        ]
+    )
+    body += "\n\nConfiguration dataclasses\n-------------------------\n\n"
+    body += _config_table(config_rows)
+    body += "\n\nRegistered presets\n------------------\n\n"
+    body += _list_table(preset_rows)
+    body += "\n\nUse this page as a concise index. Use the guide pages for workflow and the\ndocstrings for the exact API contract.\n"
+    return body
+
+
+def write_reference_rst(path: str | Path | None = None) -> Path:
+    """Write the generated reference page to disk."""
+
+    target = Path(path) if path is not None else Path(__file__).with_name("reference.rst")
+    target.write_text(build_reference_rst(), encoding="utf-8")
+    return target
+
+
+def main() -> None:
+    write_reference_rst()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/docs/_static/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/docs/_static/sidebar.css b/docs/_static/sidebar.css
new file mode 100644
index 0000000..9c91dbe
--- /dev/null
+++ b/docs/_static/sidebar.css
@@ -0,0 +1,119 @@
+.s2v-sidebar-github {
+  margin: 1rem 1rem 1.25rem 0;
+}
+
+.s2v-sidebar-github__main {
+  display: flex;
+  align-items: center;
+  gap: 0.7rem;
+  padding: 0.75rem 0.85rem;
+  border: 1px solid var(--color-background-border);
+  border-radius: 0.65rem;
+  text-decoration: none;
+  color: var(--color-foreground-primary);
+  background: var(--color-background-primary);
+  transition: border-color 0.15s ease, background 0.15s ease;
+}
+
+.s2v-sidebar-github__main:hover {
+  border-color: var(--color-brand-primary);
+  background: var(--color-background-hover);
+  text-decoration: none;
+}
+
+.s2v-sidebar-github__icon {
+  width: 1.1rem;
+  height: 1.1rem;
+  flex: 0 0 auto;
+  opacity: 0.7;
+}
+
+.s2v-sidebar-github__text {
+  display: flex;
+  flex-direction: column;
+  gap: 0.08rem;
+  flex: 1;
+  min-width: 0;
+}
+
+.s2v-sidebar-github__repo-name {
+  font-size: 0.84rem;
+  font-weight: 700;
+  line-height: 1.25;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.s2v-sidebar-github__cta {
+  font-size: 0.71rem;
+  font-weight: 400;
+  color: var(--color-foreground-muted);
+  line-height: 1.25;
+}
+
+.s2v-sidebar-github__arrow {
+  width: 0.65rem;
+  height: 0.65rem;
+  flex: 0 0 auto;
+  opacity: 0.3;
+  transition: opacity 0.15s ease, transform 0.15s ease;
+}
+
+.s2v-sidebar-github__main:hover .s2v-sidebar-github__arrow {
+  opacity: 0.8;
+  color: var(--color-brand-primary);
+  transform: translate(1px, -1px);
+}
+
+@media (max-width: 63em) {
+  .s2v-sidebar-github {
+    margin-top: 0.75rem;
+  }
+}
+
+dl.py.class,
+dl.py.function {
+  border: 1px solid var(--color-background-border);
+  border-left: 3px solid var(--color-brand-primary);
+  border-radius: 0.5rem;
+  background: var(--color-background-secondary);
+  margin: 1.75rem 0;
+  padding: 0;
+  overflow: hidden;
+}
+
+dl.py.class > dt.sig,
+dl.py.function > dt.sig {
+  background: var(--color-background-hover);
+  border-bottom: 1px solid var(--color-background-border);
+  padding: 0.6rem 1rem 0.6rem 2.5rem;
+  margin: 0;
+}
+
+dl.py.class > dd,
+dl.py.function > dd {
+  padding: 0.75rem 1.25rem;
+  margin: 0;
+}
+
+dl.py.class > dd > dl.py {
+  border: none;
+  border-left: 2px solid var(--color-background-border);
+  background: transparent;
+  margin: 0.75rem 0;
+  padding: 0 0 0 0.875rem;
+}
+
+dl.py.class > dd > dl.py > dt.sig {
+  background: transparent;
+  border-bottom: none;
+  padding: 0.2rem 0;
+  margin: 0;
+}
+
+dl.py.class > dd > dl.py > dd {
+  padding: 0.25rem 0;
+  margin: 0;
+}
+
diff --git a/docs/_templates/.gitkeep b/docs/_templates/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/docs/_templates/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/docs/_templates/base.html b/docs/_templates/base.html
new file mode 100644
index 0000000..00ce484
--- /dev/null
+++ b/docs/_templates/base.html
@@ -0,0 +1,9 @@
+{% extends "!base.html" %}
+
+{# Default to light mode instead of auto (system preference) #}
+{% block body %}
+<script>
+  document.body.dataset.theme = localStorage.getItem("theme") || "light";
+</script>
+{% endblock %}
+
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
new file mode 100644
index 0000000..521dfd5
--- /dev/null
+++ b/docs/_templates/layout.html
@@ -0,0 +1,11 @@
+{% extends "!layout.html" %}
+
+{% block body %}
+<script>
+  if (!localStorage.getItem("theme")) {
+    localStorage.setItem("theme", "light");
+  }
+</script>
+{{ super() }}
+{% endblock %}
+
diff --git a/docs/_templates/page.html b/docs/_templates/page.html
new file mode 100644
index 0000000..a600c69
--- /dev/null
+++ b/docs/_templates/page.html
@@ -0,0 +1,57 @@
+{% extends "!page.html" %}
+
+{% block footer %}
+<div class="related-pages">
+  {% if next -%}
+    <a class="next-page" href="{{ next.link }}">
+      <div class="page-info">
+        <div class="context">
+          <span>{{ _("Next") }}</span>
+        </div>
+        <div class="title">{{ next.title }}</div>
+      </div>
+      <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
+    </a>
+  {%- endif %}
+  {% if prev -%}
+    <a class="prev-page" href="{{ prev.link }}">
+      <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
+      <div class="page-info">
+        <div class="context">
+          <span>{{ _("Previous") }}</span>
+        </div>
+        {% if prev.link == pathto(master_doc) %}
+        <div class="title">{{ _("Home") }}</div>
+        {% else %}
+        <div class="title">{{ prev.title }}</div>
+        {% endif %}
+      </div>
+    </a>
+  {%- endif %}
+</div>
+<div class="bottom-of-page">
+  <div class="left-details">
+    {%- if show_copyright %}
+    <div class="copyright">
+      {%- if hasdoc('copyright') %}
+        {% trans path=pathto('copyright'), copyright=copyright|e -%}
+          <a href="{{ path }}">Copyright</a> &#169; {{ copyright }}
+        {%- endtrans %}
+      {%- else %}
+        {% trans copyright=copyright|e -%}
+          Copyright &#169; {{ copyright }}
+        {%- endtrans %}
+      {%- endif %}
+    </div>
+    {%- endif %}
+    {%- if last_updated -%}
+    <div class="last-updated">
+      {% trans last_updated=last_updated|e -%}
+        Last updated on {{ last_updated }}
+      {%- endtrans -%}
+    </div>
+    {%- endif %}
+  </div>
+</div>
+{% endblock footer %}
+
diff --git a/docs/_templates/sidebar/github.html b/docs/_templates/sidebar/github.html
new file mode 100644
index 0000000..d50ae3b
--- /dev/null
+++ b/docs/_templates/sidebar/github.html
@@ -0,0 +1,15 @@
+<aside class="s2v-sidebar-github" aria-label="Project links">
+  <a class="s2v-sidebar-github__main" href="https://github.com/clemsgrs/slide2vec">
+    <svg class="s2v-sidebar-github__icon" viewBox="0 0 16 16" aria-hidden="true" focusable="false">
+      <path fill="currentColor" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path>
+    </svg>
+    <div class="s2v-sidebar-github__text">
+      <span class="s2v-sidebar-github__repo-name">clemsgrs/slide2vec</span>
+      <span class="s2v-sidebar-github__cta">View on GitHub</span>
+    </div>
+    <svg class="s2v-sidebar-github__arrow" viewBox="0 0 12 12" aria-hidden="true" focusable="false">
+      <path fill="currentColor" d="M2.5 1h7a.5.5 0 0 1 .5.5v7a.5.5 0 0 1-1 0V2.707L2.354 9.354a.5.5 0 1 1-.708-.708L8.293 2H2.5a.5.5 0 0 1 0-1z"/>
+    </svg>
+  </a>
+</aside>
+
diff --git a/docs/cli.md b/docs/cli.md
index a7759c5..7677f80 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -36,7 +36,7 @@ This command:
 
 The manifest must use the hs2p schema. `mask_path` and `spacing_at_level_0` are optional.
 
-```csv
+```text
 sample_id,image_path,mask_path,spacing_at_level_0
 slide-1,/path/to/slide-1.svs,/path/to/mask-1.png,0.25
 slide-2,/path/to/slide-2.svs,,
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..1674b75
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,68 @@
+"""Sphinx configuration for slide2vec documentation."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+
+project = "slide2vec"
+author = "Clément Grisi"
+copyright = "2026, Clément Grisi"
+release = "4.2.0"
+
+extensions = [
+    "myst_parser",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
+    "sphinx_autodoc_typehints",
+]
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+autosummary_generate = True
+autodoc_member_order = "bysource"
+autodoc_default_options = {
+    "members": False,
+    "undoc-members": False,
+    "show-inheritance": True,
+}
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+always_use_bars_union = True
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "torch": ("https://pytorch.org/docs/stable", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "pandas": ("https://pandas.pydata.org/docs", None),
+}
+
+myst_enable_extensions = ["colon_fence", "deflist"]
+myst_heading_anchors = 3
+
+html_theme = "furo"
+html_static_path = ["_static"]
+html_css_files = ["sidebar.css"]
+html_title = "slide2vec"
+html_show_sourcelink = False
+_sidebar = [
+    "sidebar/brand.html",
+    "sidebar/search.html",
+    "sidebar/scroll-start.html",
+    "sidebar/github.html",
+    "sidebar/navigation.html",
+    "sidebar/ethical-ads.html",
+    "sidebar/scroll-end.html",
+]
+html_sidebars = {"**": _sidebar}
+html_theme_options = {
+    "source_repository": "https://github.com/clemsgrs/slide2vec",
+    "source_branch": "main",
+    "source_directory": "docs/",
+}
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..04250ab
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,44 @@
+slide2vec
+==========
+
+``slide2vec`` is a Python package for encoding whole-slide images with foundation models.
+
+It builds on ``hs2p`` for fast preprocessing and exposes a focused public API around
+``Model``, ``Pipeline``, and registry-backed encoder classes.
+
+Start here:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Page
+     - What it covers
+   * - :doc:`python-api`
+     - Interactive embedding, preprocessing, execution options, and patient workflows
+   * - :doc:`cli`
+     - Manifest-driven batch runs and config overrides
+   * - :doc:`models`
+     - Shipped presets and the custom wrapper pattern for new encoders
+   * - :doc:`reference`
+     - Compact index of the public API and encoder registry
+   * - :doc:`benchmarking`
+     - Throughput and performance workflows
+
+The docs site is organized around the main ways people use the package:
+
+- interactive embedding with the Python API
+- manifest-driven batch processing with the CLI
+- model presets and custom registry-backed encoders
+- a compact reference for the public surface
+
+.. toctree::
+   :maxdepth: 1
+
+   python-api
+   cli
+   models
+   benchmarking
+   gpu-throughput-optimization-protocol
+   optimize-throughput/h0-mini-single-gpu
+   reference
+   documentation
diff --git a/docs/models.md b/docs/models.md
index 1bf9943..47462d6 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -52,7 +52,7 @@ Patient-level models aggregate multiple slide embeddings for the same patient in
 
 Add a `patient_id` column to the standard manifest CSV to group slides by patient:
 
-```csv
+```text
 sample_id,image_path,patient_id
 slide_1a,/data/slide_1a.svs,patient_1
 slide_1b,/data/slide_1b.svs,patient_1
@@ -64,3 +64,181 @@ slide_2a,/data/slide_2a.svs,patient_2
 ### Per-slide embeddings
 
 When running a patient-level model via `Pipeline`, the intermediate per-slide MOOZY embeddings can be saved alongside the patient embeddings by setting `save_slide_embeddings: true` in config (or `ExecutionOptions(save_slide_embeddings=True)` in the Python API). Saved slide embeddings are written to `slide_embeddings/` in the output directory.
+
+## Custom registry-backed encoders
+
+If you want to use a model that is not shipped with `slide2vec`, the recommended path is to wrap it in a normal encoder class and register that class under a new preset name.
+
+The key pieces are:
+
+- subclass the appropriate base class from `slide2vec.encoders`
+- implement the required methods for that level
+- declare registry metadata with `@register_encoder(...)`
+- import the module once so the registration side effect runs before `Model.from_preset(...)`
+
+### Tile encoder example
+
+```python
+import torch
+from torch import Tensor
+
+from slide2vec.encoders import TileEncoder, register_encoder, resolve_requested_output_variant
+
+
+@register_encoder(
+    "my-tile-model",
+    output_variants={"default": {"encode_dim": 768}},
+    default_output_variant="default",
+    input_size=224,
+    supported_spacing_um=0.5,
+    precision="fp16",
+    source="my-org/my-tile-model",
+)
+class MyTileModel(TileEncoder):
+    def __init__(self, *, output_variant: str | None = None):
+        self._output_variant = resolve_requested_output_variant(output_variant)
+        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._model = self._load_model().eval()
+
+    def _load_model(self):
+        ...
+
+    def get_transform(self):
+        ...
+
+    def encode_tiles(self, batch: Tensor) -> Tensor:
+        return self._model(batch)
+
+    @property
+    def encode_dim(self) -> int:
+        return 768
+
+    @property
+    def device(self) -> torch.device:
+        return self._device
+
+    def to(self, device: torch.device | str):
+        self._device = torch.device(device)
+        self._model = self._model.to(self._device)
+        return self
+```
+
+### Slide encoder example
+
+```python
+import torch
+from torch import Tensor
+
+from slide2vec.encoders import SlideEncoder, register_encoder, resolve_requested_output_variant
+
+
+@register_encoder(
+    "my-slide-model",
+    level="slide",
+    tile_encoder="my-tile-model",
+    tile_encoder_output_variant="default",
+    output_variants={"default": {"encode_dim": 512}},
+    default_output_variant="default",
+    supported_spacing_um=0.5,
+    precision="fp16",
+    source="my-org/my-slide-model",
+)
+class MySlideModel(SlideEncoder):
+    def __init__(self, *, output_variant: str | None = None):
+        self._output_variant = resolve_requested_output_variant(output_variant)
+        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._model = self._load_model().eval()
+
+    def _load_model(self):
+        ...
+
+    @property
+    def encode_dim(self) -> int:
+        return 512
+
+    @property
+    def device(self) -> torch.device:
+        return self._device
+
+    def to(self, device: torch.device | str):
+        self._device = torch.device(device)
+        self._model = self._model.to(self._device)
+        return self
+
+    def encode_slide(
+        self,
+        tile_features: Tensor,
+        coordinates: Tensor | None = None,
+        *,
+        tile_size_lv0: int | None = None,
+    ) -> Tensor:
+        return self._model(tile_features)
+```
+
+### Patient encoder example
+
+```python
+import torch
+from torch import Tensor
+
+from slide2vec.encoders import PatientEncoder, register_encoder, resolve_requested_output_variant
+
+
+@register_encoder(
+    "my-patient-model",
+    level="patient",
+    tile_encoder="my-tile-model",
+    tile_encoder_output_variant="default",
+    output_variants={"default": {"encode_dim": 256}},
+    default_output_variant="default",
+    supported_spacing_um=0.5,
+    precision="fp16",
+    source="my-org/my-patient-model",
+)
+class MyPatientModel(PatientEncoder):
+    def __init__(self, *, output_variant: str | None = None):
+        self._output_variant = resolve_requested_output_variant(output_variant)
+        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._slide_model = self._load_slide_model().eval()
+        self._patient_model = self._load_patient_model().eval()
+
+    def _load_slide_model(self):
+        ...
+
+    def _load_patient_model(self):
+        ...
+
+    @property
+    def encode_dim(self) -> int:
+        return 256
+
+    @property
+    def device(self) -> torch.device:
+        return self._device
+
+    def to(self, device: torch.device | str):
+        self._device = torch.device(device)
+        self._slide_model = self._slide_model.to(self._device)
+        self._patient_model = self._patient_model.to(self._device)
+        return self
+
+    def encode_slide(
+        self,
+        tile_features: Tensor,
+        coordinates: Tensor | None = None,
+        *,
+        tile_size_lv0: int | None = None,
+    ) -> Tensor:
+        return self._slide_model(tile_features)
+
+    def encode_patient(self, slide_embeddings: Tensor) -> Tensor:
+        return self._patient_model(slide_embeddings)
+```
+
+Once the module is imported, the preset is available through the existing API:
+
+```python
+from slide2vec import Model
+
+model = Model.from_preset("my-tile-model")
+```
diff --git a/docs/python-api.md b/docs/python-api.md
index 7f911e3..10c81bd 100644
--- a/docs/python-api.md
+++ b/docs/python-api.md
@@ -37,6 +37,19 @@ y = embedded.y
 
 The encoder level is inferred from the preset, so callers do not need to configure it directly. Tile-focused presets and slide-native presets are selected automatically by name.
 
+To inspect the shipped preset names programmatically, call `list_models()`:
+
+```python
+from slide2vec import list_models
+
+models = list_models()
+tile_models = list_models("tile")
+slide_models = list_models("slide")
+patient_models = list_models("patient")
+```
+
+`patient` currently returns only `moozy`.
+
 When you call the direct API from an interactive terminal or a Jupyter notebook, `slide2vec` shows live progress by default. If you already installed a custom reporter with `slide2vec.progress.activate_progress_reporter(...)`, the API leaves it in place.
 
 ## `PreprocessingConfig`
diff --git a/docs/reference.rst b/docs/reference.rst
new file mode 100644
index 0000000..00d7ce7
--- /dev/null
+++ b/docs/reference.rst
@@ -0,0 +1,170 @@
+Compact Reference
+=================
+
+This page is a concise index of the public API and encoder registry. Use the
+guide pages for workflow details and the docstrings for the exact contracts.
+
+Main entry points
+-----------------
+
+.. list-table::
+   :header-rows: 1
+
+   * - Symbol
+     - Description
+   * - ``Model``
+     - Direct in-memory embedding API for slide, tile, and patient workflows
+   * - ``Pipeline``
+     - Manifest-driven batch processing and artifact writing
+   * - ``list_models``
+     - Return the registered preset names, optionally filtered by level
+   * - ``PreprocessingConfig``
+     - Whole-slide tiling, read-back, and spacing configuration
+   * - ``ExecutionOptions``
+     - Runtime settings for batch size, precision, outputs, and workers
+   * - ``EmbeddedSlide``
+     - In-memory result from Model.embed_slide(...) / Model.embed_slides(...)
+   * - ``EmbeddedPatient``
+     - In-memory result from Model.embed_patient(...) / Model.embed_patients(...)
+
+Encoder contract
+----------------
+
+.. list-table::
+   :header-rows: 1
+
+   * - Symbol
+     - Description
+   * - ``TileEncoder``
+     - Base class for encoders that consume tiles directly
+   * - ``SlideEncoder``
+     - Base class for encoders that pool tile features into slide features
+   * - ``PatientEncoder``
+     - Base class for encoders that pool slide embeddings into patient embeddings
+   * - ``register_encoder``
+     - Decorator used to register a custom encoder class and metadata
+
+Configuration dataclasses
+-------------------------
+
+.. list-table::
+   :header-rows: 1
+
+   * - Config
+     - Main fields
+     - Purpose
+   * - ``PreprocessingConfig``
+     - ``backend``, ``requested_spacing_um``, ``requested_tile_size_px``, ``requested_region_size_px``, ``region_tile_multiple``, ``tolerance``, ``overlap``, ``tissue_threshold``, ``read_coordinates_from``, ``read_tiles_from``, ``on_the_fly``, ``gpu_decode``, ``adaptive_batching``, ``use_supertiles``, ``jpeg_backend``, ``num_cucim_workers``, ``resume``, ``segmentation``, ``filtering``, ``preview``
+     - Whole-slide segmentation, read strategy, and tiling geometry
+   * - ``ExecutionOptions``
+     - ``output_dir``, ``output_format``, ``batch_size``, ``num_workers``, ``num_preprocessing_workers``, ``num_gpus``, ``precision``, ``prefetch_factor``, ``persistent_workers``, ``save_tile_embeddings``, ``save_slide_embeddings``, ``save_latents``
+     - Runtime behavior and persisted output controls
+   * - ``RunResult``
+     - ``tile_artifacts``, ``hierarchical_artifacts``, ``slide_artifacts``, ``patient_artifacts``, ``process_list_path``
+     - Summary of a manifest-driven pipeline run
+
+Registered presets
+------------------
+
+.. list-table::
+   :header-rows: 1
+
+   * - Name
+     - Class
+     - Constructor knobs
+     - Notes
+   * - ``conch``
+     - ``CONCH``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``conchv15``
+     - ``CONCHv15``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``gigapath``
+     - ``GigaPath``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``gigapath-slide``
+     - ``GigaPathSlideEncoder``
+     - ``output_variant``
+     - level=slide; default=default; spacing=0.5
+   * - ``h-optimus-0``
+     - ``HOptimus0``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``h-optimus-1``
+     - ``HOptimus1``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``h0-mini``
+     - ``H0Mini``
+     - ``output_variant``
+     - level=tile; default=cls_patch_mean; spacing=0.5
+   * - ``hibou-b``
+     - ``HibouB``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``hibou-l``
+     - ``HibouL``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``lunit``
+     - ``LunitTileEncoder``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``midnight``
+     - ``Midnight``
+     - ``output_variant``
+     - level=tile; default=default; spacing=[0.25, 0.5, 1.0, 2.0]
+   * - ``moozy``
+     - ``MOOZYPatientEncoder``
+     - ``output_variant``
+     - level=patient; default=default; spacing=0.5
+   * - ``moozy-slide``
+     - ``MOOZYSlideEncoder``
+     - ``output_variant``
+     - level=slide; default=default; spacing=0.5
+   * - ``musk``
+     - ``MUSK``
+     - ``output_variant``
+     - level=tile; default=ms_aug; spacing=[0.25, 0.5, 1.0]
+   * - ``phikon``
+     - ``Phikon``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``phikonv2``
+     - ``PhikonV2``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``prism``
+     - ``PrismSlideEncoder``
+     - ``output_variant``
+     - level=slide; default=default; spacing=0.5
+   * - ``prost40m``
+     - ``Prost40M``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``titan``
+     - ``TitanSlideEncoder``
+     - ``output_variant``
+     - level=slide; default=default; spacing=0.5
+   * - ``uni``
+     - ``UNI``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``uni2``
+     - ``UNI2``
+     - ``output_variant``
+     - level=tile; default=default; spacing=0.5
+   * - ``virchow``
+     - ``Virchow``
+     - ``output_variant``
+     - level=tile; default=cls_patch_mean; spacing=0.5
+   * - ``virchow2``
+     - ``Virchow2``
+     - ``output_variant``
+     - level=tile; default=cls_patch_mean; spacing=[0.5, 1.0, 2.0]
+
+Use this page as a concise index. Use the guide pages for workflow and the
+docstrings for the exact API contract.
diff --git a/pyproject.toml b/pyproject.toml
index f75bdab..bf09b18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -113,6 +113,13 @@ fm = [
     "ninja==1.11.1.1",
     "psutil<6",
 ]
+docs = [
+    "sphinx>=8.1",
+    "furo",
+    "myst-parser",
+    "sphinx-copybutton",
+    "sphinx-autodoc-typehints",
+]
 testing = [
     "pytest>=6.0",
     "pytest-cov>=2.0",
diff --git a/slide2vec/__init__.py b/slide2vec/__init__.py
index 74b611e..5cb2ce4 100644
--- a/slide2vec/__init__.py
+++ b/slide2vec/__init__.py
@@ -1,4 +1,12 @@
-from slide2vec.api import EmbeddedSlide, ExecutionOptions, Model, Pipeline, PreprocessingConfig, RunResult
+from slide2vec.api import (
+    EmbeddedSlide,
+    ExecutionOptions,
+    Model,
+    Pipeline,
+    PreprocessingConfig,
+    RunResult,
+    list_models,
+)
 from slide2vec.artifacts import HierarchicalEmbeddingArtifact, SlideEmbeddingArtifact, TileEmbeddingArtifact
 
 
@@ -6,6 +14,7 @@
 
 __all__ = [
     "Model",
+    "list_models",
     "Pipeline",
     "PreprocessingConfig",
     "ExecutionOptions",
diff --git a/slide2vec/api.py b/slide2vec/api.py
index d7ef7bf..a40f5b7 100644
--- a/slide2vec/api.py
+++ b/slide2vec/api.py
@@ -444,6 +444,27 @@ def _load_backend(self) -> LoadedModel:
         return self._backend
 
 
+def list_models(level: str | None = None) -> list[str]:
+    """Return the available preset model names in a stable order.
+
+    Args:
+        level: Optional model level filter. Supported values are ``"tile"``,
+            ``"slide"``, and ``"patient"``.
+    """
+    if level is None:
+        return sorted(encoder_registry.names())
+
+    normalized_level = str(level).strip().lower()
+    if normalized_level not in {"tile", "slide", "patient"}:
+        raise ValueError("list_models(level=...) must be one of: tile, slide, patient")
+
+    return sorted(
+        name
+        for name in encoder_registry.names()
+        if encoder_registry.info(name)["level"] == normalized_level
+    )
+
+
 class Pipeline:
     def __init__(
         self,
diff --git a/slide2vec/distributed/direct_embed_worker.py b/slide2vec/distributed/direct_embed_worker.py
index c986061..7328608 100644
--- a/slide2vec/distributed/direct_embed_worker.py
+++ b/slide2vec/distributed/direct_embed_worker.py
@@ -49,6 +49,7 @@ def main(argv=None) -> int:
             model_spec["name"],
             device=f"cuda:{local_rank}",
             output_variant=model_spec.get("output_variant"),
+            allow_non_recommended_settings=bool(model_spec["allow_non_recommended_settings"]),
         )
         preprocessing = deserialize_preprocessing(request["preprocessing"])
         execution = deserialize_execution(request["execution"])
diff --git a/slide2vec/distributed/pipeline_worker.py b/slide2vec/distributed/pipeline_worker.py
index ce014fb..9b752ed 100644
--- a/slide2vec/distributed/pipeline_worker.py
+++ b/slide2vec/distributed/pipeline_worker.py
@@ -43,6 +43,7 @@ def main(argv=None) -> int:
             model_spec["name"],
             device=f"cuda:{local_rank}",
             output_variant=model_spec.get("output_variant"),
+            allow_non_recommended_settings=bool(model_spec["allow_non_recommended_settings"]),
         )
         preprocessing = deserialize_preprocessing(request["preprocessing"])
         execution = deserialize_execution(request["execution"])
diff --git a/slide2vec/inference.py b/slide2vec/inference.py
index c98827c..8ae0f83 100644
--- a/slide2vec/inference.py
+++ b/slide2vec/inference.py
@@ -1,5 +1,6 @@
 import json
 import importlib
+import heapq
 import os
 import shutil
 import subprocess
@@ -291,6 +292,7 @@ def load_model(
     name: str,
     device: str = "auto",
     output_variant: str | None = None,
+    allow_non_recommended_settings: bool = False,
     token: str | None = None,
 ) -> LoadedModel:
     name = canonicalize_model_name(name)
@@ -391,6 +393,12 @@ def embed_slides(
                 output_format=execution.output_format,
             )
             emit_progress("embedding.started", slide_count=len(embeddable_slides))
+            if execution.num_gpus > 1 and len(embeddable_slides) > 1:
+                emit_progress(
+                    "embedding.assignment.started",
+                    slide_count=len(embeddable_slides),
+                    num_gpus=execution.num_gpus,
+                )
             local_persist_callback = None
             if execution.output_dir is not None and execution.num_gpus <= 1:
                 local_persist_callback, _, _ = _build_incremental_persist_callback(
@@ -408,6 +416,12 @@ def embed_slides(
                 work_dir=work_dir,
                 on_embedded_slide=local_persist_callback,
             )
+            if execution.num_gpus > 1 and len(embeddable_slides) > 1:
+                emit_progress(
+                    "embedding.assignment.finished",
+                    slide_count=len(embeddable_slides),
+                    num_gpus=execution.num_gpus,
+                )
             if execution.output_dir is not None and execution.num_gpus > 1:
                 tile_artifacts: list[TileEmbeddingArtifact] = []
                 hierarchical_artifacts: list[HierarchicalEmbeddingArtifact] = []
@@ -3011,6 +3025,11 @@ def _run_distributed_embedding_stage(
         progress_events_path=progress_events_path,
     )
     request_path.write_text(json.dumps(request_payload, indent=2, sort_keys=True), encoding="utf-8")
+    emit_progress(
+        "embedding.assignment.started",
+        slide_count=len(successful_slides),
+        num_gpus=execution.num_gpus,
+    )
     _run_torchrun_worker(
         module="slide2vec.distributed.pipeline_worker",
         execution=execution,
@@ -3019,6 +3038,11 @@ def _run_distributed_embedding_stage(
         failure_title="Distributed feature extraction failed",
         progress_events_path=progress_events_path,
     )
+    emit_progress(
+        "embedding.assignment.finished",
+        slide_count=len(successful_slides),
+        num_gpus=execution.num_gpus,
+    )
 
 
 def _embed_single_slide_distributed(
@@ -3310,14 +3334,15 @@ def _assign_slides_to_ranks(
     num_gpus: int,
 ) -> dict[int, list[str]]:
     assignments: dict[int, list[str]] = {rank: [] for rank in range(num_gpus)}
-    assigned_tiles = [0] * num_gpus
+    assigned_ranks = [(0, rank) for rank in range(num_gpus)]
+    heapq.heapify(assigned_ranks)
     sortable = []
     for slide, tiling_result in zip(slide_records, tiling_results):
         sortable.append((slide.sample_id, _num_tiles(tiling_result)))
     for sample_id, num_tiles in sorted(sortable, key=lambda item: (-item[1], item[0])):
-        rank = min(range(num_gpus), key=lambda idx: (assigned_tiles[idx], idx))
+        assigned_tiles, rank = heapq.heappop(assigned_ranks)
         assignments[rank].append(sample_id)
-        assigned_tiles[rank] += int(num_tiles)
+        heapq.heappush(assigned_ranks, (assigned_tiles + int(num_tiles), rank))
     return assignments
 
 
@@ -3385,6 +3410,9 @@ def _serialize_model(model) -> dict[str, Any]:
     return {
         "name": model.name,
         "output_variant": model._output_variant if hasattr(model, "_output_variant") else None,
+        "allow_non_recommended_settings": bool(
+            getattr(model, "allow_non_recommended_settings", False)
+        ),
     }
 
 
diff --git a/slide2vec/progress.py b/slide2vec/progress.py
index cbb5e7d..21caba7 100644
--- a/slide2vec/progress.py
+++ b/slide2vec/progress.py
@@ -124,6 +124,13 @@ def _format_line(self, kind: str, payload: dict[str, Any]) -> str | None:
             return f"Model {payload['model_name']} ready on {payload['device']}"
         if kind == "embedding.started":
             return f"Embedding slides ({payload['slide_count']} total)..."
+        if kind == "embedding.assignment.started":
+            return f"Assigning slides across {payload['num_gpus']} GPU(s)..."
+        if kind == "embedding.assignment.finished":
+            return (
+                f"Slide assignment complete: {payload['slide_count']} slide(s) across "
+                f"{payload['num_gpus']} GPU(s)"
+            )
         if kind == "embedding.slide.started":
             return f"Embedding {_progress_subject(payload)} ({payload['total_tiles']} tiles)..."
         if kind == "embedding.tile.progress":
@@ -268,6 +275,21 @@ def emit(self, event: ProgressEvent) -> None:
         if kind == "embedding.started":
             self._task_ids["embedding"] = self.progress.add_task("Embedding slides", total=payload["slide_count"])
             return
+        if kind == "embedding.assignment.started":
+            self._task_ids["embedding_assignment"] = self.progress.add_task(
+                f"Assigning slides across {payload['num_gpus']} GPUs",
+                total=None,
+            )
+            self.console.print(f"Assigning slides across {payload['num_gpus']} GPUs...")
+            return
+        if kind == "embedding.assignment.finished":
+            task_id = self._task_ids.pop("embedding_assignment", None)
+            if task_id is not None:
+                self.progress.remove_task(task_id)
+            self.console.print(
+                f"Slide assignment complete: {payload['slide_count']} slides across {payload['num_gpus']} GPUs"
+            )
+            return
         if kind == "embedding.slide.started":
             tile_task_key = _progress_task_key("tiles", payload)
             tile_task = self._task_ids.get(tile_task_key)
diff --git a/tasks/lessons.md b/tasks/lessons.md
index 7b516a2..6a19a66 100644
--- a/tasks/lessons.md
+++ b/tasks/lessons.md
@@ -13,6 +13,10 @@
 
 - In this environment, never route `apply_patch` through `exec_command`; use the dedicated `apply_patch` tool directly for file edits.
 
+## 2026-04-17
+
+- When the workspace contains both `/data/pathology/projects/clement/code/slide2vec` and `/tmp/slide2vec`, treat the `/data/...` checkout as the source of truth for edits and reporting unless the user explicitly says otherwise.
+
 ## 2026-04-01
 
 - When the user says backward compatibility is not needed, do not add compatibility-preserving behavior or messaging; implement the simpler direct behavior instead.
diff --git a/tests/test_docs.py b/tests/test_docs.py
new file mode 100644
index 0000000..0f6733e
--- /dev/null
+++ b/tests/test_docs.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import importlib.util
+from pathlib import Path
+
+import pytest
+
+
+def _load_reference_generator():
+    docs_dir = Path(__file__).resolve().parents[1] / "docs"
+    module_path = docs_dir / "_generate_reference.py"
+    spec = importlib.util.spec_from_file_location("_generate_reference", module_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Unable to load {module_path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module, docs_dir
+
+
+def test_reference_generator_matches_checked_in_file() -> None:
+    generator, docs_dir = _load_reference_generator()
+    generated = generator.build_reference_rst().strip()
+    checked_in = (docs_dir / "reference.rst").read_text(encoding="utf-8").strip()
+
+    assert generated == checked_in
+    assert "Compact Reference" in generated
+    assert "Main entry points" in generated
+    assert "Registered presets" in generated
+
+
+def test_sphinx_docs_build(tmp_path: Path) -> None:
+    pytest.importorskip("sphinx")
+    from sphinx.cmd.build import build_main
+
+    docs_dir = Path(__file__).resolve().parents[1] / "docs"
+    out_dir = tmp_path / "html"
+    status = build_main(["-W", "-b", "html", str(docs_dir), str(out_dir)])
+
+    assert status == 0
+    index_html = (out_dir / "index.html").read_text(encoding="utf-8")
+    assert "Made with" not in index_html
+    assert "@pradyunsg" not in index_html
+    assert (out_dir / "index.html").exists()
+    assert (out_dir / "reference.html").exists()
+
diff --git a/tests/test_progress.py b/tests/test_progress.py
index 00868db..cd4941f 100644
--- a/tests/test_progress.py
+++ b/tests/test_progress.py
@@ -245,6 +245,101 @@ def test_run_pipeline_emits_local_progress_events_in_order(monkeypatch, tmp_path
     ]
 
 
+def test_run_pipeline_emits_assignment_progress_for_multi_gpu_embedding(monkeypatch, tmp_path: Path):
+    import slide2vec.inference as inference
+    import slide2vec.progress as progress
+
+    reporter = RecordingReporter()
+
+    slide_a = SimpleNamespace(
+        sample_id="slide-a",
+        image_path=Path("/tmp/slide-a.svs"),
+        mask_path=None,
+        spacing_at_level_0=None,
+    )
+    slide_b = SimpleNamespace(
+        sample_id="slide-b",
+        image_path=Path("/tmp/slide-b.svs"),
+        mask_path=None,
+        spacing_at_level_0=None,
+    )
+    tiling_a = SimpleNamespace(x=np.array([0, 1]), y=np.array([0, 1]), tile_size_lv0=224)
+    tiling_b = SimpleNamespace(x=np.array([0, 1, 2]), y=np.array([0, 1, 2]), tile_size_lv0=224)
+    embedded_a = SimpleNamespace(sample_id="slide-a")
+    embedded_b = SimpleNamespace(sample_id="slide-b")
+
+    monkeypatch.setattr(
+        inference,
+        "_prepare_tiled_slides",
+        lambda *args, **kwargs: ([slide_a, slide_b], [tiling_a, tiling_b], tmp_path / "process_list.csv"),
+    )
+    monkeypatch.setattr(
+        inference,
+        "_select_embedding_path",
+        lambda *args, **kwargs: [embedded_a, embedded_b],
+    )
+    monkeypatch.setattr(inference, "_persist_embedded_slide", lambda *args, **kwargs: (None, None))
+    monkeypatch.setattr(inference, "_run_torchrun_worker", lambda *args, **kwargs: None)
+    monkeypatch.setattr(
+        inference,
+        "_collect_pipeline_artifacts",
+        lambda *args, **kwargs: (["tile-artifact"], [], ["slide-artifact"]),
+    )
+    monkeypatch.setattr(inference, "_update_process_list_after_embedding", lambda *args, **kwargs: None)
+    monkeypatch.setattr(inference, "_validate_multi_gpu_execution", lambda *args, **kwargs: None)
+
+    model = SimpleNamespace(
+        name="prism",
+        level="slide",
+        _requested_device="cuda:0",
+        _load_backend=lambda: SimpleNamespace(),
+    )
+
+    with progress.activate_progress_reporter(reporter):
+        result = inference.run_pipeline(
+            model,
+            slides=[slide_a, slide_b],
+            preprocessing=DEFAULT_PREPROCESSING,
+            execution=inference.ExecutionOptions(output_dir=tmp_path, num_gpus=2, save_tile_embeddings=True),
+        )
+
+    kinds = [event.kind for event in reporter.events]
+
+    assert result.tile_artifacts == ["tile-artifact"]
+    assert result.slide_artifacts == ["slide-artifact"]
+    assert kinds == [
+        "run.started",
+        "tiling.started",
+        "tiling.finished",
+        "embedding.started",
+        "embedding.assignment.started",
+        "embedding.assignment.finished",
+        "embedding.finished",
+        "run.finished",
+    ]
+
+
+def test_plain_text_reporter_formats_assignment_progress():
+    import slide2vec.progress as progress
+
+    reporter = progress.PlainTextCliProgressReporter(stream=io.StringIO())
+
+    assert (
+        reporter._format_line(
+            "embedding.assignment.started",
+            {"slide_count": 10, "num_gpus": 4},
+        )
+        == "Assigning slides across 4 GPU(s)..."
+    )
+    assert (
+        reporter._format_line(
+            "embedding.assignment.finished",
+            {"slide_count": 10, "num_gpus": 4},
+        )
+        == "Slide assignment complete: 10 slide(s) across 4 GPU(s)"
+    )
+
+
 def test_run_forward_pass_reports_processed_tile_counts():
     torch = pytest.importorskip("torch")
     import slide2vec.inference as inference
@@ -395,7 +490,12 @@ def test_build_direct_embed_worker_request_payload_includes_progress_events_path
     import slide2vec.inference as inference
 
     payload = inference._build_direct_embed_worker_request_payload(
-        model=SimpleNamespace(name="virchow2", level="tile", _model_kwargs={}),
+        model=SimpleNamespace(
+            name="virchow2",
+            level="tile",
+            _output_variant="cls",
+            allow_non_recommended_settings=True,
+        ),
         preprocessing=DEFAULT_PREPROCESSING,
         execution=inference.ExecutionOptions(output_dir=tmp_path),
         coordination_dir=tmp_path / "coord",
@@ -406,6 +506,7 @@ def test_build_direct_embed_worker_request_payload_includes_progress_events_path
     )
 
     assert payload["progress_events_path"] == str(tmp_path / "logs" / "direct.progress.jsonl")
+    assert payload["model"]["allow_non_recommended_settings"] is True
 
 
 def test_run_torchrun_worker_streams_progress_events_before_process_exit(monkeypatch, tmp_path: Path):
diff --git a/tests/test_regression_core.py b/tests/test_regression_core.py
index 62f583b..2c49168 100644
--- a/tests/test_regression_core.py
+++ b/tests/test_regression_core.py
@@ -12,6 +12,7 @@
     Pipeline,
     PreprocessingConfig,
 )
+from slide2vec.encoders.registry import encoder_registry
 from slide2vec.artifacts import (
     load_array,
     load_metadata,
@@ -94,6 +95,57 @@ def test_get_cfg_from_args_rejects_models_with_ambiguous_spacing_defaults(tmp_pa
         get_cfg_from_args(args)
 
 
+def test_list_models_is_public_and_returns_all_registered_models():
+    from slide2vec import list_models
+
+    models = list_models()
+
+    assert models == sorted(models)
+    assert models == sorted(encoder_registry.names())
+    assert "virchow2" in models
+    assert "moozy" in models
+    assert "prism" in models
+
+
+def test_list_models_can_filter_by_level():
+    from slide2vec import list_models
+
+    assert list_models("tile") == [
+        "conch",
+        "conchv15",
+        "gigapath",
+        "h-optimus-0",
+        "h-optimus-1",
+        "h0-mini",
+        "hibou-b",
+        "hibou-l",
+        "lunit",
+        "midnight",
+        "musk",
+        "phikon",
+        "phikonv2",
+        "prost40m",
+        "uni",
+        "uni2",
+        "virchow",
+        "virchow2",
+    ]
+    assert list_models("slide") == [
+        "gigapath-slide",
+        "moozy-slide",
+        "prism",
+        "titan",
+    ]
+    assert list_models("patient") == ["moozy"]
+
+
+def test_list_models_rejects_unknown_level():
+    from slide2vec import list_models
+
+    with pytest.raises(ValueError, match="tile, slide, patient"):
+        list_models("tiles")
+
+
 def test_npz_artifacts_round_trip(tmp_path: Path):
     features = np.arange(12, dtype=np.float32).reshape(3, 4)
     artifact = write_tile_embeddings(
diff --git a/tests/test_regression_inference.py b/tests/test_regression_inference.py
index 02ea628..5f1982e 100644
--- a/tests/test_regression_inference.py
+++ b/tests/test_regression_inference.py
@@ -1918,6 +1918,34 @@ def test_assign_slides_to_ranks_balances_by_tile_count():
         1: ["slide-b", "slide-c"],
     }
 
+
+def test_assign_slides_to_ranks_tiebreaks_by_rank_deterministically():
+    import slide2vec.inference as inference
+
+    slides = [
+        make_slide("slide-a"),
+        make_slide("slide-b"),
+        make_slide("slide-c"),
+        make_slide("slide-d"),
+        make_slide("slide-e"),
+    ]
+    tiling_results = [
+        SimpleNamespace(x=np.arange(10), y=np.arange(10), tile_size_lv0=224),
+        SimpleNamespace(x=np.arange(10), y=np.arange(10), tile_size_lv0=224),
+        SimpleNamespace(x=np.arange(10), y=np.arange(10), tile_size_lv0=224),
+        SimpleNamespace(x=np.arange(1), y=np.arange(1), tile_size_lv0=224),
+        SimpleNamespace(x=np.arange(1), y=np.arange(1), tile_size_lv0=224),
+    ]
+
+    assignments = inference._assign_slides_to_ranks(slides, tiling_results, num_gpus=3)
+
+    assert assignments == {
+        0: ["slide-a", "slide-d"],
+        1: ["slide-b", "slide-e"],
+        2: ["slide-c"],
+    }
+
+
 def test_merge_tile_embedding_shards_restores_original_tile_order():
     import slide2vec.inference as inference
 
@@ -3120,6 +3148,42 @@ def to(self, device):
     assert loaded.device == torch.device("cuda")
 
 
+def test_load_model_accepts_allow_non_recommended_settings_without_forwarding(monkeypatch):
+    import slide2vec.inference as inference
+
+    captured = {}
+
+    class DummyEncoder:
+        def __init__(self, *, output_variant=None):
+            captured["output_variant"] = output_variant
+            self.device = "cpu"
+            self.encode_dim = 8
+
+        def get_transform(self):
+            return SimpleNamespace()
+
+        def to(self, device):
+            self.device = device
+            return self
+
+    monkeypatch.setattr(inference, "canonicalize_model_name", lambda name: name)
+    monkeypatch.setattr(
+        inference.encoder_registry,
+        "info",
+        lambda name: {"level": "tile", "precision": "fp32"},
+    )
+    monkeypatch.setattr(inference.encoder_registry, "require", lambda name: DummyEncoder)
+    monkeypatch.delenv("HF_TOKEN", raising=False)
+
+    loaded = inference.load_model(
+        name="dummy-model",
+        allow_non_recommended_settings=True,
+    )
+
+    assert loaded.name == "dummy-model"
+    assert captured["output_variant"] is None
+
+
 def test_scale_coordinates_scales_down():
     from slide2vec.inference import _scale_coordinates
 

From 2d929ed4dbc4a9a824bb02e2f51e6543c2c35dc5 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Fri, 17 Apr 2026 10:37:58 +0000
Subject: [PATCH 2/3] clean up docs

---
 README.md                                     |   1 +
 docs/_static/sidebar.css                      | 246 +++++++++++++++++-
 docs/benchmarking.rst                         |   3 +
 docs/cli.rst                                  |   3 +
 docs/conf.py                                  |  13 +-
 docs/documentation.rst                        |   3 +
 docs/gpu-throughput-optimization-protocol.rst |   3 +
 docs/index.rst                                |  99 +++++--
 docs/models.rst                               |   3 +
 .../h0-mini-single-gpu.rst                    |   3 +
 docs/python-api.rst                           |   3 +
 11 files changed, 358 insertions(+), 22 deletions(-)
 create mode 100644 docs/benchmarking.rst
 create mode 100644 docs/cli.rst
 create mode 100644 docs/documentation.rst
 create mode 100644 docs/gpu-throughput-optimization-protocol.rst
 create mode 100644 docs/models.rst
 create mode 100644 docs/optimize-throughput/h0-mini-single-gpu.rst
 create mode 100644 docs/python-api.rst

diff --git a/README.md b/README.md
index a8b4d88..9025d00 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # slide2vec
 
 [![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
+[![Docs](https://img.shields.io/badge/docs-website-blue)](https://clemsgrs.github.io/slide2vec/)
 
 `slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
 
diff --git a/docs/_static/sidebar.css b/docs/_static/sidebar.css
index 9c91dbe..a2a1033 100644
--- a/docs/_static/sidebar.css
+++ b/docs/_static/sidebar.css
@@ -2,6 +2,241 @@
   margin: 1rem 1rem 1.25rem 0;
 }
 
+:root {
+  --s2v-hero-bg: linear-gradient(135deg, rgba(38, 82, 170, 0.12), rgba(88, 156, 255, 0.05) 40%, rgba(14, 165, 233, 0.09));
+  --s2v-hero-border: rgba(88, 156, 255, 0.22);
+  --s2v-card-bg: color-mix(in srgb, var(--color-background-primary) 88%, var(--color-brand-primary) 12%);
+  --s2v-card-border: var(--color-background-border);
+  --s2v-card-shadow: 0 12px 35px rgba(15, 23, 42, 0.08);
+}
+
+.s2v-hero {
+  display: grid;
+  grid-template-columns: minmax(0, 1.4fr) minmax(280px, 0.9fr);
+  gap: 1.25rem;
+  padding: 2rem;
+  margin: 0.5rem 0 2rem;
+  border: 1px solid var(--s2v-hero-border);
+  border-radius: 1.25rem;
+  background:
+    radial-gradient(circle at top right, rgba(59, 130, 246, 0.18), transparent 36%),
+    radial-gradient(circle at bottom left, rgba(14, 165, 233, 0.12), transparent 32%),
+    var(--s2v-hero-bg);
+  box-shadow: var(--s2v-card-shadow);
+}
+
+.s2v-hero__content {
+  display: flex;
+  flex-direction: column;
+  gap: 1rem;
+}
+
+.s2v-hero__eyebrow {
+  font-size: 0.78rem;
+  font-weight: 700;
+  letter-spacing: 0.12em;
+  text-transform: uppercase;
+  color: var(--color-brand-primary);
+}
+
+.s2v-hero h1 {
+  margin: 0;
+  font-size: clamp(2.6rem, 6vw, 4.4rem);
+  line-height: 0.95;
+  letter-spacing: -0.05em;
+}
+
+.s2v-hero__lede {
+  margin: 0;
+  max-width: 42rem;
+  font-size: 1.08rem;
+  line-height: 1.65;
+  color: var(--color-foreground-muted);
+}
+
+.s2v-hero__actions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 0.75rem;
+  margin-top: 0.25rem;
+}
+
+.s2v-button {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  min-height: 2.75rem;
+  padding: 0.75rem 1rem;
+  border-radius: 999px;
+  font-weight: 700;
+  text-decoration: none;
+  border: 1px solid transparent;
+  transition: transform 0.15s ease, box-shadow 0.15s ease, background 0.15s ease;
+}
+
+.s2v-button:hover {
+  transform: translateY(-1px);
+  text-decoration: none;
+}
+
+.s2v-button--primary {
+  color: white;
+  background: linear-gradient(135deg, var(--color-brand-primary), color-mix(in srgb, var(--color-brand-primary) 68%, black 32%));
+  box-shadow: 0 10px 24px rgba(59, 130, 246, 0.22);
+}
+
+.s2v-button--secondary {
+  color: var(--color-foreground-primary);
+  background: color-mix(in srgb, var(--color-background-primary) 85%, var(--color-brand-primary) 15%);
+  border-color: var(--s2v-hero-border);
+}
+
+.s2v-hero__meta {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 0.5rem;
+}
+
+.s2v-hero__meta span {
+  display: inline-flex;
+  align-items: center;
+  padding: 0.42rem 0.7rem;
+  border-radius: 999px;
+  background: rgba(255, 255, 255, 0.48);
+  border: 1px solid var(--s2v-hero-border);
+  font-size: 0.82rem;
+  color: var(--color-foreground-secondary);
+}
+
+.s2v-hero__panel {
+  display: flex;
+  flex-direction: column;
+  justify-content: space-between;
+  gap: 1rem;
+  padding: 1.25rem;
+  border-radius: 1rem;
+  background: color-mix(in srgb, var(--color-background-primary) 84%, var(--color-brand-primary) 16%);
+  border: 1px solid var(--s2v-card-border);
+}
+
+.s2v-hero__panel-label {
+  font-size: 0.76rem;
+  font-weight: 800;
+  letter-spacing: 0.08em;
+  text-transform: uppercase;
+  color: var(--color-foreground-muted);
+}
+
+.s2v-steps {
+  margin: 0;
+  padding-left: 1.2rem;
+  display: grid;
+  gap: 0.75rem;
+}
+
+.s2v-steps li {
+  line-height: 1.5;
+  color: var(--color-foreground-primary);
+}
+
+.s2v-section {
+  margin: 2rem 0 1rem;
+}
+
+.s2v-section__heading {
+  display: flex;
+  flex-direction: column;
+  gap: 0.35rem;
+  margin-bottom: 1rem;
+}
+
+.s2v-section__heading h2 {
+  margin: 0;
+  font-size: 1.5rem;
+  letter-spacing: -0.02em;
+}
+
+.s2v-section__heading p {
+  margin: 0;
+  color: var(--color-foreground-muted);
+  max-width: 55rem;
+}
+
+.s2v-card-grid {
+  display: grid;
+  grid-template-columns: repeat(2, minmax(0, 1fr));
+  gap: 1rem;
+}
+
+.s2v-card {
+  display: block;
+  padding: 1.15rem 1.15rem 1.25rem;
+  border: 1px solid var(--s2v-card-border);
+  border-radius: 1rem;
+  background: var(--s2v-card-bg);
+  box-shadow: var(--s2v-card-shadow);
+  color: inherit;
+  text-decoration: none;
+  transition: transform 0.16s ease, border-color 0.16s ease, box-shadow 0.16s ease;
+}
+
+.s2v-card:hover {
+  transform: translateY(-2px);
+  border-color: var(--color-brand-primary);
+  box-shadow: 0 18px 40px rgba(15, 23, 42, 0.12);
+  text-decoration: none;
+}
+
+.s2v-card__kicker {
+  margin-bottom: 0.55rem;
+  font-size: 0.76rem;
+  font-weight: 800;
+  letter-spacing: 0.08em;
+  text-transform: uppercase;
+  color: var(--color-brand-primary);
+}
+
+.s2v-card h3 {
+  margin: 0 0 0.5rem;
+  font-size: 1.05rem;
+}
+
+.s2v-card p {
+  margin: 0;
+  color: var(--color-foreground-muted);
+  line-height: 1.55;
+}
+
+.s2v-section--split {
+  display: grid;
+  grid-template-columns: minmax(0, 0.8fr) minmax(0, 1.2fr);
+  gap: 1rem 1.25rem;
+  align-items: start;
+}
+
+.s2v-bullets {
+  display: grid;
+  gap: 0.85rem;
+}
+
+.s2v-bullet {
+  padding: 1rem 1rem 1.05rem;
+  border: 1px solid var(--s2v-card-border);
+  border-radius: 0.9rem;
+  background: var(--color-background-primary);
+}
+
+.s2v-bullet h3 {
+  margin: 0 0 0.35rem;
+  font-size: 1rem;
+}
+
+.s2v-bullet p {
+  margin: 0;
+  color: var(--color-foreground-muted);
+  line-height: 1.55;
+}
+
 .s2v-sidebar-github__main {
   display: flex;
   align-items: center;
@@ -70,6 +305,16 @@
   .s2v-sidebar-github {
     margin-top: 0.75rem;
   }
+
+  .s2v-hero,
+  .s2v-section--split,
+  .s2v-card-grid {
+    grid-template-columns: 1fr;
+  }
+
+  .s2v-hero {
+    padding: 1.35rem;
+  }
 }
 
 dl.py.class,
@@ -116,4 +361,3 @@ dl.py.class > dd > dl.py > dd {
   padding: 0.25rem 0;
   margin: 0;
 }
-
diff --git a/docs/benchmarking.rst b/docs/benchmarking.rst
new file mode 100644
index 0000000..7f6face
--- /dev/null
+++ b/docs/benchmarking.rst
@@ -0,0 +1,3 @@
+.. include:: benchmarking.md
+   :parser: myst_parser.sphinx_
+
diff --git a/docs/cli.rst b/docs/cli.rst
new file mode 100644
index 0000000..3f28145
--- /dev/null
+++ b/docs/cli.rst
@@ -0,0 +1,3 @@
+.. include:: cli.md
+   :parser: myst_parser.sphinx_
+
diff --git a/docs/conf.py b/docs/conf.py
index 1674b75..d3ce9f9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -24,7 +24,18 @@
 ]
 
 templates_path = ["_templates"]
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+exclude_patterns = [
+    "_build",
+    "Thumbs.db",
+    ".DS_Store",
+    "benchmarking.md",
+    "cli.md",
+    "documentation.md",
+    "gpu-throughput-optimization-protocol.md",
+    "models.md",
+    "python-api.md",
+    "optimize-throughput/h0-mini-single-gpu.md",
+]
 autosummary_generate = True
 autodoc_member_order = "bysource"
 autodoc_default_options = {
diff --git a/docs/documentation.rst b/docs/documentation.rst
new file mode 100644
index 0000000..d85870f
--- /dev/null
+++ b/docs/documentation.rst
@@ -0,0 +1,3 @@
+.. include:: documentation.md
+   :parser: myst_parser.sphinx_
+
diff --git a/docs/gpu-throughput-optimization-protocol.rst b/docs/gpu-throughput-optimization-protocol.rst
new file mode 100644
index 0000000..a211de9
--- /dev/null
+++ b/docs/gpu-throughput-optimization-protocol.rst
@@ -0,0 +1,3 @@
+.. include:: gpu-throughput-optimization-protocol.md
+   :parser: myst_parser.sphinx_
+
diff --git a/docs/index.rst b/docs/index.rst
index 04250ab..54869ea 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,28 +1,87 @@
 slide2vec
 ==========
 
-``slide2vec`` is a Python package for encoding whole-slide images with foundation models.
+.. raw:: html
 
-It builds on ``hs2p`` for fast preprocessing and exposes a focused public API around
-``Model``, ``Pipeline``, and registry-backed encoder classes.
+   <section class="s2v-hero">
+     <div class="s2v-hero__content">
+       <div class="s2v-hero__eyebrow">Whole-slide embeddings</div>
+       <h1>slide2vec</h1>
+       <p class="s2v-hero__lede">
+         Encode whole-slide images with foundation models, keep preprocessing
+         reproducible, and move from a single slide to a batch pipeline without
+         changing the mental model.
+       </p>
+       <div class="s2v-hero__actions">
+         <a class="s2v-button s2v-button--primary" href="python-api.html">Read the API</a>
+         <a class="s2v-button s2v-button--secondary" href="models.html">Browse models</a>
+       </div>
+       <div class="s2v-hero__meta">
+         <span>Model</span>
+         <span>Pipeline</span>
+         <span>CLI</span>
+         <span>Registry-backed custom encoders</span>
+       </div>
+     </div>
+     <div class="s2v-hero__panel">
+       <div class="s2v-hero__panel-label">Typical flow</div>
+       <ol class="s2v-steps">
+         <li>Pick a preset or register your own encoder.</li>
+         <li>Resolve spacing, tile size, and output variant.</li>
+         <li>Run a slide, a manifest, or an entire batch.</li>
+       </ol>
+     </div>
+   </section>
 
-Start here:
+   <section class="s2v-section">
+     <div class="s2v-section__heading">
+       <h2>Start here</h2>
+       <p>Jump into the docs by task instead of paging through a directory listing.</p>
+     </div>
+     <div class="s2v-card-grid">
+       <a class="s2v-card" href="python-api.html">
+         <div class="s2v-card__kicker">Interactive</div>
+         <h3>Python API</h3>
+         <p>Embed a slide, inspect patient workflows, and control preprocessing directly from Python.</p>
+       </a>
+       <a class="s2v-card" href="cli.html">
+         <div class="s2v-card__kicker">Batch runs</div>
+         <h3>CLI</h3>
+         <p>Use config files and manifests to run repeatable jobs with artifacts written to disk.</p>
+       </a>
+       <a class="s2v-card" href="models.html">
+         <div class="s2v-card__kicker">Extend</div>
+         <h3>Models</h3>
+         <p>See shipped presets and the wrapper pattern for your own tile, slide, or patient encoder.</p>
+       </a>
+       <a class="s2v-card" href="reference.html">
+         <div class="s2v-card__kicker">Reference</div>
+         <h3>API reference</h3>
+         <p>Scan the compact public surface, dataclasses, and encoder contract in one place.</p>
+       </a>
+     </div>
+   </section>
 
-.. list-table::
-   :header-rows: 1
-
-   * - Page
-     - What it covers
-   * - :doc:`python-api`
-     - Interactive embedding, preprocessing, execution options, and patient workflows
-   * - :doc:`cli`
-     - Manifest-driven batch runs and config overrides
-   * - :doc:`models`
-     - Shipped presets and the custom wrapper pattern for new encoders
-   * - :doc:`reference`
-     - Compact index of the public API and encoder registry
-   * - :doc:`benchmarking`
-     - Throughput and performance workflows
+   <section class="s2v-section s2v-section--split">
+     <div class="s2v-section__heading">
+       <h2>Why this site</h2>
+       <p>The site is arranged around the decisions that matter when embedding slides at scale.</p>
+     </div>
+     <div class="s2v-bullets">
+       <div class="s2v-bullet">
+         <h3>Preprocessing first</h3>
+         <p>Spacing, tile size, and backend choice are validated from the selected preset.</p>
+       </div>
+       <div class="s2v-bullet">
+         <h3>Registry-driven models</h3>
+         <p>Use the built-in presets or register a wrapper class with the same contract.</p>
+       </div>
+       <div class="s2v-bullet">
+         <h3>One mental model</h3>
+         <p>The same API covers direct embedding, manifest runs, and patient-level aggregation.</p>
+       </div>
+     </div>
+   </section>
 
 The docs site is organized around the main ways people use the package:
 
@@ -37,8 +96,8 @@ The docs site is organized around the main ways people use the package:
    python-api
    cli
    models
+   documentation
    benchmarking
    gpu-throughput-optimization-protocol
    optimize-throughput/h0-mini-single-gpu
    reference
-   documentation
diff --git a/docs/models.rst b/docs/models.rst
new file mode 100644
index 0000000..9e29a06
--- /dev/null
+++ b/docs/models.rst
@@ -0,0 +1,3 @@
+.. include:: models.md
+   :parser: myst_parser.sphinx_
+
diff --git a/docs/optimize-throughput/h0-mini-single-gpu.rst b/docs/optimize-throughput/h0-mini-single-gpu.rst
new file mode 100644
index 0000000..7793b31
--- /dev/null
+++ b/docs/optimize-throughput/h0-mini-single-gpu.rst
@@ -0,0 +1,3 @@
+.. include:: h0-mini-single-gpu.md
+   :parser: myst_parser.sphinx_
+
diff --git a/docs/python-api.rst b/docs/python-api.rst
new file mode 100644
index 0000000..675927d
--- /dev/null
+++ b/docs/python-api.rst
@@ -0,0 +1,3 @@
+.. include:: python-api.md
+   :parser: myst_parser.sphinx_
+

From 4a7ac687924191f500f6f02002485f074a93e507 Mon Sep 17 00:00:00 2001
From: clemsgrs <clementnet@outlook.com>
Date: Fri, 17 Apr 2026 11:12:20 +0000
Subject: [PATCH 3/3] fix CI build test

---
 .gitignore                                    |   3 +-
 docs/documentation.md                         | 120 ++++++++++++++++++
 docs/index.rst                                |   4 +-
 .../optimize-throughput/h0-mini-single-gpu.md | 116 +++++++++++++++++
 4 files changed, 239 insertions(+), 4 deletions(-)
 create mode 100644 docs/documentation.md
 create mode 100644 docs/optimize-throughput/h0-mini-single-gpu.md

diff --git a/.gitignore b/.gitignore
index 639bf73..267edc4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,6 +166,5 @@ output/
 outputs/
 archive/
 tasks/
-docs/documentation.md
 docs/20*-*.md
-data/
\ No newline at end of file
+data/
diff --git a/docs/documentation.md b/docs/documentation.md
new file mode 100644
index 0000000..a9abd34
--- /dev/null
+++ b/docs/documentation.md
@@ -0,0 +1,120 @@
+# Documentation Log
+
+## 2026-04-17
+
+- Reworked the docs landing page into a product-style hero with action buttons, feature cards, and a summary panel to make the site feel less like a flat index.
+
+## 2026-04-17
+
+- Added Sphinx `.rst` wrapper pages for the main guides so the visible docs site now has a more uniform navigation layer while keeping the existing Markdown sources as the underlying content.
+
+## 2026-04-17
+
+- Refined the docs landing page with a clearer entry-point table and updated `README.md` to point readers at the new documentation website.
+
+## 2026-04-17
+
+- Added a Sphinx-based documentation website scaffold for `slide2vec`, including a landing page, generated reference page, custom theme overrides, a docs smoke test, and a GitHub Pages workflow.
+
+## 2026-04-17
+
+- Added a short `docs/models.md` section showing the recommended registry-backed custom encoder pattern, with examples for tile, slide, and patient encoders.
+
+## 2026-04-17
+
+- Added a visible progress step for multi-GPU slide assignment so distributed embedding runs show when the scheduler is balancing slides before workers start processing them.
+
+## 2026-04-17
+
+- Replaced the linear scan in slide-to-GPU assignment with a heap-backed greedy selector, preserving the tile-count balancing semantics while lowering the rank-selection cost for large multi-GPU runs.
+
+## 2026-04-17
+
+- Added a public `list_models()` helper to expose the registered preset names from `from slide2vec import list_models`.
+- Extended `list_models()` with optional level filtering for `tile`, `slide`, and `patient` presets.
+- Added a README note showing `list_models()` and the `tile` / `slide` / `patient` filters.
+
+## 2026-04-12
+
+- Quieted the common stdout noise during CLI runs by skipping redundant Hugging Face `login()` calls when `HF_TOKEN` is already set, moving the on-the-fly worker-count note to a single run-level INFO line, and surfacing backend selection from slide2vec's tiling path as a plain progress line so the useful cuCIM detail stays visible without glitching the progress bar.
+
+## 2026-04-08
+
+- Split the process-list reader helpers into tiling-only and embedding-only paths so slide2vec no longer relies on a single conditional schema loader for both workflows.
+- `process_list.csv` now keeps the hs2p backend provenance columns (`requested_backend`, `backend`) end to end instead of padding older manifests, so slide2vec reads the stricter manifest contract directly.
+
+## 2026-04-07
+
+- Hierarchical preprocessing now treats `hierarchical_embeddings/` as the persisted feature artifact directory during collection and resume checks, so the pipeline no longer looks for missing `tile_embeddings/` sidecars when a tile-level model runs in hierarchical mode.
+- `process_list.csv` now carries a `feature_path` column next to `feature_status`, populated with the persisted tile or hierarchical embedding path when the feature stage writes an artifact.
+- The direct `Model.embed_slide(...)` path now records `feature_path` from the slide embedding output when tile features are not persisted, so the process list reflects the actual artifact written by the API call.
+- When a slide-level run also saves tile features, `feature_path` now points at the slide embedding artifact instead of the tile artifact, so the process list tracks the requested feature output.
+- Slide2vec-written artifact paths in `process_list.csv` are now resolved to absolute paths before being recorded.
+
+## 2026-04-06
+
+- Moved the CUDA 12.8 torch constraint used by the Docker builds into inline Dockerfile generation so the build no longer depends on a checked-in `constraints-cu128.txt` file.
+
+## 2026-04-04
+
+- Added a backend-only `all` extra to `pyproject.toml` that installs ASAP, cuCIM, OpenSlide, and pyvips support through `hs2p[asap,cucim,openslide,vips]` without pulling in any model-specific dependencies.
+- Added a README installation example for `pip install "slide2vec[all]"` to match the new backend-only aggregate extra.
+
+## 2026-04-03
+
+- Zero-tile slides now keep the tile-side metadata sidecar but skip empty embedding tensors on disk, and embedding summaries now count only slides with at least one tile.
+- Trimmed additional smoke checks from the core suite, including config-comment hygiene, notebook progress UI checks, and a couple of HS2P cutover smoke assertions.
+- Pruned benchmark, release, import-surface, and dependency smoke tests from the default suite so CI now focuses on core runtime and workflow coverage.
+- Trimmed the dependency-split regression tests down to the stable packaging checks and removed the stale `slide2vec[models]` README install example.
+- Pinned `transformers` to `<5.0.0` in `pyproject.toml` so the repo stays compatible with the currently supported `huggingface-hub` line and avoids the `is_offline_mode` import crash seen with `transformers 5.5.0`.
+- `slide2vec.inference._build_batch_preprocessor()` now falls back to per-item preprocessing when the loaded transform stack cannot be lowered into the batched fast path, instead of aborting distributed embedding runs.
+- The per-item embedding fallback now applies the model's transform pipeline to each image before `encode_tiles()`, so unsupported stacks no longer forward raw `uint8` tensors into mixed-precision model weights.
+- Simplified the embedding preprocessing path to CPU-only for now.
+- `PreprocessingConfig.requested_spacing_um` and `PreprocessingConfig.requested_tile_size_px` are now required fields, and model-aware config loading fills them only when a preset exposes one unambiguous recommended spacing.
+- Direct API preprocessing inference now fails fast for preset models with multiple supported spacings instead of guessing a spacing.
+- Passing `PreprocessingConfig(backend="asap")` through the public model and pipeline APIs now fills missing spacing and tile-size values from the model's recommended preset when they are omitted.
+- ASAP tiling now preloads `wholeslidedata` under C-stderr suppression so its eager CuCIM accessory import no longer leaks `cuInit` / `cuFile` noise when the runtime still uses the ASAP backend.
+
+## 2026-03-20
+
+- Aligned slide2vec with the HS2P contract split introduced after `2.3.0`.
+- `read_coordinates_from` now refers only to HS2P coordinate sidecars for legacy WSI-based tiling reuse.
+- `read_tiles_from` is now slide2vec-specific and points at per-slide `.tiles.tar` tile stores for embedding reuse.
+- The embedding path auto-detects `<output_dir>/tiles/<sample_id>.tiles.tar` when no explicit tile-store root is configured.
+- Bumped the minimum supported HS2P version to `2.4.0`.
+- slide2vec now writes tile stores during tiling unless `read_tiles_from` explicitly points at an external existing tile-store root to reuse.
+- Removed implicit tile-store auto-discovery from the embedding path; external store reuse is explicit-only.
+
+## 2026-03-22
+
+- Added per-batch reader timing to tar, WSD, and cuCIM collators: `worker_batch_ms`, `reader_open_ms`, and `reader_read_ms`.
+- `embedding.batch.timing` events now include a `gpu_busy_fraction` proxy derived from non-loader batch time.
+- Benchmark scripts now preserve and aggregate reader timing fields plus `gpu_busy_fraction` so read-strategy runs can compare reader cost and GPU feed quality directly.
+- On-the-fly embedding now caps auto-derived DataLoader workers to the SLURM CPU allocation when `SLURM_CPUS_PER_TASK` or `SLURM_JOB_CPUS_PER_NODE` is present, instead of blindly using `os.cpu_count()`.
+- The read-strategy benchmark now supports `--batch-sizes` sweeps, groups summaries by `(mode, batch_size)`, and writes a throughput-vs-batch-size curve plot plus per-batch-size strategy/timing plots.
+- Added a dedicated `benchmark_end_to_end_paths.py` runner for full-pipeline tar-vs-on-the-fly comparisons from raw slides to final embedding artifacts.
+
+## 2026-03-23
+
+- Renamed the public model factory from `Model.from_pretrained(...)` to `Model.from_preset(...)` and updated the CLI, docs, notebooks, scripts, and regression tests to use the new preset-centric terminology.
+- Extended `benchmark_end_to_end_paths.py` with an extra `wsd_single` mode so end-to-end runs can compare the previous on-the-fly ASAP single-tile baseline against `cucim_supertiles`, while keeping the tar path as reference.
+- Added embedding subpath accounting to `benchmark_end_to_end_paths.py`, including total timed seconds and fractions for data-pipeline work versus model forward, plus an `embedding_subpath_breakdown.png` chart.
+- `benchmark_end_to_end_paths.py` now clears each per-trial run directory before rerunning a mode/repetition so stale `progress.jsonl`, metrics, and logs from previous runs cannot contaminate new summaries.
+- Moved the canonical config location for `num_cucim_workers` to `speed.num_cucim_workers`; config readers still accept the legacy `tiling.num_cucim_workers` as a fallback for backward compatibility.
+- The embedding path now supports `tiling.backend: "auto"` properly by resolving the per-slide backend from the `TilingResult` metadata, so on-the-fly embedding can dispatch to cuCIM or WSD using the backend that hs2p actually used during tiling.
+- The default slide-reading backend is now `auto` in both the Python API and YAML defaults, so new runs prefer hs2p's per-slide backend resolution instead of defaulting to ASAP.
+- Added strict recommended model-setting validation for pretrained models: by default, mismatched input size or target spacing now raises during merged config loading and public API embedding/pipeline calls.
+- Added `model.allow_non_recommended_settings` / `allow_non_recommended_settings=True` as an explicit opt-out that downgrades those mismatches to warnings instead of silently continuing.
+- Aligned the packaged `uni` region preset with the new validation by pinning its encoder `model.input_size` to `224`, and added a regression test that loads every packaged model preset through the merged config path.
+- Trimmed comments out of every packaged model config except `default.yaml`, and added a regression test that asserts non-default packaged model presets remain comment-free.
+- Added CONCHv1.5 support as preset `conchv15` with aliases including `conchv1.5`, and wired it through the TITAN `return_conch()` loading path with regression coverage for canonicalization, preset loading, and model-factory registration.
+- Replaced the old boolean mixed-precision controls with explicit `speed.precision` / `ExecutionOptions.precision` values (`fp32`, `fp16`, `bf16`), and extended recommended pretrained-model validation to include precision mismatches.
+- Normalized packaged model presets to declare their recommended precision explicitly, using TRIDENT precision recommendations for the overlapping models.
+- Aligned two model defaults with the intended upstream behavior: MUSK now runs with `ms_aug=False`, and Virchow / Virchow2 now default to concatenated `CLS + mean(patches)` embeddings while still honoring explicit `mode=` overrides.
+- Consolidated `docs/models.md` into a single preset table that records encoder level, supported spacing, and the explicit request string to use for each shipped model entry, and updated the default config comment to point there instead of duplicating a model-name list.
+- `EmbeddedSlide` now carries `num_tiles`, `mask_preview_path`, and `tiling_preview_path` from the tiling result so downstream code can retain those artifacts alongside coordinates and embeddings.
+- Added a regression test covering `_make_embedded_slide()` pass-through for the new tiling artifact fields.
+- Tiling runs now persist preview artifact paths into `process_list.csv` under `mask_preview_path` and `tiling_preview_path` by reusing the returned HS2P artifact objects, and the tiling-result loader restores those paths explicitly for later notebook and pipeline use.
+- Restored the slide2vec tiling-artifact wiring after an accidental rollback, while intentionally leaving HS2P mask-preview rendering on the previous resize behavior.
+- The public Python API now auto-installs a live progress reporter for interactive terminals and Jupyter notebooks when no reporter is already active, so `Model.embed_slide(...)`, `Model.embed_slides(...)`, and related entrypoints show tiling, embedding, and aggregation progress by default.
+- Direct Python embedding APIs now accept omitted `preprocessing=` and infer a model-aware `PreprocessingConfig` automatically, using the model's recommended tile size and selecting `requested_spacing_um=0.5` when supported, otherwise the smallest supported spacing with a warning for multi-spacing models.
diff --git a/docs/index.rst b/docs/index.rst
index 54869ea..ee6ba44 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -96,8 +96,8 @@ The docs site is organized around the main ways people use the package:
    python-api
    cli
    models
-   documentation
+   Documentation <documentation>
    benchmarking
    gpu-throughput-optimization-protocol
-   optimize-throughput/h0-mini-single-gpu
+   h0-mini single GPU <optimize-throughput/h0-mini-single-gpu>
    reference
diff --git a/docs/optimize-throughput/h0-mini-single-gpu.md b/docs/optimize-throughput/h0-mini-single-gpu.md
new file mode 100644
index 0000000..9c377e6
--- /dev/null
+++ b/docs/optimize-throughput/h0-mini-single-gpu.md
@@ -0,0 +1,116 @@
+# h0-mini Single-GPU Throughput Tuning
+
+## Summary
+
+Retained config changes for `slide2vec/configs/models/h0-mini.yaml`:
+
+- `model.batch_size: 64`
+- `speed.num_workers_embedding: 32`
+- `speed.prefetch_factor_embedding: 8`
+- `speed.persistent_workers_embedding: true`
+
+The default embedding backend remains ASAP. `speed.embedding_backend: cucim` did not improve throughput on the tuned slice.
+
+## Baseline
+
+The requested full sweep was started on the full `debug-histai-local.csv` manifest and stopped after the first `bs=32, workers=4` slice showed the run would be multi-hour and dominated by loader stalls.
+
+Live full-manifest warmup signal:
+
+- `mean_loader_wait_ms ~= 648.3`
+- `mean_ready_wait_ms ~= 0.09`
+- `mean_forward_ms ~= 20.3`
+- `loader_wait_fraction ~= 96.83%`
+
+Interpretation: the pipeline was reader-bound, not compute-bound.
+
+## Controlled Slice Results
+
+Primary one-slide slice:
+
+- Manifest: `output/benchmark-slices/h0-mini-one-slide.csv`
+- Slide: `case_06258_slide_ER-(6F11)_0`
+- Tile count: `4,759`
+
+Baseline on the one-slide slice:
+
+- `batch_size=32`, `embedding_workers=4`
+- `35.6 tiles/s`
+- `mean_loader_wait_ms=734.1`
+- `mean_ready_wait_ms=0.1`
+- `loader_wait_fraction=96.92%`
+
+### Worker sweep at `batch_size=32`
+
+- `workers=8`: `57.3 tiles/s` (`+60.9%`), `mean_loader_wait_ms=392.9` (`-341.1 ms`)
+- `workers=16`: `80.9 tiles/s` (`+127.0%`), `mean_loader_wait_ms=221.9` (`-512.1 ms`)
+- `workers=32`: `101.1 tiles/s` (`+183.7%`), `mean_loader_wait_ms=142.0` (`-592.1 ms`)
+
+Interpretation: still reader-bound, but much less severely. Keep `num_workers_embedding=32`.
+
+### Batch sweep at `embedding_workers=32`
+
+- `batch_size=32`: `97.8 tiles/s`, `mean_loader_wait_ms=151.1`, `loader_wait_fraction=86.95%`
+- `batch_size=64`: `102.5 tiles/s`, `mean_loader_wait_ms=283.5`, `loader_wait_fraction=88.11%`
+- `batch_size=128`: `93.2 tiles/s`
+- `batch_size=256`: `83.5 tiles/s`
+- `batch_size=512`: `62.8 tiles/s`
+
+Interpretation: `batch_size=64` improved throughput by `+4.8%` versus `32`, even though wait metrics worsened slightly. Larger batches became counterproductive.
+
+### Backend sweep at `batch_size=64`, `embedding_workers=32`
+
+- ASAP: `102.8 tiles/s`, `mean_loader_wait_ms=272.7`, `loader_wait_fraction=87.19%`
+- cuCIM: `101.2 tiles/s`, `mean_loader_wait_ms=278.6`, `loader_wait_fraction=86.94%`
+
+Interpretation: cuCIM did not improve throughput and did not materially reduce wait. Keep ASAP.
+
+### Prefetch sweep at `batch_size=64`, `embedding_workers=32`, ASAP
+
+- `prefetch=2`: `99.7 tiles/s`, `mean_loader_wait_ms=299.0`
+- `prefetch=4`: `101.6 tiles/s`, `mean_loader_wait_ms=275.3`
+- `prefetch=8`: `103.6 tiles/s`, `mean_loader_wait_ms=265.9`
+- `prefetch=16`: `99.8 tiles/s`, `mean_loader_wait_ms=279.4`
+
+Interpretation: `prefetch_factor_embedding=8` improved throughput and slightly reduced loader/ready wait versus the default `4`. Keep `8`.
+
+### Persistent worker check on a two-slide slice
+
+Persistence only matters across slide boundaries, so this comparison used:
+
+- Manifest: `output/benchmark-slices/h0-mini-loader-slice.csv`
+- Tile count: `12,510`
+- Tuned runtime except for `persistent_workers_embedding`
+
+Results:
+
+- `persistent_workers=true`: `148.1 tiles/s`, `mean_loader_wait_ms=265.5`, `loader_wait_fraction=88.44%`
+- `persistent_workers=false`: `143.3 tiles/s`, `mean_loader_wait_ms=283.6`, `loader_wait_fraction=89.26%`
+
+Interpretation: persistence improved throughput and loader wait on a slice where worker reuse can matter. Keep `true`.
+
+## Correctness Verification
+
+Verification slide:
+
+- Manifest: `output/benchmark-slices/h0-mini-verify-small.csv`
+- Slide: `case_06258_slide_Ki67-(MM1)_1`
+- Tile count: `841`
+
+Baseline config versus tuned config:
+
+- Tile embedding shape unchanged: `(841, 768)` in both runs
+- Coordinate arrays unchanged
+- Tile metadata unchanged except for output-path fields
+- Embedding similarity preserved:
+- `mean cosine similarity = 0.99999994`
+- `min cosine similarity = 0.99999917`
+
+## Retained Conclusion
+
+For single-GPU h0-mini embedding on this host, the dominant bottleneck is slide reading. The retained config reduces reader starvation substantially without changing output shape or metadata contracts:
+
+- Baseline one-slide slice: `35.6 tiles/s`, `loader_wait_fraction=96.92%`
+- Tuned one-slide slice (`batch_size=64`, `workers=32`, `prefetch=8`): `103.6 tiles/s`, `loader_wait_fraction=86.85%`
+
+That is a `~2.9x` throughput improvement on the fixed slice while keeping correctness intact.