clemsgrs · clemsgrs · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -0,0 +1,64 @@
+name: Documentation
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+concurrency:
+  group: docs
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+jobs:
+  build:
+    name: Build docs
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.11"
+
+      - name: Install docs dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[testing,docs]"
+
+      - name: Run docs smoke test
+        run: python -m pytest -q -o addopts= -p no:cov tests/test_docs.py
+
+      - name: Build Sphinx site
+        run: python -m sphinx -W -b html docs docs/_build/html
+
+      - name: Upload Pages artifact
+        uses: actions/[email protected]
+        with:
+          path: docs/_build/html
+
+  deploy:
+    name: Publish docs
+    if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
@@ -166,6 +166,5 @@ output/
 outputs/
 archive/
 tasks/
-docs/documentation.md
 docs/20*-*.md
-data/
+data/
diff --git a/README.md b/README.md
@@ -1,9 +1,12 @@
 # slide2vec
 
 [![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
+[![Docs](https://img.shields.io/badge/docs-website-blue)](https://clemsgrs.github.io/slide2vec/)
 
 `slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.
 
+Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)
+
 ## Installation
 
 ```shell
@@ -37,6 +40,17 @@ x = embedded.x
 y = embedded.y
 ```
 
+Use `list_models()` when you want to inspect the shipped presets programmatically:
+
+```python
+from slide2vec import list_models
+
+all_models = list_models()
+tile_models = list_models("tile")
+slide_models = list_models("slide")
+patient_models = list_models("patient")
+```
+
 Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:
 
 ```python
@@ -135,7 +149,8 @@ docker run --rm -it \
 
 ## Documentation
 
-- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
+- [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
 - [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
-- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
+- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
 - [`docs/models.md`](docs/models.md) for the full supported-model catalog
+- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
diff --git a/docs/_generate_reference.py b/docs/_generate_reference.py
@@ -0,0 +1,196 @@
+"""Generate the compact Sphinx reference page from public slide2vec metadata."""
+
+from __future__ import annotations
+
+from dataclasses import fields, is_dataclass
+from inspect import signature
+from pathlib import Path
+from textwrap import dedent
+import sys
+
+ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+
+from slide2vec.api import (  # noqa: E402
+    EmbeddedPatient,
+    EmbeddedSlide,
+    ExecutionOptions,
+    Model,
+    Pipeline,
+    PreprocessingConfig,
+    RunResult,
+    list_models,
+)
+from slide2vec.encoders import (  # noqa: E402
+    PatientEncoder,
+    SlideEncoder,
+    TileEncoder,
+    encoder_registry,
+    register_encoder,
+)
+
+
+def _field_names(cls: type) -> str:
+    if not is_dataclass(cls):
+        raise TypeError(f"{cls!r} is not a dataclass")
+    return ", ".join(f"``{field.name}``" for field in fields(cls))
+
+
+def _constructor_knobs(cls: type) -> str:
+    params = [
+        f"``{param.name}``"
+        for param in signature(cls.__init__).parameters.values()
+        if param.name != "self"
+    ]
+    return ", ".join(params)
+
+
+def _list_table(rows: list[tuple[str, str, str, str]]) -> str:
+    lines = [".. list-table::", "   :header-rows: 1", ""]
+    lines.extend(
+        [
+            "   * - Name",
+            "     - Class",
+            "     - Constructor knobs",
+            "     - Notes",
+        ]
+    )
+    for name, cls_name, knobs, notes in rows:
+        lines.extend(
+            [
+                f"   * - ``{name}``",
+                f"     - ``{cls_name}``",
+                f"     - {knobs}",
+                f"     - {notes}",
+            ]
+        )
+    return "\n".join(lines)
+
+
+def _api_table(rows: list[tuple[str, str]]) -> str:
+    lines = [".. list-table::", "   :header-rows: 1", ""]
+    lines.extend(
+        [
+            "   * - Symbol",
+            "     - Description",
+        ]
+    )
+    for symbol, desc in rows:
+        lines.extend(
+            [
+                f"   * - ``{symbol}``",
+                f"     - {desc}",
+            ]
+        )
+    return "\n".join(lines)
+
+
+def _config_table(rows: list[tuple[str, str, str]]) -> str:
+    lines = [".. list-table::", "   :header-rows: 1", ""]
+    lines.extend(
+        [
+            "   * - Config",
+            "     - Main fields",
+            "     - Purpose",
+        ]
+    )
+    for name, fields_text, purpose in rows:
+        lines.extend(
+            [
+                f"   * - ``{name}``",
+                f"     - {fields_text}",
+                f"     - {purpose}",
+            ]
+        )
+    return "\n".join(lines)
+
+
+def build_reference_rst() -> str:
+    """Return the full compact reference page as reStructuredText."""
+
+    api_rows = [
+        ("Model", "Direct in-memory embedding API for slide, tile, and patient workflows"),
+        ("Pipeline", "Manifest-driven batch processing and artifact writing"),
+        ("list_models", "Return the registered preset names, optionally filtered by level"),
+        ("PreprocessingConfig", "Whole-slide tiling, read-back, and spacing configuration"),
+        ("ExecutionOptions", "Runtime settings for batch size, precision, outputs, and workers"),
+        ("EmbeddedSlide", "In-memory result from Model.embed_slide(...) / Model.embed_slides(...)"),
+        ("EmbeddedPatient", "In-memory result from Model.embed_patient(...) / Model.embed_patients(...)"),
+    ]
+
+    config_rows = [
+        (
+            "PreprocessingConfig",
+            _field_names(PreprocessingConfig),
+            "Whole-slide segmentation, read strategy, and tiling geometry",
+        ),
+        (
+            "ExecutionOptions",
+            _field_names(ExecutionOptions),
+            "Runtime behavior and persisted output controls",
+        ),
+        (
+            "RunResult",
+            _field_names(RunResult),
+            "Summary of a manifest-driven pipeline run",
+        ),
+    ]
+
+    preset_rows = []
+    for name in sorted(list_models()):
+        info = encoder_registry.info(name)
+        cls = encoder_registry.require(name)
+        notes = []
+        level = str(info["level"])
+        notes.append(f"level={level}")
+        if "default_output_variant" in info:
+            notes.append(f"default={info['default_output_variant']}")
+        if "supported_spacing_um" in info:
+            notes.append(f"spacing={info['supported_spacing_um']}")
+        preset_rows.append((name, cls.__name__, _constructor_knobs(cls), "; ".join(notes)))
+
+    body = dedent(
+        """\
+        Compact Reference
+        =================
+
+        This page is a concise index of the public API and encoder registry. Use the
+        guide pages for workflow details and the docstrings for the exact contracts.
+
+        Main entry points
+        -----------------
+
+        """
+    )
+    body += _api_table(api_rows)
+    body += "\n\nEncoder contract\n----------------\n\n"
+    body += _api_table(
+        [
+            ("TileEncoder", "Base class for encoders that consume tiles directly"),
+            ("SlideEncoder", "Base class for encoders that pool tile features into slide features"),
+            ("PatientEncoder", "Base class for encoders that pool slide embeddings into patient embeddings"),
+            ("register_encoder", "Decorator used to register a custom encoder class and metadata"),
+        ]
+    )
+    body += "\n\nConfiguration dataclasses\n-------------------------\n\n"
+    body += _config_table(config_rows)
+    body += "\n\nRegistered presets\n------------------\n\n"
+    body += _list_table(preset_rows)
+    body += "\n\nUse this page as a concise index. Use the guide pages for workflow and the\ndocstrings for the exact API contract.\n"
+    return body
+
+
+def write_reference_rst(path: str | Path | None = None) -> Path:
+    """Write the generated reference page to disk."""
+
+    target = Path(path) if path is not None else Path(__file__).with_name("reference.rst")
+    target.write_text(build_reference_rst(), encoding="utf-8")
+    return target
+
+
+def main() -> None:
+    write_reference_rst()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep
@@ -0,0 +1 @@
+