Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: Documentation

on:
pull_request:
types: [opened, synchronize, reopened]
push:
branches: [main]
workflow_dispatch:

concurrency:
group: docs
cancel-in-progress: true

permissions:
contents: read
pages: write
id-token: write

jobs:
build:
name: Build docs
runs-on: ubuntu-latest
timeout-minutes: 60
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

steps:
- name: Check out repository
uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.11"

- name: Install docs dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[testing,docs]"

- name: Run docs smoke test
run: python -m pytest -q -o addopts= -p no:cov tests/test_docs.py

- name: Build Sphinx site
run: python -m sphinx -W -b html docs docs/_build/html

- name: Upload Pages artifact
uses: actions/[email protected]
with:
path: docs/_build/html

deploy:
name: Publish docs
if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
needs: build
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}

steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,5 @@ output/
outputs/
archive/
tasks/
docs/documentation.md
docs/20*-*.md
data/
data/
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# slide2vec

[![PyPI version](https://img.shields.io/pypi/v/slide2vec?label=pypi&logo=pypi&color=3776AB)](https://pypi.org/project/slide2vec/)
[![Docs](https://img.shields.io/badge/docs-website-blue)](https://clemsgrs.github.io/slide2vec/)

`slide2vec` is a Python package for efficient encoding of whole-slide images using publicly available foundation models. It builds on [`hs2p`](https://pypi.org/project/hs2p/) for fast preprocessing and exposes a focused surface around `Model`, `Pipeline`, and `ExecutionOptions`.

Documentation site: [https://clemsgrs.github.io/slide2vec/](https://clemsgrs.github.io/slide2vec/)

## Installation

```shell
Expand Down Expand Up @@ -37,6 +40,17 @@ x = embedded.x
y = embedded.y
```

Use `list_models()` when you want to inspect the shipped presets programmatically:

```python
from slide2vec import list_models

all_models = list_models()
tile_models = list_models("tile")
slide_models = list_models("slide")
patient_models = list_models("patient")
```

Use `Pipeline(...)` for manifest-driven batch processing when you want artifacts written to disk instead of only in-memory outputs:

```python
Expand Down Expand Up @@ -135,7 +149,8 @@ docker run --rm -it \

## Documentation

- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
- [Documentation website](https://clemsgrs.github.io/slide2vec/) for the polished docs site
- [`docs/python-api.md`](docs/python-api.md) for the detailed API reference
- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
- [`docs/cli.md`](docs/cli.md) for the config-driven CLI guide
- [`docs/models.md`](docs/models.md) for the full supported-model catalog
- [`tutorials/api_walkthrough.ipynb`](tutorials/api_walkthrough.ipynb) for a notebook walkthrough of the API
196 changes: 196 additions & 0 deletions docs/_generate_reference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""Generate the compact Sphinx reference page from public slide2vec metadata."""

from __future__ import annotations

from dataclasses import fields, is_dataclass
from inspect import signature
from pathlib import Path
from textwrap import dedent
import sys

ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))

from slide2vec.api import ( # noqa: E402
EmbeddedPatient,
EmbeddedSlide,
ExecutionOptions,
Model,
Pipeline,
PreprocessingConfig,
RunResult,
list_models,
)
from slide2vec.encoders import ( # noqa: E402
PatientEncoder,
SlideEncoder,
TileEncoder,
encoder_registry,
register_encoder,
)


def _field_names(cls: type) -> str:
if not is_dataclass(cls):
raise TypeError(f"{cls!r} is not a dataclass")
return ", ".join(f"``{field.name}``" for field in fields(cls))


def _constructor_knobs(cls: type) -> str:
params = [
f"``{param.name}``"
for param in signature(cls.__init__).parameters.values()
if param.name != "self"
]
return ", ".join(params)


def _list_table(rows: list[tuple[str, str, str, str]]) -> str:
lines = [".. list-table::", " :header-rows: 1", ""]
lines.extend(
[
" * - Name",
" - Class",
" - Constructor knobs",
" - Notes",
]
)
for name, cls_name, knobs, notes in rows:
lines.extend(
[
f" * - ``{name}``",
f" - ``{cls_name}``",
f" - {knobs}",
f" - {notes}",
]
)
return "\n".join(lines)


def _api_table(rows: list[tuple[str, str]]) -> str:
lines = [".. list-table::", " :header-rows: 1", ""]
lines.extend(
[
" * - Symbol",
" - Description",
]
)
for symbol, desc in rows:
lines.extend(
[
f" * - ``{symbol}``",
f" - {desc}",
]
)
return "\n".join(lines)


def _config_table(rows: list[tuple[str, str, str]]) -> str:
lines = [".. list-table::", " :header-rows: 1", ""]
lines.extend(
[
" * - Config",
" - Main fields",
" - Purpose",
]
)
for name, fields_text, purpose in rows:
lines.extend(
[
f" * - ``{name}``",
f" - {fields_text}",
f" - {purpose}",
]
)
return "\n".join(lines)


def build_reference_rst() -> str:
"""Return the full compact reference page as reStructuredText."""

api_rows = [
("Model", "Direct in-memory embedding API for slide, tile, and patient workflows"),
("Pipeline", "Manifest-driven batch processing and artifact writing"),
("list_models", "Return the registered preset names, optionally filtered by level"),
("PreprocessingConfig", "Whole-slide tiling, read-back, and spacing configuration"),
("ExecutionOptions", "Runtime settings for batch size, precision, outputs, and workers"),
("EmbeddedSlide", "In-memory result from Model.embed_slide(...) / Model.embed_slides(...)"),
("EmbeddedPatient", "In-memory result from Model.embed_patient(...) / Model.embed_patients(...)"),
]

config_rows = [
(
"PreprocessingConfig",
_field_names(PreprocessingConfig),
"Whole-slide segmentation, read strategy, and tiling geometry",
),
(
"ExecutionOptions",
_field_names(ExecutionOptions),
"Runtime behavior and persisted output controls",
),
(
"RunResult",
_field_names(RunResult),
"Summary of a manifest-driven pipeline run",
),
]

preset_rows = []
for name in sorted(list_models()):
info = encoder_registry.info(name)
cls = encoder_registry.require(name)
notes = []
level = str(info["level"])
notes.append(f"level={level}")
if "default_output_variant" in info:
notes.append(f"default={info['default_output_variant']}")
if "supported_spacing_um" in info:
notes.append(f"spacing={info['supported_spacing_um']}")
preset_rows.append((name, cls.__name__, _constructor_knobs(cls), "; ".join(notes)))

body = dedent(
"""\
Compact Reference
=================

This page is a concise index of the public API and encoder registry. Use the
guide pages for workflow details and the docstrings for the exact contracts.

Main entry points
-----------------

"""
)
body += _api_table(api_rows)
body += "\n\nEncoder contract\n----------------\n\n"
body += _api_table(
[
("TileEncoder", "Base class for encoders that consume tiles directly"),
("SlideEncoder", "Base class for encoders that pool tile features into slide features"),
("PatientEncoder", "Base class for encoders that pool slide embeddings into patient embeddings"),
("register_encoder", "Decorator used to register a custom encoder class and metadata"),
]
)
body += "\n\nConfiguration dataclasses\n-------------------------\n\n"
body += _config_table(config_rows)
body += "\n\nRegistered presets\n------------------\n\n"
body += _list_table(preset_rows)
body += "\n\nUse this page as a concise index. Use the guide pages for workflow and the\ndocstrings for the exact API contract.\n"
return body


def write_reference_rst(path: str | Path | None = None) -> Path:
"""Write the generated reference page to disk."""

target = Path(path) if path is not None else Path(__file__).with_name("reference.rst")
target.write_text(build_reference_rst(), encoding="utf-8")
return target


def main() -> None:
write_reference_rst()


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions docs/_static/.gitkeep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading
Loading