diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e20c1c3..c8c18fa 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -112,7 +112,7 @@ jobs: --repo '${{ github.repository }}' publish-docker: - name: Build & push Docker image to Docker Hub and GHCR + name: Build & push Docker image (${{ matrix.variant }}) # Runs on real releases, and on manual dispatch with `test_docker=true` # for verifying registry credentials before the first release. if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && inputs.test_docker) @@ -123,6 +123,17 @@ jobs: permissions: contents: read packages: write + strategy: + fail-fast: false + matrix: + include: + # slim (default) — LiteLLM-only, ~300 MB. Publishes as `:latest`. + - variant: slim + install_spec: /ccc-src + # full — bundles sentence-transformers + torch + baked model, + # ~2 GB. Publishes as `:full`. + - variant: full + install_spec: /ccc-src[full] steps: - uses: actions/checkout@v4 @@ -151,25 +162,43 @@ jobs: - name: Compute image tags id: tags - # Real releases: push `:latest` and `:` to both registries. - # Manual dispatches: push only `:test` so we don't clobber `:latest`. + # Tag scheme: + # slim on release: :latest, : + # full on release: :full, :-full + # slim on dispatch: :test + # full on dispatch: :test-full + # Dispatched tags stay out of the `:latest` / `:` namespace + # so manual test runs don't clobber what users pull. run: | + variant="${{ matrix.variant }}" + if [ "$variant" = "slim" ]; then + slim_suffix="" + else + slim_suffix="-$variant" + fi if [ "${{ github.event_name }}" = "release" ]; then - { - echo "tags<> "$GITHUB_OUTPUT" + version="${{ github.ref_name }}" + if [ "$variant" = "slim" ]; then + latest_tag="latest" + else + latest_tag="$variant" + fi + { + echo "tags<> "$GITHUB_OUTPUT" else - { - echo "tags<> "$GITHUB_OUTPUT" + test_tag="test${slim_suffix}" + { + echo "tags<> "$GITHUB_OUTPUT" fi - name: Build and push to both registries @@ -186,5 +215,11 @@ jobs: # PyPI's CDN yet (which happened on v0.2.24 release), and ensures # the image matches the tagged commit byte-for-byte. build-args: | - CCC_INSTALL_SPEC=/ccc-src[default] + CCC_VARIANT=${{ matrix.variant }} + CCC_INSTALL_SPEC=${{ matrix.install_spec }} tags: ${{ steps.tags.outputs.tags }} + # Per-variant BuildKit cache so slim and full don't evict each + # other's layers. The heavy `deps` layer (torch + friends for + # full; empty for slim) reuses across releases. + cache-from: type=gha,scope=${{ matrix.variant }} + cache-to: type=gha,mode=max,scope=${{ matrix.variant }} diff --git a/README.md b/README.md index ab47ce6..79ba957 100644 --- a/README.md +++ b/README.md @@ -46,18 +46,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code Using [pipx](https://pipx.pypa.io/stable/installation/): ```bash -pipx install 'cocoindex-code[default]' # batteries included (local embeddings) +pipx install 'cocoindex-code[full]' # batteries included (local embeddings) pipx upgrade cocoindex-code # upgrade ``` Using [uv](https://docs.astral.sh/uv/getting-started/installation/): ```bash -uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24" +uv tool install --upgrade 'cocoindex-code[full]' --prerelease explicit --with "cocoindex>=1.0.0a24" ``` -Two install styles: -- `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs). -- `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers). +Two install styles — they mirror the Docker image variants of the same names: +- `cocoindex-code[full]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs). +- `cocoindex-code` (slim) — LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers). Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control. @@ -198,6 +198,25 @@ The recommended approach is a **persistent container**: start it once, and use `docker exec` to run CLI commands or connect MCP sessions to it. The daemon inside stays warm across sessions, so the embedding model is loaded only once. +### Choosing an image + +Two variants are published from each release: + +| Tag | Size | Embedding backends | When to pick | +|---|---|---|---| +| `cocoindex/cocoindex-code:latest` (slim, default) | ~450 MB | LiteLLM (cloud: OpenAI, Voyage, Gemini, Ollama, …) | Most users. Cloud-backed embeddings, smaller image, fast pulls. | +| `cocoindex/cocoindex-code:full` | ~5 GB | sentence-transformers (local) + LiteLLM | When you want local embeddings without an API key, or an offline-ready container. Heavier because of torch + transformers. | + +The rest of this section uses `:latest` — substitute `:full` in the `image:` / +`docker run` commands if you want the full variant. + +> **Mac users running the `:full` variant:** local embedding inference is +> CPU-only inside Docker, because Docker on macOS can't access Apple's Metal +> (MPS) GPU. If you want local embeddings and fast inference, install +> natively instead: `pipx install 'cocoindex-code[full]'`. The `:latest` +> (slim) variant is unaffected — LiteLLM runs the model on the provider's +> side, so Docker vs. native makes no difference. + ### Quick start — `docker compose up -d` Grab [`docker/docker-compose.yml`](./docker/docker-compose.yml) from this repo and run: @@ -352,7 +371,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile . - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates. - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more. - **Embedded**: Portable and just works, no database setup required! -- **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM. +- **Flexible Embeddings**: Local SentenceTransformers via the `[full]` extra (free, no API key!) or 100+ cloud providers via LiteLLM. ## Configuration @@ -439,7 +458,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the ## Embedding Models -With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`. +With the `[full]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`. > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically. diff --git a/docker/Dockerfile b/docker/Dockerfile index 899e6f1..d01620f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,37 +1,85 @@ -# ─── Stage 1: install dependencies ─────────────────────────────────────────── +# ─── Stage 1: heavy stable dependencies (variant-aware) ────────────────────── +# Two image variants are published from this Dockerfile: +# - slim (default, `:latest`) — ~450 MB. cocoindex-code + LiteLLM only. +# For users who'll point the embedding at a cloud provider (OpenAI, +# Voyage, Gemini, …). +# - full (`:full`) — ~5 GB. Also bundles sentence-transformers +# + torch + a pre-baked default model. For users who want offline-ready +# local embeddings without an API key. +# +# This stage installs only the big, slow-changing deps that are shared across +# releases: +# - full: `sentence-transformers` (pulls torch + transformers + tokenizers +# transitively, ~1 GB of wheels). +# - slim: nothing — cocoindex-code's LiteLLM deps get installed in stage 2. +# +# The cache key is the RUN command string, which changes with CCC_VARIANT, so +# BuildKit keeps separate cache entries per variant and reuses each across +# releases until we bump the deps. +# +# `cocoindex` and `cocoindex-code` are deliberately NOT installed here — +# they bump often, so pinning them at this layer would invalidate the heavy +# cache on every release. Stage 2 installs them on top; transitive deps are +# already satisfied, so uv only fetches the two packages themselves. +# # Use slim (glibc-based) — cocoindex ships pre-built Rust wheels that need glibc. # Alpine / musl-libc would require building from source. -FROM python:3.12-slim AS builder +# +# `--system` tells uv to install into the base Python at +# /usr/local/lib/python3.12/... since there's no virtualenv in the image. +FROM python:3.12-slim AS deps RUN pip install --quiet uv +ARG CCC_VARIANT=slim +RUN if [ "$CCC_VARIANT" = "full" ]; then \ + uv pip install --system --prerelease=allow sentence-transformers; \ + fi + +# ─── Stage 2: install cocoindex + cocoindex-code (per release) ─────────────── +# Cheap relative to stage 1: transitive deps like torch are already in place +# for the full variant; for slim there are no heavy deps to pull. uv only +# needs to fetch the cocoindex + cocoindex-code wheels themselves. +FROM deps AS builder WORKDIR /build +ARG CCC_VARIANT=slim -# Default: install the released cocoindex-code from PyPI (release flow). -# Tests/local dev override with: -# --build-arg CCC_INSTALL_SPEC=/ccc-src[default] -# which installs from the copied-in source tree instead. The COPY always runs; -# with .dockerignore trimming build artifacts it adds ~nothing. -ARG CCC_INSTALL_SPEC="cocoindex-code[default]" +# Default behaviour: install cocoindex-code from PyPI, picking the extras +# that match CCC_VARIANT. +# Release workflow / local tests override with (respectively): +# --build-arg CCC_INSTALL_SPEC=/ccc-src +# --build-arg CCC_INSTALL_SPEC=/ccc-src[full] +ARG CCC_INSTALL_SPEC="" COPY . /ccc-src +RUN if [ -z "$CCC_INSTALL_SPEC" ]; then \ + if [ "$CCC_VARIANT" = "full" ]; then \ + CCC_INSTALL_SPEC="cocoindex-code[full]"; \ + else \ + CCC_INSTALL_SPEC="cocoindex-code"; \ + fi; \ + fi; \ + uv pip install --system --prerelease=allow \ + "cocoindex>=1.0.0a33" \ + "${CCC_INSTALL_SPEC}" -RUN uv pip install --system --prerelease=allow \ - "cocoindex>=1.0.0a33" \ - "${CCC_INSTALL_SPEC}" - -# ─── Stage 2: pre-bake the default embedding model ──────────────────────────── -# Bakes Snowflake/snowflake-arctic-embed-xs into the merged data directory at -# /var/cocoindex/cache/..., so on first run Docker's volume copy-up populates -# the cocoindex-data volume with the model — no network fetch needed. +# ─── Stage 3: pre-bake the default embedding model (full only) ─────────────── +# For the full variant, bakes Snowflake/snowflake-arctic-embed-xs into +# /var/cocoindex/cache/... so Docker's first-mount copy-up populates the +# cocoindex-data volume with the model — no network fetch on first start. +# For slim, just creates empty cache dirs so the runtime stage's COPY works +# regardless of variant. FROM builder AS model_cache +ARG CCC_VARIANT=slim ENV HF_HOME=/var/cocoindex/cache/huggingface \ SENTENCE_TRANSFORMERS_HOME=/var/cocoindex/cache/sentence-transformers RUN mkdir -p /var/cocoindex/cache/huggingface /var/cocoindex/cache/sentence-transformers \ - && python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('Snowflake/snowflake-arctic-embed-xs'); print('Model cached.')" + && if [ "$CCC_VARIANT" = "full" ]; then \ + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('Snowflake/snowflake-arctic-embed-xs'); print('Model cached.')"; \ + fi -# ─── Stage 3: runtime ───────────────────────────────────────────────────────── +# ─── Stage 4: runtime ───────────────────────────────────────────────────────── FROM python:3.12-slim AS runtime # gosu for privilege-drop (PUID/PGID pattern); create non-root coco user. diff --git a/pyproject.toml b/pyproject.toml index 964a8cc..e7e88c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,10 +36,20 @@ dependencies = [ ] [project.optional-dependencies] +# `embeddings-local` is the primary feature extra: it pulls in +# `sentence-transformers` (via cocoindex) so local embeddings work without +# an API key. embeddings-local = [ "cocoindex[sentence-transformers]==1.0.0a43", ] -default = [ +# `full` is the umbrella "batteries-included" alias. Today it's just +# `embeddings-local`, but we expect to bundle more optional niceties under +# it over time — users who want everything can keep using `[full]` and pick +# up the additions automatically. The name also matches the Docker +# `:full` image variant for consistency across install paths. Contents are +# inlined rather than self-referencing `cocoindex-code[embeddings-local]` +# to avoid resolver edge cases with older pip. +full = [ "cocoindex[sentence-transformers]==1.0.0a43", ] dev = [ diff --git a/skills/ccc/references/management.md b/skills/ccc/references/management.md index e8c05e5..75441bb 100644 --- a/skills/ccc/references/management.md +++ b/skills/ccc/references/management.md @@ -5,11 +5,11 @@ Install CocoIndex Code via pipx. Two install styles: ```bash -pipx install 'cocoindex-code[default]' # batteries included (local embeddings via sentence-transformers) +pipx install 'cocoindex-code[full]' # batteries included (local embeddings via sentence-transformers) pipx install cocoindex-code # slim (LiteLLM-only; requires a cloud embedding provider + API key) ``` -The `[default]` extra pulls in `sentence-transformers` so the first-run default (local embeddings, no API key) works out of the box. The slim install is for environments where you don't want the torch/transformers deps and plan to use a LiteLLM-supported cloud provider instead. +The `[full]` extra pulls in `sentence-transformers` so the first-run default (local embeddings, no API key) works out of the box. The slim install is for environments where you don't want the torch/transformers deps and plan to use a LiteLLM-supported cloud provider instead. To upgrade to the latest version: diff --git a/src/cocoindex_code/cli.py b/src/cocoindex_code/cli.py index de8d4ba..5f4073e 100644 --- a/src/cocoindex_code/cli.py +++ b/src/cocoindex_code/cli.py @@ -327,7 +327,7 @@ def _resolve_embedding_choice( return EmbeddingSettings(provider="sentence-transformers", model=DEFAULT_ST_MODEL) _typer.echo( "Error: sentence-transformers is not installed and stdin is not a TTY.\n" - "Either install the extra (`pip install cocoindex-code[embeddings-local]`)\n" + "Either install the extra (`pip install 'cocoindex-code[embeddings-local]'`)\n" "or pass `--litellm-model MODEL` to select a LiteLLM model.", err=True, ) diff --git a/tests/e2e_docker/conftest.py b/tests/e2e_docker/conftest.py index 11549b1..21fd5d3 100644 --- a/tests/e2e_docker/conftest.py +++ b/tests/e2e_docker/conftest.py @@ -26,6 +26,9 @@ def docker_image() -> str: """Build the image once per test session, installing cocoindex-code from the local source tree (not PyPI) so tests exercise the current changes. Returns the tag. """ + # Tests exercise the `full` variant so `ccc init -f` in non-TTY mode can + # fall back to sentence-transformers (the slim variant requires + # `--litellm-model`, which would add setup boilerplate to every test). tag = "cocoindex-code:pytest" subprocess.run( [ @@ -34,7 +37,9 @@ def docker_image() -> str: "-f", str(DOCKERFILE), "--build-arg", - "CCC_INSTALL_SPEC=/ccc-src[default]", + "CCC_VARIANT=full", + "--build-arg", + "CCC_INSTALL_SPEC=/ccc-src[full]", "-t", tag, str(REPO_ROOT), diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 71180e6..b6226b5 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -838,12 +838,16 @@ async def embed(self, text: str) -> object: # noqa: ARG002 # --------------------------------------------------------------------------- -def test_dockerfile_install_line_uses_default_extra() -> None: - """Dockerfile should install via `cocoindex-code[default]`, no separate ST pin.""" +def test_dockerfile_install_line_uses_full_extra() -> None: + """Dockerfile should install via `cocoindex-code[full]` (not the old + `[default]` alias) and should not hard-pin sentence-transformers. + """ repo_root = Path(__file__).resolve().parent.parent content = (repo_root / "docker" / "Dockerfile").read_text() - assert "cocoindex-code[default]" in content + assert "cocoindex-code[full]" in content + assert "cocoindex-code[default]" not in content assert "sentence-transformers>=" not in content + assert "sentence-transformers==" not in content # --------------------------------------------------------------------------- diff --git a/uv.lock b/uv.lock index ad21522..b34f232 100644 --- a/uv.lock +++ b/uv.lock @@ -390,9 +390,6 @@ dependencies = [ ] [package.optional-dependencies] -default = [ - { name = "cocoindex", extra = ["sentence-transformers"] }, -] dev = [ { name = "cocoindex", extra = ["sentence-transformers"] }, { name = "mypy" }, @@ -405,6 +402,9 @@ dev = [ embeddings-local = [ { name = "cocoindex", extra = ["sentence-transformers"] }, ] +full = [ + { name = "cocoindex", extra = ["sentence-transformers"] }, +] [package.dev-dependencies] dev = [ @@ -421,9 +421,9 @@ dev = [ [package.metadata] requires-dist = [ { name = "cocoindex", extras = ["litellm"], specifier = "==1.0.0a43" }, - { name = "cocoindex", extras = ["sentence-transformers"], marker = "extra == 'default'", specifier = "==1.0.0a43" }, { name = "cocoindex", extras = ["sentence-transformers"], marker = "extra == 'dev'", specifier = "==1.0.0a43" }, { name = "cocoindex", extras = ["sentence-transformers"], marker = "extra == 'embeddings-local'", specifier = "==1.0.0a43" }, + { name = "cocoindex", extras = ["sentence-transformers"], marker = "extra == 'full'", specifier = "==1.0.0a43" }, { name = "einops", specifier = ">=0.8.2" }, { name = "mcp", specifier = ">=1.0.0" }, { name = "msgspec", specifier = ">=0.19.0" }, @@ -441,7 +441,7 @@ requires-dist = [ { name = "sqlite-vec", specifier = ">=0.1.0" }, { name = "typer", specifier = ">=0.9.0" }, ] -provides-extras = ["default", "dev", "embeddings-local"] +provides-extras = ["dev", "embeddings-local", "full"] [package.metadata.requires-dev] dev = [