diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 6616b66..1c3da44 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -8,10 +8,10 @@
       "name": "llmock",
       "source": {
         "source": "npm",
-        "package": "@copilotkit/llmock",
-        "version": "^1.5.0"
+        "package": "@copilotkit/aimock",
+        "version": "^1.7.0"
       },
-      "description": "Fixture authoring skill for @copilotkit/llmock — match fields, response types, embeddings, structured output, sequential responses, streaming physics, agent loop patterns, gotchas, and debugging"
+      "description": "Fixture authoring skill for @copilotkit/aimock — match fields, response types, embeddings, structured output, sequential responses, streaming physics, agent loop patterns, gotchas, and debugging"
     }
   ]
 }
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index cd8e5ae..150c26f 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "llmock",
-  "version": "1.5.0",
-  "description": "Fixture authoring guidance for @copilotkit/llmock",
+  "version": "1.7.0",
+  "description": "Fixture authoring guidance for @copilotkit/aimock",
   "author": {
     "name": "CopilotKit"
   },
diff --git a/.github/workflows/fix-drift.yml b/.github/workflows/fix-drift.yml
index 1e44b97..4d0da8f 100644
--- a/.github/workflows/fix-drift.yml
+++ b/.github/workflows/fix-drift.yml
@@ -33,7 +33,7 @@ jobs:
       # Step 0: Configure git identity and create fix branch
       - name: Configure git
         run: |
-          git config user.name "llmock-drift-bot"
+          git config user.name "aimock-drift-bot"
           git config user.email "drift-bot@copilotkit.ai"
           git checkout -B fix/drift-$(date +%Y-%m-%d)-${{ github.run_id }}
 
diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml
index 3b40eab..2a75812 100644
--- a/.github/workflows/publish-docker.yml
+++ b/.github/workflows/publish-docker.yml
@@ -10,7 +10,8 @@ on:
 
 env:
   REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository }}
+  PRIMARY_IMAGE: ghcr.io/copilotkit/aimock
+  COMPAT_IMAGE: ghcr.io/copilotkit/llmock
 
 jobs:
   build-and-push:
@@ -37,11 +38,20 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Extract metadata
-        id: meta
+      - name: Extract metadata (primary — aimock)
+        id: meta-primary
         uses: docker/metadata-action@v5
         with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          images: ${{ env.PRIMARY_IMAGE }}
+          tags: |
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
+
+      - name: Extract metadata (compat — llmock)
+        id: meta-compat
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.COMPAT_IMAGE }}
           tags: |
             type=semver,pattern={{version}}
             type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
@@ -52,7 +62,9 @@ jobs:
           context: .
           platforms: linux/amd64,linux/arm64
           push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          tags: |
+            ${{ steps.meta-primary.outputs.tags }}
+            ${{ steps.meta-compat.outputs.tags }}
+          labels: ${{ steps.meta-primary.outputs.labels }}
           cache-from: type=gha
           cache-to: type=gha,mode=max
diff --git a/.github/workflows/publish-pytest.yml b/.github/workflows/publish-pytest.yml
new file mode 100644
index 0000000..9f59aed
--- /dev/null
+++ b/.github/workflows/publish-pytest.yml
@@ -0,0 +1,42 @@
+name: Publish aimock-pytest
+on:
+  push:
+    branches: [main]
+    paths:
+      - "packages/aimock-pytest/**"
+  workflow_dispatch:
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install build tools
+        run: pip install hatch
+
+      - name: Check if version is already published
+        id: check
+        run: |
+          VERSION=$(python -c "import tomllib; print(tomllib.load(open('packages/aimock-pytest/pyproject.toml', 'rb'))['project']['version'])")
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          if pip install "aimock-pytest==$VERSION" --dry-run --no-deps 2>/dev/null; then
+            echo "published=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "published=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Build
+        if: steps.check.outputs.published == 'false'
+        run: cd packages/aimock-pytest && hatch build
+
+      - name: Publish to PyPI
+        if: steps.check.outputs.published == 'false'
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: packages/aimock-pytest/dist/
diff --git a/.github/workflows/test-pytest.yml b/.github/workflows/test-pytest.yml
new file mode 100644
index 0000000..35e7360
--- /dev/null
+++ b/.github/workflows/test-pytest.yml
@@ -0,0 +1,44 @@
+name: Python Tests
+on:
+  push:
+    branches: [main]
+    paths:
+      - "packages/aimock-pytest/**"
+      - "src/**"
+      - "dist/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - "packages/aimock-pytest/**"
+      - "src/**"
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        node-version: [20, 22]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ matrix.node-version }}
+          cache: pnpm
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      # Build the TS package first
+      - run: pnpm install --frozen-lockfile
+      - run: pnpm run build
+
+      # Set CLI path to local build
+      - name: Set AIMOCK_CLI_PATH
+        run: echo "AIMOCK_CLI_PATH=$PWD/dist/cli.js" >> $GITHUB_ENV
+
+      # Install and test Python package
+      - name: Install aimock-pytest
+        run: pip install ./packages/aimock-pytest[test]
+      - name: Run Python tests
+        run: cd packages/aimock-pytest && pytest tests/ -v
diff --git a/.gitignore b/.gitignore
index cf9381d..fb39fce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ dist/
 *.tsbuildinfo
 .worktrees/
 .superpowers/
+coverage/
+**/__pycache__/
diff --git a/.prettierignore b/.prettierignore
index 52af816..b9f7ab6 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -2,3 +2,5 @@ dist/
 node_modules/
 pnpm-lock.yaml
 charts/
+coverage/
+.remember/
diff --git a/.remember/remember.md b/.remember/remember.md
new file mode 100644
index 0000000..9b68f88
--- /dev/null
+++ b/.remember/remember.md
@@ -0,0 +1,30 @@
+# Handoff
+
+## State
+
+aimock rebrand COMPLETE on `feat/aimock` in `/Users/jpr5/proj/cpk/llmock-v1.7.0-sp1`. PR #68 on CopilotKit/llmock. Package renamed to `@copilotkit/aimock`. 1989 tests, 55 files. All docs/source/Docker/Helm/CI/skills/README rebranded. 6 migration pages, aimock-pytest, 2 converters, control API, MCP/A2A/Vector metrics. 8 blog posts on Notion.
+
+**aimock-pytest CI + local dev path** added:
+
+- `AIMOCK_CLI_PATH` env var support in `_node_manager.py` (ensure_installed) and `_server.py` (start) — bypasses npm tarball download, points directly at a local `cli.js`
+- `tests/conftest.py` auto-detects `../../dist/cli.js` for local development
+- `.github/workflows/test-pytest.yml` — Python 3.10-3.13 x Node 20/22 matrix, builds TS first, sets AIMOCK_CLI_PATH
+- `.github/workflows/publish-pytest.yml` — publishes to PyPI on main push when version bumped (needs `PYPI_TOKEN` secret)
+- `pyproject.toml` — added `[test]` optional dependency group (pytest, requests)
+- `README.md` — added Development section with local test instructions and CI explanation
+
+## Next
+
+1. **Merge PR #68** → triggers npm publish + Docker push
+2. **GitHub repo rename**: CopilotKit/llmock → CopilotKit/aimock (Settings → General)
+3. **CNAME**: aimock.copilotkit.dev, update docs/CNAME, redirect llmock.copilotkit.dev
+4. **Deprecate @copilotkit/llmock**: final version re-exporting @copilotkit/aimock
+5. **Clean **pycache**** from aimock-pytest commit
+6. **Add `PYPI_TOKEN` secret** to CopilotKit/llmock (or aimock) GitHub repo for publish-pytest workflow
+
+## Context
+
+- Branch `feat/aimock`, worktree `/Users/jpr5/proj/cpk/llmock-v1.7.0-sp1`
+- Notion: Content (3353aa38-1852-81fb), Website (3353aa38-1852-811d), Conversion (3353aa38-1852-816d)
+- PRs #62 (reasoning) and #63 (requestTransform) awaiting contributor fixes
+- `npx aimock` always, `aimock` lowercase, `LLMock` class stays
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f684458..9f47f65 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-# @copilotkit/llmock
+# @copilotkit/aimock
 
 ## 1.6.1
 
diff --git a/Dockerfile b/Dockerfile
index 09b9811..1f7e0ec 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,6 +19,10 @@ RUN pnpm run build
 # --- Production stage ---
 FROM node:22-alpine
 
+LABEL org.opencontainers.image.title="aimock"
+LABEL org.opencontainers.image.description="Mock infrastructure for AI application testing"
+LABEL org.opencontainers.image.source="https://github.com/CopilotKit/llmock"
+
 WORKDIR /app
 
 # No runtime dependencies — all imports are node:* built-ins
diff --git a/README.md b/README.md
index 2b3448b..1612eaa 100644
--- a/README.md
+++ b/README.md
@@ -1,98 +1,81 @@
-# @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock)
+# aimock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/aimock)](https://www.npmjs.com/package/@copilotkit/aimock)
 
-Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, and Cohere API formats, driven entirely by fixtures. Zero runtime dependencies.
+Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.
 
 ## Quick Start
 
 ```bash
-npm install @copilotkit/llmock
+npm install @copilotkit/aimock
 ```
 
 ```typescript
-import { LLMock } from "@copilotkit/llmock";
-
-const mock = new LLMock({ port: 5555 });
+import { LLMock } from "@copilotkit/aimock";
 
+const mock = new LLMock({ port: 0 });
 mock.onMessage("hello", { content: "Hi there!" });
+await mock.start();
 
-const url = await mock.start();
-// Point your OpenAI client at `url` instead of https://api.openai.com
+process.env.OPENAI_BASE_URL = `${mock.url}/v1`;
 
 // ... run your tests ...
 
 await mock.stop();
 ```
 
-## Features
+## The aimock Suite
 
-- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html) (streaming + Converse), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html)
-- **[Embeddings API](https://llmock.copilotkit.dev/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions
-- **[Structured output / JSON mode](https://llmock.copilotkit.dev/structured-output.html)** — `response_format`, `json_schema`, and function calling
-- **[Sequential responses](https://llmock.copilotkit.dev/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call
-- **[Streaming physics](https://llmock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing
-- **[WebSocket APIs](https://llmock.copilotkit.dev/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live
-- **[Error injection](https://llmock.copilotkit.dev/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats
-- **[Chaos testing](https://llmock.copilotkit.dev/chaos-testing.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects
-- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics`
-- **[Request journal](https://llmock.copilotkit.dev/docs.html)** — Record, inspect, and assert on every request
-- **[Fixture validation](https://llmock.copilotkit.dev/fixtures.html)** — Schema validation at load time with `--validate-on-load`
-- **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing
-- **[Docker + Helm](https://llmock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD pipelines
-- **Record-and-replay** — VCR-style proxy-on-miss records real API responses as fixtures for deterministic replay
-- **[Drift detection](https://llmock.copilotkit.dev/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes
-- **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly
-
-## CLI Quick Reference
+aimock mocks everything your AI app talks to:
 
-```bash
-llmock [options]
-```
+| Tool           | What it mocks                                                     | Docs                                                     |
+| -------------- | ----------------------------------------------------------------- | -------------------------------------------------------- |
+| **LLMock**     | OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere | [Providers](https://aimock.copilotkit.dev/docs.html)     |
+| **MCPMock**    | MCP tools, resources, prompts with session management             | [MCP](https://aimock.copilotkit.dev/mcp-mock.html)       |
+| **A2AMock**    | Agent-to-agent protocol with SSE streaming                        | [A2A](https://aimock.copilotkit.dev/a2a-mock.html)       |
+| **VectorMock** | Pinecone, Qdrant, ChromaDB compatible endpoints                   | [Vector](https://aimock.copilotkit.dev/vector-mock.html) |
+| **Services**   | Tavily search, Cohere rerank, OpenAI moderation                   | [Services](https://aimock.copilotkit.dev/services.html)  |
 
-| Option               | Short | Default      | Description                                 |
-| -------------------- | ----- | ------------ | ------------------------------------------- |
-| `--port`             | `-p`  | `4010`       | Port to listen on                           |
-| `--host`             | `-h`  | `127.0.0.1`  | Host to bind to                             |
-| `--fixtures`         | `-f`  | `./fixtures` | Path to fixtures directory or file          |
-| `--latency`          | `-l`  | `0`          | Latency between SSE chunks (ms)             |
-| `--chunk-size`       | `-c`  | `20`         | Characters per SSE chunk                    |
-| `--watch`            | `-w`  |              | Watch fixture path for changes and reload   |
-| `--log-level`        |       | `info`       | Log verbosity: `silent`, `info`, `debug`    |
-| `--validate-on-load` |       |              | Validate fixture schemas at startup         |
-| `--chaos-drop`       |       | `0`          | Chaos: probability of 500 errors (0-1)      |
-| `--chaos-malformed`  |       | `0`          | Chaos: probability of malformed JSON (0-1)  |
-| `--chaos-disconnect` |       | `0`          | Chaos: probability of disconnect (0-1)      |
-| `--metrics`          |       |              | Enable Prometheus metrics at /metrics       |
-| `--record`           |       |              | Record mode: proxy unmatched to real APIs   |
-| `--strict`           |       |              | Strict mode: fail on unmatched requests     |
-| `--provider-*`       |       |              | Upstream URL per provider (with `--record`) |
-| `--help`             |       |              | Show help                                   |
+Run them all on one port with `npx aimock --config aimock.json`, or use the programmatic API to compose exactly what you need.
 
-```bash
-# Start with bundled example fixtures
-llmock
+## Features
 
-# Custom fixtures on a specific port
-llmock -p 8080 -f ./my-fixtures
+- **[Record & Replay](https://aimock.copilotkit.dev/record-replay.html)** — Proxy real APIs, save as fixtures, replay deterministically forever
+- **[11 LLM Providers](https://aimock.copilotkit.dev/docs.html)** — OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere — full streaming support
+- **[MCP / A2A / Vector](https://aimock.copilotkit.dev/mcp-mock.html)** — Mock every protocol your AI agents use
+- **[Chaos Testing](https://aimock.copilotkit.dev/chaos-testing.html)** — 500 errors, malformed JSON, mid-stream disconnects at any probability
+- **[Drift Detection](https://aimock.copilotkit.dev/drift-detection.html)** — Daily CI validation against real APIs
+- **[Streaming Physics](https://aimock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter`
+- **[WebSocket APIs](https://aimock.copilotkit.dev/websocket.html)** — OpenAI Realtime, Responses WS, Gemini Live
+- **[Prometheus Metrics](https://aimock.copilotkit.dev/metrics.html)** — Request counts, latencies, fixture match rates
+- **[Docker + Helm](https://aimock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD
+- **Zero dependencies** — Everything from Node.js builtins
 
-# Simulate slow responses
-llmock --latency 100 --chunk-size 5
+## CLI
 
-# Record mode: proxy unmatched requests to real APIs and save as fixtures
-llmock --record --provider-openai https://api.openai.com --provider-anthropic https://api.anthropic.com
+```bash
+# LLM mocking only
+npx aimock -p 4010 -f ./fixtures
+
+# Full suite from config
+npx aimock --config aimock.json
+
+# Record mode: proxy to real APIs, save fixtures
+npx aimock --record --provider-openai https://api.openai.com
 
-# Strict mode in CI: fail if any request doesn't match a fixture
-llmock --strict -f ./fixtures
+# Docker
+docker run -d -p 4010:4010 -v ./fixtures:/fixtures ghcr.io/copilotkit/aimock -f /fixtures
 ```
 
-## Documentation
+## Switching from other tools?
+
+Step-by-step migration guides: [MSW](https://aimock.copilotkit.dev/migrate-from-msw.html) · [VidaiMock](https://aimock.copilotkit.dev/migrate-from-vidaimock.html) · [mock-llm](https://aimock.copilotkit.dev/migrate-from-mock-llm.html) · [Python mocks](https://aimock.copilotkit.dev/migrate-from-python-mocks.html) · [Mokksy](https://aimock.copilotkit.dev/migrate-from-mokksy.html)
 
-Full API reference, fixture format, E2E patterns, and provider-specific guides:
+## Documentation
 
-**[https://llmock.copilotkit.dev/docs.html](https://llmock.copilotkit.dev/docs.html)**
+**[https://aimock.copilotkit.dev](https://aimock.copilotkit.dev)**
 
 ## Real-World Usage
 
-[CopilotKit](https://github.com/CopilotKit/CopilotKit) uses llmock across its test suite to verify AI agent behavior across multiple LLM providers without hitting real APIs.
+[AG-UI](https://github.com/ag-ui-protocol/ag-ui) uses aimock for its [end-to-end test suite](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo/e2e), verifying AI agent behavior across LLM providers with [fixture-driven responses](https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo/e2e/fixtures/openai).
 
 ## License
 
diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml
new file mode 100644
index 0000000..6d23526
--- /dev/null
+++ b/charts/aimock/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: aimock
+description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector)
+type: application
+version: 0.1.0
+appVersion: "1.7.0"
diff --git a/charts/llmock/templates/_helpers.tpl b/charts/aimock/templates/_helpers.tpl
similarity index 79%
rename from charts/llmock/templates/_helpers.tpl
rename to charts/aimock/templates/_helpers.tpl
index 896b8d6..b852baa 100644
--- a/charts/llmock/templates/_helpers.tpl
+++ b/charts/aimock/templates/_helpers.tpl
@@ -1,14 +1,14 @@
 {{/*
 Expand the name of the chart.
 */}}
-{{- define "llmock.name" -}}
+{{- define "aimock.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
 {{/*
 Create a default fully qualified app name.
 */}}
-{{- define "llmock.fullname" -}}
+{{- define "aimock.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
@@ -24,9 +24,9 @@ Create a default fully qualified app name.
 {{/*
 Common labels
 */}}
-{{- define "llmock.labels" -}}
+{{- define "aimock.labels" -}}
 helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
-{{ include "llmock.selectorLabels" . }}
+{{ include "aimock.selectorLabels" . }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
@@ -34,7 +34,7 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{/*
 Selector labels
 */}}
-{{- define "llmock.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "llmock.name" . }}
+{{- define "aimock.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "aimock.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
diff --git a/charts/llmock/templates/deployment.yaml b/charts/aimock/templates/deployment.yaml
similarity index 88%
rename from charts/llmock/templates/deployment.yaml
rename to charts/aimock/templates/deployment.yaml
index 22534ca..61541f6 100644
--- a/charts/llmock/templates/deployment.yaml
+++ b/charts/aimock/templates/deployment.yaml
@@ -1,18 +1,18 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ include "llmock.fullname" . }}
+  name: {{ include "aimock.fullname" . }}
   labels:
-    {{- include "llmock.labels" . | nindent 4 }}
+    {{- include "aimock.labels" . | nindent 4 }}
 spec:
   replicas: {{ .Values.replicaCount }}
   selector:
     matchLabels:
-      {{- include "llmock.selectorLabels" . | nindent 6 }}
+      {{- include "aimock.selectorLabels" . | nindent 6 }}
   template:
     metadata:
       labels:
-        {{- include "llmock.selectorLabels" . | nindent 8 }}
+        {{- include "aimock.selectorLabels" . | nindent 8 }}
     spec:
       {{- with .Values.nodeSelector }}
       nodeSelector:
@@ -27,7 +27,7 @@ spec:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       containers:
-        - name: llmock
+        - name: aimock
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           args:
diff --git a/charts/llmock/templates/service.yaml b/charts/aimock/templates/service.yaml
similarity index 58%
rename from charts/llmock/templates/service.yaml
rename to charts/aimock/templates/service.yaml
index 894b443..abd3742 100644
--- a/charts/llmock/templates/service.yaml
+++ b/charts/aimock/templates/service.yaml
@@ -1,9 +1,9 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ include "llmock.fullname" . }}
+  name: {{ include "aimock.fullname" . }}
   labels:
-    {{- include "llmock.labels" . | nindent 4 }}
+    {{- include "aimock.labels" . | nindent 4 }}
 spec:
   type: {{ .Values.service.type }}
   ports:
@@ -12,4 +12,4 @@ spec:
       protocol: TCP
       name: http
   selector:
-    {{- include "llmock.selectorLabels" . | nindent 4 }}
+    {{- include "aimock.selectorLabels" . | nindent 4 }}
diff --git a/charts/llmock/values.yaml b/charts/aimock/values.yaml
similarity index 92%
rename from charts/llmock/values.yaml
rename to charts/aimock/values.yaml
index c33a2ea..52cfc9d 100644
--- a/charts/llmock/values.yaml
+++ b/charts/aimock/values.yaml
@@ -4,7 +4,7 @@ fullnameOverride: ""
 replicaCount: 1
 
 image:
-  repository: ghcr.io/copilotkit/llmock
+  repository: ghcr.io/copilotkit/aimock
   tag: ""
   pullPolicy: IfNotPresent
 
diff --git a/charts/llmock/Chart.yaml b/charts/llmock/Chart.yaml
deleted file mode 100644
index 5603860..0000000
--- a/charts/llmock/Chart.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-apiVersion: v2
-name: llmock
-description: Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)
-type: application
-version: 0.1.0
-appVersion: "1.6.0"
diff --git a/docs/a2a-mock.html b/docs/a2a-mock.html
new file mode 100644
index 0000000..7104e38
--- /dev/null
+++ b/docs/a2a-mock.html
@@ -0,0 +1,243 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>A2AMock — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>A2AMock</h1>
+        <p class="lead">
+          Mock A2A (Agent-to-Agent) protocol server for testing multi-agent systems. Implements the
+          A2A JSON-RPC protocol with agent card discovery, message routing, task management, and SSE
+          streaming.
+        </p>
+
+        <h2>Quick Start</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Standalone mode <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { A2AMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> a2a = <span class="kw">new</span> A2AMock();
+
+a2a.registerAgent({
+  name: <span class="str">"translator"</span>,
+  description: <span class="str">"Translates text between languages"</span>,
+  skills: [{ id: <span class="str">"translate"</span>, name: <span class="str">"Translate"</span> }],
+});
+
+a2a.onMessage(<span class="str">"translator"</span>, <span class="str">"translate"</span>, [{ text: <span class="str">"Translated text"</span> }]);
+
+<span class="kw">const</span> url = <span class="kw">await</span> a2a.start();
+<span class="cm">// Agent card at: ${url}/.well-known/agent-card.json</span>
+<span class="cm">// JSON-RPC at: ${url}/</span></code></pre>
+        </div>
+
+        <h2>Mounted Mode</h2>
+        <p>
+          Mount A2AMock onto an LLMock server to share a single port with LLM mocking and other
+          services:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Mount on LLMock <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { LLMock, A2AMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+<span class="kw">const</span> a2a = <span class="kw">new</span> A2AMock();
+
+a2a.registerAgent({ name: <span class="str">"assistant"</span> });
+a2a.onMessage(<span class="str">"assistant"</span>, <span class="str">"hello"</span>, [{ text: <span class="str">"Hi!"</span> }]);
+
+llm.mount(<span class="str">"/a2a"</span>, a2a);
+<span class="kw">await</span> llm.start();
+<span class="cm">// A2A available at http://127.0.0.1:5555/a2a</span></code></pre>
+        </div>
+
+        <h2>Subpath Import</h2>
+        <p>A2AMock is also available via a dedicated subpath import for tree-shaking:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Subpath import <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { A2AMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock/a2a"</span>;</code></pre>
+        </div>
+
+        <h2>Agent Registration</h2>
+        <p>Register agents with skills and capabilities:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Register agents <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>a2a.registerAgent({
+  name: <span class="str">"researcher"</span>,
+  description: <span class="str">"Research assistant"</span>,
+  version: <span class="str">"1.0.0"</span>,
+  skills: [
+    { id: <span class="str">"search"</span>, name: <span class="str">"Web Search"</span>, tags: [<span class="str">"research"</span>] },
+    { id: <span class="str">"summarize"</span>, name: <span class="str">"Summarize"</span> },
+  ],
+  capabilities: { streaming: <span class="kw">true</span> },
+});</code></pre>
+        </div>
+
+        <h2>Message Patterns</h2>
+        <p>Route messages to responses using string or RegExp patterns:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Message patterns <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="cm">// String substring match</span>
+a2a.onMessage(<span class="str">"agent"</span>, <span class="str">"hello"</span>, [{ text: <span class="str">"Hi there!"</span> }]);
+
+<span class="cm">// RegExp match</span>
+a2a.onMessage(<span class="str">"agent"</span>, <span class="kw">/</span>^translate\s+(.+)<span class="kw">/i</span>, [{ text: <span class="str">"Translation result"</span> }]);
+
+<span class="cm">// Task with artifacts</span>
+a2a.onTask(<span class="str">"agent"</span>, <span class="str">"compute"</span>, [
+  { parts: [{ text: <span class="str">"42"</span> }], name: <span class="str">"result"</span> },
+]);</code></pre>
+        </div>
+
+        <h2>Streaming Tasks</h2>
+        <p>Simulate streaming responses with SSE events:</p>
+        <div class="code-block">
+          <div class="code-block-header">Streaming <span class="lang-tag">typescript</span></div>
+          <pre><code>a2a.onStreamingTask(<span class="str">"agent"</span>, <span class="str">"long-task"</span>, [
+  { type: <span class="str">"status"</span>, state: <span class="str">"TASK_STATE_WORKING"</span> },
+  { type: <span class="str">"artifact"</span>, parts: [{ text: <span class="str">"partial result"</span> }], name: <span class="str">"output"</span> },
+  { type: <span class="str">"artifact"</span>, parts: [{ text: <span class="str">"final result"</span> }], lastChunk: <span class="kw">true</span>, name: <span class="str">"output"</span> },
+], <span class="num">50</span>); <span class="cm">// 50ms delay between events</span></code></pre>
+        </div>
+
+        <h2>Config File</h2>
+        <p>A2AMock can be configured via the aimock JSON config file:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"a2a"</span>: {
+    <span class="prop">"path"</span>: <span class="str">"/a2a"</span>,
+    <span class="prop">"agents"</span>: [
+      {
+        <span class="prop">"name"</span>: <span class="str">"assistant"</span>,
+        <span class="prop">"description"</span>: <span class="str">"A helpful assistant"</span>,
+        <span class="prop">"skills"</span>: [{ <span class="prop">"id"</span>: <span class="str">"chat"</span>, <span class="prop">"name"</span>: <span class="str">"Chat"</span> }],
+        <span class="prop">"messages"</span>: [
+          { <span class="prop">"pattern"</span>: <span class="str">"hello"</span>, <span class="prop">"parts"</span>: [{ <span class="prop">"text"</span>: <span class="str">"Hi there!"</span> }] }
+        ]
+      }
+    ]
+  }
+}</code></pre>
+        </div>
+
+        <h2>JSON-RPC Methods</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>SendMessage</code></td>
+              <td>Send a message, get a synchronous response</td>
+            </tr>
+            <tr>
+              <td><code>SendStreamingMessage</code></td>
+              <td>Send a message, get an SSE stream of events</td>
+            </tr>
+            <tr>
+              <td><code>GetTask</code></td>
+              <td>Retrieve a task by ID</td>
+            </tr>
+            <tr>
+              <td><code>ListTasks</code></td>
+              <td>List tasks, optionally filtered by contextId</td>
+            </tr>
+            <tr>
+              <td><code>CancelTask</code></td>
+              <td>Cancel a non-terminal task</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Agent Card</h2>
+        <p>
+          The agent card is served at <code>GET /.well-known/agent-card.json</code> and includes all
+          registered agents' skills and capabilities. The <code>A2A-Version: 1.0</code> header is
+          included on all responses.
+        </p>
+
+        <h2>Inspection</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Inspection API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>a2a.health();  <span class="cm">// { status: "ok", agents: 2, tasks: 5 }</span>
+a2a.reset();   <span class="cm">// Clears all agents and tasks</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/aimock-cli.html b/docs/aimock-cli.html
new file mode 100644
index 0000000..04de8db
--- /dev/null
+++ b/docs/aimock-cli.html
@@ -0,0 +1,323 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>aimock CLI — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>aimock CLI</h1>
+        <p class="lead">
+          <code>aimock</code> is the full-stack mock orchestrator. Where <code>aimock</code> serves
+          LLM endpoints only, <code>aimock</code> reads a JSON config file and serves LLM mocks
+          alongside additional mock services (MCP, A2A, vector stores) on a single port.
+        </p>
+
+        <h2>aimock vs aimock</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>llmock CLI</th>
+              <th>aimock CLI</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>LLM mock endpoints</td>
+              <td>Yes</td>
+              <td>Yes</td>
+            </tr>
+            <tr>
+              <td>Additional mock services</td>
+              <td>No</td>
+              <td>Yes (via mount)</td>
+            </tr>
+            <tr>
+              <td>Config file</td>
+              <td>CLI flags only</td>
+              <td>JSON config file</td>
+            </tr>
+            <tr>
+              <td>Single-port routing</td>
+              <td>LLM paths only</td>
+              <td>All services on one port</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Quick Start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">Run aimock <span class="lang-tag">shell</span></div>
+              <pre><code>$ npx aimock --config aimock.json --port 4010</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">Run aimock <span class="lang-tag">shell</span></div>
+              <pre><code>$ docker run -d -p 4010:4010 \
+  -v ./aimock.json:/config.json \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --config /config.json --port 4010</code></pre>
+            </div>
+          </div>
+        </div>
+
+        <h2>Config File Format</h2>
+        <p>
+          The config file is a JSON object describing which services to run and how to configure
+          them. The <code>llm</code> section configures the core LLMock server. Additional services
+          are mounted at path prefixes.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"llm"</span>: {
+    <span class="prop">"fixtures"</span>: <span class="str">"./fixtures"</span>,
+    <span class="prop">"latency"</span>: <span class="num">0</span>,
+    <span class="prop">"chunkSize"</span>: <span class="num">20</span>,
+    <span class="prop">"logLevel"</span>: <span class="str">"info"</span>,
+    <span class="prop">"validateOnLoad"</span>: <span class="kw">true</span>,
+    <span class="prop">"metrics"</span>: <span class="kw">true</span>,
+    <span class="prop">"strict"</span>: <span class="kw">false</span>
+  },
+  <span class="prop">"services"</span>: {
+    <span class="prop">"/mcp"</span>: {
+      <span class="prop">"type"</span>: <span class="str">"mcp"</span>,
+      <span class="prop">"tools"</span>: <span class="str">"./mcp-tools.json"</span>
+    },
+    <span class="prop">"/a2a"</span>: {
+      <span class="prop">"type"</span>: <span class="str">"a2a"</span>,
+      <span class="prop">"agents"</span>: <span class="str">"./a2a-agents.json"</span>
+    }
+  }
+}</code></pre>
+        </div>
+
+        <h3>Config Fields</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Field</th>
+              <th>Type</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>llm</code></td>
+              <td>object</td>
+              <td>
+                LLMock configuration. Accepts <code>fixtures</code>, <code>latency</code>,
+                <code>chunkSize</code>, <code>logLevel</code>, <code>validateOnLoad</code>,
+                <code>metrics</code>, <code>strict</code>, <code>chaos</code>,
+                <code>streamingProfile</code>.
+              </td>
+            </tr>
+            <tr>
+              <td><code>services</code></td>
+              <td>object</td>
+              <td>
+                Map of mount paths to service configs. Each key is a URL path prefix (e.g.
+                <code>/mcp</code>), each value describes the service type and its options.
+              </td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>CLI Flags</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Option</th>
+              <th>Default</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>--config</code></td>
+              <td><code>aimock.json</code></td>
+              <td>Path to JSON config file</td>
+            </tr>
+            <tr>
+              <td><code>--port</code></td>
+              <td><code>4010</code></td>
+              <td>Port to listen on (overrides config)</td>
+            </tr>
+            <tr>
+              <td><code>--host</code></td>
+              <td><code>127.0.0.1</code></td>
+              <td>Host to bind to (overrides config)</td>
+            </tr>
+            <tr>
+              <td><code>--help</code></td>
+              <td>&mdash;</td>
+              <td>Show help</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Single-Port Routing</h2>
+        <p>
+          All services share one port. Requests are routed by path prefix. LLM endpoints live at the
+          root, mounted services at their configured prefix:
+        </p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Path</th>
+              <th>Service</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>/v1/chat/completions</code></td>
+              <td>LLMock (OpenAI Chat Completions)</td>
+            </tr>
+            <tr>
+              <td><code>/v1/messages</code></td>
+              <td>LLMock (Anthropic Claude)</td>
+            </tr>
+            <tr>
+              <td><code>/v1/embeddings</code></td>
+              <td>LLMock (Embeddings)</td>
+            </tr>
+            <tr>
+              <td><code>/mcp/*</code></td>
+              <td>MCP mock service</td>
+            </tr>
+            <tr>
+              <td><code>/a2a/*</code></td>
+              <td>A2A mock service</td>
+            </tr>
+            <tr>
+              <td><code>/health</code></td>
+              <td>Unified health check (all services)</td>
+            </tr>
+            <tr>
+              <td><code>/metrics</code></td>
+              <td>Prometheus metrics (if enabled)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <p>
+          Path stripping is automatic &mdash; a request to <code>/mcp/tools/list</code> arrives at
+          the MCP service as <code>/tools/list</code>.
+        </p>
+
+        <h2>Docker Usage</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                Run with config <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>$ npx aimock --config aimock.json --host 0.0.0.0</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                Docker run with config <span class="lang-tag">shell</span>
+              </div>
+              <pre><code><span class="cm"># Mount config and fixtures into the container</span>
+$ docker run -p 4010:4010 \
+  -v ./aimock.json:/config.json \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --config /config.json --host 0.0.0.0</code></pre>
+            </div>
+          </div>
+        </div>
+
+        <h2>Docker Compose</h2>
+
+        <div class="code-block">
+          <div class="code-block-header">docker-compose.yml <span class="lang-tag">yaml</span></div>
+          <pre><code><span class="prop">services</span>:
+  <span class="prop">aimock</span>:
+    <span class="prop">image</span>: <span class="str">ghcr.io/copilotkit/aimock:latest</span>
+    <span class="prop">command</span>: <span class="str">aimock --config /app/aimock.json --host 0.0.0.0</span>
+    <span class="prop">ports</span>:
+      - <span class="str">"4010:4010"</span>
+    <span class="prop">volumes</span>:
+      - <span class="str">./aimock.json:/app/aimock.json:ro</span>
+      - <span class="str">./fixtures:/app/fixtures:ro</span>
+
+  <span class="prop">app</span>:
+    <span class="prop">build</span>: <span class="str">.</span>
+    <span class="prop">environment</span>:
+      <span class="prop">OPENAI_BASE_URL</span>: <span class="str">http://aimock:4010/v1</span>
+      <span class="prop">MCP_SERVER_URL</span>: <span class="str">http://aimock:4010/mcp</span>
+    <span class="prop">depends_on</span>:
+      - <span class="str">aimock</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/aws-bedrock.html b/docs/aws-bedrock.html
index 09cf238..f64f2f2 100644
--- a/docs/aws-bedrock.html
+++ b/docs/aws-bedrock.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>AWS Bedrock &mdash; llmock</title>
+    <title>AWS Bedrock &mdash; aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,46 +42,17 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a
-          ><a href="aws-bedrock.html" class="active">AWS Bedrock</a><a href="ollama.html">Ollama</a
-          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>AWS Bedrock</h1>
         <p class="lead">
-          llmock supports the AWS Bedrock Claude invoke and Converse API endpoints &mdash; both
-          streaming and non-streaming. Point the AWS SDK at your llmock instance and fixtures match
+          aimock supports the AWS Bedrock Claude invoke and Converse API endpoints &mdash; both
+          streaming and non-streaming. Point the AWS SDK at your aimock instance and fixtures match
           against the Bedrock-format requests, returning responses in the authentic Bedrock format
           including AWS Event Stream binary framing for streaming.
         </p>
@@ -96,13 +67,13 @@ <h2>How It Works</h2>
           <code>model</code> field in the body (the model is in the URL).
         </p>
         <p>
-          llmock detects the Bedrock URL pattern, extracts the model ID, translates the request to
+          aimock detects the Bedrock URL pattern, extracts the model ID, translates the request to
           the internal fixture-matching format, and returns the response in the Anthropic Messages
           API format &mdash; which is identical to the Bedrock Claude response format. For
           streaming, responses use the AWS Event Stream binary framing protocol.
         </p>
         <p>
-          llmock also supports the <strong>Converse API</strong> (<code
+          aimock also supports the <strong>Converse API</strong> (<code
             >/model/{modelId}/converse</code
           >
           and <code>/model/{modelId}/converse-stream</code>), which uses a different
@@ -205,7 +176,7 @@ <h2>Model Resolution</h2>
         </div>
 
         <h2>SDK Configuration</h2>
-        <p>To point the AWS SDK Bedrock Runtime client at llmock, configure the endpoint URL:</p>
+        <p>To point the AWS SDK Bedrock Runtime client at aimock, configure the endpoint URL:</p>
 
         <div class="code-block">
           <div class="code-block-header">bedrock-sdk.ts <span class="lang-tag">ts</span></div>
@@ -213,7 +184,7 @@ <h2>SDK Configuration</h2>
 
 <span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">BedrockRuntimeClient</span>({
   <span class="prop">region</span>: <span class="str">"us-east-1"</span>,
-  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,  <span class="cm">// llmock URL</span>
+  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,  <span class="cm">// aimock URL</span>
   <span class="prop">credentials</span>: { <span class="prop">accessKeyId</span>: <span class="str">"mock"</span>, <span class="prop">secretAccessKey</span>: <span class="str">"mock"</span> },
 });
 
@@ -256,7 +227,7 @@ <h2>Fixture Examples</h2>
         <div class="info-box">
           <p>
             Fixtures are shared across all providers. The same fixture file works for OpenAI, Claude
-            Messages, Gemini, Azure, and Bedrock endpoints &mdash; llmock translates each provider's
+            Messages, Gemini, Azure, and Bedrock endpoints &mdash; aimock translates each provider's
             request format to a common internal format before matching.
           </p>
         </div>
@@ -264,7 +235,7 @@ <h2>Fixture Examples</h2>
         <h2>Streaming (invoke-with-response-stream)</h2>
         <p>
           The <code>invoke-with-response-stream</code> endpoint returns responses using the
-          <strong>AWS Event Stream binary protocol</strong>. llmock implements this protocol
+          <strong>AWS Event Stream binary protocol</strong>. aimock implements this protocol
           natively &mdash; each response chunk is encoded as a binary frame with CRC32 checksums,
           headers, and a JSON payload, exactly as the real Bedrock service sends them.
         </p>
@@ -322,7 +293,7 @@ <h2>AWS Event Stream Binary Format</h2>
 [message_crc32: 4B CRC32 of entire frame minus last 4 bytes]</code></pre>
         </div>
         <p>
-          llmock encodes these frames with proper CRC32 checksums, so the AWS SDK can decode them
+          aimock encodes these frames with proper CRC32 checksums, so the AWS SDK can decode them
           natively. The <code>:event-type</code> header in each frame carries the event name (e.g.
           <code>chunk</code>), and the <code>:content-type</code> header is set to
           <code>application/json</code>.
@@ -332,7 +303,7 @@ <h2>Converse API</h2>
         <p>
           The Converse API is AWS Bedrock's provider-agnostic conversation interface. It uses
           camelCase field names and a different request structure than the Claude-native invoke
-          endpoints. llmock supports both <code>/model/{modelId}/converse</code> (non-streaming) and
+          endpoints. aimock supports both <code>/model/{modelId}/converse</code> (non-streaming) and
           <code>/model/{modelId}/converse-stream</code> (streaming via Event Stream binary).
         </p>
 
@@ -369,21 +340,23 @@ <h2>Converse API</h2>
         <p>
           The Converse API also supports tool calls via <code>toolUse</code> and
           <code>toolResult</code> content blocks, and tool definitions via the
-          <code>toolConfig</code> field. llmock translates all of these to the unified internal
+          <code>toolConfig</code> field. aimock translates all of these to the unified internal
           format for fixture matching.
         </p>
       </main>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/azure-openai.html b/docs/azure-openai.html
index c17a494..3c3ad24 100644
--- a/docs/azure-openai.html
+++ b/docs/azure-openai.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Azure OpenAI — llmock</title>
+    <title>Azure OpenAI — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,46 +42,17 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html" class="active">Azure OpenAI</a
-          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
-          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Azure OpenAI</h1>
         <p class="lead">
-          llmock routes Azure OpenAI deployment-based URLs to the existing chat completions and
-          embeddings handlers. Point the Azure OpenAI SDK at your llmock instance and fixtures work
+          aimock routes Azure OpenAI deployment-based URLs to the existing chat completions and
+          embeddings handlers. Point the Azure OpenAI SDK at your aimock instance and fixtures work
           exactly as they do with the standard OpenAI endpoints.
         </p>
 
@@ -93,7 +64,7 @@ <h2>How It Works</h2>
           <code>api-version</code> query parameter.
         </p>
         <p>
-          llmock detects these Azure-style URLs and rewrites them to the standard paths before
+          aimock detects these Azure-style URLs and rewrites them to the standard paths before
           routing to the existing handlers. The deployment ID is extracted and used as a model
           fallback when the request body omits the <code>model</code> field (which Azure requests
           commonly do, since the model is implied by the deployment).
@@ -121,7 +92,7 @@ <h2>URL Pattern Mapping</h2>
 
         <h2>Model Resolution</h2>
         <p>
-          When a request arrives via an Azure deployment URL, llmock resolves the model name using
+          When a request arrives via an Azure deployment URL, aimock resolves the model name using
           these rules:
         </p>
         <ol>
@@ -153,7 +124,7 @@ <h2>Model Resolution</h2>
 
         <h2>Authentication</h2>
         <p>
-          llmock does not validate authentication tokens, but it accepts both Azure-style and
+          aimock does not validate authentication tokens, but it accepts both Azure-style and
           standard auth headers without rejecting the request:
         </p>
         <ul>
@@ -162,14 +133,14 @@ <h2>Authentication</h2>
         </ul>
 
         <h2>SDK Configuration</h2>
-        <p>To point the Azure OpenAI Node.js SDK at llmock, set the endpoint to your llmock URL:</p>
+        <p>To point the Azure OpenAI Node.js SDK at aimock, set the endpoint to your aimock URL:</p>
 
         <div class="code-block">
           <div class="code-block-header">azure-openai-sdk.ts <span class="lang-tag">ts</span></div>
           <pre><code><span class="kw">import</span> { <span class="type">AzureOpenAI</span> } <span class="kw">from</span> <span class="str">"openai"</span>;
 
 <span class="kw">const</span> <span class="op">client</span> = <span class="kw">new</span> <span class="type">AzureOpenAI</span>({
-  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,  <span class="cm">// llmock URL</span>
+  <span class="prop">endpoint</span>: <span class="str">"http://localhost:4005"</span>,  <span class="cm">// aimock URL</span>
   <span class="prop">apiKey</span>: <span class="str">"mock-key"</span>,
   <span class="prop">apiVersion</span>: <span class="str">"2024-10-21"</span>,
   <span class="prop">deployment</span>: <span class="str">"my-gpt4-deployment"</span>,
@@ -188,14 +159,14 @@ <h2>Environment Variables</h2>
 
         <div class="code-block">
           <div class="code-block-header">.env <span class="lang-tag">sh</span></div>
-          <pre><code><span class="cm"># Point Azure SDK at llmock</span>
+          <pre><code><span class="cm"># Point Azure SDK at aimock</span>
 <span class="prop">AZURE_OPENAI_ENDPOINT</span>=<span class="str">http://localhost:4005</span>
 <span class="prop">AZURE_OPENAI_API_KEY</span>=<span class="str">mock-key</span></code></pre>
         </div>
 
         <div class="info-box">
           <p>
-            The <code>api-version</code> query parameter is accepted but ignored &mdash; llmock
+            The <code>api-version</code> query parameter is accepted but ignored &mdash; aimock
             responds identically regardless of which API version is requested. This means you can
             test against any API version without changing fixtures.
           </p>
@@ -204,14 +175,16 @@ <h2>Environment Variables</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/chaos-testing.html b/docs/chaos-testing.html
index e0dfc67..b821d01 100644
--- a/docs/chaos-testing.html
+++ b/docs/chaos-testing.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Chaos Testing — llmock</title>
+    <title>Chaos Testing — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html" class="active">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a
-          ><a href="record-replay.html">Record &amp; Replay</a
-          ><a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Chaos Testing</h1>
         <p class="lead">
-          llmock provides probabilistic failure injection to test how your application handles
+          aimock provides probabilistic failure injection to test how your application handles
           unreliable LLM APIs. Three failure modes can be configured at the server, fixture, or
           per-request level.
         </p>
@@ -138,7 +109,7 @@ <h2>Quick Start</h2>
 
         <div class="code-block">
           <div class="code-block-header">chaos-quick-start.ts <span class="lang-tag">ts</span></div>
-          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 
 <span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
 <span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi!"</span> });
@@ -210,15 +181,15 @@ <h2>Per-Request Headers</h2>
           </thead>
           <tbody>
             <tr>
-              <td><code>x-llmock-chaos-drop</code></td>
+              <td><code>x-aimock-chaos-drop</code></td>
               <td>Drop rate (0&ndash;1)</td>
             </tr>
             <tr>
-              <td><code>x-llmock-chaos-malformed</code></td>
+              <td><code>x-aimock-chaos-malformed</code></td>
               <td>Malformed rate (0&ndash;1)</td>
             </tr>
             <tr>
-              <td><code>x-llmock-chaos-disconnect</code></td>
+              <td><code>x-aimock-chaos-disconnect</code></td>
               <td>Disconnect rate (0&ndash;1)</td>
             </tr>
           </tbody>
@@ -233,7 +204,7 @@ <h2>Per-Request Headers</h2>
   <span class="prop">method</span>: <span class="str">"POST"</span>,
   <span class="prop">headers</span>: {
     <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span>,
-    <span class="str">"x-llmock-chaos-disconnect"</span>: <span class="str">"1.0"</span>,
+    <span class="str">"x-aimock-chaos-disconnect"</span>: <span class="str">"1.0"</span>,
   },
   <span class="prop">body</span>: <span class="fn">JSON.stringify</span>({ <span class="prop">model</span>: <span class="str">"gpt-4"</span>, <span class="prop">messages</span>: [...] }),
 });</code></pre>
@@ -242,12 +213,32 @@ <h2>Per-Request Headers</h2>
         <h2>CLI Flags</h2>
         <p>Set server-level chaos from the command line:</p>
 
-        <div class="code-block">
-          <div class="code-block-header">CLI chaos flags <span class="lang-tag">bash</span></div>
-          <pre><code>npx llmock --fixtures ./fixtures \
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                CLI chaos flags <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>$ npx aimock --fixtures ./fixtures \
+  --chaos-drop 0.1 \
+  --chaos-malformed 0.05 \
+  --chaos-disconnect 0.02</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                CLI chaos flags <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>$ docker run -d -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --fixtures /fixtures \
   --chaos-drop 0.1 \
   --chaos-malformed 0.05 \
   --chaos-disconnect 0.02</code></pre>
+            </div>
+          </div>
         </div>
 
         <h2>Journal Tracking</h2>
@@ -279,28 +270,30 @@ <h2>Journal Tracking</h2>
         <h2>Prometheus Metrics</h2>
         <p>
           When metrics are enabled (<code>--metrics</code>), each chaos trigger increments the
-          <code>llmock_chaos_triggered_total</code> counter with an <code>action</code> label:
+          <code>aimock_chaos_triggered_total</code> counter with an <code>action</code> label:
         </p>
 
         <div class="code-block">
           <div class="code-block-header">Metrics output <span class="lang-tag">text</span></div>
-          <pre><code># TYPE llmock_chaos_triggered_total counter
-llmock_chaos_triggered_total{action="drop"} 3
-llmock_chaos_triggered_total{action="malformed"} 1
-llmock_chaos_triggered_total{action="disconnect"} 2</code></pre>
+          <pre><code># TYPE aimock_chaos_triggered_total counter
+aimock_chaos_triggered_total{action="drop"} 3
+aimock_chaos_triggered_total{action="malformed"} 1
+aimock_chaos_triggered_total{action="disconnect"} 2</code></pre>
         </div>
       </main>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/chat-completions.html b/docs/chat-completions.html
index 353d4f2..1c8af81 100644
--- a/docs/chat-completions.html
+++ b/docs/chat-completions.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Chat Completions — llmock</title>
+    <title>Chat Completions — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -43,41 +43,10 @@
       </div>
     </nav>
 
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html" class="active">Chat Completions (OpenAI)</a>
-          <a href="responses-api.html">Responses API (OpenAI)</a>
-          <a href="claude-messages.html">Claude Messages</a>
-          <a href="gemini.html">Gemini</a><a href="azure-openai.html">Azure OpenAI</a
-          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
-          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a>
-          <a href="structured-output.html">Structured Output</a>
-          <a href="sequential-responses.html">Sequential Responses</a>
-          <a href="fixtures.html">Fixtures</a>
-          <a href="error-injection.html">Error Injection</a>
-          <a href="chaos-testing.html">Chaos Testing</a>
-          <a href="streaming-physics.html">Streaming Physics</a>
-          <a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a>
-          <a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>OpenAI Chat Completions</h1>
@@ -114,7 +83,7 @@ <h2>Unit Test: Text Response</h2>
           <div class="code-block-header">
             text-response.test.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 <span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
 
 <span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
@@ -180,7 +149,7 @@ <h2>Integration Test: Streaming SSE</h2>
           <div class="code-block-header">
             streaming-integration.test.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="fn">createServer</span>, <span class="kw">type</span> <span class="type">ServerInstance</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/server"</span>;
+          <pre><code><span class="kw">import</span> { <span class="fn">createServer</span>, <span class="kw">type</span> <span class="type">ServerInstance</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock/server"</span>;
 
 <span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>(
   [{ <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hello! How can I help?"</span> } }],
@@ -263,14 +232,16 @@ <h3>Streaming (stream: true)</h3>
 
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/claude-messages.html b/docs/claude-messages.html
index d034278..571e32f 100644
--- a/docs/claude-messages.html
+++ b/docs/claude-messages.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Claude Messages — llmock</title>
+    <title>Claude Messages — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,40 +42,11 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html" class="active">Claude Messages</a
-          ><a href="gemini.html">Gemini</a><a href="azure-openai.html">Azure OpenAI</a
-          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
-          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Anthropic Claude Messages API</h1>
@@ -171,7 +142,7 @@ <h2>SSE Event Sequence</h2>
 
         <h2>Request Translation</h2>
         <p>
-          llmock internally translates Anthropic requests to a unified format for fixture matching.
+          aimock internally translates Anthropic requests to a unified format for fixture matching.
           The <code>claudeToCompletionRequest()</code> function handles mapping Anthropic message
           arrays (including content block arrays) to OpenAI-style messages so the same fixtures work
           across all providers.
@@ -180,14 +151,16 @@ <h2>Request Translation</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/cli-tabs.js b/docs/cli-tabs.js
new file mode 100644
index 0000000..aa792c5
--- /dev/null
+++ b/docs/cli-tabs.js
@@ -0,0 +1,128 @@
+/* global document, localStorage */
+(function () {
+  var STORAGE_KEY = "aimock-tab-preference";
+
+  function injectStyles() {
+    var style = document.createElement("style");
+    style.textContent = [
+      ".cli-docker-tab-bar {",
+      "  display: flex;",
+      "  flex-direction: row;",
+      "  gap: 0;",
+      "  border-bottom: 1px solid var(--border);",
+      "  margin-bottom: 0;",
+      "}",
+      ".cli-docker-tab-bar button {",
+      "  padding: 0.5rem 1.25rem;",
+      "  font-family: var(--font-mono);",
+      "  font-size: 0.75rem;",
+      "  font-weight: 500;",
+      "  background: transparent;",
+      "  border: none;",
+      "  border-bottom: 2px solid transparent;",
+      "  color: var(--text-dim);",
+      "  cursor: pointer;",
+      "  transition: color 0.15s, border-color 0.15s;",
+      "  outline: none;",
+      "}",
+      ".cli-docker-tab-bar button:hover {",
+      "  color: var(--text-secondary);",
+      "}",
+      ".cli-docker-tab-bar button.active {",
+      "  color: var(--accent);",
+      "  border-bottom-color: var(--accent);",
+      "  cursor: default;",
+      "}",
+      ".cli-docker-tabs > .tab-cli,",
+      ".cli-docker-tabs > .tab-docker {",
+      "  display: none;",
+      "}",
+      ".cli-docker-tabs > .tab-cli.active,",
+      ".cli-docker-tabs > .tab-docker.active {",
+      "  display: block;",
+      "}",
+    ].join("\n");
+    document.head.appendChild(style);
+  }
+
+  function init() {
+    var containers = document.querySelectorAll(".cli-docker-tabs");
+    if (!containers.length) return;
+
+    injectStyles();
+
+    var preference = localStorage.getItem(STORAGE_KEY) || "cli";
+
+    containers.forEach(function (container) {
+      var cliLabel = container.dataset.cliLabel || "CLI";
+      var dockerLabel = container.dataset.dockerLabel || "Docker";
+
+      var tabCli = container.querySelector(".tab-cli");
+      var tabDocker = container.querySelector(".tab-docker");
+      if (!tabCli || !tabDocker) return;
+
+      // Build tab bar
+      var bar = document.createElement("div");
+      bar.className = "cli-docker-tab-bar";
+
+      var btnCli = document.createElement("button");
+      btnCli.type = "button";
+      btnCli.textContent = cliLabel;
+      btnCli.dataset.tab = "cli";
+
+      var btnDocker = document.createElement("button");
+      btnDocker.type = "button";
+      btnDocker.textContent = dockerLabel;
+      btnDocker.dataset.tab = "docker";
+
+      bar.appendChild(btnCli);
+      bar.appendChild(btnDocker);
+      container.insertBefore(bar, container.firstChild);
+
+      // Click handlers
+      btnCli.addEventListener("click", function () {
+        switchAll("cli");
+      });
+      btnDocker.addEventListener("click", function () {
+        switchAll("docker");
+      });
+
+      // Apply initial preference
+      applyTab(container, preference);
+    });
+  }
+
+  function applyTab(container, tab) {
+    var tabCli = container.querySelector(".tab-cli");
+    var tabDocker = container.querySelector(".tab-docker");
+    var btnCli = container.querySelector('.cli-docker-tab-bar button[data-tab="cli"]');
+    var btnDocker = container.querySelector('.cli-docker-tab-bar button[data-tab="docker"]');
+    if (!tabCli || !tabDocker || !btnCli || !btnDocker) return;
+
+    if (tab === "docker") {
+      tabCli.classList.remove("active");
+      tabDocker.classList.add("active");
+      btnCli.classList.remove("active");
+      btnDocker.classList.add("active");
+    } else {
+      tabCli.classList.add("active");
+      tabDocker.classList.remove("active");
+      btnCli.classList.add("active");
+      btnDocker.classList.remove("active");
+    }
+  }
+
+  function switchAll(tab) {
+    localStorage.setItem(STORAGE_KEY, tab);
+    var containers = document.querySelectorAll(".cli-docker-tabs");
+    containers.forEach(function (container) {
+      applyTab(container, tab);
+    });
+  }
+
+  if (document.readyState === "loading") {
+    document.addEventListener("DOMContentLoaded", init);
+  } else {
+    init();
+  }
+})();
diff --git a/docs/cohere.html b/docs/cohere.html
index 162f738..247402a 100644
--- a/docs/cohere.html
+++ b/docs/cohere.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Cohere — llmock</title>
+    <title>Cohere — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,40 +42,11 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html" class="active">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a
-          ><a href="record-replay.html">Record &amp; Replay</a
-          ><a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Cohere v2 Chat API</h1>
@@ -117,7 +88,7 @@ <h2>Key Features</h2>
           <li>
             <strong>Dual usage tracking.</strong> Responses include both
             <code>billed_units</code> (input_tokens, output_tokens, search_units, classifications)
-            and <code>tokens</code> (input_tokens, output_tokens). llmock returns zeroed values.
+            and <code>tokens</code> (input_tokens, output_tokens). aimock returns zeroed values.
           </li>
           <li>
             <strong>Defaults to non-streaming.</strong> Set <code>"stream": true</code> explicitly
@@ -131,13 +102,13 @@ <h2>Quick Start</h2>
           <div class="code-block-header">
             cohere-quick-start.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 
 <span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
 <span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi from Cohere!"</span> });
 <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
 
-<span class="cmt">// Point the Cohere SDK at llmock</span>
+<span class="cmt">// Point the Cohere SDK at aimock</span>
 <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v2/chat`</span>, {
   <span class="prop">method</span>: <span class="str">"POST"</span>,
   <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
@@ -256,7 +227,7 @@ <h2>Streaming Event Wire Format</h2>
 
         <h2>Request Translation</h2>
         <p>
-          llmock internally translates Cohere requests to a unified
+          aimock internally translates Cohere requests to a unified
           <code>ChatCompletionRequest</code> format for fixture matching. The
           <code>cohereToCompletionRequest()</code> function maps Cohere message roles (including
           <code>tool</code> with <code>tool_call_id</code>) and tool definitions to the common
@@ -266,14 +237,16 @@ <h2>Request Translation</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/compatible-providers.html b/docs/compatible-providers.html
index 2bb2179..d1e9836 100644
--- a/docs/compatible-providers.html
+++ b/docs/compatible-providers.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Compatible Providers — llmock</title>
+    <title>Compatible Providers — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,47 +42,18 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html" class="active">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Compatible Providers</h1>
         <p class="lead">
           Many LLM providers use OpenAI-compatible
-          <code>/v1/chat/completions</code> endpoints. llmock works with all of them out of the box
-          &mdash; just point the SDK's base URL at your llmock instance.
+          <code>/v1/chat/completions</code> endpoints. aimock works with all of them out of the box
+          &mdash; just point the SDK's base URL at your aimock instance.
         </p>
 
         <h2>Supported Providers</h2>
@@ -103,7 +74,7 @@ <h2>Supported Providers</h2>
             <tr>
               <td>Groq</td>
               <td><code>/openai/v1/chat/completions</code></td>
-              <td>Uses <code>/openai/</code> prefix &mdash; llmock strips it automatically</td>
+              <td>Uses <code>/openai/</code> prefix &mdash; aimock strips it automatically</td>
             </tr>
             <tr>
               <td>Ollama</td>
@@ -140,11 +111,11 @@ <h2>How It Works</h2>
         <ul>
           <li>
             Most OpenAI-compatible providers send requests to
-            <code>/v1/chat/completions</code> with the same JSON format &mdash; llmock already
+            <code>/v1/chat/completions</code> with the same JSON format &mdash; aimock already
             handles this natively
           </li>
           <li>
-            Groq uses a <code>/openai/v1/</code> prefix for all endpoints. llmock automatically
+            Groq uses a <code>/openai/v1/</code> prefix for all endpoints. aimock automatically
             strips the <code>/openai</code> prefix, so <code>/openai/v1/chat/completions</code>,
             <code>/openai/v1/embeddings</code>, and <code>/openai/v1/models</code> all work
             transparently
@@ -159,7 +130,7 @@ <h2>How It Works</h2>
         <h2>Mistral Configuration</h2>
         <p>
           Mistral's SDK uses the standard OpenAI-compatible endpoint. Point
-          <code>MISTRAL_API_ENDPOINT</code> at llmock:
+          <code>MISTRAL_API_ENDPOINT</code> at aimock:
         </p>
 
         <div class="code-block">
@@ -183,7 +154,7 @@ <h2>Mistral Configuration</h2>
         <h2>Groq Configuration</h2>
         <p>
           Groq's SDK sends requests to <code>/openai/v1/chat/completions</code> (note the
-          <code>/openai</code> prefix). llmock handles this automatically.
+          <code>/openai</code> prefix). aimock handles this automatically.
         </p>
 
         <div class="code-block">
@@ -206,7 +177,7 @@ <h2>Groq Configuration</h2>
 
         <h2>Ollama Configuration</h2>
         <p>
-          Ollama exposes an OpenAI-compatible endpoint locally. Point the OpenAI SDK at llmock
+          Ollama exposes an OpenAI-compatible endpoint locally. Point the OpenAI SDK at aimock
           instead:
         </p>
 
@@ -303,7 +274,7 @@ <h2>Example Fixture</h2>
           <p>
             <strong>Ollama native endpoints:</strong> In addition to the OpenAI-compatible endpoint
             listed above, Ollama has its own native <code>/api/chat</code> and
-            <code>/api/generate</code> endpoints. llmock supports these natively &mdash; see the
+            <code>/api/generate</code> endpoints. aimock supports these natively &mdash; see the
             <a href="ollama.html">Ollama page</a> for details on the native endpoint format.
           </p>
         </div>
@@ -311,14 +282,16 @@ <h2>Example Fixture</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/docker.html b/docs/docker.html
index 4f71441..1dce183 100644
--- a/docs/docker.html
+++ b/docs/docker.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Docker &amp; Helm — llmock</title>
+    <title>Docker &amp; Helm — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html" class="active">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Docker &amp; Helm</h1>
         <p class="lead">
-          Run llmock as a container in Docker or deploy it to Kubernetes with the included Helm
+          Run aimock as a container in Docker or deploy it to Kubernetes with the included Helm
           chart. The image is based on <code>node:22-alpine</code> with zero runtime dependencies.
         </p>
 
@@ -89,21 +60,41 @@ <h2>Docker</h2>
         <h3>Build the image</h3>
         <div class="code-block">
           <div class="code-block-header">Build <span class="lang-tag">shell</span></div>
-          <pre><code>docker build -t llmock .</code></pre>
+          <pre><code>docker build -t aimock .</code></pre>
         </div>
 
         <h3>Run with local fixtures</h3>
-        <div class="code-block">
-          <div class="code-block-header">Run <span class="lang-tag">shell</span></div>
-          <pre><code><span class="cm"># Mount your fixture directory into the container</span>
-docker run -p 4010:4010 -v $(pwd)/fixtures:/fixtures llmock
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">Run <span class="lang-tag">shell</span></div>
+              <pre><code>$ npx aimock --fixtures ./fixtures
 
 <span class="cm"># Custom port</span>
-docker run -p 5555:5555 llmock --fixtures /fixtures --port 5555
+$ npx aimock --fixtures ./fixtures --port 5555</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">Run <span class="lang-tag">shell</span></div>
+              <pre><code><span class="cm"># Mount your fixture directory into the container</span>
+$ docker run -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock
+
+<span class="cm"># Custom port</span>
+$ docker run -p 5555:5555 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --fixtures /fixtures --port 5555
 
 <span class="cm"># Pull from GitHub Container Registry</span>
-docker pull ghcr.io/copilotkit/llmock:latest
-docker run -p 4010:4010 -v $(pwd)/fixtures:/fixtures ghcr.io/copilotkit/llmock</code></pre>
+$ docker pull ghcr.io/copilotkit/aimock:latest
+$ docker run -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock</code></pre>
+            </div>
+          </div>
         </div>
 
         <h3>Dockerfile</h3>
@@ -135,15 +126,15 @@ <h3>Dockerfile</h3>
         </div>
 
         <h2>Helm Chart</h2>
-        <p>Deploy to Kubernetes using the Helm chart in <code>charts/llmock/</code>.</p>
+        <p>Deploy to Kubernetes using the Helm chart in <code>charts/aimock/</code>.</p>
 
         <h3>Install</h3>
         <div class="code-block">
           <div class="code-block-header">Helm install <span class="lang-tag">shell</span></div>
-          <pre><code>helm install llmock ./charts/llmock
+          <pre><code>helm install aimock ./charts/aimock
 
 <span class="cm"># With custom values</span>
-helm install llmock ./charts/llmock \
+helm install aimock ./charts/aimock \
   --set image.tag=1.4.0 \
   --set service.port=5555 \
   --set replicaCount=2</code></pre>
@@ -153,12 +144,12 @@ <h3>Configuration (values.yaml)</h3>
 
         <div class="code-block">
           <div class="code-block-header">
-            charts/llmock/values.yaml <span class="lang-tag">yaml</span>
+            charts/aimock/values.yaml <span class="lang-tag">yaml</span>
           </div>
           <pre><code><span class="prop">replicaCount</span>: <span class="num">1</span>
 
 <span class="prop">image</span>:
-  <span class="prop">repository</span>: <span class="str">ghcr.io/copilotkit/llmock</span>
+  <span class="prop">repository</span>: <span class="str">ghcr.io/copilotkit/aimock</span>
   <span class="prop">tag</span>: <span class="str">""</span>            <span class="cm"># defaults to Chart appVersion</span>
   <span class="prop">pullPolicy</span>: <span class="str">IfNotPresent</span>
 
@@ -217,14 +208,16 @@ <h2>v1.6.0 Features</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/docs.html b/docs/docs.html
index 7df43c9..c2da0ef 100644
--- a/docs/docs.html
+++ b/docs/docs.html
@@ -3,10 +3,10 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Documentation — llmock</title>
+    <title>Documentation — aimock</title>
     <meta
       name="description"
-      content="llmock documentation — fixture-driven mock LLM server for OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Ollama, Cohere, and Vertex AI APIs."
+      content="aimock documentation — complete mock infrastructure for AI application testing."
     />
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
@@ -16,6 +16,221 @@
       rel="stylesheet"
     />
     <link rel="stylesheet" href="style.css" />
+    <style>
+      /* ─── Section Bar (new component) ──────────────────────────── */
+      .section-bar {
+        position: sticky;
+        top: 57px;
+        z-index: 90;
+        background: rgba(10, 10, 15, 0.85);
+        backdrop-filter: blur(20px) saturate(1.4);
+        -webkit-backdrop-filter: blur(20px) saturate(1.4);
+        border-bottom: 1px solid var(--border);
+        padding: 0.75rem 0;
+        overflow: visible;
+        overflow-x: auto;
+        -webkit-overflow-scrolling: touch;
+        scrollbar-width: none;
+      }
+      .section-bar::-webkit-scrollbar {
+        display: none;
+      }
+      .section-bar-inner {
+        max-width: 1400px;
+        margin: 0 auto;
+        padding: 0 2rem;
+        display: flex;
+        align-items: center;
+        gap: 0.65rem;
+      }
+      .section-pill {
+        display: inline-flex;
+        align-items: center;
+        gap: 0.4rem;
+        padding: 0.45rem 0.85rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 4px;
+        font-family: var(--font-mono);
+        font-size: 0.72rem;
+        font-weight: 500;
+        color: var(--text-secondary);
+        white-space: nowrap;
+        transition: all 0.2s var(--ease-out-expo);
+        text-decoration: none;
+      }
+      .section-pill:hover {
+        color: var(--text-primary);
+        border-color: var(--border-bright);
+        background: var(--bg-card-hover);
+        text-decoration: none;
+        transform: translateY(-1px);
+      }
+      .section-pill.pill-green {
+        border-left: 3px solid var(--accent);
+      }
+      .section-pill.pill-blue {
+        border-left: 3px solid var(--blue);
+      }
+      .section-pill.pill-purple {
+        border-left: 3px solid var(--purple);
+      }
+      .section-pill.pill-amber {
+        border-left: 3px solid var(--warning);
+      }
+      .section-pill.pill-red {
+        border-left: 3px solid var(--error);
+      }
+      .section-pill.pill-gray {
+        border-left: 3px solid var(--text-dim);
+      }
+      .section-pill-icon {
+        font-size: 0.85rem;
+        line-height: 1;
+      }
+
+      /* ─── Suite Cards Grid (new component) ─────────────────────── */
+      .suite-grid {
+        display: grid;
+        grid-template-columns: repeat(2, 1fr);
+        gap: 1.25rem;
+        margin-top: 2rem;
+        margin-bottom: 2rem;
+      }
+      .suite-card {
+        display: flex;
+        flex-direction: column;
+        gap: 0.75rem;
+        padding: 1.5rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 12px;
+        transition:
+          border-color 0.3s,
+          transform 0.3s,
+          box-shadow 0.3s;
+      }
+      .suite-card:hover {
+        border-color: var(--border-bright);
+        transform: translateY(-2px);
+        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+      }
+      .suite-card.border-green {
+        border-top: 2px solid var(--accent);
+      }
+      .suite-card.border-blue {
+        border-top: 2px solid var(--blue);
+      }
+      .suite-card.border-purple {
+        border-top: 2px solid var(--purple);
+      }
+      .suite-card.border-amber {
+        border-top: 2px solid var(--warning);
+      }
+      .suite-card.border-red {
+        border-top: 2px solid var(--error);
+      }
+      .suite-card.border-gray {
+        border-top: 2px solid var(--text-dim);
+      }
+      .suite-card-header {
+        display: flex;
+        align-items: center;
+        gap: 0.6rem;
+      }
+      .suite-card-icon {
+        font-size: 1.5rem;
+        line-height: 1;
+      }
+      .suite-card-title {
+        font-size: 1.05rem;
+        font-weight: 600;
+        color: var(--text-primary);
+      }
+      .suite-card-desc {
+        font-size: 0.85rem;
+        color: var(--text-secondary);
+        line-height: 1.5;
+        margin: 0;
+      }
+      .suite-card-links {
+        list-style: none;
+        padding: 0;
+        display: flex;
+        flex-direction: column;
+        gap: 0.25rem;
+        margin: 0;
+      }
+      .suite-card-links li {
+        font-size: 0.8rem;
+        padding-left: 0.85rem;
+        position: relative;
+        margin: 0;
+      }
+      .suite-card-links li::before {
+        content: "\203A";
+        position: absolute;
+        left: 0;
+        color: var(--text-dim);
+      }
+      .suite-card-links a {
+        color: var(--text-secondary);
+        font-size: 0.8rem;
+        transition: color 0.15s;
+      }
+      .suite-card-links a:hover {
+        color: var(--accent);
+      }
+      .suite-card-cta {
+        display: inline-flex;
+        align-items: center;
+        gap: 0.35rem;
+        font-family: var(--font-mono);
+        font-size: 0.8rem;
+        font-weight: 500;
+        color: var(--accent);
+        margin-top: auto;
+        padding-top: 0.25rem;
+        transition: gap 0.2s var(--ease-out-expo);
+      }
+      .suite-card:hover .suite-card-cta {
+        gap: 0.6rem;
+      }
+
+      /* ─── Quick Start Paths ────────────────────────────────────── */
+      .quick-start-paths {
+        display: grid;
+        grid-template-columns: repeat(2, 1fr);
+        gap: 1.25rem;
+        margin-top: 1.5rem;
+      }
+      .quick-start-path {
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 12px;
+        padding: 1.5rem;
+      }
+      .quick-start-path h3 {
+        font-size: 1rem;
+        font-weight: 600;
+        margin-top: 0;
+        margin-bottom: 0.75rem;
+        color: var(--text-primary);
+      }
+
+      /* ─── Responsive overrides ─────────────────────────────────── */
+      @media (max-width: 768px) {
+        .suite-grid {
+          grid-template-columns: 1fr;
+        }
+        .quick-start-paths {
+          grid-template-columns: 1fr;
+        }
+        .section-bar-inner {
+          padding: 0 1rem;
+        }
+      }
+    </style>
   </head>
   <body>
     <!-- ═══ Nav ═══════════════════════════════════════════════════════ -->
@@ -29,7 +244,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> llmock </a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -48,399 +263,182 @@
       </div>
     </nav>
 
+    <!-- ═══ Section Bar ══════════════════════════════════════════════ -->
+    <div class="section-bar">
+      <div class="section-bar-inner">
+        <a href="chat-completions.html" class="section-pill pill-green">
+          <span class="section-pill-icon">&#128225;</span> LLM Mocking
+        </a>
+        <a href="mcp-mock.html" class="section-pill pill-blue">
+          <span class="section-pill-icon">&#128268;</span> MCP Protocol
+        </a>
+        <a href="a2a-mock.html" class="section-pill pill-purple">
+          <span class="section-pill-icon">&#129309;</span> A2A Protocol
+        </a>
+        <a href="vector-mock.html" class="section-pill pill-amber">
+          <span class="section-pill-icon">&#128230;</span> Vector DBs
+        </a>
+        <a href="services.html" class="section-pill pill-red">
+          <span class="section-pill-icon">&#128269;</span> Search &amp; Rerank
+        </a>
+        <a href="chaos-testing.html" class="section-pill pill-gray">
+          <span class="section-pill-icon">&#9881;</span> Chaos &amp; DevOps
+        </a>
+      </div>
+    </div>
+
     <div class="docs-layout">
       <!-- ═══ Sidebar ═══════════════════════════════════════════════ -->
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html" class="active">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a>
-          <a href="responses-api.html">Responses API (OpenAI)</a>
-          <a href="claude-messages.html">Claude Messages</a>
-          <a href="gemini.html">Gemini</a><a href="azure-openai.html">Azure OpenAI</a
-          ><a href="aws-bedrock.html">AWS Bedrock</a><a href="ollama.html">Ollama</a
-          ><a href="cohere.html">Cohere</a><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a>
-          <a href="structured-output.html">Structured Output</a>
-          <a href="sequential-responses.html">Sequential Responses</a>
-          <a href="fixtures.html">Fixtures</a>
-          <a href="error-injection.html">Error Injection</a>
-          <a href="chaos-testing.html">Chaos Testing</a>
-          <a href="streaming-physics.html">Streaming Physics</a>
-          <a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a>
-          <a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <!-- ═══ Main Content ══════════════════════════════════════════ -->
       <main class="docs-content">
-        <h1>llmock Documentation</h1>
+        <h1>aimock Documentation</h1>
         <p class="lead">
-          llmock is a deterministic mock LLM server for testing. It runs a real HTTP server that any
-          process on the machine can reach, serving fixture-driven responses in the authentic SSE
-          format for OpenAI, Anthropic Claude, and Google Gemini APIs.
+          aimock is the complete mock infrastructure for AI application testing. Start with LLM API
+          mocking, add MCP tools, A2A agents, vector databases, and services as your stack grows.
+          One package. Zero dependencies.
         </p>
 
-        <h2>Quick Start</h2>
+        <h2>The Suite</h2>
 
-        <div class="code-block">
-          <div class="code-block-header">
-            Install
-            <span class="lang-tag">shell</span>
+        <div class="suite-grid">
+          <!-- LLM Mocking -->
+          <div class="suite-card border-green">
+            <div class="suite-card-header">
+              <span class="suite-card-icon">&#128225;</span>
+              <span class="suite-card-title">LLM Mocking</span>
+            </div>
+            <p class="suite-card-desc">Mock 10+ LLM providers with fixture-driven responses</p>
+            <ul class="suite-card-links">
+              <li><a href="chat-completions.html">Chat Completions</a></li>
+              <li><a href="claude-messages.html">Claude Messages</a></li>
+              <li><a href="gemini.html">Gemini</a></li>
+              <li><a href="aws-bedrock.html">Bedrock</a></li>
+              <li><a href="record-replay.html">Record &amp; Replay</a></li>
+            </ul>
+            <a href="chat-completions.html" class="suite-card-cta"
+              >Get started <span>&rarr;</span></a
+            >
           </div>
-          <pre><code><span class="cm"># npm</span>
-npm install @copilotkit/llmock
 
-<span class="cm"># pnpm</span>
-pnpm add @copilotkit/llmock</code></pre>
-        </div>
-
-        <div class="code-block">
-          <div class="code-block-header">
-            Programmatic usage (vitest)
-            <span class="lang-tag">ts</span>
+          <!-- MCP Protocol -->
+          <div class="suite-card border-blue">
+            <div class="suite-card-header">
+              <span class="suite-card-icon">&#128268;</span>
+              <span class="suite-card-title">MCP Protocol</span>
+            </div>
+            <p class="suite-card-desc">Mock MCP servers with tools, resources, and prompts</p>
+            <ul class="suite-card-links">
+              <li><a href="mcp-mock.html">Tool handlers</a></li>
+              <li><a href="mcp-mock.html">Resources</a></li>
+              <li><a href="mcp-mock.html">Prompts</a></li>
+              <li><a href="mcp-mock.html">Session management</a></li>
+            </ul>
+            <a href="mcp-mock.html" class="suite-card-cta">Get started <span>&rarr;</span></a>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
-<span class="kw">import</span> { <span class="fn">describe</span>, <span class="fn">it</span>, <span class="fn">expect</span>, <span class="fn">beforeAll</span>, <span class="fn">afterAll</span> } <span class="kw">from</span> <span class="str">"vitest"</span>;
-
-<span class="kw">let</span> <span class="op">mock</span>: <span class="type">LLMock</span>;
 
-<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
-  <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
-  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
-});
-
-<span class="fn">afterAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
-  <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">stop</span>();
-});
-
-<span class="fn">it</span>(<span class="str">"returns a text response"</span>, <span class="kw">async</span> () <span class="kw">=&gt;</span> {
-  <span class="op">mock</span>.<span class="fn">on</span>({ <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, { <span class="prop">content</span>: <span class="str">"Hi there!"</span> });
+          <!-- A2A Protocol -->
+          <div class="suite-card border-purple">
+            <div class="suite-card-header">
+              <span class="suite-card-icon">&#129309;</span>
+              <span class="suite-card-title">A2A Protocol</span>
+            </div>
+            <p class="suite-card-desc">Mock agent-to-agent interactions with streaming</p>
+            <ul class="suite-card-links">
+              <li><a href="a2a-mock.html">Agent cards</a></li>
+              <li><a href="a2a-mock.html">Message routing</a></li>
+              <li><a href="a2a-mock.html">Task streaming</a></li>
+            </ul>
+            <a href="a2a-mock.html" class="suite-card-cta">Get started <span>&rarr;</span></a>
+          </div>
 
-  <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/v1/chat/completions`</span>, {
-    <span class="prop">method</span>: <span class="str">"POST"</span>,
-    <span class="prop">headers</span>: { <span class="str">"Content-Type"</span>: <span class="str">"application/json"</span> },
-    <span class="prop">body</span>: <span class="type">JSON</span>.<span class="fn">stringify</span>({
-      <span class="prop">model</span>: <span class="str">"gpt-4"</span>,
-      <span class="prop">messages</span>: [{ <span class="prop">role</span>: <span class="str">"user"</span>, <span class="prop">content</span>: <span class="str">"hello"</span> }],
-      <span class="prop">stream</span>: <span class="kw">false</span>,
-    }),
-  });
-  <span class="kw">const</span> <span class="op">body</span> = <span class="kw">await</span> <span class="op">res</span>.<span class="fn">json</span>();
-  <span class="fn">expect</span>(<span class="op">body</span>.<span class="prop">choices</span>[<span class="num">0</span>].<span class="prop">message</span>.<span class="prop">content</span>).<span class="fn">toBe</span>(<span class="str">"Hi there!"</span>);
-});</code></pre>
-        </div>
+          <!-- Vector Databases -->
+          <div class="suite-card border-amber">
+            <div class="suite-card-header">
+              <span class="suite-card-icon">&#128230;</span>
+              <span class="suite-card-title">Vector Databases</span>
+            </div>
+            <p class="suite-card-desc">Mock Pinecone, Qdrant, and ChromaDB endpoints</p>
+            <ul class="suite-card-links">
+              <li><a href="vector-mock.html">Upsert &amp; query</a></li>
+              <li><a href="vector-mock.html">Collections</a></li>
+              <li><a href="vector-mock.html">Custom handlers</a></li>
+            </ul>
+            <a href="vector-mock.html" class="suite-card-cta">Get started <span>&rarr;</span></a>
+          </div>
 
-        <div class="code-block">
-          <div class="code-block-header">
-            CLI usage
-            <span class="lang-tag">shell</span>
+          <!-- Services -->
+          <div class="suite-card border-red">
+            <div class="suite-card-header">
+              <span class="suite-card-icon">&#128269;</span>
+              <span class="suite-card-title">Services</span>
+            </div>
+            <p class="suite-card-desc">Search, rerank, and moderation API mocks</p>
+            <ul class="suite-card-links">
+              <li><a href="services.html">Tavily search</a></li>
+              <li><a href="services.html">Cohere rerank</a></li>
+              <li><a href="services.html">OpenAI moderation</a></li>
+            </ul>
+            <a href="services.html" class="suite-card-cta">Get started <span>&rarr;</span></a>
           </div>
-          <pre><code><span class="cm"># Start the server with fixture files</span>
-npx llmock --fixtures ./fixtures --port 5555
 
-<span class="cm"># Point your app at it</span>
-<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:5555/v1
-<span class="kw">export</span> OPENAI_API_KEY=mock-key</code></pre>
+          <!-- Testing & Operations -->
+          <div class="suite-card border-gray">
+            <div class="suite-card-header">
+              <span class="suite-card-icon">&#9881;</span>
+              <span class="suite-card-title">Testing &amp; Operations</span>
+            </div>
+            <p class="suite-card-desc">Chaos testing, metrics, drift detection, Docker</p>
+            <ul class="suite-card-links">
+              <li><a href="chaos-testing.html">Chaos testing</a></li>
+              <li><a href="metrics.html">Prometheus</a></li>
+              <li><a href="drift-detection.html">Drift detection</a></li>
+              <li><a href="docker.html">Docker &amp; Helm</a></li>
+            </ul>
+            <a href="chaos-testing.html" class="suite-card-cta">Get started <span>&rarr;</span></a>
+          </div>
         </div>
 
-        <h2>Supported Endpoints</h2>
-
-        <table class="endpoint-table">
-          <thead>
-            <tr>
-              <th>Endpoint</th>
-              <th>Provider</th>
-              <th>Transport</th>
-            </tr>
-          </thead>
-          <tbody>
-            <tr>
-              <td>POST /v1/chat/completions</td>
-              <td>OpenAI</td>
-              <td>HTTP SSE / JSON</td>
-            </tr>
-            <tr>
-              <td>POST /v1/responses</td>
-              <td>OpenAI</td>
-              <td>HTTP SSE</td>
-            </tr>
-            <tr>
-              <td>WS /v1/responses</td>
-              <td>OpenAI</td>
-              <td>WebSocket</td>
-            </tr>
-            <tr>
-              <td>WS /v1/realtime</td>
-              <td>OpenAI</td>
-              <td>WebSocket</td>
-            </tr>
-            <tr>
-              <td>POST /v1/messages</td>
-              <td>Anthropic</td>
-              <td>HTTP SSE / JSON</td>
-            </tr>
-            <tr>
-              <td>POST /v1beta/models/:model:*</td>
-              <td>Google Gemini</td>
-              <td>HTTP SSE / JSON</td>
-            </tr>
-            <tr>
-              <td>WS /ws/google.ai.generativelanguage.*</td>
-              <td>Google Gemini Live</td>
-              <td>WebSocket</td>
-            </tr>
-            <tr>
-              <td>POST /v1/embeddings</td>
-              <td>OpenAI</td>
-              <td>JSON</td>
-            </tr>
-            <tr>
-              <td>POST /openai/v1/chat/completions</td>
-              <td>Groq / OpenAI-Compatible</td>
-              <td>HTTP SSE / JSON</td>
-            </tr>
-            <tr>
-              <td>POST /model/{modelId}/invoke</td>
-              <td>AWS Bedrock</td>
-              <td>JSON</td>
-            </tr>
-            <tr>
-              <td>POST /model/{modelId}/invoke-with-response-stream</td>
-              <td>AWS Bedrock</td>
-              <td>AWS Event Stream (binary)</td>
-            </tr>
-            <tr>
-              <td>POST /model/{modelId}/converse</td>
-              <td>AWS Bedrock</td>
-              <td>JSON</td>
-            </tr>
-            <tr>
-              <td>POST /model/{modelId}/converse-stream</td>
-              <td>AWS Bedrock</td>
-              <td>AWS Event Stream (binary)</td>
-            </tr>
-            <tr>
-              <td>POST /v1/projects/.../models/:model:*</td>
-              <td>Vertex AI</td>
-              <td>HTTP SSE / JSON</td>
-            </tr>
-            <tr>
-              <td>POST /api/chat</td>
-              <td>Ollama</td>
-              <td>NDJSON / JSON</td>
-            </tr>
-            <tr>
-              <td>POST /api/generate</td>
-              <td>Ollama</td>
-              <td>NDJSON / JSON</td>
-            </tr>
-            <tr>
-              <td>POST /v2/chat</td>
-              <td>Cohere</td>
-              <td>HTTP SSE / JSON</td>
-            </tr>
-          </tbody>
-        </table>
-
-        <h2>Feature Pages</h2>
+        <h2>Quick Start</h2>
 
-        <div class="feature-grid">
-          <a href="chat-completions.html" class="feature-link">
-            <span class="badge badge-green">OpenAI</span>
-            <h3>Chat Completions</h3>
-            <p>Streaming and non-streaming text + tool call responses via SSE.</p>
-          </a>
-          <a href="responses-api.html" class="feature-link">
-            <span class="badge badge-green">OpenAI</span>
-            <h3>Responses API</h3>
-            <p>HTTP SSE and WebSocket transports for the Responses API.</p>
-          </a>
-          <a href="claude-messages.html" class="feature-link">
-            <span class="badge badge-purple">Anthropic</span>
-            <h3>Claude Messages</h3>
-            <p>Anthropic-format SSE streaming with content blocks.</p>
-          </a>
-          <a href="gemini.html" class="feature-link">
-            <span class="badge badge-blue">Google</span>
-            <h3>Gemini</h3>
-            <p>GenerateContent and StreamGenerateContent endpoints.</p>
-          </a>
-          <a href="embeddings.html" class="feature-link">
-            <span class="badge badge-amber">New</span>
-            <h3>Embeddings</h3>
-            <p>OpenAI-compatible /v1/embeddings endpoint with fixture or auto-generated vectors.</p>
-          </a>
-          <a href="structured-output.html" class="feature-link">
-            <span class="badge badge-amber">New</span>
-            <h3>Structured Output</h3>
-            <p>JSON mode and response_format matching for structured responses.</p>
-          </a>
-          <a href="sequential-responses.html" class="feature-link">
-            <span class="badge badge-amber">New</span>
-            <h3>Sequential Responses</h3>
-            <p>Stateful fixtures that return different responses on each call.</p>
-          </a>
-          <a href="fixtures.html" class="feature-link">
-            <span class="badge badge-green">Core</span>
-            <h3>Fixtures</h3>
-            <p>JSON fixture file format, matching rules, and validation.</p>
-          </a>
-          <a href="error-injection.html" class="feature-link">
-            <span class="badge badge-red">Core</span>
-            <h3>Error Injection</h3>
-            <p>One-shot errors, stream truncation, and disconnect simulation.</p>
-          </a>
-          <a href="chaos-testing.html" class="feature-link">
-            <span class="badge badge-red">New</span>
-            <h3>Chaos Testing</h3>
-            <p>
-              Probabilistic failure injection &mdash; random errors, latency spikes, stream
-              corruption.
-            </p>
-          </a>
-          <a href="websocket.html" class="feature-link">
-            <span class="badge badge-blue">Core</span>
-            <h3>WebSocket APIs</h3>
-            <p>Realtime, Responses, and Gemini Live over WebSocket.</p>
-          </a>
-          <a href="record-replay.html" class="feature-link">
-            <span class="badge badge-purple">New</span>
-            <h3>Record &amp; Replay</h3>
-            <p>Proxy to real APIs, record responses as fixtures, then replay deterministically.</p>
-          </a>
-          <a href="metrics.html" class="feature-link">
-            <span class="badge badge-blue">New</span>
-            <h3>Prometheus Metrics</h3>
-            <p>Expose request counts, latencies, and fixture match rates via /metrics endpoint.</p>
-          </a>
-          <a href="ollama.html" class="feature-link">
-            <span class="badge badge-green">Provider</span>
-            <h3>Ollama</h3>
-            <p>Native Ollama /api/chat and /api/generate endpoints.</p>
-          </a>
-          <a href="cohere.html" class="feature-link">
-            <span class="badge badge-green">Provider</span>
-            <h3>Cohere</h3>
-            <p>Cohere Chat API with native and OpenAI-compatible endpoints.</p>
-          </a>
-          <a href="vertex-ai.html" class="feature-link">
-            <span class="badge badge-blue">Provider</span>
-            <h3>Vertex AI</h3>
-            <p>Google Cloud Vertex AI endpoints using the Gemini handler.</p>
-          </a>
-          <a href="docker.html" class="feature-link">
-            <span class="badge badge-amber">Ops</span>
-            <h3>Docker &amp; Helm</h3>
-            <p>Container image and Kubernetes Helm chart deployment.</p>
-          </a>
-          <a href="drift-detection.html" class="feature-link">
-            <span class="badge badge-red">CI</span>
-            <h3>Drift Detection</h3>
-            <p>Three-way conformance testing against real APIs.</p>
-          </a>
-        </div>
+        <div class="quick-start-paths">
+          <!-- Path 1: LLM only -->
+          <div class="quick-start-path">
+            <h3>I want to mock LLM APIs</h3>
+            <div class="code-block">
+              <div class="code-block-header">
+                Programmatic usage
+                <span class="lang-tag">ts</span>
+              </div>
+              <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 
-        <h2>API Reference</h2>
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>();
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
 
-        <h3>LLMock class</h3>
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi there!"</span> });</code></pre>
+            </div>
+            <p><a href="docs.html">Quick Start: LLM &rarr;</a></p>
+          </div>
 
-        <table class="endpoint-table">
-          <thead>
-            <tr>
-              <th>Method</th>
-              <th>Description</th>
-            </tr>
-          </thead>
-          <tbody>
-            <tr>
-              <td>new LLMock(opts?)</td>
-              <td>
-                Create instance. Options: <code>port</code>, <code>host</code>,
-                <code>latency</code>, <code>chunkSize</code>, <code>logLevel</code>,
-                <code>chaos</code>, <code>record</code>, <code>strict</code>, <code>metrics</code>,
-                <code>streamingProfile</code>
-              </td>
-            </tr>
-            <tr>
-              <td>start()</td>
-              <td>Start the HTTP server. Returns the base URL.</td>
-            </tr>
-            <tr>
-              <td>stop()</td>
-              <td>Stop the server.</td>
-            </tr>
-            <tr>
-              <td>on(match, response, opts?)</td>
-              <td>Add a fixture with match criteria and response.</td>
-            </tr>
-            <tr>
-              <td>onMessage(pattern, response)</td>
-              <td>Shorthand: match on userMessage.</td>
-            </tr>
-            <tr>
-              <td>onToolCall(name, response)</td>
-              <td>Shorthand: match on toolName.</td>
-            </tr>
-            <tr>
-              <td>onEmbedding(pattern, response)</td>
-              <td>Shorthand: match on inputText (embeddings).</td>
-            </tr>
-            <tr>
-              <td>onJsonOutput(pattern, json)</td>
-              <td>Shorthand: match userMessage + responseFormat=json_object.</td>
-            </tr>
-            <tr>
-              <td>onToolResult(id, response)</td>
-              <td>Shorthand: match on toolCallId.</td>
-            </tr>
-            <tr>
-              <td>nextRequestError(status, body?)</td>
-              <td>Queue a one-shot error for the next request.</td>
-            </tr>
-            <tr>
-              <td>addFixture(fixture)</td>
-              <td>Add a raw Fixture object.</td>
-            </tr>
-            <tr>
-              <td>loadFixtureFile(path)</td>
-              <td>Load fixtures from a JSON file.</td>
-            </tr>
-            <tr>
-              <td>loadFixtureDir(path)</td>
-              <td>Load all fixture JSON files from a directory.</td>
-            </tr>
-            <tr>
-              <td>reset()</td>
-              <td>Clear all fixtures and journal entries.</td>
-            </tr>
-            <tr>
-              <td>getRequests()</td>
-              <td>Get all journal entries.</td>
-            </tr>
-            <tr>
-              <td>getLastRequest()</td>
-              <td>Get the most recent journal entry.</td>
-            </tr>
-            <tr>
-              <td>.url / .port</td>
-              <td>Access the server URL and port.</td>
-            </tr>
-          </tbody>
-        </table>
+          <!-- Path 2: Full suite -->
+          <div class="quick-start-path">
+            <h3>I want to mock my entire AI stack</h3>
+            <div class="code-block">
+              <div class="code-block-header">
+                aimock.json config
+                <span class="lang-tag">json</span>
+              </div>
+              <pre><code>{
+  <span class="key">"llm"</span>:    { <span class="key">"fixtures"</span>: <span class="str">"./fixtures/llm"</span> },
+  <span class="key">"mcp"</span>:    { <span class="key">"tools"</span>: [<span class="str">"search"</span>, <span class="str">"fetch"</span>] },
+  <span class="key">"a2a"</span>:    { <span class="key">"agents"</span>: [<span class="str">"planner"</span>] },
+  <span class="key">"vector"</span>: { <span class="key">"provider"</span>: <span class="str">"pinecone"</span> }
+}</code></pre>
+            </div>
+            <p><a href="aimock-cli.html">Quick Start: aimock suite &rarr;</a></p>
+          </div>
+        </div>
       </main>
     </div>
 
@@ -448,16 +446,17 @@ <h3>LLMock class</h3>
     <footer class="docs-footer">
       <div class="footer-inner">
         <div class="footer-left">
-          <span>$</span> llmock &middot; MIT License &middot; Built by
+          <span>$</span> aimock &middot; MIT License &middot; Built by
           <a href="https://github.com/CopilotKit" target="_blank">CopilotKit</a>
         </div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
   </body>
 </html>
diff --git a/docs/drift-detection.html b/docs/drift-detection.html
index dc8f9d4..d57dc64 100644
--- a/docs/drift-detection.html
+++ b/docs/drift-detection.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Drift Detection — llmock</title>
+    <title>Drift Detection — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html" class="active">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Drift Detection</h1>
         <p class="lead">
-          A mock that does not match reality is worse than no mock. llmock includes three-way drift
+          A mock that does not match reality is worse than no mock. aimock includes three-way drift
           tests that compare SDK types, real API responses, and mock output to catch shape
           mismatches before your users do.
         </p>
@@ -274,7 +245,7 @@ <h3>Real API</h3>
           </div>
           <div class="tri-node mock">
             <div class="node-icon">&#9881;</div>
-            <h3>llmock</h3>
+            <h3>aimock</h3>
             <p>What the mock produces for the same request</p>
           </div>
         </div>
@@ -287,7 +258,7 @@ <h3>llmock</h3>
               <h4>Mock doesn't match real</h4>
             </div>
             <p>
-              llmock needs updating &mdash; test fails immediately. The SDK comparison tells us why
+              aimock needs updating &mdash; test fails immediately. The SDK comparison tells us why
               it drifted.
             </p>
           </div>
@@ -297,7 +268,7 @@ <h4>Mock doesn't match real</h4>
               <h4>Provider changed, SDK is behind</h4>
             </div>
             <p>
-              Early warning &mdash; the real API has new fields that neither the SDK nor llmock know
+              Early warning &mdash; the real API has new fields that neither the SDK nor aimock know
               about yet.
             </p>
           </div>
@@ -425,7 +396,7 @@ <h2>Severity Levels</h2>
             <tr>
               <td style="color: var(--error)">critical</td>
               <td>Mock does not match real API</td>
-              <td>Test fails. llmock needs updating.</td>
+              <td>Test fails. aimock needs updating.</td>
             </tr>
             <tr>
               <td style="color: var(--warning)">warning</td>
@@ -458,14 +429,16 @@ <h2>CI Integration</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/embeddings.html b/docs/embeddings.html
index 00d06f0..fd6ea4c 100644
--- a/docs/embeddings.html
+++ b/docs/embeddings.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Embeddings — llmock</title>
+    <title>Embeddings — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,46 +42,17 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html" class="active">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Embeddings</h1>
         <p class="lead">
           The <code>POST /v1/embeddings</code> endpoint returns OpenAI-compatible embedding vectors.
-          You can provide explicit vectors in fixtures or let llmock generate deterministic
+          You can provide explicit vectors in fixtures or let aimock generate deterministic
           embeddings automatically from the input text.
         </p>
 
@@ -155,7 +126,7 @@ <h2>Unit Test: Auto-generated Embedding</h2>
           <div class="code-block-header">
             embedding-auto.test.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="fn">generateDeterministicEmbedding</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/helpers"</span>;
+          <pre><code><span class="kw">import</span> { <span class="fn">generateDeterministicEmbedding</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock/helpers"</span>;
 
 <span class="cm">// Deterministic: same input always produces the same output</span>
 <span class="kw">const</span> <span class="op">a</span> = <span class="fn">generateDeterministicEmbedding</span>(<span class="str">"hello world"</span>);
@@ -224,14 +195,16 @@ <h2>Response Format</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/error-injection.html b/docs/error-injection.html
index 80ac5ee..b445205 100644
--- a/docs/error-injection.html
+++ b/docs/error-injection.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Error Injection — llmock</title>
+    <title>Error Injection — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,47 +42,17 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a
-          ><a href="error-injection.html" class="active">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Error Injection</h1>
         <p class="lead">
           Test your application's error handling with one-shot errors, stream truncation, and timed
-          disconnects. llmock provides three mechanisms for simulating failures.
+          disconnects. aimock provides three mechanisms for simulating failures.
         </p>
 
         <h2>One-Shot Errors</h2>
@@ -220,14 +190,16 @@ <h2>Interruption Behavior</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/fixtures.html b/docs/fixtures.html
index 0a13382..0e44cba 100644
--- a/docs/fixtures.html
+++ b/docs/fixtures.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Fixtures — llmock</title>
+    <title>Fixtures — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,41 +42,11 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html" class="active">Fixtures</a
-          ><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Fixtures</h1>
@@ -317,14 +287,16 @@ <h2>Routing Rules</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/gemini.html b/docs/gemini.html
index b3beeb1..b851f48 100644
--- a/docs/gemini.html
+++ b/docs/gemini.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Gemini — llmock</title>
+    <title>Gemini — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,46 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a
-          ><a href="gemini.html" class="active">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Google Gemini</h1>
         <p class="lead">
-          llmock supports both <code>generateContent</code> (non-streaming) and
+          aimock supports both <code>generateContent</code> (non-streaming) and
           <code>streamGenerateContent</code> (SSE) endpoints, plus Gemini Live over WebSocket. The
           same fixtures drive all three transports.
         </p>
@@ -175,7 +145,7 @@ <h2>Unit Test: Tool Call</h2>
         <h2>Request Translation</h2>
         <p>
           Gemini uses a different request format (<code>contents</code> with <code>parts</code>)
-          than OpenAI. llmock translates Gemini requests to the unified format via
+          than OpenAI. aimock translates Gemini requests to the unified format via
           <code>geminiToCompletionRequest()</code> so the same fixture
           <code>match.userMessage</code> works regardless of which provider endpoint the request
           arrives on.
@@ -199,7 +169,7 @@ <h2>Gemini Live (WebSocket)</h2>
         <h2>Vertex AI</h2>
         <p>
           Google Cloud's <strong>Vertex AI</strong> provides access to Gemini models through a
-          different URL pattern than the AI Studio API. llmock supports Vertex AI requests using the
+          different URL pattern than the AI Studio API. aimock supports Vertex AI requests using the
           same Gemini handler &mdash; the URL pattern is different, but the request and response
           formats are identical.
         </p>
@@ -219,14 +189,16 @@ <h2>Vertex AI</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/index.html b/docs/index.html
index bc2e99e..b0045dc 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -3,10 +3,10 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>llmock — Deterministic mock LLM server for testing</title>
+    <title>aimock — Deterministic mock infrastructure for AI apps</title>
     <meta
       name="description"
-      content="Real HTTP server. Real SSE streams. WebSocket APIs. Fixture-driven. Zero dependencies. Multi-provider mock — OpenAI, Claude, Gemini, AWS Bedrock, Azure, Ollama, Cohere, Vertex AI — drop-in replacement for your test suite."
+      content="One mock server for LLMs, MCP, A2A, vector DBs, and more. Fixture-driven. Zero dependencies. The aimock suite for AI application testing."
     />
 
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
@@ -48,6 +48,7 @@
         --blue: #4488ff;
         --purple: #aa66ff;
         --font-mono: "JetBrains Mono", "SF Mono", "Fira Code", monospace;
+        --font-features-mono: "liga" 0, "calt" 0;
         --font-sans: "Instrument Sans", -apple-system, system-ui, sans-serif;
         --ease-out-expo: cubic-bezier(0.16, 1, 0.3, 1);
       }
@@ -67,6 +68,12 @@
         overflow-x: hidden;
       }
 
+      code,
+      pre,
+      kbd {
+        font-feature-settings: var(--font-features-mono);
+      }
+
       a {
         color: var(--accent);
         text-decoration: none;
@@ -115,16 +122,28 @@
       }
       .nav-brand {
         display: flex;
-        align-items: center;
-        gap: 0.75rem;
+        flex-direction: column;
+        gap: 0;
         font-family: var(--font-mono);
         font-weight: 600;
         font-size: 1rem;
         color: var(--text-primary);
       }
+      .nav-brand .brand-main {
+        display: flex;
+        align-items: center;
+        gap: 0.75rem;
+      }
       .nav-brand .prompt {
         color: var(--accent);
       }
+      .nav-brand .powered-by {
+        font-size: 0.6rem;
+        font-weight: 400;
+        color: var(--text-dim);
+        padding-left: 1.5rem;
+        margin-top: -2px;
+      }
       .nav-links {
         display: flex;
         align-items: center;
@@ -157,14 +176,56 @@
         background: var(--bg-card);
       }
 
+      /* ─── Animations ─────────────────────────────────────────────── */
+      @keyframes fadeUp {
+        from {
+          opacity: 0;
+          transform: translateY(30px);
+        }
+        to {
+          opacity: 1;
+          transform: translateY(0);
+        }
+      }
+
+      @keyframes pulse {
+        0%,
+        100% {
+          opacity: 1;
+        }
+        50% {
+          opacity: 0.4;
+        }
+      }
+
+      @keyframes blink {
+        0%,
+        100% {
+          opacity: 1;
+        }
+        50% {
+          opacity: 0;
+        }
+      }
+
+      .fade-in {
+        opacity: 0;
+        transform: translateY(30px);
+        transition:
+          opacity 0.8s var(--ease-out-expo),
+          transform 0.8s var(--ease-out-expo);
+      }
+      .fade-in.visible {
+        opacity: 1;
+        transform: translateY(0);
+      }
+
       /* ─── Hero ───────────────────────────────────────────────────── */
       .hero {
-        padding: 10rem 0 6rem;
+        padding: 10rem 0 5rem;
         position: relative;
         overflow: hidden;
       }
-
-      /* Gradient orb behind hero */
       .hero::before {
         content: "";
         position: absolute;
@@ -176,12 +237,10 @@
         background: radial-gradient(ellipse, var(--accent-glow) 0%, transparent 70%);
         pointer-events: none;
       }
-
       .hero .container {
         position: relative;
         z-index: 1;
       }
-
       .hero-badge {
         display: inline-flex;
         align-items: center;
@@ -204,7 +263,6 @@
         box-shadow: 0 0 8px var(--accent);
         animation: pulse 2s infinite;
       }
-
       .hero h1 {
         font-family: var(--font-sans);
         font-size: clamp(2.5rem, 6vw, 4.5rem);
@@ -218,121 +276,75 @@
         color: var(--accent);
         position: relative;
       }
-
       .hero-sub {
         font-size: 1.2rem;
         color: var(--text-secondary);
-        max-width: 600px;
+        max-width: 640px;
         margin-top: 1.5rem;
         line-height: 1.7;
         animation: fadeUp 0.8s var(--ease-out-expo) 0.2s both;
       }
 
-      .hero-actions {
-        display: flex;
-        flex-wrap: wrap;
-        gap: 1rem;
-        margin-top: 2.5rem;
-        animation: fadeUp 0.8s var(--ease-out-expo) 0.3s both;
-      }
-
-      .btn {
+      /* ─── Install Box ────────────────────────────────────────────── */
+      .install-box {
         display: inline-flex;
         align-items: center;
-        gap: 0.5rem;
-        padding: 0.75rem 1.75rem;
-        border-radius: 8px;
-        font-family: var(--font-mono);
-        font-size: 0.875rem;
-        font-weight: 500;
-        transition: all 0.2s;
-        cursor: pointer;
-        border: none;
-      }
-      .btn-primary {
-        background: var(--accent);
-        color: var(--bg-deep);
-      }
-      .btn-primary:hover {
-        background: #33ffaa;
-        box-shadow: 0 0 30px var(--accent-glow-strong);
-        text-decoration: none;
-      }
-      .btn-secondary {
-        background: transparent;
-        color: var(--text-primary);
-        border: 1px solid var(--border);
-      }
-      .btn-secondary:hover {
-        border-color: var(--border-bright);
-        background: var(--bg-card);
-        text-decoration: none;
-      }
-
-      .hero-install {
-        display: flex;
-        align-items: center;
         gap: 1rem;
-        margin-top: 2rem;
-        padding: 0.75rem 1.25rem;
-        background: var(--bg-surface);
+        margin-top: 2.5rem;
+        padding: 0.75rem 1rem 0.75rem 1.25rem;
+        background: var(--bg-card);
         border: 1px solid var(--border);
-        border-radius: 8px;
+        border-radius: 10px;
         font-family: var(--font-mono);
-        font-size: 0.875rem;
-        max-width: max-content;
-        animation: fadeUp 0.8s var(--ease-out-expo) 0.4s both;
+        font-size: 0.9rem;
+        animation: fadeUp 0.8s var(--ease-out-expo) 0.3s both;
       }
-      .hero-install .dollar {
+      .install-box .prompt-sign {
         color: var(--accent);
         user-select: none;
       }
-      .hero-install code {
-        color: var(--text-secondary);
+      .install-box code {
+        color: var(--text-primary);
       }
-      .hero-install .copy-btn {
+      .install-box .copy-btn {
         background: none;
-        border: none;
-        color: var(--text-dim);
+        border: 1px solid var(--border);
+        border-radius: 6px;
+        color: var(--text-secondary);
+        padding: 0.35rem 0.75rem;
+        font-family: var(--font-mono);
+        font-size: 0.75rem;
         cursor: pointer;
-        padding: 0.25rem;
-        transition: color 0.2s;
-        font-size: 1rem;
+        transition: all 0.2s;
       }
-      .hero-install .copy-btn:hover {
+      .install-box .copy-btn:hover {
+        border-color: var(--accent);
         color: var(--accent);
       }
 
-      /* ─── SSE Stream Animation ───────────────────────────────────── */
-      .demo {
-        margin-top: 4rem;
-        animation: fadeUp 0.8s var(--ease-out-expo) 0.5s both;
-      }
+      /* ─── Demo Panels ────────────────────────────────────────────── */
       .demo-grid {
         display: grid;
         grid-template-columns: 1fr 1fr;
         gap: 1.5rem;
-      }
-      @media (max-width: 900px) {
-        .demo-grid {
-          grid-template-columns: 1fr;
-        }
+        margin-top: 3.5rem;
+        animation: fadeUp 0.8s var(--ease-out-expo) 0.4s both;
       }
       .demo-panel {
-        background: var(--bg-surface);
+        background: var(--bg-card);
         border: 1px solid var(--border);
         border-radius: 12px;
         overflow: hidden;
         box-shadow:
-          0 20px 60px rgba(0, 0, 0, 0.5),
-          0 0 0 1px rgba(255, 255, 255, 0.03) inset;
+          0 8px 32px rgba(0, 0, 0, 0.4),
+          0 2px 8px rgba(0, 0, 0, 0.2);
       }
       .demo-titlebar {
         display: flex;
         align-items: center;
         gap: 0.5rem;
-        padding: 0.65rem 1rem;
-        background: var(--bg-card);
+        padding: 0.75rem 1rem;
+        background: rgba(0, 0, 0, 0.3);
         border-bottom: 1px solid var(--border);
       }
       .demo-titlebar .dots {
@@ -348,37 +360,67 @@
         background: #ff5f57;
       }
       .demo-titlebar .dots span:nth-child(2) {
-        background: #febc2e;
+        background: #ffbd2e;
       }
       .demo-titlebar .dots span:nth-child(3) {
-        background: #28c840;
+        background: #28ca42;
       }
       .demo-titlebar .title {
-        flex: 1;
-        text-align: center;
         font-family: var(--font-mono);
-        font-size: 0.7rem;
+        font-size: 0.75rem;
         color: var(--text-dim);
+        margin-left: 0.5rem;
       }
-
-      /* Left panel: static fixture code */
-      .demo-fixture pre {
+      .demo-body {
         padding: 1.25rem;
         font-family: var(--font-mono);
-        font-size: 0.78rem;
-        line-height: 1.75;
+        font-size: 0.8rem;
+        line-height: 1.7;
         overflow-x: auto;
-        color: var(--text-secondary);
-        height: 310px;
       }
-      .demo-fixture .key {
-        color: var(--blue);
+
+      /* ─── Syntax Highlighting ────────────────────────────────────── */
+      .syn-key {
+        color: #88aaff;
+      }
+      .syn-str {
+        color: #66ddaa;
+      }
+      .syn-num {
+        color: #ffaa66;
+      }
+      .syn-bool {
+        color: #ff88aa;
       }
-      .demo-fixture .str {
+      .syn-bracket {
+        color: var(--text-dim);
+      }
+      .syn-comment {
+        color: var(--text-dim);
+        font-style: italic;
+      }
+
+      /* ─── Terminal Colors ────────────────────────────────────────── */
+      .t-green {
         color: var(--accent);
       }
+      .t-yellow {
+        color: var(--warning);
+      }
+      .t-red {
+        color: var(--error);
+      }
+      .t-blue {
+        color: var(--blue);
+      }
+      .t-dim {
+        color: var(--text-dim);
+      }
+      .t-purple {
+        color: var(--purple);
+      }
 
-      /* Right panel: animated terminal */
+      /* ─── Terminal Demo Animation ────────────────────────────────── */
       .demo-term {
         padding: 1.25rem;
         font-family: var(--font-mono);
@@ -431,502 +473,511 @@
         margin-left: 1px;
       }
 
-      /* ─── Sections ───────────────────────────────────────────────── */
-      section {
-        padding: 3rem 0;
+      /* ─── Section: Record & Replay ───────────────────────────────── */
+      .section-record {
+        padding: 4.5rem 0;
+        position: relative;
+        background: linear-gradient(
+          180deg,
+          var(--bg-deep) 0%,
+          var(--bg-surface) 30%,
+          var(--bg-surface) 70%,
+          var(--bg-deep) 100%
+        );
+      }
+      .record-grid {
+        display: grid;
+        grid-template-columns: 1fr 1.2fr;
+        gap: 4rem;
+        align-items: center;
       }
-
-      .section-label {
+      .record-label {
+        display: inline-flex;
+        align-items: center;
+        gap: 0.5rem;
         font-family: var(--font-mono);
-        font-size: 0.75rem;
-        font-weight: 500;
-        text-transform: uppercase;
-        letter-spacing: 0.15em;
-        color: var(--accent);
+        font-size: 0.8rem;
+        color: var(--error);
         margin-bottom: 1rem;
       }
-
-      .section-title {
-        font-size: clamp(1.75rem, 4vw, 2.75rem);
+      .record-dot {
+        width: 8px;
+        height: 8px;
+        border-radius: 50%;
+        background: var(--error);
+        box-shadow: 0 0 10px var(--error);
+        animation: pulse 1.5s infinite;
+      }
+      .record-left h2 {
+        font-size: clamp(1.8rem, 4vw, 2.8rem);
         font-weight: 700;
-        letter-spacing: -0.02em;
         line-height: 1.15;
-        max-width: 650px;
-      }
-
-      .section-desc {
-        font-size: 1.1rem;
-        color: var(--text-secondary);
-        max-width: 600px;
-        margin-top: 1rem;
-        line-height: 1.7;
+        letter-spacing: -0.02em;
+        margin-bottom: 2rem;
       }
-
-      /* ─── Features Grid ──────────────────────────────────────────── */
-      .features-grid {
-        display: grid;
-        grid-template-columns: repeat(3, 1fr);
-        gap: 1.5rem;
-        margin-top: 3.5rem;
+      .steps {
+        display: flex;
+        flex-direction: column;
+        gap: 1.25rem;
+        margin-bottom: 2.5rem;
       }
-
-      .features-grid > .feature-card:last-child:nth-child(3n + 1) {
-        grid-column: 2;
+      .step {
+        display: flex;
+        gap: 1rem;
+        align-items: flex-start;
       }
-
-      .feature-card {
-        padding: 2rem;
+      .step-num {
+        flex-shrink: 0;
+        width: 32px;
+        height: 32px;
+        border-radius: 50%;
         background: var(--bg-card);
         border: 1px solid var(--border);
-        border-radius: 12px;
-        transition:
-          border-color 0.3s,
-          transform 0.3s,
-          box-shadow 0.3s;
-      }
-      .feature-card:hover {
-        border-color: var(--border-bright);
-        transform: translateY(-2px);
-        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
-      }
-      .feature-icon {
-        width: 40px;
-        height: 40px;
         display: flex;
         align-items: center;
         justify-content: center;
-        border-radius: 10px;
-        font-size: 1.25rem;
-        margin-bottom: 1.25rem;
         font-family: var(--font-mono);
-        font-weight: 700;
-      }
-      .feature-icon.green {
-        background: rgba(0, 255, 136, 0.1);
+        font-size: 0.8rem;
+        font-weight: 600;
         color: var(--accent);
-        border: 1px solid rgba(0, 255, 136, 0.2);
-      }
-      .feature-icon.blue {
-        background: rgba(68, 136, 255, 0.1);
-        color: var(--blue);
-        border: 1px solid rgba(68, 136, 255, 0.2);
-      }
-      .feature-icon.purple {
-        background: rgba(170, 102, 255, 0.1);
-        color: var(--purple);
-        border: 1px solid rgba(170, 102, 255, 0.2);
-      }
-      .feature-icon.amber {
-        background: rgba(255, 170, 0, 0.1);
-        color: var(--warning);
-        border: 1px solid rgba(255, 170, 0, 0.2);
-      }
-      .feature-icon.red {
-        background: rgba(255, 68, 102, 0.1);
-        color: var(--error);
-        border: 1px solid rgba(255, 68, 102, 0.2);
       }
-
-      .feature-card h3 {
-        font-size: 1.05rem;
+      .step-content h3 {
+        font-size: 1rem;
         font-weight: 600;
-        margin-bottom: 0.5rem;
+        margin-bottom: 0.25rem;
       }
-      .feature-card p {
+      .step-content p {
         font-size: 0.9rem;
         color: var(--text-secondary);
         line-height: 1.6;
       }
 
-      /* ─── Code Examples ──────────────────────────────────────────── */
-      .code-section {
-        display: grid;
-        grid-template-columns: 1fr 1fr;
-        gap: 3rem;
-        align-items: start;
-        margin-top: 3.5rem;
+      /* ─── Buttons ────────────────────────────────────────────────── */
+      .btn-primary {
+        display: inline-flex;
+        align-items: center;
+        gap: 0.5rem;
+        padding: 0.75rem 1.75rem;
+        background: var(--accent);
+        color: var(--bg-deep);
+        font-family: var(--font-mono);
+        font-size: 0.875rem;
+        font-weight: 600;
+        border-radius: 8px;
+        border: none;
+        cursor: pointer;
+        transition: all 0.2s;
+        text-decoration: none;
       }
-      .code-section .text-side {
-        padding-top: 1rem;
+      .btn-primary:hover {
+        background: var(--accent-dim);
+        text-decoration: none;
+        transform: translateY(-1px);
+        box-shadow: 0 4px 20px var(--accent-glow);
       }
-      .code-section .text-side h3 {
-        font-size: 1.5rem;
-        font-weight: 600;
-        margin-bottom: 0.75rem;
+      .btn-secondary {
+        display: inline-flex;
+        align-items: center;
+        gap: 0.5rem;
+        padding: 0.75rem 1.75rem;
+        background: transparent;
+        color: var(--text-primary);
+        font-family: var(--font-mono);
+        font-size: 0.875rem;
+        font-weight: 500;
+        border-radius: 8px;
+        border: 1px solid var(--border);
+        cursor: pointer;
+        transition: all 0.2s;
+        text-decoration: none;
       }
-      .code-section .text-side p {
-        color: var(--text-secondary);
-        line-height: 1.7;
-        margin-bottom: 1rem;
+      .btn-secondary:hover {
+        border-color: var(--border-bright);
+        background: var(--bg-card);
+        text-decoration: none;
       }
-      .code-section .text-side ul {
-        list-style: none;
-        padding: 0;
+
+      /* ─── Section: Transition ─────────────────────────────────────── */
+      .section-transition {
+        padding: 6rem 0;
+        text-align: center;
       }
-      .code-section .text-side ul li {
-        padding: 0.4rem 0;
-        padding-left: 1.5rem;
-        position: relative;
+      .transition-text {
+        font-size: clamp(1.4rem, 3vw, 2rem);
+        font-weight: 600;
         color: var(--text-secondary);
-        font-size: 0.95rem;
+        letter-spacing: -0.02em;
+        margin-bottom: 3rem;
       }
-      .code-section .text-side ul li::before {
-        content: "→";
-        position: absolute;
-        left: 0;
-        color: var(--accent);
-        font-family: var(--font-mono);
+      .service-strip {
+        display: flex;
+        gap: 1rem;
+        justify-content: center;
+        flex-wrap: wrap;
+        margin-top: 2.5rem;
+        margin-bottom: 2rem;
       }
-
-      .code-block {
-        background: var(--bg-surface);
+      .service-card {
+        background: var(--bg-card);
         border: 1px solid var(--border);
         border-radius: 12px;
-        overflow: hidden;
-      }
-      .code-block-header {
+        padding: 1.5rem 2rem;
         display: flex;
+        flex-direction: column;
         align-items: center;
-        padding: 0.65rem 1rem;
-        background: var(--bg-card);
-        border-bottom: 1px solid var(--border);
-        font-family: var(--font-mono);
-        font-size: 0.7rem;
-        color: var(--text-dim);
+        gap: 0.75rem;
+        min-width: 140px;
+        backdrop-filter: blur(10px);
+        transition: all 0.3s var(--ease-out-expo);
       }
-      .code-block-header .lang-tag {
-        margin-left: auto;
-        padding: 0.15rem 0.5rem;
-        border: 1px solid var(--border);
-        border-radius: 4px;
-        font-size: 0.65rem;
-        text-transform: uppercase;
+      .service-card-link {
+        text-decoration: none;
+        color: inherit;
+        display: block;
+        min-width: 140px;
       }
-      .code-block pre {
-        padding: 1.25rem;
-        overflow-x: auto;
-        font-family: var(--font-mono);
-        font-size: 0.8rem;
-        line-height: 1.75;
+      .service-card-link:hover {
+        text-decoration: none;
       }
-      .code-block pre code {
-        color: var(--text-secondary);
+      .service-card:hover {
+        transform: translateY(-4px);
+        border-color: var(--border-bright);
+        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
       }
-      .code-block .kw {
-        color: var(--purple);
+      .service-card-link:hover .service-card.blue {
+        border-top-color: #5599ff;
       }
-      .code-block .fn {
-        color: var(--blue);
+      .service-card-link:hover .service-card.purple {
+        border-top-color: #cc88ff;
       }
-      .code-block .str {
-        color: var(--accent);
+      .service-card-link:hover .service-card.amber {
+        border-top-color: #ffcc33;
       }
-      .code-block .num {
-        color: var(--warning);
+      .service-card-link:hover .service-card.red {
+        border-top-color: #ff6688;
       }
-      .code-block .cm {
-        color: var(--text-dim);
-        font-style: italic;
+      .service-card-link:hover .service-card.gray {
+        border-top-color: #7777a0;
       }
-      .code-block .op {
+      .service-card .service-icon {
+        font-size: 1.75rem;
+      }
+      .service-card .service-label {
+        font-family: var(--font-mono);
+        font-size: 0.8rem;
+        font-weight: 500;
         color: var(--text-primary);
       }
-      .code-block .prop {
-        color: var(--error);
+      .service-card.blue {
+        border-top: 2px solid var(--blue);
       }
-      .code-block .type {
-        color: var(--warning);
+      .service-card.purple {
+        border-top: 2px solid var(--purple);
       }
-
-      /* ─── Reliability / Drift Detection ─────────────────────────── */
-      .triangle-wrapper {
-        position: relative;
-        width: 100%;
-        max-width: 600px;
-        margin: 3.5rem auto 1rem;
-        aspect-ratio: 1.3 / 1;
+      .service-card.amber {
+        border-top: 2px solid var(--warning);
       }
-      .triangle-wrapper svg {
-        position: absolute;
-        top: 0;
-        left: 0;
-        width: 100%;
-        height: 100%;
-        z-index: 0;
+      .service-card.red {
+        border-top: 2px solid var(--error);
       }
-      .tri-node {
-        position: absolute;
-        background: var(--bg-card);
-        border: 2px solid;
-        border-radius: 12px;
-        padding: 1rem 1.25rem;
-        text-align: center;
-        width: 170px;
-        z-index: 1;
-      }
-      .tri-node h3 {
-        font-size: 0.95rem;
-        font-weight: 600;
-        color: var(--text-primary);
-        margin-bottom: 0.3rem;
-      }
-      .tri-node p {
-        font-size: 0.75rem;
-        color: var(--text-secondary);
-        line-height: 1.4;
+      .service-card.gray {
+        border-top: 2px solid var(--text-dim);
       }
-      .tri-node .node-icon {
-        font-size: 1.5rem;
-        margin-bottom: 0.5rem;
+
+      /* ─── Section: Suite Reveal ──────────────────────────────────── */
+      .section-suite {
+        padding: 4.5rem 0;
+        position: relative;
+        overflow: hidden;
       }
-      .tri-node.sdk {
-        border-color: var(--blue);
-        top: 0;
+      .section-suite::before {
+        content: "";
+        position: absolute;
+        top: -100px;
         left: 50%;
         transform: translateX(-50%);
+        width: 1000px;
+        height: 800px;
+        background: radial-gradient(
+          ellipse,
+          rgba(68, 136, 255, 0.08) 0%,
+          rgba(0, 255, 136, 0.05) 40%,
+          transparent 70%
+        );
+        pointer-events: none;
       }
-      .tri-node.sdk .node-icon {
-        color: var(--blue);
+      .section-suite .container {
+        position: relative;
+        z-index: 1;
       }
-      .tri-node.real {
-        border-color: var(--accent);
-        bottom: 0;
-        left: 0;
+      .section-suite h2 {
+        font-size: clamp(2rem, 5vw, 3.5rem);
+        font-weight: 700;
+        line-height: 1.1;
+        letter-spacing: -0.03em;
+        text-align: center;
+        margin-bottom: 1rem;
       }
-      .tri-node.real .node-icon {
-        color: var(--accent);
+      .section-suite .suite-sub {
+        text-align: center;
+        color: var(--text-secondary);
+        font-size: 1.15rem;
+        max-width: 560px;
+        margin: 0 auto 3.5rem;
+        line-height: 1.7;
       }
-      .tri-node.mock {
-        border-color: var(--purple);
-        bottom: 0;
-        right: 0;
+
+      /* ─── Section: Feature Grid ──────────────────────────────────── */
+      .section-features {
+        padding: 4.5rem 0;
+        background: linear-gradient(
+          180deg,
+          var(--bg-deep) 0%,
+          var(--bg-surface) 50%,
+          var(--bg-deep) 100%
+        );
       }
-      .tri-node.mock .node-icon {
-        color: var(--purple);
+      .section-features h2 {
+        font-size: clamp(1.8rem, 4vw, 2.8rem);
+        font-weight: 700;
+        letter-spacing: -0.02em;
+        text-align: center;
+        margin-bottom: 3.5rem;
       }
-      .diagnosis-grid {
+      .feature-grid {
         display: grid;
         grid-template-columns: repeat(3, 1fr);
-        gap: 1rem;
-        margin-top: 2.5rem;
+        gap: 1.5rem;
       }
-      .diagnosis-card {
+      .feature-card {
         background: var(--bg-card);
         border: 1px solid var(--border);
-        border-radius: 8px;
-        padding: 1rem 1.25rem;
+        border-radius: 12px;
+        padding: 2rem;
+        box-shadow: 0 4px 16px rgba(0, 0, 0, 0.2);
+        transition: all 0.3s var(--ease-out-expo);
       }
-      .diagnosis-card .diag-header {
-        display: flex;
-        align-items: center;
-        gap: 0.5rem;
-        margin-bottom: 0.4rem;
+      .feature-card:hover {
+        transform: translateY(-4px);
+        border-color: var(--border-bright);
+        box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
       }
-      .diagnosis-card .diag-dot {
-        width: 10px;
-        height: 10px;
-        border-radius: 50%;
-        flex-shrink: 0;
+      .feature-card .feature-icon {
+        font-size: 1.75rem;
+        margin-bottom: 1rem;
       }
-      .diagnosis-card h4 {
-        font-size: 0.85rem;
+      .feature-card h3 {
+        font-size: 1.05rem;
         font-weight: 600;
-        color: var(--text-primary);
-      }
-      .diagnosis-card p {
-        font-size: 0.78rem;
-        color: var(--text-secondary);
-        line-height: 1.5;
+        margin-bottom: 0.5rem;
       }
-      .drift-report {
-        background: var(--bg-deep);
-        border: 1px solid var(--border);
-        border-radius: 8px;
-        padding: 1.25rem 1.5rem;
-        margin-top: 2.5rem;
-        font-family: var(--font-mono);
-        font-size: 0.75rem;
-        line-height: 1.8;
+      .feature-card p {
+        font-size: 0.875rem;
         color: var(--text-secondary);
-        overflow-x: auto;
-      }
-      .drift-report .report-header {
-        color: var(--text-primary);
-        font-weight: 600;
-        margin-bottom: 0.75rem;
-        font-size: 0.8rem;
-      }
-      .drift-report .severity-critical {
-        color: var(--error);
-      }
-      .drift-report .severity-warning {
-        color: var(--warning);
+        line-height: 1.6;
       }
-      .drift-report .severity-ok {
-        color: var(--accent);
+
+      /* ─── Section: Reliability / Drift Detection ────────────────── */
+      .section-reliability {
+        padding: 4.5rem 0;
+        position: relative;
+        background: linear-gradient(
+          180deg,
+          var(--bg-deep) 0%,
+          #0d1220 30%,
+          #0d1220 70%,
+          var(--bg-deep) 100%
+        );
+      }
+      .section-reliability .container {
+        position: relative;
+        z-index: 1;
       }
-      .drift-report .field-path {
-        color: var(--blue);
+      .section-reliability h2 {
+        font-size: clamp(1.8rem, 4vw, 2.8rem);
+        font-weight: 700;
+        letter-spacing: -0.02em;
+        text-align: center;
+        margin-bottom: 1rem;
       }
-      .drift-report .drift-label {
-        color: var(--text-primary);
+      .reliability-sub {
+        text-align: center;
+        color: var(--text-secondary);
+        font-size: 1.05rem;
+        max-width: 640px;
+        margin: 0 auto 3.5rem;
+        line-height: 1.7;
       }
-      .drift-report .report-summary {
-        color: var(--text-dim);
+      .drift-flow {
+        display: grid;
+        grid-template-columns: 1fr 1fr 1fr;
+        gap: 2rem;
+        max-width: 900px;
+        margin: 0 auto 3rem;
+        position: relative;
       }
-      .drift-report .field-label {
-        color: var(--text-dim);
+      .drift-flow::before {
+        content: "";
+        position: absolute;
+        top: 28px;
+        left: calc(16.67% + 1rem);
+        right: calc(16.67% + 1rem);
+        height: 2px;
+        background: var(--border);
       }
-      .drift-report .divider {
-        border-top: 1px solid var(--border);
-        margin: 0.6rem 0;
+      .drift-step {
+        text-align: center;
+        position: relative;
       }
-      .ci-footer {
+      .drift-step-num {
+        width: 56px;
+        height: 56px;
+        border-radius: 50%;
+        background: var(--bg-card);
+        border: 2px solid var(--blue);
         display: flex;
         align-items: center;
-        gap: 1.5rem;
-        margin-top: 2rem;
-        padding-top: 1.5rem;
-        border-top: 1px solid var(--border);
-      }
-      .ci-badge {
-        display: inline-flex;
-        align-items: center;
-        gap: 0.5rem;
-        background: var(--bg-card);
-        border: 1px solid var(--border);
-        border-radius: 6px;
-        padding: 0.4rem 0.75rem;
-        font-size: 0.8rem;
-        color: var(--text-secondary);
+        justify-content: center;
         font-family: var(--font-mono);
-        flex-shrink: 0;
+        font-size: 1.1rem;
+        font-weight: 700;
+        color: var(--blue);
+        margin: 0 auto 1.25rem;
+        position: relative;
+        z-index: 1;
       }
-      .ci-badge .dot {
-        width: 8px;
-        height: 8px;
-        border-radius: 50%;
-        background: var(--accent);
+      .drift-step h3 {
+        font-size: 1.05rem;
+        font-weight: 600;
+        margin-bottom: 0.5rem;
       }
-      .ci-text {
-        font-size: 0.9rem;
+      .drift-step p {
+        font-size: 0.875rem;
         color: var(--text-secondary);
         line-height: 1.6;
       }
+      .reliability-cta {
+        text-align: center;
+      }
+
+      @media (max-width: 768px) {
+        .drift-flow {
+          grid-template-columns: 1fr;
+          gap: 2.5rem;
+        }
+        .drift-flow::before {
+          display: none;
+        }
+      }
 
-      /* ─── Comparison Table ───────────────────────────────────────── */
-      .comparison-table-wrap {
-        margin-top: 3rem;
+      /* ─── Section: Comparison ─────────────────────────────────────── */
+      .section-comparison {
+        padding: 4.5rem 0;
+      }
+      .section-comparison h2 {
+        font-size: clamp(1.8rem, 4vw, 2.8rem);
+        font-weight: 700;
+        letter-spacing: -0.02em;
+        text-align: center;
+        margin-bottom: 3.5rem;
+      }
+      .comparison-wrap {
+        -webkit-overflow-scrolling: touch;
       }
       .comparison-table {
         width: 100%;
         border-collapse: separate;
         border-spacing: 0;
-        font-size: 0.9rem;
+        font-size: 0.85rem;
       }
       .comparison-table thead th {
-        text-align: left;
-        padding: 1rem 1.25rem;
-        font-family: var(--font-mono);
-        font-weight: 600;
-        font-size: 0.8rem;
-        text-transform: uppercase;
-        letter-spacing: 0.08em;
-        border-bottom: 2px solid var(--border-bright);
-        color: var(--text-secondary);
         position: sticky;
-        top: 56px;
-        background: var(--bg-deep);
+        top: 57px;
         z-index: 10;
+        padding: 1rem;
+        background: var(--bg-deep);
+        border-bottom: 1px solid var(--border);
+        border-right: 1px solid var(--border);
+        font-weight: 600;
+        text-align: center;
+        font-family: var(--font-mono);
+        font-size: 0.8rem;
+        white-space: nowrap;
       }
-      .comparison-table thead th:nth-child(2) {
-        color: var(--accent);
-      }
-      .comparison-table thead th a {
-        text-decoration: none;
+      .comparison-table thead th:first-child {
+        text-align: left;
       }
-      .comparison-table thead th a:hover {
-        text-decoration: underline;
+      .comparison-table thead th.col-aimock {
+        background: var(--accent);
+        color: var(--bg-deep);
+        border-left: 2px solid var(--accent);
+        border-right: 2px solid var(--accent);
       }
-      .comparison-table td {
-        padding: 0.85rem 1.25rem;
-        border-bottom: 1px solid var(--border);
+      .comparison-table tbody td {
+        padding: 0.75rem 1rem;
+        border: 1px solid var(--border);
+        text-align: center;
         color: var(--text-secondary);
       }
-      .comparison-table td:first-child {
-        font-weight: 500;
+      .comparison-table tbody td:first-child {
+        text-align: left;
         color: var(--text-primary);
+        font-weight: 500;
       }
-      .comparison-table tr:last-child td {
-        border-bottom: none;
-      }
-      .comparison-table .yes {
-        color: var(--accent);
-        font-weight: 600;
-      }
-      .comparison-table .no {
-        color: var(--text-dim);
+      .comparison-table tbody td.col-aimock {
+        border-left: 2px solid var(--accent);
+        border-right: 2px solid var(--accent);
+        background: rgba(0, 255, 136, 0.03);
       }
-      .comparison-table .manual {
-        color: var(--warning);
-        font-size: 0.85rem;
-      }
-
-      /* ─── Architecture Diagram ───────────────────────────────────── */
-      .arch-diagram {
-        margin-top: 3rem;
-        padding: 2.5rem;
-        background: var(--bg-surface);
-        border: 1px solid var(--border);
-        border-radius: 12px;
-        font-family: var(--font-mono);
-        font-size: 0.8rem;
-        line-height: 2;
-        overflow-x: auto;
+      .comparison-table tbody tr:hover td {
+        background: var(--bg-card-hover);
       }
-      .arch-diagram .process {
-        color: var(--text-primary);
+      .comparison-table tbody tr:hover td.col-aimock {
+        background: rgba(0, 255, 136, 0.06);
       }
-      .arch-diagram .arrow {
-        color: var(--text-dim);
+      .comparison-table tbody td.col-aimock:last-child {
+        border-right: 2px solid var(--accent);
       }
-      .arch-diagram .mock {
+      .yes {
         color: var(--accent);
         font-weight: 600;
       }
-      .arch-diagram .env {
+      .no {
+        color: var(--text-dim);
+      }
+      .partial {
         color: var(--warning);
       }
-      .arch-diagram .dim {
-        color: var(--text-dim);
+      .manual {
+        color: var(--blue);
+        font-size: 0.75rem;
       }
 
       /* ─── Footer ─────────────────────────────────────────────────── */
       footer {
-        padding: 4rem 0;
+        padding: 3rem 0;
         border-top: 1px solid var(--border);
+        text-align: center;
       }
-      footer .container {
-        display: flex;
-        justify-content: space-between;
-        align-items: center;
-      }
-      .footer-left {
+      .footer-brand {
         font-family: var(--font-mono);
-        font-size: 0.8rem;
+        font-size: 0.85rem;
         color: var(--text-dim);
+        margin-bottom: 1rem;
       }
-      .footer-left span {
+      .footer-brand .prompt {
         color: var(--accent);
       }
       .footer-links {
         display: flex;
         gap: 2rem;
+        justify-content: center;
         list-style: none;
       }
       .footer-links a {
-        color: var(--text-dim);
+        color: var(--text-secondary);
         font-size: 0.85rem;
+        font-weight: 500;
         transition: color 0.2s;
       }
       .footer-links a:hover {
@@ -934,113 +985,75 @@
         text-decoration: none;
       }
 
-      /* ─── Animations ─────────────────────────────────────────────── */
-      @keyframes fadeUp {
-        from {
-          opacity: 0;
-          transform: translateY(20px);
-        }
-        to {
-          opacity: 1;
-          transform: translateY(0);
-        }
-      }
-      @keyframes blink {
-        50% {
-          opacity: 0;
-        }
-      }
-      @keyframes pulse {
-        0%,
-        100% {
-          box-shadow: 0 0 8px var(--accent);
+      /* ─── Mobile ──────────────────────────────────────────────────── */
+      @media (max-width: 768px) {
+        .nav-links {
+          gap: 1rem;
         }
-        50% {
-          box-shadow:
-            0 0 16px var(--accent),
-            0 0 4px var(--accent);
+        .nav-links .nav-hide-mobile {
+          display: none;
         }
-      }
-
-      /* Scroll-triggered fade-in */
-      .reveal {
-        opacity: 0;
-        transform: translateY(30px);
-        transition:
-          opacity 0.7s var(--ease-out-expo),
-          transform 0.7s var(--ease-out-expo);
-      }
-      .reveal.visible {
-        opacity: 1;
-        transform: translateY(0);
-      }
-      /* Remove transform from comparison section so sticky headers work
-         (transform creates a new containing block that breaks sticky) */
-      .comparison.reveal.visible {
-        transform: none;
-      }
-
-      /* ─── Responsive ─────────────────────────────────────────────── */
-      @media (max-width: 900px) {
-        .features-grid {
+        .demo-grid {
           grid-template-columns: 1fr;
         }
-        .code-section {
+        .record-grid {
           grid-template-columns: 1fr;
+          gap: 2.5rem;
+        }
+        .service-strip {
+          gap: 0.75rem;
+        }
+        .service-card {
+          min-width: 110px;
+          padding: 1rem 1.25rem;
         }
-        .diagnosis-grid {
+        .feature-grid {
           grid-template-columns: 1fr;
         }
         .comparison-table {
-          font-size: 0.8rem;
+          font-size: 0.78rem;
         }
-        .comparison-table th,
-        .comparison-table td {
-          padding: 0.65rem 0.75rem;
+        .install-box {
+          flex-wrap: wrap;
+          font-size: 0.8rem;
         }
       }
-      @media (max-width: 600px) {
+
+      @media (max-width: 480px) {
         .hero {
           padding: 8rem 0 4rem;
         }
         .hero h1 {
           font-size: 2rem;
         }
-        .nav-links {
-          gap: 1rem;
-        }
-        .nav-links a:not(.gh-link) {
+        .nav-brand .powered-by {
           display: none;
         }
-        .ci-footer {
-          flex-direction: column;
-          align-items: flex-start;
-        }
-        footer .container {
-          flex-direction: column;
-          gap: 1.5rem;
-          text-align: center;
-        }
-        .footer-links {
-          flex-wrap: wrap;
-          justify-content: center;
-        }
       }
     </style>
   </head>
+
   <body>
-    <!-- ═══ Nav ═══════════════════════════════════════════════════════ -->
+    <!-- ─── Nav ──────────────────────────────────────────────────── -->
     <nav>
       <div class="container">
-        <a href="#" class="nav-brand"> <span class="prompt">$</span> llmock </a>
+        <div class="nav-brand">
+          <div class="brand-main"><span class="prompt">$</span> aimock</div>
+        </div>
         <ul class="nav-links">
-          <li><a href="#features">Features</a></li>
-          <li><a href="#examples">Examples</a></li>
-          <li><a href="#comparison">Comparison</a></li>
-          <li><a href="#reliability">Reliability</a></li>
+          <li><a href="#record" class="nav-hide-mobile">Record & Replay</a></li>
+          <li><a href="#suite" class="nav-hide-mobile">Suite</a></li>
+          <li><a href="#features" class="nav-hide-mobile">Features</a></li>
+          <li><a href="#comparison" class="nav-hide-mobile">Comparison</a></li>
+          <li><a href="#switch" class="nav-hide-mobile">Switch</a></li>
           <li><a href="docs.html">Docs</a></li>
           <li>
-            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+            <a
+              href="https://github.com/CopilotKit/llmock"
+              class="gh-link"
+              target="_blank"
+              rel="noopener"
+            >
               <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
                 <path
                   d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
@@ -1053,715 +1066,552 @@
       </div>
     </nav>
 
-    <!-- ═══ Hero ══════════════════════════════════════════════════════ -->
-    <header class="hero">
+    <!-- ─── Section 1: Hero ──────────────────────────────────────── -->
+    <section class="hero">
       <div class="container">
         <div class="hero-badge">
           <span class="dot"></span>
-          Zero dependencies &middot; Node.js builtins only &middot;
-          <a
-            href="https://www.npmjs.com/package/@copilotkit/llmock"
-            target="_blank"
-            style="display: inline-flex; align-items: center; vertical-align: middle"
-            ><img
-              src="https://img.shields.io/npm/v/@copilotkit/llmock?style=flat-square&color=e8e8f0&labelColor=2a2a3a&label=Version"
-              alt="npm version"
-              style="height: 16px"
-          /></a>
+          aimock &mdash; mock everything your AI app talks to
         </div>
 
-        <h1>Deterministic <span class="highlight">mock LLM</span> server for testing</h1>
+        <h1>
+          Deterministic
+          <span class="highlight">mock infrastructure</span>
+          for AI apps
+        </h1>
 
         <p class="hero-sub">
-          Real HTTP server. Real SSE streams. WebSocket APIs. Fixture-driven responses.
-          Multi-provider mock — OpenAI, Claude, Gemini — any process on the machine can reach it.
+          Start with LLM mocking. Add MCP, A2A, vector, and more when you need them. The aimock
+          suite grows with your stack.
         </p>
 
-        <div class="hero-actions">
-          <a href="docs.html" class="btn btn-primary"> Get Started </a>
-          <a
-            href="https://www.npmjs.com/package/@copilotkit/llmock"
-            class="btn btn-secondary"
-            target="_blank"
-          >
-            npm &rarr;
-          </a>
+        <div class="install-box">
+          <span class="prompt-sign">$</span>
+          <code>npm install @copilotkit/aimock</code>
+          <button class="copy-btn" onclick="copyInstall(this)">Copy</button>
         </div>
 
-        <div class="hero-install">
-          <span class="dollar">$</span>
-          <code>npm install @copilotkit/llmock</code>
-          <button
-            class="copy-btn"
-            onclick="
-              navigator.clipboard.writeText('npm install @copilotkit/llmock');
-              this.textContent = '✓';
-              setTimeout(() => (this.textContent = '⧉'), 1500);
-            "
-            title="Copy"
-          >
-            ⧉
-          </button>
+        <div class="demo-grid">
+          <!-- Left: fixture.json -->
+          <div class="demo-panel">
+            <div class="demo-titlebar">
+              <div class="dots"><span></span><span></span><span></span></div>
+              <span class="title">fixture.json</span>
+            </div>
+            <div class="demo-body">
+              <pre><span class="syn-bracket">{</span>
+  <span class="syn-key">"match"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"userMessage"</span><span class="syn-bracket">:</span> <span class="syn-str">"Hello"</span>
+  <span class="syn-bracket">},</span>
+  <span class="syn-key">"response"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"content"</span><span class="syn-bracket">:</span> <span class="syn-str">"Hi there! How can I help?"</span>
+  <span class="syn-bracket">},</span>
+  <span class="syn-key">"opts"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"chunkSize"</span><span class="syn-bracket">:</span> <span class="syn-num">10</span><span class="syn-bracket">,</span>
+    <span class="syn-key">"latency"</span><span class="syn-bracket">:</span> <span class="syn-num">1000</span>
+  <span class="syn-bracket">}</span>
+<span class="syn-bracket">}</span></pre>
+            </div>
+          </div>
+
+          <!-- Right: terminal -->
+          <div class="demo-panel">
+            <div class="demo-titlebar">
+              <div class="dots"><span></span><span></span><span></span></div>
+              <span class="title">terminal</span>
+            </div>
+            <div id="demo-term" class="demo-term"></div>
+          </div>
         </div>
+      </div>
+    </section>
 
-        <!-- Two-panel demo -->
-        <div class="demo">
-          <div class="demo-grid">
-            <!-- Left: fixture definition -->
-            <div class="demo-panel demo-fixture">
-              <div class="demo-titlebar">
-                <div class="dots"><span></span><span></span><span></span></div>
-                <span class="title">fixtures/chat.json</span>
-                <div style="width: 54px"></div>
+    <!-- ─── Section 2: Record & Replay ───────────────────────────── -->
+    <section class="section-record" id="record">
+      <div class="container">
+        <div class="record-grid">
+          <div class="record-left fade-in">
+            <div class="record-label">
+              <span class="record-dot"></span>
+              Record &amp; Replay
+            </div>
+            <h2>From zero to fixtures in one command</h2>
+
+            <div class="steps">
+              <div class="step">
+                <div class="step-num">1</div>
+                <div class="step-content">
+                  <h3>Record</h3>
+                  <p>Proxy unmatched requests to real APIs and capture every response.</p>
+                </div>
+              </div>
+              <div class="step">
+                <div class="step-num">2</div>
+                <div class="step-content">
+                  <h3>Save</h3>
+                  <p>Fixtures written to disk automatically as clean, editable JSON.</p>
+                </div>
+              </div>
+              <div class="step">
+                <div class="step-num">3</div>
+                <div class="step-content">
+                  <h3>Replay</h3>
+                  <p>Deterministic responses in CI, forever. No API keys, no flakiness.</p>
+                </div>
               </div>
-              <pre>{
-  <span class="key">"fixtures"</span>: [
-    {
-      <span class="key">"match"</span>: {
-        <span class="key">"userMessage"</span>: <span class="str">"capital of France"</span>
-      },
-      <span class="key">"response"</span>: {
-        <span class="key">"content"</span>: <span class="str">"The capital of France is Paris."</span>
-      }
-    }
-  ]
-}</pre>
             </div>
 
-            <!-- Right: animated terminal -->
+            <a href="record-replay.html" class="btn-primary"
+              >Learn about Record &amp; Replay &rarr;</a
+            >
+          </div>
+
+          <div class="fade-in" style="transition-delay: 0.15s">
             <div class="demo-panel">
               <div class="demo-titlebar">
                 <div class="dots"><span></span><span></span><span></span></div>
-                <span class="title">Terminal</span>
-                <div style="width: 54px"></div>
+                <span class="title">terminal</span>
+              </div>
+              <div class="demo-body">
+                <pre><span class="t-green">$</span> npx aimock --record --provider-openai https://api.openai.com
+
+<span class="t-blue">&#9889;</span> Listening on http://localhost:4010
+
+<span class="t-yellow">&#9888;</span> NO FIXTURE MATCH &mdash; proxying to
+  https://api.openai.com/v1/chat/completions
+
+<span class="t-green">&#10003;</span> Recorded &rarr; fixtures/recorded/openai-2026-03-31T22:15:00.json
+
+<span class="t-blue">&#9889;</span> Fixture match &mdash; replaying from disk</pre>
               </div>
-              <div class="demo-term" id="demo-term"></div>
             </div>
           </div>
         </div>
       </div>
-    </header>
+    </section>
 
-    <!-- ═══ Features ══════════════════════════════════════════════════ -->
-    <section id="features" class="reveal">
+    <!-- ─── Section 3: Transition ────────────────────────────────── -->
+    <!-- ─── Section 4: The Full Suite ────────────────────────────── -->
+    <section class="section-suite" id="suite">
       <div class="container">
-        <span class="section-label">Why llmock</span>
-        <h2 class="section-title">Stop paying for flaky tests</h2>
-        <p class="section-desc">
-          Tests that hit real LLM APIs — OpenAI, Gemini, Anthropic — cost money, time out, and
-          produce non-deterministic results. llmock replaces those calls with immediate,
-          deterministic responses from a real HTTP server any process on the machine can reach.
+        <h2 class="fade-in">
+          Your AI app talks to more than just LLMs.<br />
+          <span class="highlight">aimock mocks all of them.</span>
+        </h2>
+
+        <div class="service-strip fade-in" style="transition-delay: 0.1s">
+          <a href="mcp-mock.html" class="service-card-link">
+            <div class="service-card blue">
+              <span class="service-icon">&#128268;</span>
+              <span class="service-label">MCP Tools</span>
+            </div>
+          </a>
+          <a href="a2a-mock.html" class="service-card-link">
+            <div class="service-card purple">
+              <span class="service-icon">&#129309;</span>
+              <span class="service-label">A2A Agents</span>
+            </div>
+          </a>
+          <a href="vector-mock.html" class="service-card-link">
+            <div class="service-card amber">
+              <span class="service-icon">&#128230;</span>
+              <span class="service-label">Vector DBs</span>
+            </div>
+          </a>
+          <a href="services.html" class="service-card-link">
+            <div class="service-card red">
+              <span class="service-icon">&#128269;</span>
+              <span class="service-label">Search &amp; Rerank</span>
+            </div>
+          </a>
+          <a href="services.html#moderation" class="service-card-link">
+            <div class="service-card gray">
+              <span class="service-icon">&#128737;</span>
+              <span class="service-label">Moderation</span>
+            </div>
+          </a>
+        </div>
+
+        <p class="suite-sub fade-in" style="transition-delay: 0.2s">
+          One JSON config. One port. Every service your AI app depends on.
         </p>
 
-        <div class="features-grid">
-          <div class="feature-card">
-            <div class="feature-icon green">⚡</div>
-            <h3>Real HTTP Server</h3>
-            <p>
-              Runs on an actual port. Any process on the machine can reach it — Next.js, Mastra,
-              LangGraph, Agno, anything that speaks HTTP.
-            </p>
-          </div>
-          <div class="feature-card">
-            <div class="feature-icon blue">📡</div>
-            <h3>Authentic SSE Streams</h3>
-            <p>
-              OpenAI, Claude, and Gemini APIs — authentic SSE format for each provider. Streaming
-              and non-streaming modes.
-            </p>
-          </div>
-          <div class="feature-card">
-            <div class="feature-icon purple">📁</div>
-            <h3>JSON Fixture Files</h3>
-            <p>
-              Define responses as JSON — one file per feature. Load a directory, load a file, or
-              register fixtures programmatically.
-            </p>
+        <div class="demo-grid fade-in" style="transition-delay: 0.2s">
+          <!-- Left: aimock.json -->
+          <div class="demo-panel">
+            <div class="demo-titlebar">
+              <div class="dots"><span></span><span></span><span></span></div>
+              <span class="title">aimock.json</span>
+            </div>
+            <div class="demo-body">
+              <pre><span class="syn-bracket">{</span>
+  <span class="syn-key">"llm"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"fixtures"</span><span class="syn-bracket">:</span> <span class="syn-str">"./fixtures/llm"</span><span class="syn-bracket">,</span>
+    <span class="syn-key">"providers"</span><span class="syn-bracket">:</span> <span class="syn-bracket">[</span><span class="syn-str">"openai"</span><span class="syn-bracket">,</span> <span class="syn-str">"claude"</span><span class="syn-bracket">,</span> <span class="syn-str">"gemini"</span><span class="syn-bracket">]</span>
+  <span class="syn-bracket">},</span>
+  <span class="syn-key">"mcp"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"tools"</span><span class="syn-bracket">:</span> <span class="syn-str">"./fixtures/mcp/tools.json"</span><span class="syn-bracket">,</span>
+    <span class="syn-key">"resources"</span><span class="syn-bracket">:</span> <span class="syn-str">"./fixtures/mcp/resources.json"</span>
+  <span class="syn-bracket">},</span>
+  <span class="syn-key">"a2a"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"agents"</span><span class="syn-bracket">:</span> <span class="syn-str">"./fixtures/a2a/agents.json"</span>
+  <span class="syn-bracket">},</span>
+  <span class="syn-key">"vector"</span><span class="syn-bracket">:</span> <span class="syn-bracket">{</span>
+    <span class="syn-key">"provider"</span><span class="syn-bracket">:</span> <span class="syn-str">"pinecone"</span><span class="syn-bracket">,</span>
+    <span class="syn-key">"fixtures"</span><span class="syn-bracket">:</span> <span class="syn-str">"./fixtures/vector"</span>
+  <span class="syn-bracket">}</span>
+<span class="syn-bracket">}</span></pre>
+            </div>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon amber">🔧</div>
-            <h3>Tool Call Support</h3>
-            <p>
-              Return tool calls with structured arguments. Match on tool names, tool result IDs, or
-              write custom predicates.
-            </p>
+
+          <!-- Right: terminal -->
+          <div class="demo-panel">
+            <div class="demo-titlebar">
+              <div class="dots"><span></span><span></span><span></span></div>
+              <span class="title">terminal</span>
+            </div>
+            <div class="demo-body">
+              <pre><span class="t-green">$</span> npx aimock --config aimock.json
+
+<span class="t-blue">&#9889;</span> aimock v1.0.0
+
+<span class="t-green">&#10003;</span> LLM    <span class="t-dim">mounted at</span>  /v1/chat/completions
+<span class="t-green">&#10003;</span> LLM    <span class="t-dim">mounted at</span>  /v1/messages
+<span class="t-green">&#10003;</span> LLM    <span class="t-dim">mounted at</span>  /v1/embeddings
+<span class="t-green">&#10003;</span> MCP    <span class="t-dim">mounted at</span>  /mcp/tools/*
+<span class="t-green">&#10003;</span> A2A    <span class="t-dim">mounted at</span>  /a2a/agents/*
+<span class="t-green">&#10003;</span> Vector <span class="t-dim">mounted at</span>  /vectors/*
+
+<span class="t-blue">&#9889;</span> Listening on <span class="t-green">http://localhost:4010</span>
+<span class="t-dim">   6 services &middot; 24 fixtures loaded</span></pre>
+            </div>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon red">💥</div>
-            <h3>Error Injection</h3>
+        </div>
+      </div>
+    </section>
+
+    <!-- ─── Section 5: Feature Grid ──────────────────────────────── -->
+    <section class="section-features" id="features">
+      <div class="container">
+        <h2 class="fade-in">Everything you need</h2>
+
+        <div class="feature-grid">
+          <div class="feature-card fade-in">
+            <div class="feature-icon">&#128225;</div>
+            <h3>Every Major LLM Provider</h3>
             <p>
-              Queue one-shot errors — 429 rate limits, 503 outages, whatever. Fires once, then
-              auto-removes itself.
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere &mdash; full
+              streaming and embeddings support for every provider.
             </p>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon green">📋</div>
-            <h3>Request Journal</h3>
+
+          <div class="feature-card fade-in" style="transition-delay: 0.08s">
+            <div class="feature-icon">&#128268;</div>
+            <h3>MCP Protocol</h3>
             <p>
-              Every request recorded. Inspect messages, verify tool calls, assert on conversation
-              history. HTTP and programmatic access.
+              Mock tools, resources, and prompts with full session management. Test your MCP
+              integrations without running real tool servers.
             </p>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon blue">🔌</div>
-            <h3>WebSocket APIs</h3>
+
+          <div class="feature-card fade-in" style="transition-delay: 0.16s">
+            <div class="feature-icon">&#129309;</div>
+            <h3>A2A Protocol</h3>
             <p>
-              OpenAI Responses, OpenAI Realtime, and Gemini Live over WebSocket. Same fixtures, real
-              RFC 6455 framing, zero dependencies. Text + tool calls.
+              Agent cards, message routing, and SSE streaming. Mock multi-agent interactions with
+              deterministic responses.
             </p>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon purple">🎛️</div>
-            <h3>Streaming Physics</h3>
+
+          <div class="feature-card fade-in" style="transition-delay: 0.24s">
+            <div class="feature-icon">&#128230;</div>
+            <h3>Vector Databases</h3>
             <p>
-              Simulate realistic streaming timing with TTFT, TPS, and jitter. Test loading states
-              and streaming UX under real-world conditions.
+              Pinecone, Qdrant, and ChromaDB compatible. Mock similarity search, upserts, and index
+              operations with fixtures.
             </p>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon red">🎲</div>
+
+          <div class="feature-card fade-in" style="transition-delay: 0.32s">
+            <div class="feature-icon">&#128165;</div>
             <h3>Chaos Testing</h3>
             <p>
-              Probabilistic failure injection &mdash; random errors, latency spikes, and stream
-              corruption for resilience testing.
-            </p>
-          </div>
-          <div class="feature-card">
-            <div class="feature-icon amber">📊</div>
-            <h3>Prometheus Metrics</h3>
-            <p>
-              Expose request counts, latencies, and fixture match rates via a /metrics endpoint.
-              Grafana-ready.
+              Drop, malformed, or disconnect at any probability. Verify your app gracefully handles
+              every failure mode.
             </p>
           </div>
-          <div class="feature-card">
-            <div class="feature-icon green">🔴</div>
-            <h3>Record &amp; Replay</h3>
+
+          <div class="feature-card fade-in" style="transition-delay: 0.4s">
+            <div class="feature-icon">&#128202;</div>
+            <h3>Drift Detection</h3>
             <p>
-              Proxy to real APIs, record responses as fixtures, then replay them deterministically
-              in tests.
+              Daily CI validation against real APIs. Know immediately when provider behavior changes
+              break your fixtures.
             </p>
           </div>
         </div>
       </div>
     </section>
 
-    <!-- ═══ Code Examples ════════════════════════════════════════════ -->
-    <section id="examples" class="reveal">
+    <!-- ─── Section 6: Drift Detection / Reliability ──────────────── -->
+    <section class="section-reliability" id="reliability">
       <div class="container">
-        <span class="section-label">Usage</span>
-        <h2 class="section-title">Fixture-driven. Zero boilerplate.</h2>
-
-        <!-- Example 1: Quick Start -->
-        <div class="code-section" style="margin-top: 3.5rem">
-          <div class="text-side">
-            <h3>Simple text responses</h3>
-            <p>
-              Match on the last user message — substring or regex. The fixture fires when it
-              matches, streaming SSE chunks just like the real API.
-            </p>
-            <ul>
-              <li>First-match-wins routing</li>
-              <li>Substring and RegExp matching</li>
-              <li>Configurable chunk size and latency</li>
-            </ul>
-          </div>
-          <div class="code-block">
-            <div class="code-block-header">
-              fixtures/chat.json
-              <span class="lang-tag">json</span>
-            </div>
-            <pre><code>{
-  <span class="key">"fixtures"</span>: [
-    {
-      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"stock price of AAPL"</span> },
-      <span class="key">"response"</span>: {
-        <span class="key">"content"</span>: <span class="str">"The current stock price of Apple Inc. (AAPL) is $150.25."</span>
-      }
-    },
-    {
-      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"capital of France"</span> },
-      <span class="key">"response"</span>: {
-        <span class="key">"content"</span>: <span class="str">"The capital of France is Paris."</span>
-      }
-    }
-  ]
-}</code></pre>
-          </div>
-        </div>
+        <h2 class="fade-in">Verified against real APIs, every day</h2>
+        <p class="reliability-sub fade-in" style="transition-delay: 0.1s">
+          aimock's drift detection runs daily against live provider APIs. When response formats
+          change, you know immediately &mdash; not when your tests break in production.
+        </p>
 
-        <!-- Example 2: Tool Calls -->
-        <div class="code-section reveal">
-          <div class="code-block">
-            <div class="code-block-header">
-              fixtures/tools.json
-              <span class="lang-tag">json</span>
-            </div>
-            <pre><code>{
-  <span class="key">"fixtures"</span>: [
-    {
-      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"one step with eggs"</span> },
-      <span class="key">"response"</span>: {
-        <span class="key">"toolCalls"</span>: [{
-          <span class="key">"name"</span>: <span class="str">"generate_task_steps"</span>,
-          <span class="key">"arguments"</span>: <span class="str">"{\"steps\":[{\"description\":\"Crack eggs\"},{\"description\":\"Preheat oven\"}]}"</span>
-        }]
-      }
-    },
-    {
-      <span class="key">"match"</span>: { <span class="key">"userMessage"</span>: <span class="str">"background color to blue"</span> },
-      <span class="key">"response"</span>: {
-        <span class="key">"toolCalls"</span>: [{
-          <span class="key">"name"</span>: <span class="str">"change_background"</span>,
-          <span class="key">"arguments"</span>: <span class="str">"{\"background\":\"blue\"}"</span>
-        }]
-      }
-    }
-  ]
-}</code></pre>
-          </div>
-          <div class="text-side">
-            <h3>Tool call responses</h3>
+        <div class="drift-flow fade-in" style="transition-delay: 0.2s">
+          <div class="drift-step">
+            <div class="drift-step-num">1</div>
+            <h3>Real API calls</h3>
             <p>
-              Return structured tool calls that agent frameworks execute directly. Used in
-              production E2E tests for CopilotKit, Mastra, and LangGraph integrations.
+              Daily CI hits actual OpenAI, Anthropic, Gemini endpoints to capture current response
+              formats.
             </p>
-            <ul>
-              <li>Tool calls with JSON arguments</li>
-              <li>Match on tool name or tool result ID</li>
-              <li>Multi-tool-call responses</li>
-            </ul>
           </div>
-        </div>
-
-        <!-- Example 3: Predicate Routing -->
-        <div class="code-section reveal">
-          <div class="text-side">
-            <h3>Predicate-based routing</h3>
+          <div class="drift-step">
+            <div class="drift-step-num">2</div>
+            <h3>Response validation</h3>
             <p>
-              When substring matching isn't enough, use predicates. Inspect the full request —
-              system prompt flags, message history, model name, anything.
+              Compares real responses against aimock's fixture format. Schema changes are caught
+              instantly.
             </p>
-            <ul>
-              <li>Inspect system prompt state flags</li>
-              <li>Route supervisor agents by conversation state</li>
-              <li>Combine with substring matching (AND logic)</li>
-            </ul>
-          </div>
-          <div class="code-block">
-            <div class="code-block-header">
-              e2e/mock-setup.ts
-              <span class="lang-tag">ts</span>
-            </div>
-            <pre><code><span class="cm">// Supervisor sees the same user message every time,</span>
-<span class="cm">// but system prompt contains state flags</span>
-<span class="kw">mock</span>.<span class="fn">addFixture</span>({
-  <span class="prop">match</span>: {
-    <span class="prop">predicate</span>: (<span class="op">req</span>) <span class="kw">=></span> {
-      <span class="kw">const</span> <span class="op">sys</span> = <span class="op">req</span>.<span class="prop">messages</span>
-        .<span class="fn">find</span>(<span class="op">m</span> <span class="kw">=></span> <span class="op">m</span>.<span class="prop">role</span> === <span class="str">"system"</span>);
-      <span class="kw">return</span> <span class="op">sys</span>?.<span class="prop">content</span>
-        ?.<span class="fn">includes</span>(<span class="str">"Flights found: false"</span>);
-    }
-  },
-  <span class="prop">response</span>: {
-    <span class="prop">toolCalls</span>: [{
-      <span class="prop">name</span>: <span class="str">"supervisor_response"</span>,
-      <span class="prop">arguments</span>: <span class="str">'{"next_agent":"flights_agent"}'</span>
-    }]
-  }
-});</code></pre>
-          </div>
-        </div>
-
-        <!-- Example 4: E2E Setup -->
-        <div class="code-section reveal">
-          <div class="code-block">
-            <div class="code-block-header">
-              e2e/global-setup.ts
-              <span class="lang-tag">ts</span>
-            </div>
-            <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
-
-<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>({ <span class="prop">port</span>: <span class="num">5555</span> });
-
-<span class="cm">// Load JSON fixture files</span>
-<span class="op">mock</span>.<span class="fn">loadFixtureDir</span>(<span class="str">"./fixtures/openai"</span>);
-
-<span class="cm">// Catch-all for tool results</span>
-<span class="op">mock</span>.<span class="fn">addFixture</span>({
-  <span class="prop">match</span>: {
-    <span class="prop">predicate</span>: (<span class="op">req</span>) <span class="kw">=></span>
-      <span class="op">req</span>.<span class="prop">messages</span>.<span class="fn">at</span>(-<span class="num">1</span>)?.<span class="prop">role</span> === <span class="str">"tool"</span>
-  },
-  <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Done!"</span> }
-});
-
-<span class="kw">const</span> <span class="op">url</span> = <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
-
-<span class="cm">// Every process on the machine can reach this</span>
-<span class="op">process</span>.<span class="prop">env</span>.<span class="prop">OPENAI_BASE_URL</span> = <span class="str">`${url}/v1`</span>;
-<span class="op">process</span>.<span class="prop">env</span>.<span class="prop">OPENAI_API_KEY</span> = <span class="str">"mock-key"</span>;</code></pre>
           </div>
-          <div class="text-side">
-            <h3>E2E global setup</h3>
+          <div class="drift-step">
+            <div class="drift-step-num">3</div>
+            <h3>Auto-remediation</h3>
             <p>
-              Start the mock server once in Playwright's global setup. All child processes —
-              Next.js, agent workers, CopilotKit runtime — inherit <code>OPENAI_BASE_URL</code> and
-              hit the same server.
+              Drift detected &rarr; PR opened &rarr; fixtures, skills, and docs updated
+              automatically. Zero manual effort.
             </p>
-            <ul>
-              <li>One server, many processes</li>
-              <li>JSON fixtures loaded from disk</li>
-              <li>Programmatic catch-alls for tool results</li>
-              <li>Universal fallback prevents 404 crashes</li>
-            </ul>
           </div>
         </div>
 
-        <!-- Example 5: WebSocket Realtime -->
-        <div class="code-section reveal">
-          <div class="text-side">
-            <h3>WebSocket APIs</h3>
-            <p>
-              Same fixtures work over WebSocket transport. OpenAI Responses, OpenAI Realtime, and
-              Gemini Live — RFC 6455 framing with zero dependencies.
-            </p>
-            <ul>
-              <li>OpenAI Responses API over WebSocket</li>
-              <li>OpenAI Realtime API — text + tool calls</li>
-              <li>
-                Gemini Live BidiGenerateContent (unverified — no text-capable model exists yet)
-              </li>
-              <li>No audio/video — text and tool call paths only</li>
-            </ul>
-          </div>
-          <div class="code-block">
-            <div class="code-block-header">
-              OpenAI Realtime over WebSocket
-              <span class="lang-tag">jsonc</span>
-            </div>
-            <pre><code><span class="cm">// Connect to ws://localhost:5555/v1/realtime</span>
-
-<span class="cm">// → Configure session:</span>
-{ <span class="key">"type"</span>: <span class="str">"session.update"</span>,
-  <span class="key">"session"</span>: { <span class="key">"modalities"</span>: [<span class="str">"text"</span>] } }
-
-<span class="cm">// → Add user message:</span>
-{ <span class="key">"type"</span>: <span class="str">"conversation.item.create"</span>,
-  <span class="key">"item"</span>: { <span class="key">"type"</span>: <span class="str">"message"</span>,
-    <span class="key">"role"</span>: <span class="str">"user"</span>,
-    <span class="key">"content"</span>: [{ <span class="key">"type"</span>: <span class="str">"input_text"</span>,
-      <span class="key">"text"</span>: <span class="str">"Hello"</span> }] } }
-
-<span class="cm">// → Request response:</span>
-{ <span class="key">"type"</span>: <span class="str">"response.create"</span> }
-
-<span class="cm">// ← Server streams back:</span>
-<span class="cm">// {"type":"response.created", ...}</span>
-<span class="cm">// {"type":"response.text.delta","delta":"Hi"}</span>
-<span class="cm">// {"type":"response.text.delta","delta":" there!"}</span>
-<span class="cm">// {"type":"response.text.done", ...}</span>
-<span class="cm">// {"type":"response.done", ...}</span></code></pre>
-          </div>
+        <div class="reliability-cta fade-in" style="transition-delay: 0.3s">
+          <a href="drift-detection.html" class="btn-primary">Learn about drift detection &rarr;</a>
         </div>
       </div>
     </section>
 
-    <!-- ═══ Comparison ═══════════════════════════════════════════════ -->
-    <section id="comparison" class="comparison reveal">
+    <!-- ─── Section 7: Comparison ────────────────────────────────── -->
+    <section class="section-comparison" id="comparison">
       <div class="container">
-        <span class="section-label">Comparison</span>
-        <h2 class="section-title">How llmock compares</h2>
-        <p class="section-desc">
-          llmock is purpose-built for LLM API testing. Here's how it stacks up against
-          general-purpose and LLM-specific mocking tools.
-        </p>
-
-        <div class="arch-diagram">
-          <span class="dim">// MSW: only intercepts in the process that calls server.listen()</span
-          ><br />
-          <span class="dim">// llmock: real server on a real port — any process can reach it</span
-          ><br /><br />
-          <span class="process">Playwright test runner</span><br />
-          <span class="arrow">&nbsp;&nbsp;└─</span> <span class="process">controls browser</span>
-          <span class="arrow">→</span> <span class="process">Next.js app</span>
-          <span class="dim">(separate process)</span><br />
-          <span class="arrow"
-            >&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└─</span
-          >
-          <span class="env">OPENAI_BASE_URL</span> <span class="arrow">→</span>
-          <span class="mock">llmock :5555</span><br />
-          <span class="arrow"
-            >&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├─</span
-          >
-          <span class="process">Mastra agent workers</span><br />
-          <span class="arrow"
-            >&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├─</span
-          >
-          <span class="process">LangGraph workers</span><br />
-          <span class="arrow"
-            >&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└─</span
-          >
-          <span class="process">CopilotKit runtime</span>
-        </div>
+        <h2 class="fade-in">How aimock compares</h2>
 
-        <div class="comparison-table-wrap">
+        <div class="comparison-wrap fade-in" style="transition-delay: 0.1s">
           <table class="comparison-table">
             <thead>
               <tr>
                 <th>Capability</th>
-                <th>
-                  <a
-                    href="https://github.com/CopilotKit/llmock"
-                    target="_blank"
-                    style="color: var(--accent)"
-                    >llmock</a
-                  >
-                </th>
-                <th>
-                  <a href="https://mswjs.io/" target="_blank" style="color: var(--text-secondary)"
-                    >MSW</a
-                  >
-                </th>
-                <th>
-                  <a
-                    href="https://github.com/vidaiUK/VidaiMock"
-                    target="_blank"
-                    style="color: var(--text-secondary)"
-                    >VidaiMock</a
-                  >
-                </th>
-                <th>
-                  <a
-                    href="https://github.com/dwmkerr/mock-llm"
-                    target="_blank"
-                    style="color: var(--text-secondary)"
-                    >mock-llm</a
-                  >
-                </th>
-                <th>
-                  <a
-                    href="https://github.com/piyook/llm-mock"
-                    target="_blank"
-                    style="color: var(--text-secondary)"
-                    >piyook/llm-mock</a
-                  >
-                </th>
+                <th class="col-aimock">aimock</th>
+                <th>MSW</th>
+                <th>VidaiMock</th>
+                <th>mock-llm</th>
+                <th>piyook/llm-mock</th>
               </tr>
             </thead>
             <tbody>
               <tr>
                 <td>Cross-process interception</td>
-                <td class="yes">Real server ✓</td>
-                <td class="no">In-process only</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes (Docker)</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">Real server &#10003;</span></td>
+                <td>In-process only</td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="yes">&#10003;</span> (Docker)</td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
                 <td>Chat Completions SSE</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="manual">Manual</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
                 <td>Responses API SSE</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="manual">Manual</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
                 <td>Claude Messages API</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="manual">Manual</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
                 <td>Gemini streaming</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="manual">Manual</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
                 <td>WebSocket APIs</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
                 <td>Multi-provider support</td>
-                <td class="yes">
-                  OpenAI + Claude + Gemini + Bedrock + Azure + Ollama + Cohere + Vertex AI ✓
-                </td>
-                <td class="no">Manual</td>
-                <td class="yes">
-                  OpenAI + Claude + Gemini + Bedrock + Azure + Vertex + Cohere + more
-                </td>
-                <td class="manual">OpenAI only</td>
-                <td class="manual">OpenAI only</td>
+                <td class="col-aimock"><span class="yes">11 providers &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td>11 providers</td>
+                <td>OpenAI only</td>
+                <td>OpenAI only</td>
               </tr>
               <tr>
                 <td>Embeddings API</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
                 <td>Structured output / JSON mode</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">Manual</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
                 <td>Sequential / stateful responses</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="manual">Manual</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
                 <td>Fixture files</td>
-                <td class="yes">JSON ✓</td>
-                <td class="no">Code-only</td>
-                <td class="manual">Tera templates</td>
-                <td class="manual">YAML config</td>
-                <td class="manual">JSON templates</td>
+                <td class="col-aimock"><span class="yes">JSON &#10003;</span></td>
+                <td>Code-only</td>
+                <td>Tera templates</td>
+                <td>YAML config</td>
+                <td>JSON templates</td>
               </tr>
               <tr>
-                <td>Programmatic API (test helpers)</td>
-                <td class="yes">Yes (TypeScript/JS) ✓</td>
-                <td class="yes">Yes (TypeScript/JS)</td>
-                <td class="no">No (binary only)</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>Programmatic API</td>
+                <td class="col-aimock"><span class="yes">&#10003;</span> (TypeScript/JS)</td>
+                <td><span class="yes">&#10003;</span> (TypeScript/JS)</td>
+                <td>No (binary only)</td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
                 <td>Request journal</td>
-                <td class="yes">Yes ✓</td>
-                <td class="no">Manual</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
+                <td class="col-aimock"><span class="yes">&#10003;</span></td>
+                <td><span class="manual">manual</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
               </tr>
               <tr>
-                <td>Error injection (one-shot)</td>
-                <td class="yes">Yes ✓</td>
-                <td class="yes">Yes</td>
-                <td class="manual">Partial</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>Error injection</td>
+                <td class="col-aimock"><span class="yes">&#10003;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="partial">partial</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>Docker image</td>
-                <td class="yes">Yes ✓</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
-              </tr>
-              <tr>
-                <td>Helm chart</td>
-                <td class="yes">Yes ✓</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
+                <td>Docker + Helm</td>
+                <td class="col-aimock"><span class="yes">Both &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td>Docker only</td>
+                <td><span class="yes">&#10003;</span> (Both)</td>
+                <td>Docker only</td>
               </tr>
               <tr>
                 <td>Drift detection</td>
-                <td class="yes">Yes ✓</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td class="col-aimock"><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>Azure OpenAI</td>
-                <td class="yes">Yes ✓</td>
-                <td class="manual">Manual</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
+                <td>Chaos testing</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>AWS Bedrock</td>
-                <td class="yes">Yes (streaming + Converse) ✓</td>
-                <td class="manual">Manual</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>Record &amp; replay</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>CLI server</td>
-                <td class="yes">Yes ✓</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
-                <td class="yes">Yes</td>
+                <td>Prometheus metrics</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>GET /v1/models</td>
-                <td class="yes">Yes ✓</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
+                <td>Streaming physics</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="yes">&#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>Chaos testing</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>MCP tool mocking</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>Record &amp; replay</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>A2A agent mocking</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>Prometheus metrics</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>Vector DB mocking</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
-                <td>Streaming physics</td>
-                <td class="yes">Built-in ✓</td>
-                <td class="no">No</td>
-                <td class="yes">Yes</td>
-                <td class="no">No</td>
-                <td class="no">No</td>
+                <td>Search &amp; rerank</td>
+                <td class="col-aimock"><span class="yes">Built-in &#10003;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
+                <td><span class="no">&#10007;</span></td>
               </tr>
               <tr>
                 <td>Dependencies</td>
-                <td class="yes">Zero</td>
-                <td class="no">~300KB</td>
-                <td class="yes">Zero (Rust binary)</td>
-                <td class="no">Node.js + Express</td>
-                <td class="manual">Minimal</td>
+                <td class="col-aimock"><span class="yes">Zero &#10003;</span></td>
+                <td>~300KB</td>
+                <td>Zero (Rust)</td>
+                <td>Node+Express</td>
+                <td>Minimal</td>
               </tr>
             </tbody>
           </table>
@@ -1769,370 +1619,185 @@ <h2 class="section-title">How llmock compares</h2>
       </div>
     </section>
 
-    <!-- ═══ Reliability / Drift Detection ═══════════════════════════ -->
-    <section id="reliability" class="reveal">
-      <div class="container">
-        <span class="section-label">Reliability</span>
-        <h2 class="section-title">Verified against real APIs. Every day.</h2>
-        <p class="section-desc">
-          A mock that doesn't match reality is worse than no mock &mdash; your tests pass, but
-          production breaks. llmock runs three-way drift detection that compares SDK types, real API
-          responses, and mock output to catch shape mismatches before you do.
+    <!-- ─── Section: Ready to Switch ─────────────────────────────── -->
+    <section class="section-switch" id="switch" style="padding: 2rem 0">
+      <div class="container" style="text-align: center">
+        <h2 class="fade-in">Ready to switch? We got you.</h2>
+        <p
+          class="fade-in"
+          style="color: var(--text-secondary); margin: 1rem auto 1.5rem; max-width: 500px"
+        >
+          Step-by-step migration guides for every major mock tool.
         </p>
-
-        <!-- Triangle diagram -->
-        <div class="triangle-wrapper">
-          <svg viewBox="0 0 600 420" fill="none" xmlns="http://www.w3.org/2000/svg">
-            <!-- SDK → Real (left edge) -->
-            <line
-              x1="245"
-              y1="105"
-              x2="130"
-              y2="280"
-              stroke="var(--border)"
-              stroke-width="1.5"
-              stroke-dasharray="6 4"
-            />
-            <polygon points="127,274 137,278 133,286" fill="var(--border)" />
-            <!-- SDK → Mock (right edge) -->
-            <line
-              x1="355"
-              y1="105"
-              x2="470"
-              y2="280"
-              stroke="var(--border)"
-              stroke-width="1.5"
-              stroke-dasharray="6 4"
-            />
-            <polygon points="473,274 463,278 467,286" fill="var(--border)" />
-            <!-- Real ↔ Mock (bottom edge) -->
-            <line
-              x1="195"
-              y1="355"
-              x2="405"
-              y2="355"
-              stroke="var(--border)"
-              stroke-width="1.5"
-              stroke-dasharray="6 4"
-            />
-            <polygon points="200,349 190,355 200,361" fill="var(--border)" />
-            <polygon points="400,349 410,355 400,361" fill="var(--border)" />
-            <!-- Edge labels (horizontal, centered on each line) -->
-            <rect x="131" y="182" width="85" height="20" rx="4" fill="var(--bg-deep)" />
-            <text
-              x="173"
-              y="196"
-              text-anchor="middle"
-              fill="var(--text-dim)"
-              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
-              font-size="11"
-            >
-              SDK = Real?
-            </text>
-            <rect x="360" y="182" width="90" height="20" rx="4" fill="var(--bg-deep)" />
-            <text
-              x="405"
-              y="196"
-              text-anchor="middle"
-              fill="var(--text-dim)"
-              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
-              font-size="11"
-            >
-              SDK = Mock?
-            </text>
-            <rect x="255" y="338" width="90" height="20" rx="4" fill="var(--bg-deep)" />
-            <text
-              x="300"
-              y="352"
-              text-anchor="middle"
-              fill="var(--text-dim)"
-              font-family="JetBrains Mono, SF Mono, Fira Code, monospace"
-              font-size="11"
-            >
-              Real = Mock?
-            </text>
-          </svg>
-          <div class="tri-node sdk">
-            <div class="node-icon">{ }</div>
-            <h3>SDK Types</h3>
-            <p>What TypeScript types say the shape should be</p>
-          </div>
-          <div class="tri-node real">
-            <div class="node-icon">&#8644;</div>
-            <h3>Real API</h3>
-            <p>What OpenAI, Claude, Gemini actually return</p>
-          </div>
-          <div class="tri-node mock">
-            <div class="node-icon">&#9881;</div>
-            <h3>llmock</h3>
-            <p>What the mock produces for the same request</p>
-          </div>
-        </div>
-
-        <!-- Diagnosis cards -->
-        <div class="diagnosis-grid">
-          <div class="diagnosis-card">
-            <div class="diag-header">
-              <div class="diag-dot" style="background: var(--error)"></div>
-              <h4>Mock doesn't match real</h4>
-            </div>
-            <p>
-              llmock needs updating &mdash; test fails immediately. The SDK comparison tells us why
-              it drifted.
-            </p>
-          </div>
-          <div class="diagnosis-card">
-            <div class="diag-header">
-              <div class="diag-dot" style="background: var(--warning)"></div>
-              <h4>Provider changed, SDK is behind</h4>
-            </div>
-            <p>
-              Early warning &mdash; the real API has new fields that neither the SDK nor llmock know
-              about yet.
-            </p>
-          </div>
-          <div class="diagnosis-card">
-            <div class="diag-header">
-              <div class="diag-dot" style="background: var(--accent)"></div>
-              <h4>All three agree</h4>
-            </div>
-            <p>No drift &mdash; the mock matches reality and the SDK types are current.</p>
-          </div>
-        </div>
-
-        <!-- Drift report snippet -->
-        <div class="drift-report">
-          <div class="report-header">$ pnpm test:drift</div>
-          <span class="severity-critical">[critical]</span>
-          <span class="drift-label">LLMOCK DRIFT</span> &mdash; field in SDK + real API but missing
-          from mock<br />
-          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
-            >choices[].message.refusal</span
-          ><br />
-          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;null &nbsp;&nbsp;
-          <span class="field-label">Real:</span> null &nbsp;&nbsp;
-          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
-          <div class="divider"></div>
-          <span class="severity-critical">[critical]</span>
-          <span class="drift-label">TYPE MISMATCH</span> &mdash; real API and mock disagree on
-          type<br />
-          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
-            >content[].input</span
-          ><br />
-          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;object &nbsp;&nbsp;
-          <span class="field-label">Real:</span> object &nbsp;&nbsp;
-          <span class="field-label">Mock:</span> string<br />
-          <div class="divider"></div>
-          <span class="severity-warning">[warning]</span>
-          <span class="drift-label">PROVIDER ADDED FIELD</span> &mdash; in real API but not in SDK
-          or mock<br />
-          <span class="field-label">Path:</span>&nbsp;&nbsp;&nbsp;&nbsp;<span class="field-path"
-            >choices[].message.annotations</span
-          ><br />
-          <span class="field-label">SDK:</span>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;absent&gt;
-          &nbsp;&nbsp; <span class="field-label">Real:</span> array &nbsp;&nbsp;
-          <span class="field-label">Mock:</span> &lt;absent&gt;<br />
-          <div class="divider"></div>
-          <span class="severity-ok" style="font-size: 0.85rem">&#10003;</span>
-          <span class="report-summary"
-            >2 critical (test fails) &middot; 1 warning (logged) &middot; detected before any user
-            reported it</span
+        <div
+          class="fade-in"
+          style="display: flex; gap: 1rem; justify-content: center; flex-wrap: wrap"
+        >
+          <a href="migrate-from-msw.html" class="btn btn-secondary"
+            ><img
+              src="https://mswjs.io/icon.svg"
+              alt=""
+              width="16"
+              height="16"
+              style="vertical-align: -2px; margin-right: 6px; filter: grayscale(1) brightness(2)"
+            />MSW</a
           >
-        </div>
-
-        <!-- CI footer -->
-        <div class="ci-footer">
-          <div class="ci-badge"><span class="dot"></span> Daily CI</div>
-          <span class="ci-text">Drift tests across 4 providers run automatically every day.</span>
-        </div>
-      </div>
-    </section>
-
-    <!-- ═══ Claude Code Integration ═══════════════════════════════════ -->
-    <section id="claude-code" class="reveal">
-      <div class="container">
-        <span class="section-label">AI-Assisted Development</span>
-        <h2 class="section-title">Claude Code Integration</h2>
-        <p class="section-desc">
-          llmock ships with a
-          <a href="https://docs.anthropic.com/en/docs/claude-code" target="_blank">Claude Code</a>
-          skill that teaches your AI assistant how to write fixtures correctly &mdash; match fields,
-          response types, agent loop patterns, gotchas, and debugging techniques.
-        </p>
-
-        <div class="features-grid" style="grid-template-columns: repeat(2, 1fr)">
-          <div class="feature-card">
-            <div class="feature-icon green">🔌</div>
-            <h3>Plugin Install</h3>
-            <p>
-              <code>/plugin marketplace add CopilotKit/llmock</code><br />
-              <code>/plugin install llmock@copilotkit-tools</code>
-            </p>
-            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
-              Skill appears as <code>/llmock:write-fixtures</code>
-            </p>
-          </div>
-          <div class="feature-card">
-            <div class="feature-icon blue">📂</div>
-            <h3>Local Plugin</h3>
-            <p>
-              <code>claude --plugin-dir ./node_modules/@copilotkit/llmock</code>
-            </p>
-            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
-              Same result, no marketplace needed
-            </p>
-          </div>
-          <div class="feature-card">
-            <div class="feature-icon purple">📁</div>
-            <h3>Add Directory</h3>
-            <p>
-              <code>claude --add-dir ./node_modules/@copilotkit/llmock</code>
-            </p>
-            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
-              Skill appears as <code>/write-fixtures</code> for the session
-            </p>
-          </div>
-          <div class="feature-card">
-            <div class="feature-icon amber">📋</div>
-            <h3>Copy to Project</h3>
-            <p>
-              <code
-                >cp node_modules/@copilotkit/llmock/.claude/commands/write-fixtures.md
-                .claude/commands/</code
-              >
-            </p>
-            <p style="margin-top: 0.5rem; color: var(--text-secondary)">
-              Permanent <code>/write-fixtures</code> &mdash; commit to share with team
-            </p>
-          </div>
+          <a href="migrate-from-vidaimock.html" class="btn btn-secondary">VidaiMock</a>
+          <a href="migrate-from-mock-llm.html" class="btn btn-secondary">mock-llm</a>
+          <a href="migrate-from-piyook.html" class="btn btn-secondary">piyook</a>
+          <a href="migrate-from-python-mocks.html" class="btn btn-secondary"
+            ><img
+              src="https://www.python.org/static/favicon.ico"
+              alt=""
+              width="16"
+              height="16"
+              style="vertical-align: -2px; margin-right: 6px"
+            />Python</a
+          >
+          <a href="migrate-from-mokksy.html" class="btn btn-secondary">Mokksy</a>
         </div>
       </div>
     </section>
 
-    <!-- ═══ Real-World Usage ═════════════════════════════════════════ -->
-    <section class="reveal">
-      <div class="container">
-        <h2 class="section-title">Real-World Usage</h2>
-        <p class="section-desc">
-          <a href="https://github.com/CopilotKit/CopilotKit" target="_blank">CopilotKit</a> uses
-          llmock across its test suite to verify AI agent behavior across multiple LLM providers
-          without hitting real APIs. The tests cover streaming text, tool calls, and multi-turn
-          conversations across both v1 and v2 runtimes. See the
-          <a
-            href="https://github.com/CopilotKit/CopilotKit/search?q=llmock&amp;type=code"
-            target="_blank"
-            >test suite</a
-          >
-          and
-          <a
-            href="https://github.com/CopilotKit/CopilotKit/search?q=fixtures+path%3A**%2Ffixtures&amp;type=code"
-            target="_blank"
-            >fixture files</a
+    <!-- ─── Section: Real-World Usage ────────────────────────────── -->
+    <section class="section-realworld" id="realworld" style="padding: 2rem 0">
+      <div class="container" style="text-align: center">
+        <h2 class="fade-in">Built for production</h2>
+        <p
+          class="fade-in"
+          style="
+            color: var(--text-secondary);
+            max-width: 700px;
+            margin: 1rem auto 0;
+            line-height: 1.8;
+          "
+        >
+          <a href="https://github.com/ag-ui-protocol/ag-ui">AG-UI</a> uses aimock for its
+          <a href="https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo/e2e"
+            >end-to-end test suite</a
+          >, verifying AI agent behavior across LLM providers with
+          <a href="https://github.com/ag-ui-protocol/ag-ui/tree/main/apps/dojo/e2e/fixtures/openai"
+            >fixture-driven responses</a
           >
-          for real-world examples.
+          in the codebase.
         </p>
       </div>
     </section>
 
-    <!-- ═══ Footer ═══════════════════════════════════════════════════ -->
+    <!-- ─── Section 8: Footer ────────────────────────────────────── -->
     <footer>
       <div class="container">
-        <div class="footer-left">
-          <span>$</span> llmock &middot; MIT License &middot; Built by
-          <a href="https://github.com/CopilotKit" target="_blank">CopilotKit</a>
-        </div>
+        <div class="footer-brand"><span class="prompt">$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
-          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
-          <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
-          </li>
+          <li><a href="docs.html">Docs</a></li>
           <li>
-            <a href="docs.html">Docs</a>
+            <a href="https://github.com/CopilotKit/llmock" target="_blank" rel="noopener">GitHub</a>
           </li>
           <li>
-            <a href="https://github.com/CopilotKit/llmock/issues" target="_blank">Issues</a>
+            <a
+              href="https://www.npmjs.com/package/@copilotkit/aimock"
+              target="_blank"
+              rel="noopener"
+              >npm</a
+            >
           </li>
         </ul>
       </div>
     </footer>
 
-    <!-- ═══ Scripts ═══════════════════════════════════════════════════ -->
+    <!-- ─── Scripts ──────────────────────────────────────────────── -->
     <script>
-      // ── Scroll reveal ────────────────────────────────────────────────
-      const observer = new IntersectionObserver(
-        (entries) => {
-          entries.forEach((e) => {
-            if (e.isIntersecting) {
-              e.target.classList.add("visible");
-              observer.unobserve(e.target);
-            }
-          });
-        },
-        { threshold: 0.1 },
-      );
+      // Copy install command
+      function copyInstall(btn) {
+        navigator.clipboard.writeText("npm install @copilotkit/aimock").then(function () {
+          btn.textContent = "Copied!";
+          setTimeout(function () {
+            btn.textContent = "Copy";
+          }, 2000);
+        });
+      }
+
+      // Scroll-triggered fade-in via IntersectionObserver
+      (function () {
+        var observer = new IntersectionObserver(
+          function (entries) {
+            entries.forEach(function (entry) {
+              if (entry.isIntersecting) {
+                entry.target.classList.add("visible");
+                observer.unobserve(entry.target);
+              }
+            });
+          },
+          { threshold: 0.1, rootMargin: "0px 0px -40px 0px" },
+        );
 
-      document.querySelectorAll(".reveal").forEach((el) => observer.observe(el));
+        document.querySelectorAll(".fade-in").forEach(function (el) {
+          observer.observe(el);
+        });
+      })();
 
       // ── Terminal demo animation ──────────────────────────────────────
-      const termSteps = [
+      var termSteps = [
         // Step 1: User types command
-        { type: "prompt", text: "llmock -p 5555 -f ./fixtures", delay: 600 },
+        { type: "prompt", text: "npx aimock -p 4010 -f ./fixture.json", delay: 600 },
         // Step 2: Server starts
         {
           type: "line",
-          html: '<span class="ok">✓ Listening on http://127.0.0.1:5555</span>',
+          html: '<span class="ok">\u2713 Listening on http://127.0.0.1:4010</span>',
           delay: 400,
         },
         {
           type: "line",
-          html: '<span class="info">  Loaded 12 fixtures from ./fixtures</span>',
+          html: '<span class="info">  Loaded 1 fixture from ./fixture.json</span>',
           delay: 300,
         },
         { type: "line", html: "", delay: 600 },
         // Step 3: Request arrives
-        { type: "line", html: '<span class="req">→ POST /v1/chat/completions</span>', delay: 200 },
-        { type: "line", html: '<span class="info">  model: gpt-4o</span>', delay: 150 },
         {
           type: "line",
-          html: '<span class="info">  user: "What is the capital of France?"</span>',
+          html: '<span class="req">\u2192 POST /v1/chat/completions</span>',
           delay: 200,
         },
+        { type: "line", html: '<span class="info">  model: gpt-4o</span>', delay: 150 },
+        { type: "line", html: '<span class="info">  user: "Hello"</span>', delay: 200 },
         {
           type: "line",
-          html: '<span class="match">  ✓ matched fixture: "capital of France"</span>',
+          html: '<span class="match">  \u2713 matched fixture: "Hello"</span>',
           delay: 400,
         },
         { type: "line", html: "", delay: 200 },
-        // Step 4: Stream the response
+        // Step 4: Stream the response — chunkSize: 20, latency: 50ms
         {
           type: "line",
-          html: '<span class="info">  streaming →</span> <span class="streamed" id="stream-target"></span>',
+          html: '<span class="info">  streaming \u2192</span> <span class="streamed" id="stream-target"></span>',
           delay: 100,
         },
         {
           type: "stream",
-          text: "The capital of France is Paris.",
+          text: "Hi there! How can I help?",
           target: "stream-target",
-          charDelay: 45,
+          chunkSize: 10,
+          charDelay: 20,
+          chunkDelay: 1000,
         },
         { type: "line", html: "", delay: 400 },
-        // Step 5: Done, show prompt again
+        // Step 5: Done
         {
           type: "line",
-          html: '<span class="ok">  ✓ 200 OK</span> <span class="info">— 3 chunks, 0ms latency</span>',
+          html: '<span class="ok">  \u2713 200 OK</span> <span class="info">\u2014 3 chunks, 1s latency</span>',
           delay: 800,
         },
       ];
 
       function runTermDemo() {
-        const term = document.getElementById("demo-term");
+        var term = document.getElementById("demo-term");
         term.innerHTML = "";
-        let stepIdx = 0;
+        var stepIdx = 0;
 
         function nextStep() {
           if (stepIdx >= termSteps.length) {
             // Add blinking cursor, then restart
-            const cursorLine = document.createElement("div");
+            var cursorLine = document.createElement("div");
             cursorLine.innerHTML = '<span class="prompt-sym">$ </span><span class="cursor"></span>';
             cursorLine.style.opacity = "1";
             term.appendChild(cursorLine);
@@ -2140,21 +1805,21 @@ <h2 class="section-title">Real-World Usage</h2>
             return;
           }
 
-          const step = termSteps[stepIdx];
+          var step = termSteps[stepIdx];
           stepIdx++;
 
           if (step.type === "prompt") {
             // Type out the command character by character
-            const line = document.createElement("div");
+            var line = document.createElement("div");
             line.className = "line";
             line.innerHTML =
               '<span class="prompt-sym">$ </span><span class="cmd"></span><span class="cursor"></span>';
             line.classList.add("visible");
             term.appendChild(line);
 
-            const cmdSpan = line.querySelector(".cmd");
-            const cursor = line.querySelector(".cursor");
-            let ci = 0;
+            var cmdSpan = line.querySelector(".cmd");
+            var cursor = line.querySelector(".cursor");
+            var ci = 0;
             function typeChar() {
               if (ci >= step.text.length) {
                 cursor.remove();
@@ -2167,26 +1832,39 @@ <h2 class="section-title">Real-World Usage</h2>
             }
             typeChar();
           } else if (step.type === "stream") {
-            // Type into an existing target span
-            const target = document.getElementById(step.target);
+            // Stream in chunks (chunkSize chars per burst, chunkDelay between bursts,
+            // charDelay between characters within each burst)
+            var target = document.getElementById(step.target);
             if (!target) {
               setTimeout(nextStep, 100);
               return;
             }
-            let ci = 0;
-            function typeStream() {
+            var ci = 0;
+            var sz = step.chunkSize || step.text.length;
+            var cd = step.charDelay || 20;
+            function typeChunk() {
               if (ci >= step.text.length) {
                 setTimeout(nextStep, step.delay || 300);
                 return;
               }
-              target.textContent += step.text[ci];
-              ci++;
-              setTimeout(typeStream, step.charDelay);
+              var end = Math.min(ci + sz, step.text.length);
+              var chi = ci;
+              function typeChar() {
+                if (chi >= end) {
+                  ci = end;
+                  setTimeout(typeChunk, step.chunkDelay || 50);
+                  return;
+                }
+                target.textContent += step.text[chi];
+                chi++;
+                setTimeout(typeChar, cd);
+              }
+              typeChar();
             }
-            typeStream();
+            typeChunk();
           } else {
             // Regular line — fade in
-            const line = document.createElement("div");
+            var line = document.createElement("div");
             line.className = "line";
             if (step.html === "") {
               line.innerHTML = "&nbsp;";
@@ -2195,8 +1873,10 @@ <h2 class="section-title">Real-World Usage</h2>
             }
             term.appendChild(line);
             // Trigger reflow, then add visible class
-            requestAnimationFrame(() => {
-              requestAnimationFrame(() => line.classList.add("visible"));
+            requestAnimationFrame(function () {
+              requestAnimationFrame(function () {
+                line.classList.add("visible");
+              });
             });
             setTimeout(nextStep, step.delay);
           }
@@ -2206,19 +1886,22 @@ <h2 class="section-title">Real-World Usage</h2>
       }
 
       // Start when demo scrolls into view
-      const demoObserver = new IntersectionObserver(
-        (entries) => {
-          entries.forEach((e) => {
-            if (e.isIntersecting) {
-              runTermDemo();
-              demoObserver.unobserve(e.target);
-            }
-          });
-        },
-        { threshold: 0.3 },
-      );
-
-      demoObserver.observe(document.querySelector(".demo"));
+      (function () {
+        var demoTerm = document.getElementById("demo-term");
+        if (!demoTerm) return;
+        var demoObserver = new IntersectionObserver(
+          function (entries) {
+            entries.forEach(function (e) {
+              if (e.isIntersecting) {
+                runTermDemo();
+                demoObserver.unobserve(e.target);
+              }
+            });
+          },
+          { threshold: 0.1 },
+        );
+        demoObserver.observe(demoTerm);
+      })();
     </script>
   </body>
 </html>
diff --git a/docs/mcp-mock.html b/docs/mcp-mock.html
new file mode 100644
index 0000000..4b0b4ed
--- /dev/null
+++ b/docs/mcp-mock.html
@@ -0,0 +1,255 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>MCPMock — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>MCPMock</h1>
+        <p class="lead">
+          Mock MCP (Model Context Protocol) server for testing tool integrations. Implements the
+          Streamable HTTP transport with JSON-RPC dispatch, session management, and full
+          tools/resources/prompts support.
+        </p>
+
+        <h2>Quick Start</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Standalone mode <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { MCPMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> mcp = <span class="kw">new</span> MCPMock();
+
+mcp.addTool({ name: <span class="str">"search"</span>, description: <span class="str">"Search the web"</span> });
+mcp.onToolCall(<span class="str">"search"</span>, (args) =&gt; {
+  <span class="kw">return</span> <span class="str">`Results for: ${(args as { query: string }).query}`</span>;
+});
+
+<span class="kw">const</span> url = <span class="kw">await</span> mcp.start();
+<span class="cm">// Point your MCP client at `url`</span></code></pre>
+        </div>
+
+        <h2>Mounted Mode</h2>
+        <p>
+          Mount MCPMock onto an LLMock server to share a single port with LLM mocking and other
+          services:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Mount on LLMock <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { LLMock, MCPMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+<span class="kw">const</span> mcp = <span class="kw">new</span> MCPMock();
+
+mcp.addTool({ name: <span class="str">"calc"</span>, description: <span class="str">"Calculator"</span> });
+mcp.onToolCall(<span class="str">"calc"</span>, (args) =&gt; <span class="str">"42"</span>);
+
+llm.mount(<span class="str">"/mcp"</span>, mcp);
+<span class="kw">await</span> llm.start();
+<span class="cm">// MCP available at http://127.0.0.1:5555/mcp</span></code></pre>
+        </div>
+
+        <h2>Subpath Import</h2>
+        <p>MCPMock is also available via a dedicated subpath import for tree-shaking:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Subpath import <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { MCPMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock/mcp"</span>;</code></pre>
+        </div>
+
+        <h2>Tools</h2>
+        <p>Register tools and their handlers:</p>
+        <div class="code-block">
+          <div class="code-block-header">Tools API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="cm">// Register a tool definition</span>
+mcp.addTool({
+  name: <span class="str">"search"</span>,
+  description: <span class="str">"Search the web"</span>,
+  inputSchema: { type: <span class="str">"object"</span>, properties: { query: { type: <span class="str">"string"</span> } } },
+});
+
+<span class="cm">// Register a handler (returns string or MCPContent[])</span>
+mcp.onToolCall(<span class="str">"search"</span>, (args) =&gt; {
+  <span class="kw">const</span> { query } = args <span class="kw">as</span> { query: <span class="kw">string</span> };
+  <span class="kw">return</span> <span class="str">`Found 3 results for "${query}"`</span>;
+});
+
+<span class="cm">// Or return rich content</span>
+mcp.onToolCall(<span class="str">"rich-tool"</span>, () =&gt; [
+  { type: <span class="str">"text"</span>, text: <span class="str">"Hello"</span> },
+  { type: <span class="str">"image"</span>, data: <span class="str">"base64..."</span>, mimeType: <span class="str">"image/png"</span> },
+]);</code></pre>
+        </div>
+
+        <h2>Resources</h2>
+        <p>Register static resources that clients can read:</p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Resources API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>mcp.addResource(
+  { uri: <span class="str">"file:///readme.md"</span>, name: <span class="str">"README"</span>, mimeType: <span class="str">"text/markdown"</span> },
+  { text: <span class="str">"# My Project\nWelcome!"</span> },
+);</code></pre>
+        </div>
+
+        <h2>Prompts</h2>
+        <p>Register prompt templates with optional handlers:</p>
+        <div class="code-block">
+          <div class="code-block-header">Prompts API <span class="lang-tag">typescript</span></div>
+          <pre><code>mcp.addPrompt(
+  { name: <span class="str">"summarize"</span>, arguments: [{ name: <span class="str">"text"</span>, required: <span class="kw">true</span> }] },
+  (args) =&gt; ({
+    messages: [
+      { role: <span class="str">"user"</span>, content: { type: <span class="str">"text"</span>, text: <span class="str">`Summarize: ${(args as { text: string }).text}`</span> } },
+    ],
+  }),
+);</code></pre>
+        </div>
+
+        <h2>Config File</h2>
+        <p>MCPMock can be configured via the aimock JSON config file:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"mcp"</span>: {
+    <span class="prop">"path"</span>: <span class="str">"/mcp"</span>,
+    <span class="prop">"tools"</span>: [
+      { <span class="prop">"name"</span>: <span class="str">"search"</span>, <span class="prop">"description"</span>: <span class="str">"Search"</span>, <span class="prop">"result"</span>: <span class="str">"Found it"</span> }
+    ],
+    <span class="prop">"resources"</span>: [
+      { <span class="prop">"uri"</span>: <span class="str">"file:///data.json"</span>, <span class="prop">"name"</span>: <span class="str">"Data"</span>, <span class="prop">"text"</span>: <span class="str">"{\"key\": \"value\"}"</span> }
+    ]
+  }
+}</code></pre>
+        </div>
+
+        <h2>Session Management</h2>
+        <p>
+          MCPMock implements full session management per the MCP Streamable HTTP spec. Each
+          <code>initialize</code> request creates a new session, and the session ID is returned via
+          the <code>Mcp-Session-Id</code> header. All subsequent requests must include this header.
+        </p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>initialize</code></td>
+              <td>Creates session, returns capabilities and session ID</td>
+            </tr>
+            <tr>
+              <td><code>tools/list</code></td>
+              <td>Lists all registered tools</td>
+            </tr>
+            <tr>
+              <td><code>tools/call</code></td>
+              <td>Calls a tool by name with arguments</td>
+            </tr>
+            <tr>
+              <td><code>resources/list</code></td>
+              <td>Lists all registered resources</td>
+            </tr>
+            <tr>
+              <td><code>resources/read</code></td>
+              <td>Reads a resource by URI</td>
+            </tr>
+            <tr>
+              <td><code>prompts/list</code></td>
+              <td>Lists all registered prompts</td>
+            </tr>
+            <tr>
+              <td><code>prompts/get</code></td>
+              <td>Gets a prompt by name with arguments</td>
+            </tr>
+            <tr>
+              <td><code>ping</code></td>
+              <td>Returns empty object (health check)</td>
+            </tr>
+            <tr>
+              <td><code>DELETE /</code></td>
+              <td>Destroys a session</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Inspection</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Inspection API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>mcp.health();       <span class="cm">// { status: "ok", tools: 2, resources: 1, prompts: 0, sessions: 1 }</span>
+mcp.getSessions();  <span class="cm">// Map of active sessions</span>
+mcp.getRequests();  <span class="cm">// Journal entries (when mounted with shared journal)</span>
+mcp.reset();        <span class="cm">// Clears all tools, resources, prompts, and sessions</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/metrics.html b/docs/metrics.html
index 71235c6..33b7b0a 100644
--- a/docs/metrics.html
+++ b/docs/metrics.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Prometheus Metrics — llmock</title>
+    <title>Prometheus Metrics — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a
-          ><a href="record-replay.html">Record &amp; Replay</a
-          ><a href="metrics.html" class="active">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Prometheus Metrics</h1>
         <p class="lead">
-          llmock exposes Prometheus-compatible metrics via <code>GET /metrics</code>. Opt-in with
+          aimock exposes Prometheus-compatible metrics via <code>GET /metrics</code>. Opt-in with
           <code>--metrics</code>. Zero external dependencies &mdash; implements counters,
           histograms, and gauges with Prometheus text exposition format serialization.
         </p>
@@ -105,9 +76,26 @@ <h2>Endpoint</h2>
 
         <h2>Quick Start</h2>
 
-        <div class="code-block">
-          <div class="code-block-header">Enable metrics <span class="lang-tag">bash</span></div>
-          <pre><code>npx llmock --fixtures ./fixtures --metrics</code></pre>
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                Enable metrics <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>$ npx aimock --fixtures ./fixtures --metrics</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                Enable metrics <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>$ docker run -d -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --fixtures /fixtures --metrics</code></pre>
+            </div>
+          </div>
         </div>
 
         <div class="code-block">
@@ -127,25 +115,25 @@ <h2>Available Metrics</h2>
           </thead>
           <tbody>
             <tr>
-              <td><code>llmock_requests_total</code></td>
+              <td><code>aimock_requests_total</code></td>
               <td>Counter</td>
               <td><code>method</code>, <code>path</code>, <code>status</code></td>
               <td>Total number of requests handled</td>
             </tr>
             <tr>
-              <td><code>llmock_request_duration_seconds</code></td>
+              <td><code>aimock_request_duration_seconds</code></td>
               <td>Histogram</td>
               <td><code>method</code>, <code>path</code></td>
               <td>Request duration in seconds</td>
             </tr>
             <tr>
-              <td><code>llmock_fixtures_loaded</code></td>
+              <td><code>aimock_fixtures_loaded</code></td>
               <td>Gauge</td>
               <td>&mdash;</td>
               <td>Number of fixtures currently loaded</td>
             </tr>
             <tr>
-              <td><code>llmock_chaos_triggered_total</code></td>
+              <td><code>aimock_chaos_triggered_total</code></td>
               <td>Counter</td>
               <td><code>action</code></td>
               <td>
@@ -153,6 +141,35 @@ <h2>Available Metrics</h2>
                 <code>disconnect</code>)
               </td>
             </tr>
+            <tr>
+              <td><code>aimock_mcp_requests_total</code></td>
+              <td>Counter</td>
+              <td><code>method</code></td>
+              <td>
+                Total MCP JSON-RPC requests (methods: <code>initialize</code>,
+                <code>tools/list</code>, <code>tools/call</code>, <code>resources/read</code>,
+                <code>resources/list</code>, <code>prompts/list</code>, <code>prompts/get</code>)
+              </td>
+            </tr>
+            <tr>
+              <td><code>aimock_a2a_requests_total</code></td>
+              <td>Counter</td>
+              <td><code>method</code></td>
+              <td>
+                Total A2A requests (methods: <code>GetAgentCard</code>, <code>SendMessage</code>,
+                <code>GetTask</code>, <code>CancelTask</code>)
+              </td>
+            </tr>
+            <tr>
+              <td><code>aimock_vector_requests_total</code></td>
+              <td>Counter</td>
+              <td><code>operation</code>, <code>provider</code></td>
+              <td>
+                Total vector DB requests (operations: <code>query</code>, <code>upsert</code>,
+                <code>delete</code>, <code>list</code>, <code>fetch</code>, <code>describe</code>;
+                providers: <code>pinecone</code>, <code>qdrant</code>, <code>chromadb</code>)
+              </td>
+            </tr>
           </tbody>
         </table>
 
@@ -216,32 +233,32 @@ <h2>Output Format</h2>
           <div class="code-block-header">
             Example /metrics response <span class="lang-tag">text</span>
           </div>
-          <pre><code># TYPE llmock_requests_total counter
-llmock_requests_total{method="POST",path="/v1/chat/completions",status="200"} 42
-llmock_requests_total{method="POST",path="/v1/messages",status="200"} 15
+          <pre><code># TYPE aimock_requests_total counter
+aimock_requests_total{method="POST",path="/v1/chat/completions",status="200"} 42
+aimock_requests_total{method="POST",path="/v1/messages",status="200"} 15
 
-# TYPE llmock_request_duration_seconds histogram
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.005"} 0
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.01"} 5
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.025"} 20
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.05"} 35
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.1"} 40
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.25"} 42
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.5"} 42
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="1"} 42
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="2.5"} 42
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="5"} 42
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="10"} 42
-llmock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="+Inf"} 42
-llmock_request_duration_seconds_sum{method="POST",path="/v1/chat/completions"} 1.234
-llmock_request_duration_seconds_count{method="POST",path="/v1/chat/completions"} 42
+# TYPE aimock_request_duration_seconds histogram
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.005"} 0
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.01"} 5
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.025"} 20
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.05"} 35
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.1"} 40
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.25"} 42
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="0.5"} 42
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="1"} 42
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="2.5"} 42
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="5"} 42
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="10"} 42
+aimock_request_duration_seconds_bucket{method="POST",path="/v1/chat/completions",le="+Inf"} 42
+aimock_request_duration_seconds_sum{method="POST",path="/v1/chat/completions"} 1.234
+aimock_request_duration_seconds_count{method="POST",path="/v1/chat/completions"} 42
 
-# TYPE llmock_fixtures_loaded gauge
-llmock_fixtures_loaded{} 12
+# TYPE aimock_fixtures_loaded gauge
+aimock_fixtures_loaded{} 12
 
-# TYPE llmock_chaos_triggered_total counter
-llmock_chaos_triggered_total{action="drop"} 3
-llmock_chaos_triggered_total{action="malformed"} 1</code></pre>
+# TYPE aimock_chaos_triggered_total counter
+aimock_chaos_triggered_total{action="drop"} 3
+aimock_chaos_triggered_total{action="malformed"} 1</code></pre>
         </div>
 
         <h2>Histogram Buckets</h2>
@@ -271,14 +288,16 @@ <h2>Implementation Details</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/migrate-from-mock-llm.html b/docs/migrate-from-mock-llm.html
new file mode 100644
index 0000000..7d46bb2
--- /dev/null
+++ b/docs/migrate-from-mock-llm.html
@@ -0,0 +1,533 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Migrate from mock-llm — aimock</title>
+    <meta
+      name="description"
+      content="A conversion guide for mock-llm (dwmkerr) users switching to aimock for AI application testing."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+    <style>
+      /* ─── Side-by-side comparison grid ─────────────────────────── */
+      .compare-grid {
+        display: grid;
+        grid-template-columns: 1fr 1fr;
+        gap: 1.5rem;
+        margin: 1.5rem 0 2rem;
+      }
+      .compare-grid .code-block {
+        margin-bottom: 0;
+      }
+      @media (max-width: 900px) {
+        .compare-grid {
+          grid-template-columns: 1fr;
+        }
+      }
+
+      /* ─── Feature cards ────────────────────────────────────────── */
+      .features-grid {
+        display: grid;
+        grid-template-columns: repeat(4, 1fr);
+        gap: 1.25rem;
+        margin-top: 1.5rem;
+      }
+      .feature-card {
+        padding: 1.5rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        transition:
+          border-color 0.3s,
+          transform 0.3s,
+          box-shadow 0.3s;
+      }
+      .feature-card:hover {
+        border-color: var(--border-bright);
+        transform: translateY(-2px);
+        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+      }
+      .feature-icon {
+        width: 40px;
+        height: 40px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border-radius: 8px;
+        margin-bottom: 1rem;
+        font-size: 1.25rem;
+        font-weight: 700;
+      }
+      .feature-icon.green {
+        background: rgba(0, 255, 136, 0.1);
+        border: 1px solid rgba(0, 255, 136, 0.2);
+      }
+      .feature-icon.blue {
+        background: rgba(68, 136, 255, 0.1);
+        border: 1px solid rgba(68, 136, 255, 0.2);
+      }
+      .feature-icon.purple {
+        background: rgba(170, 102, 255, 0.1);
+        border: 1px solid rgba(170, 102, 255, 0.2);
+      }
+      .feature-icon.amber {
+        background: rgba(255, 170, 0, 0.1);
+        border: 1px solid rgba(255, 170, 0, 0.2);
+      }
+      .feature-icon.red {
+        background: rgba(255, 68, 102, 0.1);
+        border: 1px solid rgba(255, 68, 102, 0.2);
+      }
+      .feature-card h3 {
+        font-size: 0.95rem;
+        font-weight: 600;
+        margin-bottom: 0.4rem;
+      }
+      .feature-card p {
+        font-size: 0.85rem;
+        color: var(--text-secondary);
+        line-height: 1.5;
+      }
+      @media (max-width: 900px) {
+        .features-grid {
+          grid-template-columns: 1fr;
+        }
+      }
+      @media (min-width: 901px) and (max-width: 1200px) {
+        .features-grid {
+          grid-template-columns: repeat(2, 1fr);
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Switching from mock-llm to aimock</h1>
+        <p class="lead">
+          mock-llm is solid for OpenAI mocking with Kubernetes. aimock gives you 9 more providers,
+          zero dependencies, and full MCP/A2A/Vector support&mdash;with the same Helm chart workflow
+          you're used to.
+        </p>
+
+        <!-- ─── Config migration ─────────────────────────────────── -->
+        <h2>Config migration</h2>
+        <p>
+          mock-llm uses a YAML config to define responses. aimock uses a JSON config with a richer
+          feature set. Here's how the concepts map:
+        </p>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">
+              mock-llm &mdash; config.yaml <span class="lang-tag">yaml</span>
+            </div>
+            <pre><code><span class="prop">port</span>: <span class="num">8080</span>
+<span class="prop">completions</span>:
+  - <span class="prop">model</span>: <span class="str">"gpt-4"</span>
+    <span class="prop">response</span>:
+      <span class="prop">content</span>: <span class="str">"Hello from mock-llm"</span>
+      <span class="prop">role</span>: <span class="str">"assistant"</span>
+    <span class="prop">usage</span>:
+      <span class="prop">prompt_tokens</span>: <span class="num">10</span>
+      <span class="prop">completion_tokens</span>: <span class="num">5</span>
+      <span class="prop">total_tokens</span>: <span class="num">15</span></code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">
+              aimock &mdash; aimock.json <span class="lang-tag">json</span>
+            </div>
+            <pre><code>{
+  <span class="prop">"llm"</span>: {
+    <span class="prop">"fixtures"</span>: <span class="str">"./fixtures"</span>,
+    <span class="prop">"metrics"</span>: <span class="kw">true</span>,
+    <span class="prop">"strict"</span>: <span class="kw">false</span>
+  },
+  <span class="prop">"services"</span>: {
+    <span class="prop">"/mcp"</span>: { <span class="prop">"type"</span>: <span class="str">"mcp"</span>, <span class="prop">"tools"</span>: <span class="str">"./mcp-tools.json"</span> },
+    <span class="prop">"/a2a"</span>: { <span class="prop">"type"</span>: <span class="str">"a2a"</span>, <span class="prop">"agents"</span>: <span class="str">"./a2a-agents.json"</span> }
+  }
+}</code></pre>
+          </div>
+        </div>
+
+        <p>
+          Inline responses from mock-llm's YAML become fixture files. Each fixture is a JSON file
+          that defines the response content, and aimock handles SSE framing, chunking, and token
+          counting automatically:
+        </p>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">
+              mock-llm &mdash; inline YAML response <span class="lang-tag">yaml</span>
+            </div>
+            <pre><code><span class="prop">completions</span>:
+  - <span class="prop">model</span>: <span class="str">"gpt-4"</span>
+    <span class="prop">response</span>:
+      <span class="prop">content</span>: <span class="str">"Hello world"</span>
+      <span class="prop">role</span>: <span class="str">"assistant"</span></code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">
+              aimock &mdash; fixtures/hello.json <span class="lang-tag">json</span>
+            </div>
+            <pre><code>{
+  <span class="prop">"match"</span>: { <span class="prop">"userMessage"</span>: <span class="str">"*"</span> },
+  <span class="prop">"response"</span>: {
+    <span class="prop">"content"</span>: <span class="str">"Hello world"</span>
+  }
+}</code></pre>
+          </div>
+        </div>
+
+        <!-- ─── What you gain ────────────────────────────────────── -->
+        <h2>What you gain</h2>
+
+        <div class="features-grid">
+          <div class="feature-card">
+            <div class="feature-icon green">&#127760;</div>
+            <h3>Multi-provider (10 vs 1)</h3>
+            <p>
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere, and
+              OpenAI-compatible providers.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">0</div>
+            <h3>Zero dependencies</h3>
+            <p>
+              Pure Node.js with no runtime dependencies. No YAML parsers, no Express, no framework
+              lock-in.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">&#128230;</div>
+            <h3>Vector DB mocking</h3>
+            <p>Mock Pinecone, Qdrant, Weaviate, and ChromaDB endpoints for RAG pipeline testing.</p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">&#9210;</div>
+            <h3>Record &amp; replay</h3>
+            <p>Proxy real APIs, capture responses as fixtures, replay deterministically forever.</p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon red">&#128269;</div>
+            <h3>Drift detection</h3>
+            <p>
+              Three-way comparison of SDK types, real API responses, and mock output catches shape
+              mismatches.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon green">&#129513;</div>
+            <h3>Chaos testing</h3>
+            <p>
+              Inject latency, drop chunks, corrupt payloads, and disconnect mid-stream to harden
+              your app.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#128268;</div>
+            <h3>WebSocket APIs</h3>
+            <p>OpenAI Realtime, Responses WS, Gemini Live. Full bidirectional protocol mocking.</p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">{ }</div>
+            <h3>Programmatic API</h3>
+            <p>
+              Use <code>new LLMock()</code> in Vitest/Jest for in-process mocking alongside CLI and
+              Docker modes.
+            </p>
+          </div>
+        </div>
+
+        <!-- ─── Comparison table ─────────────────────────────────── -->
+        <h2>Comparison table</h2>
+
+        <table class="comparison-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>mock-llm</th>
+              <th>aimock</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>OpenAI Chat Completions</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>OpenAI Responses API</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Anthropic Claude</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Google Gemini</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>AWS Bedrock</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Azure OpenAI / Vertex AI / Ollama / Cohere</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Streaming SSE</td>
+              <td style="color: var(--text-dim)">Basic</td>
+              <td style="color: var(--accent)">Full (TTFT, TPS, jitter)</td>
+            </tr>
+            <tr>
+              <td>WebSocket protocols</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">3 protocols</td>
+            </tr>
+            <tr>
+              <td>MCP protocol mocking</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>A2A protocol mocking</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Vector DB mocking</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Record &amp; replay</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Drift detection</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Chaos testing</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Prometheus metrics</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Programmatic API</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Zero dependencies</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Kubernetes / Helm</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Docker image</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <!-- ─── Kubernetes migration ─────────────────────────────── -->
+        <h2>Kubernetes migration</h2>
+        <p>
+          If you're running mock-llm in Kubernetes via Helm, switching to aimock is a values.yaml
+          change. The deployment pattern is identical&mdash;only the image and config format differ.
+        </p>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">
+              mock-llm &mdash; values.yaml <span class="lang-tag">yaml</span>
+            </div>
+            <pre><code><span class="prop">replicaCount</span>: <span class="num">1</span>
+
+<span class="prop">image</span>:
+  <span class="prop">repository</span>: <span class="str">dwmkerr/mock-llm</span>
+  <span class="prop">tag</span>: <span class="str">"latest"</span>
+  <span class="prop">pullPolicy</span>: <span class="str">IfNotPresent</span>
+
+<span class="prop">service</span>:
+  <span class="prop">type</span>: <span class="str">ClusterIP</span>
+  <span class="prop">port</span>: <span class="num">8080</span>
+
+<span class="prop">env</span>:
+  - <span class="prop">name</span>: <span class="str">MOCK_LLM_CONFIG</span>
+    <span class="prop">value</span>: <span class="str">/config/config.yaml</span>
+
+<span class="prop">volumes</span>:
+  - <span class="prop">name</span>: <span class="str">config</span>
+    <span class="prop">configMap</span>:
+      <span class="prop">name</span>: <span class="str">mock-llm-config</span>
+
+<span class="prop">volumeMounts</span>:
+  - <span class="prop">name</span>: <span class="str">config</span>
+    <span class="prop">mountPath</span>: <span class="str">/config</span></code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">
+              aimock &mdash; values.yaml <span class="lang-tag">yaml</span>
+            </div>
+            <pre><code><span class="prop">replicaCount</span>: <span class="num">1</span>
+
+<span class="prop">image</span>:
+  <span class="prop">repository</span>: <span class="str">ghcr.io/copilotkit/aimock</span>
+  <span class="prop">tag</span>: <span class="str">""</span>   <span class="cm"># defaults to Chart appVersion</span>
+  <span class="prop">pullPolicy</span>: <span class="str">IfNotPresent</span>
+
+<span class="prop">service</span>:
+  <span class="prop">type</span>: <span class="str">ClusterIP</span>
+  <span class="prop">port</span>: <span class="num">4010</span>
+
+<span class="prop">fixtures</span>:
+  <span class="prop">mountPath</span>: <span class="str">/app/fixtures</span>
+  <span class="prop">existingClaim</span>: <span class="str">""</span>  <span class="cm"># PVC for fixture files</span>
+
+<span class="prop">resources</span>: {}
+  <span class="cm"># limits:</span>
+  <span class="cm">#   cpu: 200m</span>
+  <span class="cm">#   memory: 256Mi</span></code></pre>
+          </div>
+        </div>
+
+        <p>
+          Key differences: aimock uses JSON fixtures on a PVC instead of a YAML ConfigMap, the
+          default port is <code>4010</code> instead of <code>8080</code>, and the image comes from
+          <code>ghcr.io/copilotkit/aimock</code>. Health checks work the same way&mdash;aimock
+          exposes <code>/health</code> (liveness) and <code>/ready</code> (readiness).
+        </p>
+
+        <div class="info-box">
+          <p>
+            If you were using mock-llm's ConfigMap for inline responses, convert each response to a
+            fixture JSON file and load them via a PVC. See the
+            <a href="fixtures.html">Fixtures</a> documentation for the full schema.
+          </p>
+        </div>
+
+        <!-- ─── CLI / Docker quick start ─────────────────────────── -->
+        <h2>CLI / Docker quick start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                Install &amp; run <span class="lang-tag">sh</span>
+              </div>
+              <pre><code><span class="cm"># Run the mock server</span>
+npx aimock -p <span class="num">4010</span> -f ./fixtures
+
+<span class="cm"># With a full config file</span>
+npx aimock --config aimock.json --port <span class="num">4010</span>
+
+<span class="cm"># Point your app at the mock</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">Docker <span class="lang-tag">sh</span></div>
+              <pre><code><span class="cm"># Pull and run</span>
+docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/fixtures:/fixtures</span> \
+  ghcr.io/copilotkit/aimock:latest \
+  -p <span class="num">4010</span> -f /fixtures
+
+<span class="cm"># With a config file</span>
+docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/aimock.json:/app/aimock.json</span> \
+  -v <span class="str">$(pwd)/fixtures:/app/fixtures</span> \
+  ghcr.io/copilotkit/aimock \
+  aimock --config /app/aimock.json --host 0.0.0.0
+
+<span class="cm"># Point your app at the mock</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1</code></pre>
+            </div>
+          </div>
+        </div>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/migrate-from-mokksy.html b/docs/migrate-from-mokksy.html
new file mode 100644
index 0000000..ba83145
--- /dev/null
+++ b/docs/migrate-from-mokksy.html
@@ -0,0 +1,429 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Migrate from Mokksy — aimock</title>
+    <meta
+      name="description"
+      content="A conversion guide for AI-Mocks/Mokksy (Kotlin) users switching to aimock for AI application testing."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+    <style>
+      /* ─── Feature cards (scoped from index) ────────────────────── */
+      .features-grid {
+        display: grid;
+        grid-template-columns: repeat(3, 1fr);
+        gap: 1.25rem;
+        margin-top: 1.5rem;
+      }
+      .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+        grid-column: 2;
+      }
+      .feature-card {
+        padding: 2rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        transition:
+          border-color 0.3s,
+          transform 0.3s,
+          box-shadow 0.3s;
+      }
+      .feature-card:hover {
+        border-color: var(--border-bright);
+        transform: translateY(-2px);
+        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+      }
+      .feature-icon {
+        width: 40px;
+        height: 40px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border-radius: 8px;
+        margin-bottom: 1rem;
+        font-size: 1.25rem;
+        font-weight: 700;
+      }
+      .feature-icon.green {
+        background: rgba(0, 255, 136, 0.1);
+        border: 1px solid rgba(0, 255, 136, 0.2);
+      }
+      .feature-icon.blue {
+        background: rgba(68, 136, 255, 0.1);
+        border: 1px solid rgba(68, 136, 255, 0.2);
+      }
+      .feature-icon.purple {
+        background: rgba(170, 102, 255, 0.1);
+        border: 1px solid rgba(170, 102, 255, 0.2);
+      }
+      .feature-icon.amber {
+        background: rgba(255, 170, 0, 0.1);
+        border: 1px solid rgba(255, 170, 0, 0.2);
+      }
+      .feature-icon.red {
+        background: rgba(255, 68, 102, 0.1);
+        border: 1px solid rgba(255, 68, 102, 0.2);
+      }
+      .feature-card h3 {
+        font-size: 1.05rem;
+        font-weight: 600;
+        margin-bottom: 0.5rem;
+      }
+      .feature-card p {
+        font-size: 0.9rem;
+        color: var(--text-secondary);
+        line-height: 1.6;
+      }
+      @media (max-width: 900px) {
+        .features-grid {
+          grid-template-columns: 1fr;
+        }
+        .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+          grid-column: auto;
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Switching from Mokksy to aimock</h1>
+        <p class="lead">
+          Mokksy (AI-Mocks) is a solid Kotlin/Ktor-based mock for JVM teams. aimock gives you the
+          same LLM mocking from any language&mdash;plus MCP, A2A, vector databases,
+          record-and-replay, and drift detection that Mokksy doesn't have.
+        </p>
+
+        <!-- ─── The quick switch ─────────────────────────────────── -->
+        <h2>The quick switch</h2>
+        <p>
+          JVM tests can use aimock via Docker or <code>npx aimock</code> if Node.js is available.
+          Point the OpenAI Java SDK at aimock and your existing test assertions stay the same.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            JUnit 5 &mdash; start aimock before tests <span class="lang-tag">kotlin</span>
+          </div>
+          <pre><code><span class="cm">// JUnit 5 — start aimock before tests</span>
+<span class="kw">@BeforeAll</span>
+<span class="kw">fun</span> <span class="fn">setup</span>() {
+    <span class="type">ProcessBuilder</span>(<span class="str">"docker"</span>, <span class="str">"run"</span>, <span class="str">"-d"</span>, <span class="str">"--name"</span>, <span class="str">"aimock"</span>,
+        <span class="str">"-p"</span>, <span class="str">"4010:4010"</span>, <span class="str">"-v"</span>, <span class="str">"./fixtures:/fixtures"</span>,
+        <span class="str">"ghcr.io/copilotkit/aimock"</span>, <span class="str">"-f"</span>, <span class="str">"/fixtures"</span>)
+        .<span class="fn">start</span>().<span class="fn">waitFor</span>()
+}
+
+<span class="kw">@AfterAll</span>
+<span class="kw">fun</span> <span class="fn">teardown</span>() {
+    <span class="type">ProcessBuilder</span>(<span class="str">"docker"</span>, <span class="str">"stop"</span>, <span class="str">"aimock"</span>).<span class="fn">start</span>().<span class="fn">waitFor</span>()
+    <span class="type">ProcessBuilder</span>(<span class="str">"docker"</span>, <span class="str">"rm"</span>, <span class="str">"aimock"</span>).<span class="fn">start</span>().<span class="fn">waitFor</span>()
+}
+
+<span class="cm">// Point OpenAI Java SDK at aimock</span>
+<span class="kw">val</span> <span class="op">client</span> = <span class="type">OpenAIClient</span>.<span class="fn">builder</span>()
+    .<span class="fn">baseUrl</span>(<span class="str">"http://localhost:4010/v1"</span>)
+    .<span class="fn">apiKey</span>(<span class="str">"mock"</span>)
+    .<span class="fn">build</span>()</code></pre>
+        </div>
+
+        <!-- ─── What you gain ────────────────────────────────────── -->
+        <h2>What you gain</h2>
+
+        <div class="features-grid">
+          <div class="feature-card">
+            <div class="feature-icon green">&#127760;</div>
+            <h3>Cross-language testing</h3>
+            <p>
+              Kotlin, Python, TypeScript, Go, Rust&mdash;any language that speaks HTTP can use the
+              same mock server with the same fixtures.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#128268;</div>
+            <h3>MCP / A2A / Vector mocking</h3>
+            <p>
+              Mock your entire AI stack: MCP tool servers, A2A agent protocols, and vector
+              databases&mdash;not just LLM completions.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">&#9210;</div>
+            <h3>Record &amp; replay</h3>
+            <p>
+              Proxy real APIs, save responses as JSON fixtures, replay deterministically forever. No
+              manual response construction.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">&#128225;</div>
+            <h3>More providers (11 vs 5)</h3>
+            <p>
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere, and more. Mokksy
+              covers OpenAI, Anthropic, Google, Ollama, and MistralAI.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon red">&#128269;</div>
+            <h3>Drift detection</h3>
+            <p>
+              Automatically detect when real provider APIs diverge from your mocked responses. Know
+              when your tests are lying to you.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon green">&#129513;</div>
+            <h3>Chaos testing</h3>
+            <p>
+              Inject latency, errors, partial failures, and rate limits. Verify your app handles
+              degraded AI services gracefully.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#128230;</div>
+            <h3>Docker + Helm native</h3>
+            <p>
+              First-class Docker images and Helm charts. Drop aimock into any CI pipeline or
+              Kubernetes cluster without JVM dependencies.
+            </p>
+          </div>
+        </div>
+
+        <!-- ─── What you lose (honestly) ─────────────────────────── -->
+        <h2>What you lose (honestly)</h2>
+
+        <ul>
+          <li>
+            <strong>Native Kotlin DSL</strong> &mdash; Mokksy's <code>mockLLM { }</code> builder is
+            elegant. With aimock you configure via HTTP/JSON fixtures or the Docker CLI.
+          </li>
+          <li>
+            <strong>In-process JVM mock</strong> &mdash; Mokksy embeds inside your test process.
+            aimock runs as a separate server (localhost or container).
+          </li>
+          <li>
+            <strong>Kotlin coroutine integration</strong> &mdash; Mokksy's streaming is backed by
+            Ktor and coroutines natively. aimock streams over plain HTTP SSE.
+          </li>
+          <li>
+            <strong>Ktor-specific test helpers</strong> &mdash; If your app uses Ktor's test engine,
+            Mokksy plugs in directly. aimock requires a network call.
+          </li>
+        </ul>
+
+        <!-- ─── Comparison table ─────────────────────────────────── -->
+        <h2>Comparison table</h2>
+
+        <table class="comparison-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>Mokksy</th>
+              <th>aimock</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Language</td>
+              <td>Kotlin/JVM only</td>
+              <td>Any (HTTP-based)</td>
+            </tr>
+            <tr>
+              <td>In-process mock</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--error)">&#10007;</td>
+            </tr>
+            <tr>
+              <td>Cross-language / cross-process</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>LLM providers</td>
+              <td>5 (OpenAI, Anthropic, Google, Ollama, MistralAI)</td>
+              <td>11+</td>
+            </tr>
+            <tr>
+              <td>Streaming SSE</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>WebSocket APIs</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>MCP mock</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>A2A mock</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Vector DB mock</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Record &amp; replay</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Drift detection</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Chaos testing</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Docker / Helm</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Kotlin DSL</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--error)">&#10007;</td>
+            </tr>
+            <tr>
+              <td>Coroutine integration</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--error)">&#10007;</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <!-- ─── TestContainers integration ───────────────────────── -->
+        <h2>TestContainers integration</h2>
+        <p>
+          For a cleaner lifecycle, use
+          <a href="https://testcontainers.com/" target="_blank">TestContainers</a>
+          to manage the aimock container automatically.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            AIMockContainer.kt <span class="lang-tag">kotlin</span>
+          </div>
+          <pre><code><span class="cm">// Using TestContainers for cleaner lifecycle</span>
+<span class="kw">class</span> <span class="type">AIMockContainer</span> : <span class="type">GenericContainer</span>&lt;<span class="type">AIMockContainer</span>&gt;(<span class="str">"ghcr.io/copilotkit/aimock"</span>) {
+    <span class="kw">init</span> {
+        <span class="fn">withExposedPorts</span>(<span class="num">4010</span>)
+        <span class="fn">withFileSystemBind</span>(<span class="str">"./fixtures"</span>, <span class="str">"/fixtures"</span>)
+        <span class="fn">withCommand</span>(<span class="str">"-f"</span>, <span class="str">"/fixtures"</span>)
+        <span class="fn">waitingFor</span>(<span class="type">Wait</span>.<span class="fn">forHttp</span>(<span class="str">"/health"</span>).<span class="fn">forPort</span>(<span class="num">4010</span>))
+    }
+}</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Using the container in tests <span class="lang-tag">kotlin</span>
+          </div>
+          <pre><code><span class="kw">@Testcontainers</span>
+<span class="kw">class</span> <span class="type">MyAgentTest</span> {
+    <span class="kw">companion object</span> {
+        <span class="kw">@Container</span>
+        <span class="kw">val</span> <span class="op">aimock</span> = <span class="type">AIMockContainer</span>()
+    }
+
+    <span class="kw">@Test</span>
+    <span class="kw">fun</span> <span class="fn">`agent responds with mocked content`</span>() {
+        <span class="kw">val</span> <span class="op">baseUrl</span> = <span class="str">"http://</span><span class="op">\${aimock.host}</span><span class="str">:</span><span class="op">\${aimock.getMappedPort(4010)}</span><span class="str">/v1"</span>
+        <span class="kw">val</span> <span class="op">client</span> = <span class="type">OpenAIClient</span>.<span class="fn">builder</span>()
+            .<span class="fn">baseUrl</span>(<span class="op">baseUrl</span>)
+            .<span class="fn">apiKey</span>(<span class="str">"mock"</span>)
+            .<span class="fn">build</span>()
+        <span class="cm">// ... your test assertions</span>
+    }
+}</code></pre>
+        </div>
+
+        <!-- ─── CLI / Docker quick start ─────────────────────────── -->
+        <h2>CLI / Docker quick start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">CLI <span class="lang-tag">sh</span></div>
+              <pre><code>npx aimock -p <span class="num">4010</span> -f ./fixtures</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">Docker <span class="lang-tag">sh</span></div>
+              <pre><code>docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/fixtures:/fixtures</span> \
+  ghcr.io/copilotkit/aimock:latest \
+  -f /fixtures</code></pre>
+            </div>
+          </div>
+        </div>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/migrate-from-msw.html b/docs/migrate-from-msw.html
new file mode 100644
index 0000000..e3d936e
--- /dev/null
+++ b/docs/migrate-from-msw.html
@@ -0,0 +1,407 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Migrate from MSW — aimock</title>
+    <meta
+      name="description"
+      content="A conversion guide for MSW users switching to aimock for AI application testing."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+    <style>
+      /* ─── Side-by-side comparison grid ─────────────────────────── */
+      .compare-grid {
+        display: grid;
+        grid-template-columns: 1fr 1fr;
+        gap: 1.5rem;
+        margin: 1.5rem 0 2rem;
+      }
+      .compare-grid .code-block {
+        margin-bottom: 0;
+      }
+      @media (max-width: 900px) {
+        .compare-grid {
+          grid-template-columns: 1fr;
+        }
+      }
+
+      /* ─── Feature cards (scoped from index) ────────────────────── */
+      .features-grid {
+        display: grid;
+        grid-template-columns: repeat(3, 1fr);
+        gap: 1.25rem;
+        margin-top: 1.5rem;
+      }
+      .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+        grid-column: 2;
+      }
+      .feature-card {
+        padding: 2rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        transition:
+          border-color 0.3s,
+          transform 0.3s,
+          box-shadow 0.3s;
+      }
+      .feature-card:hover {
+        border-color: var(--border-bright);
+        transform: translateY(-2px);
+        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+      }
+      .feature-icon {
+        width: 40px;
+        height: 40px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border-radius: 8px;
+        margin-bottom: 1rem;
+        font-size: 1.25rem;
+        font-weight: 700;
+      }
+      .feature-icon.green {
+        background: rgba(0, 255, 136, 0.1);
+        border: 1px solid rgba(0, 255, 136, 0.2);
+      }
+      .feature-icon.blue {
+        background: rgba(68, 136, 255, 0.1);
+        border: 1px solid rgba(68, 136, 255, 0.2);
+      }
+      .feature-icon.purple {
+        background: rgba(170, 102, 255, 0.1);
+        border: 1px solid rgba(170, 102, 255, 0.2);
+      }
+      .feature-icon.amber {
+        background: rgba(255, 170, 0, 0.1);
+        border: 1px solid rgba(255, 170, 0, 0.2);
+      }
+      .feature-icon.red {
+        background: rgba(255, 68, 102, 0.1);
+        border: 1px solid rgba(255, 68, 102, 0.2);
+      }
+      .feature-card h3 {
+        font-size: 1.05rem;
+        font-weight: 600;
+        margin-bottom: 0.5rem;
+      }
+      .feature-card p {
+        font-size: 0.9rem;
+        color: var(--text-secondary);
+        line-height: 1.6;
+      }
+      @media (max-width: 900px) {
+        .features-grid {
+          grid-template-columns: 1fr;
+        }
+        .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+          grid-column: auto;
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Switching from MSW to aimock</h1>
+        <p class="lead">
+          MSW is excellent for general API mocking. But when your AI app spans multiple
+          processes&mdash;Playwright, Next.js, agent workers, Docker&mdash;MSW's in-process
+          interception can't reach them. aimock runs a real HTTP server that any process can hit.
+        </p>
+
+        <!-- ─── The 5-minute switch ──────────────────────────────── -->
+        <h2>The 5-minute switch</h2>
+        <p>Side-by-side: streaming SSE with MSW vs. aimock.</p>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">
+              MSW &mdash; streaming SSE (~35 lines) <span class="lang-tag">ts</span>
+            </div>
+            <pre><code><span class="kw">import</span> { <span class="op">http</span>, <span class="type">HttpResponse</span> } <span class="kw">from</span> <span class="str">'msw'</span>
+<span class="kw">import</span> { <span class="fn">setupServer</span> } <span class="kw">from</span> <span class="str">'msw/node'</span>
+
+<span class="kw">const</span> <span class="op">server</span> = <span class="fn">setupServer</span>(
+  <span class="op">http</span>.<span class="fn">post</span>(<span class="str">'https://api.openai.com/v1/chat/completions'</span>, () <span class="kw">=&gt;</span> {
+    <span class="kw">const</span> <span class="op">encoder</span> = <span class="kw">new</span> <span class="type">TextEncoder</span>()
+    <span class="kw">const</span> <span class="op">stream</span> = <span class="kw">new</span> <span class="type">ReadableStream</span>({
+      <span class="fn">start</span>(<span class="op">controller</span>) {
+        <span class="op">controller</span>.<span class="fn">enqueue</span>(<span class="op">encoder</span>.<span class="fn">encode</span>(
+          <span class="str">'data: {"choices":[{"delta":{"role":"assistant"}}]}\n\n'</span>
+        ))
+        <span class="op">controller</span>.<span class="fn">enqueue</span>(<span class="op">encoder</span>.<span class="fn">encode</span>(
+          <span class="str">'data: {"choices":[{"delta":{"content":"Hello"}}]}\n\n'</span>
+        ))
+        <span class="op">controller</span>.<span class="fn">enqueue</span>(<span class="op">encoder</span>.<span class="fn">encode</span>(
+          <span class="str">'data: {"choices":[{"delta":{"content":" there"}}]}\n\n'</span>
+        ))
+        <span class="op">controller</span>.<span class="fn">enqueue</span>(<span class="op">encoder</span>.<span class="fn">encode</span>(<span class="str">'data: [DONE]\n\n'</span>))
+        <span class="op">controller</span>.<span class="fn">close</span>()
+      }
+    })
+    <span class="kw">return new</span> <span class="type">HttpResponse</span>(<span class="op">stream</span>, {
+      <span class="prop">headers</span>: { <span class="str">'Content-Type'</span>: <span class="str">'text/event-stream'</span> }
+    })
+  })
+)
+<span class="op">server</span>.<span class="fn">listen</span>()</code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">
+              aimock &mdash; same result (4 lines) <span class="lang-tag">ts</span>
+            </div>
+            <pre><code><span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">'@copilotkit/aimock'</span>
+
+<span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="type">LLMock</span>()
+<span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hello there"</span> })
+<span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>()
+<span class="cm">// set OPENAI_BASE_URL = mock.url + "/v1"</span></code></pre>
+          </div>
+        </div>
+
+        <!-- ─── Non-streaming comparison ─────────────────────────── -->
+        <h2>Non-streaming comparison</h2>
+        <p>For simple JSON responses, MSW is comparable:</p>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">MSW <span class="lang-tag">ts</span></div>
+            <pre><code><span class="op">http</span>.<span class="fn">post</span>(<span class="str">'/v1/chat/completions'</span>, () <span class="kw">=&gt;</span> {
+  <span class="kw">return</span> <span class="type">HttpResponse</span>.<span class="fn">json</span>({
+    <span class="prop">choices</span>: [{ <span class="prop">message</span>: { <span class="prop">content</span>: <span class="str">"Hi"</span> } }]
+  })
+})</code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">aimock <span class="lang-tag">ts</span></div>
+            <pre><code><span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi"</span> })</code></pre>
+          </div>
+        </div>
+
+        <p>
+          For non-streaming responses, the complexity is similar. aimock's advantage is
+          consistency&mdash;the same fixture works for streaming, non-streaming, and WebSocket
+          endpoints.
+        </p>
+
+        <!-- ─── What you gain ────────────────────────────────────── -->
+        <h2>What you gain</h2>
+
+        <div class="features-grid" style="grid-template-columns: repeat(3, 1fr)">
+          <div class="feature-card">
+            <div class="feature-icon green">&#127760;</div>
+            <h3>Cross-process interception</h3>
+            <p>
+              Playwright &rarr; Next.js &rarr; agent workers &rarr; Docker. Every process on the
+              machine hits the same mock.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#9889;</div>
+            <h3>Built-in SSE for 10+ providers</h3>
+            <p>
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere. No manual chunk
+              construction.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">&#128268;</div>
+            <h3>WebSocket APIs</h3>
+            <p>OpenAI Realtime, Responses WS, Gemini Live. MSW cannot intercept WebSocket.</p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">&#9210;</div>
+            <h3>Record &amp; Replay</h3>
+            <p>
+              Proxy real APIs, save as fixtures, replay forever.
+              <code>npx aimock --record --provider-openai https://api.openai.com</code>
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon green">&#128193;</div>
+            <h3>Fixture files</h3>
+            <p>JSON files on disk. Version-controlled. Shared across tests.</p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon red">&#129513;</div>
+            <h3>MCP + A2A + Vector</h3>
+            <p>Mock your entire AI stack, not just LLM calls.</p>
+          </div>
+        </div>
+
+        <!-- ─── What you keep (or lose) ──────────────────────────── -->
+        <h2>What you keep (or lose)</h2>
+
+        <table class="comparison-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>MSW</th>
+              <th>aimock</th>
+              <th>Notes</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Browser service worker</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td>Point browser app's API base URL at aimock instead</td>
+            </tr>
+            <tr>
+              <td>General REST/GraphQL mocking</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td>Keep MSW alongside for non-AI routes</td>
+            </tr>
+            <tr>
+              <td>Cross-process</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td>aimock's key advantage</td>
+            </tr>
+            <tr>
+              <td>Streaming SSE</td>
+              <td style="color: var(--text-dim)">Manual</td>
+              <td style="color: var(--accent)">Built-in</td>
+              <td>10+ providers</td>
+            </tr>
+            <tr>
+              <td>WebSocket</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td>3 protocols</td>
+            </tr>
+            <tr>
+              <td>Record &amp; replay</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>Chaos testing</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>Zero dependencies</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td>MSW ~300KB</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <!-- ─── Using aimock alongside MSW ───────────────────────── -->
+        <h2>Using aimock alongside MSW</h2>
+        <p>
+          You don't have to choose. Use MSW for general REST/GraphQL mocking and aimock for
+          AI-specific endpoints.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">test-setup.ts <span class="lang-tag">ts</span></div>
+          <pre><code><span class="cm">// test setup</span>
+<span class="kw">import</span> { <span class="fn">setupServer</span> } <span class="kw">from</span> <span class="str">'msw/node'</span>
+<span class="kw">import</span> { <span class="type">LLMock</span> } <span class="kw">from</span> <span class="str">'@copilotkit/aimock'</span>
+
+<span class="cm">// MSW for REST APIs</span>
+<span class="kw">const</span> <span class="op">mswServer</span> = <span class="fn">setupServer</span>(
+  <span class="op">http</span>.<span class="fn">get</span>(<span class="str">'/api/users'</span>, () <span class="kw">=&gt;</span> <span class="type">HttpResponse</span>.<span class="fn">json</span>([...]))
+)
+
+<span class="cm">// aimock for AI APIs</span>
+<span class="kw">const</span> <span class="op">aiMock</span> = <span class="kw">new</span> <span class="type">LLMock</span>({ <span class="prop">port</span>: <span class="num">0</span> })
+<span class="op">aiMock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi!"</span> })
+
+<span class="fn">beforeAll</span>(<span class="kw">async</span> () <span class="kw">=&gt;</span> {
+  <span class="op">mswServer</span>.<span class="fn">listen</span>()
+  <span class="kw">await</span> <span class="op">aiMock</span>.<span class="fn">start</span>()
+  <span class="op">process</span>.<span class="op">env</span>.<span class="prop">OPENAI_BASE_URL</span> = <span class="op">aiMock</span>.<span class="op">url</span> + <span class="str">"/v1"</span>
+})</code></pre>
+        </div>
+
+        <!-- ─── CLI / Docker quick start ─────────────────────────── -->
+        <h2>CLI / Docker quick start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">CLI <span class="lang-tag">sh</span></div>
+              <pre><code>npx aimock -p <span class="num">4010</span> -f ./fixtures</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">Docker <span class="lang-tag">sh</span></div>
+              <pre><code>docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/fixtures:/fixtures</span> \
+  ghcr.io/copilotkit/aimock:latest \
+  -p <span class="num">4010</span> -f /fixtures</code></pre>
+            </div>
+          </div>
+        </div>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/migrate-from-piyook.html b/docs/migrate-from-piyook.html
new file mode 100644
index 0000000..2fbe894
--- /dev/null
+++ b/docs/migrate-from-piyook.html
@@ -0,0 +1,451 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Migrate from piyook/llm-mock — aimock</title>
+    <meta
+      name="description"
+      content="A conversion guide for piyook/llm-mock users switching to aimock for AI application testing."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+    <style>
+      /* ─── Side-by-side comparison grid ─────────────────────────── */
+      .compare-grid {
+        display: grid;
+        grid-template-columns: 1fr 1fr;
+        gap: 1.5rem;
+        margin: 1.5rem 0 2rem;
+      }
+      .compare-grid .code-block {
+        margin-bottom: 0;
+      }
+      @media (max-width: 900px) {
+        .compare-grid {
+          grid-template-columns: 1fr;
+        }
+      }
+
+      /* ─── Feature cards ────────────────────────────────────────── */
+      .features-grid {
+        display: grid;
+        grid-template-columns: repeat(4, 1fr);
+        gap: 1.25rem;
+        margin-top: 1.5rem;
+      }
+      .feature-card {
+        padding: 1.5rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        transition:
+          border-color 0.3s,
+          transform 0.3s,
+          box-shadow 0.3s;
+      }
+      .feature-card:hover {
+        border-color: var(--border-bright);
+        transform: translateY(-2px);
+        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+      }
+      .feature-icon {
+        width: 40px;
+        height: 40px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border-radius: 8px;
+        margin-bottom: 1rem;
+        font-size: 1.25rem;
+        font-weight: 700;
+      }
+      .feature-icon.green {
+        background: rgba(0, 255, 136, 0.1);
+        border: 1px solid rgba(0, 255, 136, 0.2);
+      }
+      .feature-icon.blue {
+        background: rgba(68, 136, 255, 0.1);
+        border: 1px solid rgba(68, 136, 255, 0.2);
+      }
+      .feature-icon.purple {
+        background: rgba(170, 102, 255, 0.1);
+        border: 1px solid rgba(170, 102, 255, 0.2);
+      }
+      .feature-icon.amber {
+        background: rgba(255, 170, 0, 0.1);
+        border: 1px solid rgba(255, 170, 0, 0.2);
+      }
+      .feature-icon.red {
+        background: rgba(255, 68, 102, 0.1);
+        border: 1px solid rgba(255, 68, 102, 0.2);
+      }
+      .feature-card h3 {
+        font-size: 0.95rem;
+        font-weight: 600;
+        margin-bottom: 0.4rem;
+      }
+      .feature-card p {
+        font-size: 0.85rem;
+        color: var(--text-secondary);
+        line-height: 1.5;
+      }
+      @media (max-width: 900px) {
+        .features-grid {
+          grid-template-columns: 1fr;
+        }
+      }
+      @media (min-width: 901px) and (max-width: 1200px) {
+        .features-grid {
+          grid-template-columns: repeat(2, 1fr);
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Switching from piyook/llm-mock to aimock</h1>
+        <p class="lead">
+          piyook/llm-mock handles basic OpenAI mocking with JSON templates. aimock is a complete
+          superset&mdash;streaming, multi-provider, WebSocket, structured output, sequential
+          responses, and the full AI stack.
+        </p>
+
+        <!-- ─── Fixture format comparison ────────────────────────── -->
+        <h2>Fixture format comparison</h2>
+        <p>
+          piyook/llm-mock requires you to build the full OpenAI response envelope by hand. aimock
+          uses a declarative match+response format&mdash;you specify the content, and aimock
+          auto-builds the response envelope with proper IDs, timestamps, token counts, and SSE
+          framing.
+        </p>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">
+              piyook/llm-mock &mdash; full JSON template <span class="lang-tag">json</span>
+            </div>
+            <pre><code>{
+  <span class="prop">"id"</span>: <span class="str">"chatcmpl-abc123"</span>,
+  <span class="prop">"object"</span>: <span class="str">"chat.completion"</span>,
+  <span class="prop">"created"</span>: <span class="num">1677858242</span>,
+  <span class="prop">"model"</span>: <span class="str">"gpt-4"</span>,
+  <span class="prop">"choices"</span>: [{
+    <span class="prop">"index"</span>: <span class="num">0</span>,
+    <span class="prop">"message"</span>: {
+      <span class="prop">"role"</span>: <span class="str">"assistant"</span>,
+      <span class="prop">"content"</span>: <span class="str">"Hello there"</span>
+    },
+    <span class="prop">"finish_reason"</span>: <span class="str">"stop"</span>
+  }],
+  <span class="prop">"usage"</span>: {
+    <span class="prop">"prompt_tokens"</span>: <span class="num">9</span>,
+    <span class="prop">"completion_tokens"</span>: <span class="num">12</span>,
+    <span class="prop">"total_tokens"</span>: <span class="num">21</span>
+  }
+}</code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">
+              aimock &mdash; declarative fixture <span class="lang-tag">json</span>
+            </div>
+            <pre><code>{
+  <span class="prop">"match"</span>: { <span class="prop">"userMessage"</span>: <span class="str">"hello"</span> },
+  <span class="prop">"response"</span>: { <span class="prop">"content"</span>: <span class="str">"Hello there"</span> }
+}
+
+<span class="cm">// aimock auto-generates:</span>
+<span class="cm">//   - id, object, created, model</span>
+<span class="cm">//   - choices[].index, finish_reason</span>
+<span class="cm">//   - usage (prompt_tokens, completion_tokens)</span>
+<span class="cm">//   - SSE streaming chunks (when stream: true)</span></code></pre>
+          </div>
+        </div>
+
+        <p>
+          With piyook/llm-mock, you maintain one template per response scenario and the server
+          returns it verbatim. With aimock, you write the minimum&mdash;just the content and an
+          optional match rule&mdash;and the server handles envelope generation, streaming, and
+          provider-specific formatting.
+        </p>
+
+        <!-- ─── What you gain ────────────────────────────────────── -->
+        <h2>What you gain</h2>
+
+        <div class="features-grid">
+          <div class="feature-card">
+            <div class="feature-icon green">&#9889;</div>
+            <h3>Streaming SSE</h3>
+            <p>
+              Built-in Server-Sent Events for all providers. No manual chunk construction&mdash;the
+              same fixture works for streaming and non-streaming requests.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#127760;</div>
+            <h3>10+ providers</h3>
+            <p>
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere, and any
+              OpenAI-compatible endpoint.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">&#128268;</div>
+            <h3>WebSocket APIs</h3>
+            <p>
+              OpenAI Realtime, Responses WS, Gemini Live. Full bidirectional protocol mocking out of
+              the box.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">{ }</div>
+            <h3>Structured output</h3>
+            <p>
+              JSON mode and <code>response_format</code> matching. Return typed JSON that validates
+              against your schema.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon green">&#128260;</div>
+            <h3>Sequential responses</h3>
+            <p>
+              Return different responses on successive calls. Model multi-turn conversations, retry
+              scenarios, and degradation patterns.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon red">&#129513;</div>
+            <h3>MCP / A2A / Vector</h3>
+            <p>
+              Mock MCP tool servers, A2A agent endpoints, and vector database APIs alongside LLM
+              mocks on one port.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#9210;</div>
+            <h3>Record &amp; replay</h3>
+            <p>
+              Proxy real APIs, capture responses as fixtures, replay deterministically in CI. No
+              manual fixture authoring.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">&#128165;</div>
+            <h3>Chaos testing</h3>
+            <p>
+              Inject latency, drop chunks, corrupt payloads, and disconnect mid-stream to harden
+              your error handling.
+            </p>
+          </div>
+        </div>
+
+        <!-- ─── Comparison table ─────────────────────────────────── -->
+        <h2>Comparison table</h2>
+
+        <table class="comparison-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>piyook/llm-mock</th>
+              <th>aimock</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>OpenAI Chat Completions</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>OpenAI Responses API</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Anthropic Claude</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Google Gemini</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>AWS Bedrock / Azure / Vertex AI / Ollama / Cohere</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Streaming SSE</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">Built-in (TTFT, TPS, jitter)</td>
+            </tr>
+            <tr>
+              <td>WebSocket protocols</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">3 protocols</td>
+            </tr>
+            <tr>
+              <td>Structured output / JSON mode</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Sequential responses</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>MCP / A2A / Vector mocking</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Record &amp; replay</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Chaos testing</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Drift detection</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">Automated CI</td>
+            </tr>
+            <tr>
+              <td>Prometheus metrics</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Programmatic API</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003; (TypeScript/JS)</td>
+            </tr>
+            <tr>
+              <td>Request journal</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Auto envelope generation</td>
+              <td style="color: var(--error)">&#10007; (manual JSON)</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Docker image</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+            <tr>
+              <td>Zero dependencies</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <!-- ─── CLI / Docker quick start ─────────────────────────── -->
+        <h2>CLI / Docker quick start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                Install &amp; run <span class="lang-tag">sh</span>
+              </div>
+              <pre><code><span class="cm"># Run the mock server</span>
+npx aimock -p <span class="num">4010</span> -f ./fixtures
+
+<span class="cm"># With a full config file</span>
+npx aimock --config aimock.json --port <span class="num">4010</span>
+
+<span class="cm"># Point your app at the mock</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1
+<span class="kw">export</span> OPENAI_API_KEY=mock-key</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">Docker <span class="lang-tag">sh</span></div>
+              <pre><code><span class="cm"># Pull and run</span>
+docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/fixtures:/fixtures</span> \
+  ghcr.io/copilotkit/aimock:latest \
+  -p <span class="num">4010</span> -f /fixtures
+
+<span class="cm"># With a config file</span>
+docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/aimock.json:/app/aimock.json</span> \
+  -v <span class="str">$(pwd)/fixtures:/app/fixtures</span> \
+  ghcr.io/copilotkit/aimock \
+  aimock --config /app/aimock.json --host 0.0.0.0
+
+<span class="cm"># Point your app at the mock</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1</code></pre>
+            </div>
+          </div>
+        </div>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/migrate-from-python-mocks.html b/docs/migrate-from-python-mocks.html
new file mode 100644
index 0000000..f03c42c
--- /dev/null
+++ b/docs/migrate-from-python-mocks.html
@@ -0,0 +1,520 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Migrate from Python mock libraries — aimock</title>
+    <meta
+      name="description"
+      content="A conversion guide for pytest-mockllm, openai-responses-python, and evalcraft users switching to aimock."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+    <style>
+      /* ─── Side-by-side comparison grid ─────────────────────────── */
+      .compare-grid {
+        display: grid;
+        grid-template-columns: 1fr 1fr;
+        gap: 1.5rem;
+        margin: 1.5rem 0 2rem;
+      }
+      .compare-grid .code-block {
+        margin-bottom: 0;
+      }
+      @media (max-width: 900px) {
+        .compare-grid {
+          grid-template-columns: 1fr;
+        }
+      }
+
+      /* ─── Feature cards ────────────────────────────────────────── */
+      .features-grid {
+        display: grid;
+        grid-template-columns: repeat(3, 1fr);
+        gap: 1.25rem;
+        margin-top: 1.5rem;
+      }
+      .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+        grid-column: 2;
+      }
+      .feature-card {
+        padding: 2rem;
+        background: var(--bg-card);
+        border: 1px solid var(--border);
+        border-radius: 8px;
+        transition:
+          border-color 0.3s,
+          transform 0.3s,
+          box-shadow 0.3s;
+      }
+      .feature-card:hover {
+        border-color: var(--border-bright);
+        transform: translateY(-2px);
+        box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+      }
+      .feature-icon {
+        width: 40px;
+        height: 40px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border-radius: 8px;
+        margin-bottom: 1rem;
+        font-size: 1.25rem;
+        font-weight: 700;
+      }
+      .feature-icon.green {
+        background: rgba(0, 255, 136, 0.1);
+        border: 1px solid rgba(0, 255, 136, 0.2);
+      }
+      .feature-icon.blue {
+        background: rgba(68, 136, 255, 0.1);
+        border: 1px solid rgba(68, 136, 255, 0.2);
+      }
+      .feature-icon.purple {
+        background: rgba(170, 102, 255, 0.1);
+        border: 1px solid rgba(170, 102, 255, 0.2);
+      }
+      .feature-icon.amber {
+        background: rgba(255, 170, 0, 0.1);
+        border: 1px solid rgba(255, 170, 0, 0.2);
+      }
+      .feature-icon.red {
+        background: rgba(255, 68, 102, 0.1);
+        border: 1px solid rgba(255, 68, 102, 0.2);
+      }
+      .feature-card h3 {
+        font-size: 1.05rem;
+        font-weight: 600;
+        margin-bottom: 0.5rem;
+      }
+      .feature-card p {
+        font-size: 0.9rem;
+        color: var(--text-secondary);
+        line-height: 1.6;
+      }
+      @media (max-width: 900px) {
+        .features-grid {
+          grid-template-columns: 1fr;
+        }
+        .features-grid > .feature-card:last-child:nth-child(3n + 1) {
+          grid-column: auto;
+        }
+      }
+    </style>
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank">
+              <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub
+            </a>
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Switching from Python mock libraries to aimock</h1>
+        <p class="lead">
+          pytest-mockllm, openai-responses-python, and evalcraft work great for single-process
+          Python tests. When your AI app spans multiple services&mdash;or you want to test from any
+          language&mdash;aimock gives you a real mock server accessible from anywhere.
+        </p>
+
+        <!-- ─── The libraries ────────────────────────────────────── -->
+        <h2>The libraries</h2>
+
+        <table class="comparison-table">
+          <thead>
+            <tr>
+              <th>Library</th>
+              <th>Approach</th>
+              <th>Scope</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><strong>pytest-mockllm</strong></td>
+              <td>pytest fixture + monkey-patching</td>
+              <td>OpenAI and Anthropic in-process</td>
+            </tr>
+            <tr>
+              <td><strong>openai-responses-python</strong></td>
+              <td>Decorator that intercepts <code>httpx</code></td>
+              <td>OpenAI API responses only</td>
+            </tr>
+            <tr>
+              <td><strong>evalcraft</strong></td>
+              <td>Mock + evaluation framework</td>
+              <td>OpenAI completions + eval metrics</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <p>
+          All three work by intercepting HTTP calls within the same Python process. This is
+          convenient for unit tests, but it breaks down when your AI application spans multiple
+          services (API server, agent worker, background jobs) or when you need to test from
+          Playwright, a Node.js frontend, or another language entirely.
+        </p>
+
+        <!-- ─── Honest assessment ────────────────────────────────── -->
+        <h2>Honest assessment</h2>
+
+        <div class="info-box">
+          <p>
+            <strong>Two paths for Python teams.</strong> If you have Node.js available,
+            <code>npx aimock</code> starts a mock server in one command &mdash; no Docker needed.
+            The <code>aimock-pytest</code> pip package is in development to provide native pytest
+            fixture integration with automatic server lifecycle management. For Docker-based CI
+            environments, the <code>ghcr.io/copilotkit/aimock</code> image works with any language.
+          </p>
+        </div>
+
+        <!-- ─── Code comparison ──────────────────────────────────── -->
+        <h2>Code comparison</h2>
+        <p>
+          Here's what the switch looks like in practice. The Python decorator becomes a Docker
+          container + <code>conftest.py</code> fixture.
+        </p>
+
+        <h3>pytest-mockllm (before)</h3>
+
+        <div class="code-block">
+          <div class="code-block-header">test_agent.py <span class="lang-tag">py</span></div>
+          <pre><code><span class="kw">import</span> pytest
+<span class="kw">from</span> pytest_mockllm <span class="kw">import</span> mock_openai
+
+<span class="op">@mock_openai</span>(response=<span class="str">"Hello from the mock"</span>)
+<span class="kw">def</span> <span class="fn">test_my_agent</span>():
+    result = my_agent.run(<span class="str">"hello"</span>)
+    <span class="kw">assert</span> result == <span class="str">"Hello from the mock"</span></code></pre>
+        </div>
+
+        <h3>openai-responses-python (before)</h3>
+
+        <div class="code-block">
+          <div class="code-block-header">test_completions.py <span class="lang-tag">py</span></div>
+          <pre><code><span class="kw">from</span> openai_responses <span class="kw">import</span> mock_completions
+
+<span class="op">@mock_completions</span>(content=<span class="str">"Hello from the mock"</span>)
+<span class="kw">def</span> <span class="fn">test_chat</span>():
+    client = OpenAI()
+    resp = client.chat.completions.create(
+        model=<span class="str">"gpt-4"</span>,
+        messages=[{<span class="str">"role"</span>: <span class="str">"user"</span>, <span class="str">"content"</span>: <span class="str">"hello"</span>}]
+    )
+    <span class="kw">assert</span> resp.choices[<span class="num">0</span>].message.content == <span class="str">"Hello from the mock"</span></code></pre>
+        </div>
+
+        <h3>aimock (after)</h3>
+
+        <div class="compare-grid">
+          <div class="code-block">
+            <div class="code-block-header">conftest.py <span class="lang-tag">py</span></div>
+            <pre><code><span class="kw">import</span> pytest
+<span class="kw">import</span> subprocess, time, os
+
+<span class="op">@pytest.fixture</span>(scope=<span class="str">"session"</span>)
+<span class="kw">def</span> <span class="fn">aimock_server</span>():
+    <span class="cm"># Start aimock via Docker</span>
+    proc = subprocess.Popen([
+        <span class="str">"docker"</span>, <span class="str">"run"</span>, <span class="str">"--rm"</span>,
+        <span class="str">"-p"</span>, <span class="str">"4010:4010"</span>,
+        <span class="str">"-v"</span>, <span class="str">f"{os.getcwd()}/fixtures:/fixtures"</span>,
+        <span class="str">"ghcr.io/copilotkit/aimock:latest"</span>,
+        <span class="str">"-f"</span>, <span class="str">"/fixtures"</span>
+    ])
+    time.sleep(<span class="num">2</span>)  <span class="cm"># wait for server</span>
+
+    <span class="cm"># Point OpenAI SDK at the mock</span>
+    os.environ[<span class="str">"OPENAI_BASE_URL"</span>] = <span class="str">"http://localhost:4010/v1"</span>
+    os.environ[<span class="str">"OPENAI_API_KEY"</span>] = <span class="str">"mock-key"</span>
+
+    <span class="kw">yield</span> <span class="str">"http://localhost:4010"</span>
+    proc.terminate()
+    proc.wait()</code></pre>
+          </div>
+
+          <div class="code-block">
+            <div class="code-block-header">test_agent.py <span class="lang-tag">py</span></div>
+            <pre><code><span class="kw">import</span> openai
+
+<span class="kw">def</span> <span class="fn">test_chat_completion</span>(aimock_server):
+    client = openai.OpenAI(
+        base_url=<span class="str">f"{aimock_server}/v1"</span>,
+        api_key=<span class="str">"mock-key"</span>
+    )
+    response = client.chat.completions.create(
+        model=<span class="str">"gpt-4"</span>,
+        messages=[{<span class="str">"role"</span>: <span class="str">"user"</span>, <span class="str">"content"</span>: <span class="str">"hello"</span>}]
+    )
+    <span class="kw">assert</span> response.choices[<span class="num">0</span>].message.content == <span class="str">"Hello from the mock"</span></code></pre>
+          </div>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            fixtures/hello.json <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"match"</span>: { <span class="prop">"userMessage"</span>: <span class="str">"hello"</span> },
+  <span class="prop">"response"</span>: { <span class="prop">"content"</span>: <span class="str">"Hello from the mock"</span> }
+}</code></pre>
+        </div>
+
+        <!-- ─── What you gain ────────────────────────────────────── -->
+        <h2>What you gain</h2>
+
+        <div class="features-grid">
+          <div class="feature-card">
+            <div class="feature-icon green">&#127760;</div>
+            <h3>Cross-process, cross-language</h3>
+            <p>
+              Your Python tests, Node.js frontend, Go microservices, and Playwright E2E tests all
+              hit the same mock server. No per-language patching.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon blue">&#128225;</div>
+            <h3>10+ LLM providers</h3>
+            <p>
+              OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere. The Python
+              libraries only cover OpenAI (and sometimes Anthropic).
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon purple">&#9210;</div>
+            <h3>Record &amp; replay</h3>
+            <p>
+              Proxy real APIs, save responses as fixtures, replay forever. No manual response
+              construction.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon amber">&#129513;</div>
+            <h3>MCP / A2A / Vector</h3>
+            <p>
+              Mock your entire AI stack&mdash;MCP tool servers, A2A agent endpoints, vector
+              databases&mdash;not just LLM calls.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon green">&#128268;</div>
+            <h3>WebSocket + streaming</h3>
+            <p>
+              Built-in SSE streaming and WebSocket protocol support (OpenAI Realtime, Gemini Live).
+              The Python libraries don't handle streaming.
+            </p>
+          </div>
+          <div class="feature-card">
+            <div class="feature-icon red">&#128165;</div>
+            <h3>Chaos testing</h3>
+            <p>
+              Inject latency, drop chunks, corrupt payloads mid-stream. Test your error handling
+              under realistic failure conditions.
+            </p>
+          </div>
+        </div>
+
+        <!-- ─── What you lose (honestly) ─────────────────────────── -->
+        <h2>What you lose (honestly)</h2>
+
+        <table class="comparison-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>Python mocks</th>
+              <th>aimock</th>
+              <th>Notes</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>In-process decorator convenience</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td>Coming with <code>aimock-pytest</code> pip package</td>
+            </tr>
+            <tr>
+              <td>Native pytest integration</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--text-dim)">conftest.py fixture</td>
+              <td>Works, but more boilerplate today</td>
+            </tr>
+            <tr>
+              <td>Zero infrastructure</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td style="color: var(--text-dim)">Docker or npx</td>
+              <td>Requires Docker or Node.js runtime</td>
+            </tr>
+            <tr>
+              <td>Cross-process mocking</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td>aimock's key advantage</td>
+            </tr>
+            <tr>
+              <td>Multi-provider</td>
+              <td style="color: var(--text-dim)">1&ndash;2 providers</td>
+              <td style="color: var(--accent)">10+</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>Streaming SSE</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">Built-in</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>WebSocket protocols</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">3 protocols</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>Record &amp; replay</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>MCP / A2A / Vector</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td></td>
+            </tr>
+            <tr>
+              <td>Chaos testing</td>
+              <td style="color: var(--error)">&#10007;</td>
+              <td style="color: var(--accent)">&#10003;</td>
+              <td></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <!-- ─── CLI / Docker quick start ─────────────────────────── -->
+        <h2>CLI / Docker quick start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                Install &amp; run <span class="lang-tag">sh</span>
+              </div>
+              <pre><code><span class="cm"># Run the mock server (requires Node.js)</span>
+npx aimock -p <span class="num">4010</span> -f ./fixtures
+
+<span class="cm"># Point your Python app at the mock</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1
+<span class="kw">export</span> OPENAI_API_KEY=mock-key
+
+<span class="cm"># Run your tests</span>
+pytest</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                Docker (no Node.js required) <span class="lang-tag">sh</span>
+              </div>
+              <pre><code><span class="cm"># Pull and run</span>
+docker run -d -p <span class="num">4010</span>:<span class="num">4010</span> \
+  -v <span class="str">$(pwd)/fixtures:/fixtures</span> \
+  ghcr.io/copilotkit/aimock:latest \
+  -f /fixtures
+
+<span class="cm"># Point your Python app at the mock</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1
+<span class="kw">export</span> OPENAI_API_KEY=mock-key
+
+<span class="cm"># Run your tests</span>
+pytest</code></pre>
+            </div>
+          </div>
+        </div>
+
+        <div class="info-box">
+          <p>
+            <strong>Docker is the recommended path for Python teams</strong> since it doesn't
+            require Node.js in your development environment. Add the container to your
+            <code>docker-compose.yml</code> or CI pipeline alongside your Python services.
+          </p>
+        </div>
+
+        <!-- ─── npx alternative ──────────────────────────────────── -->
+        <h2>Alternative: npx fixture (no Docker)</h2>
+        <p>
+          If Node.js is available in your environment, you can skip Docker entirely and use
+          <code>npx aimock</code> directly from your <code>conftest.py</code>.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">conftest.py (npx) <span class="lang-tag">py</span></div>
+          <pre><code><span class="kw">import</span> pytest
+<span class="kw">import</span> subprocess, time, os
+
+<span class="op">@pytest.fixture</span>(scope=<span class="str">"session"</span>)
+<span class="kw">def</span> <span class="fn">aimock_server</span>():
+    proc = subprocess.Popen(
+        [<span class="str">"npx"</span>, <span class="str">"aimock"</span>, <span class="str">"-p"</span>, <span class="str">"4010"</span>, <span class="str">"-f"</span>, <span class="str">"./fixtures"</span>],
+        stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
+    time.sleep(<span class="num">2</span>)  <span class="cm"># wait for server</span>
+
+    os.environ[<span class="str">"OPENAI_BASE_URL"</span>] = <span class="str">"http://localhost:4010/v1"</span>
+    os.environ[<span class="str">"OPENAI_API_KEY"</span>] = <span class="str">"mock-key"</span>
+
+    <span class="kw">yield</span> <span class="str">"http://localhost:4010"</span>
+    proc.terminate()
+    proc.wait()</code></pre>
+        </div>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/migrate-from-vidaimock.html b/docs/migrate-from-vidaimock.html
new file mode 100644
index 0000000..72cb450
--- /dev/null
+++ b/docs/migrate-from-vidaimock.html
@@ -0,0 +1,374 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Migrate from VidaiMock — aimock</title>
+    <meta
+      name="description"
+      content="Switching from VidaiMock to aimock — a conversion guide covering CLI, fixtures, and feature comparison."
+    />
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Switching from VidaiMock to aimock</h1>
+        <p class="lead">
+          VidaiMock is a capable Rust binary with broad provider support. aimock matches that
+          coverage and adds what VidaiMock can&rsquo;t &mdash; a programmatic TypeScript API,
+          WebSocket support, fixture files, request journal, and MCP/A2A/Vector mocking.
+        </p>
+
+        <!-- ═══ The quick switch ══════════════════════════════════════ -->
+        <h2>The quick switch</h2>
+
+        <p>The CLI flags map directly. Swap the binary name, adjust the flag names, and go:</p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            VidaiMock
+            <span class="lang-tag">shell</span>
+          </div>
+          <pre><code>vidaimock --port 4010 --template-dir ./templates</code></pre>
+        </div>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                aimock (equivalent)
+                <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>npx aimock -p 4010 -f ./fixtures</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                aimock (equivalent)
+                <span class="lang-tag">shell</span>
+              </div>
+              <pre><code>docker run -d -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock -p 4010 -f /fixtures</code></pre>
+            </div>
+          </div>
+        </div>
+
+        <h3>Fixture format comparison</h3>
+        <p>
+          VidaiMock uses Tera templates (<code>.tera</code>) to build responses dynamically. aimock
+          uses plain JSON fixture files &mdash; no template language, no compilation step.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            VidaiMock &mdash; Tera template
+            <span class="lang-tag">tera</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"model"</span>: <span class="str">"{{ model }}"</span>,
+  <span class="prop">"choices"</span>: [{
+    <span class="prop">"message"</span>: {
+      <span class="prop">"role"</span>: <span class="str">"assistant"</span>,
+      <span class="prop">"content"</span>: <span class="str">"Hello from VidaiMock"</span>
+    }
+  }]
+}</code></pre>
+        </div>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            aimock &mdash; JSON fixture
+            <span class="lang-tag">json</span>
+          </div>
+          <pre><code>{
+  <span class="prop">"match"</span>: { <span class="prop">"userMessage"</span>: <span class="str">"hello"</span> },
+  <span class="prop">"response"</span>: { <span class="prop">"content"</span>: <span class="str">"Hello from aimock"</span> }
+}</code></pre>
+        </div>
+
+        <p>
+          aimock fixtures are declarative: you specify <em>what to match</em> and
+          <em>what to return</em>. The server handles model names, streaming format, and
+          provider-specific envelope generation automatically.
+        </p>
+
+        <!-- ═══ What you gain ═════════════════════════════════════════ -->
+        <h2>What you gain</h2>
+
+        <div class="feature-grid">
+          <div class="feature-link">
+            <span class="badge badge-green">API</span>
+            <h3>Programmatic API</h3>
+            <p>
+              Import <code>LLMock</code> in TypeScript/JavaScript and configure fixtures,
+              assertions, and lifecycle in code &mdash; no process spawning needed.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-blue">Transport</span>
+            <h3>WebSocket APIs</h3>
+            <p>
+              Built-in support for OpenAI Realtime, OpenAI Responses WS, and Gemini Live &mdash;
+              three WebSocket protocols out of the box.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-purple">Testing</span>
+            <h3>Request journal</h3>
+            <p>
+              Every request is recorded with full headers, body, matched fixture, and timing.
+              <code>getRequests()</code> lets you assert on what your app actually sent.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-amber">Protocols</span>
+            <h3>MCP / A2A / Vector</h3>
+            <p>
+              Mock MCP tool servers, A2A agent-to-agent endpoints, and vector database APIs
+              alongside your LLM mocks on one port.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-purple">Capture</span>
+            <h3>Record &amp; replay</h3>
+            <p>
+              Proxy to real APIs, capture responses as fixtures, then replay deterministically in
+              CI. No manual fixture authoring required.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-red">CI</span>
+            <h3>Drift detection</h3>
+            <p>
+              Automated three-way conformance testing against live provider APIs. Catch breaking
+              changes before your users do.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-amber">Format</span>
+            <h3>Structured output</h3>
+            <p>
+              JSON mode and <code>response_format</code> matching for structured responses &mdash;
+              match on schema, return typed JSON.
+            </p>
+          </div>
+          <div class="feature-link">
+            <span class="badge badge-green">Stateful</span>
+            <h3>Sequential responses</h3>
+            <p>
+              Return different responses on successive calls to the same endpoint. Model multi-turn
+              conversations and retry scenarios.
+            </p>
+          </div>
+        </div>
+
+        <!-- ═══ Comparison table ══════════════════════════════════════ -->
+        <h2>Comparison table</h2>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Capability</th>
+              <th>VidaiMock</th>
+              <th>aimock</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Programmatic API</td>
+              <td>No (binary only)</td>
+              <td>Yes (TypeScript/JS)</td>
+            </tr>
+            <tr>
+              <td>WebSocket APIs</td>
+              <td>No</td>
+              <td>Built-in (3 protocols)</td>
+            </tr>
+            <tr>
+              <td>Request journal</td>
+              <td>No</td>
+              <td>Built-in</td>
+            </tr>
+            <tr>
+              <td>MCP / A2A / Vector</td>
+              <td>No</td>
+              <td>Built-in</td>
+            </tr>
+            <tr>
+              <td>Record &amp; replay</td>
+              <td>No</td>
+              <td>Built-in</td>
+            </tr>
+            <tr>
+              <td>Drift detection</td>
+              <td>No</td>
+              <td>Automated CI</td>
+            </tr>
+            <tr>
+              <td>LLM providers</td>
+              <td>11+</td>
+              <td>10+</td>
+            </tr>
+            <tr>
+              <td>Prometheus metrics</td>
+              <td>Yes</td>
+              <td>Yes</td>
+            </tr>
+            <tr>
+              <td>Chaos testing</td>
+              <td>Partial</td>
+              <td>Built-in (3 modes)</td>
+            </tr>
+            <tr>
+              <td>Docker</td>
+              <td>Yes</td>
+              <td>Yes</td>
+            </tr>
+            <tr>
+              <td>Streaming physics</td>
+              <td>Yes</td>
+              <td>Built-in (ttft/tps/jitter)</td>
+            </tr>
+            <tr>
+              <td>Dependencies</td>
+              <td>Zero (Rust)</td>
+              <td>Zero (Node.js builtins)</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <!-- ═══ CLI / Docker quick start ══════════════════════════════ -->
+        <h2>CLI / Docker quick start</h2>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli active">
+            <div class="code-block">
+              <div class="code-block-header">
+                Install &amp; run
+                <span class="lang-tag">shell</span>
+              </div>
+              <pre><code><span class="cm"># Run the mock server</span>
+npx aimock -p 4010 -f ./fixtures
+
+<span class="cm"># Point your app at it</span>
+<span class="kw">export</span> OPENAI_BASE_URL=http://localhost:4010/v1
+<span class="kw">export</span> OPENAI_API_KEY=mock-key</code></pre>
+            </div>
+
+            <div class="code-block">
+              <div class="code-block-header">
+                With a config file
+                <span class="lang-tag">shell</span>
+              </div>
+              <pre><code><span class="cm"># Full config-driven setup (LLM + MCP + A2A on one port)</span>
+npx aimock --config aimock.json --port 4010</code></pre>
+            </div>
+          </div>
+
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                Docker run
+                <span class="lang-tag">shell</span>
+              </div>
+              <pre><code><span class="cm"># Pull and run from GitHub Container Registry</span>
+docker pull ghcr.io/copilotkit/aimock:latest
+
+docker run -p 4010:4010 \
+  -v $(pwd)/fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock
+
+<span class="cm"># With a config file</span>
+docker run -p 4010:4010 \
+  -v $(pwd)/aimock.json:/app/aimock.json \
+  -v $(pwd)/fixtures:/app/fixtures \
+  ghcr.io/copilotkit/aimock aimock --config /app/aimock.json --host 0.0.0.0</code></pre>
+            </div>
+
+            <div class="code-block">
+              <div class="code-block-header">
+                docker-compose.yml
+                <span class="lang-tag">yaml</span>
+              </div>
+              <pre><code><span class="prop">services</span>:
+  <span class="prop">aimock</span>:
+    <span class="prop">image</span>: <span class="str">ghcr.io/copilotkit/aimock:latest</span>
+    <span class="prop">command</span>: <span class="str">aimock --config /app/aimock.json --host 0.0.0.0</span>
+    <span class="prop">ports</span>:
+      - <span class="str">"4010:4010"</span>
+    <span class="prop">volumes</span>:
+      - <span class="str">./aimock.json:/app/aimock.json:ro</span>
+      - <span class="str">./fixtures:/app/fixtures:ro</span>
+
+  <span class="prop">app</span>:
+    <span class="prop">build</span>: <span class="str">.</span>
+    <span class="prop">environment</span>:
+      <span class="prop">OPENAI_BASE_URL</span>: <span class="str">http://aimock:4010/v1</span>
+      <span class="prop">OPENAI_API_KEY</span>: <span class="str">mock-key</span>
+    <span class="prop">depends_on</span>:
+      - <span class="str">aimock</span></code></pre>
+            </div>
+          </div>
+        </div>
+      </main>
+    </div>
+
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/mount.html b/docs/mount.html
new file mode 100644
index 0000000..eaf0359
--- /dev/null
+++ b/docs/mount.html
@@ -0,0 +1,262 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Mount &amp; Composition — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Mount &amp; Composition</h1>
+        <p class="lead">
+          Mount additional mock services onto a running LLMock server. All services share one port,
+          one health endpoint, and one request journal &mdash; no port juggling, no service
+          discovery.
+        </p>
+
+        <h2>Mountable Interface</h2>
+        <p>
+          Any object that implements the <code>Mountable</code> interface can be mounted onto
+          LLMock. The interface requires a single method:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Mountable interface <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">interface</span> Mountable {
+  <span class="cm">/** Handle an incoming HTTP request. Return true if handled. */</span>
+  handleRequest(
+    req: IncomingMessage,
+    res: ServerResponse,
+    pathname: <span class="kw">string</span>,
+  ): Promise&lt;<span class="kw">boolean</span>&gt;;
+
+  <span class="cm">/** Optional: handle WebSocket upgrades. */</span>
+  handleUpgrade?(
+    socket: Socket,
+    head: Buffer,
+    pathname: <span class="kw">string</span>,
+  ): Promise&lt;<span class="kw">boolean</span>&gt;;
+
+  <span class="cm">/** Optional: health check. */</span>
+  health?(): { status: <span class="kw">string</span>; [key: <span class="kw">string</span>]: unknown };
+
+  <span class="cm">/** Optional: receive the shared journal instance. */</span>
+  setJournal?(journal: Journal): <span class="kw">void</span>;
+
+  <span class="cm">/** Optional: receive the base URL when mounted. */</span>
+  setBaseUrl?(url: <span class="kw">string</span>): <span class="kw">void</span>;
+}</code></pre>
+        </div>
+
+        <h2>LLMock.mount()</h2>
+        <p>
+          Mount a <code>Mountable</code> service at a path prefix. Requests matching the prefix are
+          forwarded to the service with the prefix stripped.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">mount() API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+
+llm.mount(<span class="str">"/mcp"</span>, mcpMock);   <span class="cm">// MCP tools at /mcp</span>
+llm.mount(<span class="str">"/a2a"</span>, a2aMock);   <span class="cm">// A2A agents at /a2a</span>
+
+<span class="kw">await</span> llm.start();
+<span class="cm">// All protocols accessible on port 5555</span></code></pre>
+        </div>
+
+        <h3>Path Stripping</h3>
+        <p>
+          When a request arrives at a mounted path, the prefix is stripped before the service sees
+          it. For example, a request to <code>/mcp/tools/list</code> arrives at the MCP service with
+          pathname <code>/tools/list</code>.
+        </p>
+
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Incoming Request</th>
+              <th>Mount Prefix</th>
+              <th>Service Sees</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST /mcp/tools/list</code></td>
+              <td><code>/mcp</code></td>
+              <td><code>/tools/list</code></td>
+            </tr>
+            <tr>
+              <td><code>POST /a2a/agents/run</code></td>
+              <td><code>/a2a</code></td>
+              <td><code>/agents/run</code></td>
+            </tr>
+            <tr>
+              <td><code>GET /mcp</code></td>
+              <td><code>/mcp</code></td>
+              <td><code>/</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h3>WebSocket Upgrade Support</h3>
+        <p>
+          If a mounted service implements <code>handleUpgrade()</code>, WebSocket upgrade requests
+          matching the mount prefix are forwarded to it. This enables WebSocket-based protocols like
+          MCP over StreamableHTTP or custom agent protocols.
+        </p>
+
+        <h2>Unified Health Endpoint</h2>
+        <p>
+          The <code>GET /health</code> endpoint aggregates health from all mounted services. Each
+          service that implements <code>health()</code> is included in the response:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">Health response <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"status"</span>: <span class="str">"ok"</span>,
+  <span class="prop">"services"</span>: {
+    <span class="prop">"llm"</span>: { <span class="prop">"status"</span>: <span class="str">"ok"</span>, <span class="prop">"fixtures"</span>: <span class="num">5</span> },
+    <span class="prop">"mcp"</span>: { <span class="prop">"status"</span>: <span class="str">"ok"</span>, <span class="prop">"tools"</span>: <span class="num">2</span>, <span class="prop">"resources"</span>: <span class="num">1</span>, <span class="prop">"prompts"</span>: <span class="num">0</span>, <span class="prop">"sessions"</span>: <span class="num">0</span> },
+    <span class="prop">"a2a"</span>: { <span class="prop">"status"</span>: <span class="str">"ok"</span>, <span class="prop">"agents"</span>: <span class="num">1</span>, <span class="prop">"tasks"</span>: <span class="num">0</span> }
+  }
+}</code></pre>
+        </div>
+        <p>
+          Each mounted service that implements <code>health()</code> contributes its status to the
+          response. The health endpoint always returns HTTP 200.
+        </p>
+
+        <h2>Shared Journal</h2>
+        <p>
+          All mounted services share the same request journal. Journal entries include a
+          <code>service</code> field indicating which service handled the request:
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Journal entry <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">const</span> entries = mock.getRequests();
+<span class="cm">// [</span>
+<span class="cm">//   { service: "llm", method: "POST", path: "/v1/chat/completions", ... },</span>
+<span class="cm">//   { service: "/mcp", method: "POST", path: "/tools/list", ... },</span>
+<span class="cm">// ]</span></code></pre>
+        </div>
+
+        <h2>createMockSuite()</h2>
+        <p>
+          <code>createMockSuite()</code> provides a unified lifecycle for LLMock and all mounted
+          services. It creates the server, mounts services, and returns <code>start()</code> /
+          <code>stop()</code> / <code>reset()</code> methods that manage everything together.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            createMockSuite() <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { createMockSuite } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> suite = <span class="kw">await</span> createMockSuite({
+  llm: { port: <span class="num">0</span> },
+  mcp: {},    <span class="cm">// enables MCPMock, mounted at /mcp</span>
+  a2a: {},    <span class="cm">// enables A2AMock, mounted at /a2a</span>
+  vector: {}, <span class="cm">// enables VectorMock, mounted at /vector</span>
+});
+
+<span class="kw">await</span> suite.start();
+<span class="cm">// suite.llm — the underlying LLMock instance</span>
+<span class="cm">// suite.mcp — MCPMock instance (if configured)</span>
+<span class="cm">// suite.a2a — A2AMock instance (if configured)</span>
+<span class="cm">// suite.vector — VectorMock instance (if configured)</span>
+
+<span class="cm">// In tests:</span>
+afterEach(() =&gt; suite.reset());     <span class="cm">// resets LLM fixtures + all service state</span>
+afterAll(() =&gt; suite.stop());       <span class="cm">// stops everything</span></code></pre>
+        </div>
+
+        <h2>Built-in Services</h2>
+        <p>
+          aimock ships with first-party mock services that implement the <code>Mountable</code>
+          interface:
+        </p>
+        <ul>
+          <li>
+            <a href="mcp-mock.html"><strong>MCPMock</strong></a> &mdash; Mock MCP tool server with
+            JSON-RPC dispatch, session management, and tool/resource/prompt support
+          </li>
+          <li>
+            <a href="a2a-mock.html"><strong>A2AMock</strong></a> &mdash; Mock A2A agent-to-agent
+            protocol server with agent cards, message routing, and streaming
+          </li>
+          <li>
+            <a href="vector-mock.html"><strong>VectorMock</strong></a> &mdash; Mock vector database
+            with Pinecone, Qdrant, and ChromaDB endpoints
+          </li>
+        </ul>
+        <p>
+          All implement the <code>Mountable</code> interface and work with <code>mount()</code>,
+          <code>createMockSuite()</code>, and the <a href="aimock-cli.html">aimock CLI</a>.
+        </p>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/ollama.html b/docs/ollama.html
index 7251f3c..e6afe26 100644
--- a/docs/ollama.html
+++ b/docs/ollama.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Ollama — llmock</title>
+    <title>Ollama — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html" class="active">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a
-          ><a href="record-replay.html">Record &amp; Replay</a
-          ><a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Ollama</h1>
         <p class="lead">
-          llmock implements Ollama's native <code>/api/chat</code>, <code>/api/generate</code>, and
+          aimock implements Ollama's native <code>/api/chat</code>, <code>/api/generate</code>, and
           <code>/api/tags</code> endpoints with NDJSON streaming, matching Ollama's wire format
           including its key differences from OpenAI.
         </p>
@@ -133,7 +104,7 @@ <h2>Key Differences from OpenAI</h2>
           </li>
           <li>
             <strong>Duration metadata.</strong> Responses include <code>done_reason</code>,
-            <code>total_duration</code>, <code>eval_count</code>, etc. on the final chunk. llmock
+            <code>total_duration</code>, <code>eval_count</code>, etc. on the final chunk. aimock
             sends zeroed values.
           </li>
         </ul>
@@ -144,13 +115,13 @@ <h2>Quick Start</h2>
           <div class="code-block-header">
             ollama-quick-start.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 
 <span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
 <span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi from Ollama!"</span> });
 <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
 
-<span class="cmt">// Point the Ollama SDK at llmock</span>
+<span class="cmt">// Point the Ollama SDK at aimock</span>
 <span class="kw">const</span> <span class="op">res</span> = <span class="kw">await</span> <span class="fn">fetch</span>(<span class="str">`${mock.url}/api/chat`</span>, {
   <span class="prop">method</span>: <span class="str">"POST"</span>,
   <span class="prop">body</span>: <span class="fn">JSON.stringify</span>({
@@ -199,7 +170,7 @@ <h2>Non-Streaming Response</h2>
         <h2>Tool Calls</h2>
         <p>
           Tool calls in Ollama send <code>arguments</code> as a parsed object (not a JSON string).
-          llmock automatically converts fixture <code>arguments</code> strings into objects for the
+          aimock automatically converts fixture <code>arguments</code> strings into objects for the
           Ollama wire format.
         </p>
 
@@ -278,7 +249,7 @@ <h2>/api/tags Endpoint</h2>
 
         <h2>Request Translation</h2>
         <p>
-          llmock internally translates Ollama requests to a unified
+          aimock internally translates Ollama requests to a unified
           <code>ChatCompletionRequest</code> format for fixture matching. The
           <code>ollamaToCompletionRequest()</code> function maps Ollama's
           <code>options.temperature</code> to <code>temperature</code> and
@@ -289,14 +260,16 @@ <h2>Request Translation</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/record-replay.html b/docs/record-replay.html
index d389159..8eeb9f1 100644
--- a/docs/record-replay.html
+++ b/docs/record-replay.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Record &amp; Replay — llmock</title>
+    <title>Record &amp; Replay — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,53 +42,24 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a
-          ><a href="record-replay.html" class="active">Record &amp; Replay</a
-          ><a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Record &amp; Replay</h1>
         <p class="lead">
-          VCR-style record-and-replay support. When a request doesn't match any fixture, llmock
+          VCR-style record-and-replay support. When a request doesn't match any fixture, aimock
           proxies it to the real upstream provider, records the response as a fixture on disk and in
           memory, then replays it on subsequent identical requests.
         </p>
 
         <h2>How It Works</h2>
         <ol>
-          <li>Client sends a request to llmock</li>
-          <li>llmock attempts fixture matching as usual</li>
+          <li>Client sends a request to aimock</li>
+          <li>aimock attempts fixture matching as usual</li>
           <li>
             <strong>On miss:</strong> the request is forwarded to the configured upstream provider
           </li>
@@ -101,12 +72,28 @@ <h2>How It Works</h2>
 
         <h2>Quick Start</h2>
 
-        <div class="code-block">
-          <div class="code-block-header">CLI usage <span class="lang-tag">bash</span></div>
-          <pre><code>npx llmock --fixtures ./fixtures \
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">CLI usage <span class="lang-tag">shell</span></div>
+              <pre><code>$ npx aimock --fixtures ./fixtures \
+  --record \
+  --provider-openai https://api.openai.com \
+  --provider-anthropic https://api.anthropic.com</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">CLI usage <span class="lang-tag">shell</span></div>
+              <pre><code>$ docker run -d -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --fixtures /fixtures \
   --record \
   --provider-openai https://api.openai.com \
   --provider-anthropic https://api.anthropic.com</code></pre>
+            </div>
+          </div>
         </div>
 
         <h2>CLI Flags</h2>
@@ -167,7 +154,7 @@ <h2>Programmatic API</h2>
           <div class="code-block-header">
             Programmatic recording <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 
 <span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
 <span class="kw">await</span> <span class="op">mock</span>.<span class="fn">start</span>();
@@ -190,7 +177,7 @@ <h2>Programmatic API</h2>
 
         <h2>Stream Collapsing</h2>
         <p>
-          When the upstream provider returns a streaming response, llmock collapses it into a
+          When the upstream provider returns a streaming response, aimock collapses it into a
           non-streaming fixture. Six streaming formats are supported:
         </p>
         <table class="endpoint-table">
@@ -241,7 +228,7 @@ <h2>Stream Collapsing</h2>
 
         <h2>Auth Header Forwarding</h2>
         <p>
-          When proxying to upstream providers, llmock forwards these headers from the original
+          When proxying to upstream providers, aimock forwards these headers from the original
           request:
         </p>
         <ul>
@@ -303,18 +290,118 @@ <h2>Fixture Lifecycle</h2>
             recordings
           </li>
         </ul>
+        <h2>Local Development Workflow</h2>
+        <p>
+          Record once against real APIs, then replay from fixtures for fast, offline development.
+        </p>
+
+        <div class="cli-docker-tabs">
+          <div class="tab-cli">
+            <div class="code-block">
+              <div class="code-block-header">
+                Record then replay <span class="lang-tag">shell</span>
+              </div>
+              <pre><code># First run: record real API responses
+$ npx aimock --record --provider-openai https://api.openai.com -f ./fixtures
+
+# Subsequent runs: replay from recorded fixtures
+$ npx aimock -f ./fixtures</code></pre>
+            </div>
+          </div>
+          <div class="tab-docker">
+            <div class="code-block">
+              <div class="code-block-header">
+                Record then replay <span class="lang-tag">shell</span>
+              </div>
+              <pre><code># First run: record real API responses
+$ docker run -d -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock --record --provider-openai https://api.openai.com -f /fixtures
+
+# Subsequent runs: replay from recorded fixtures
+$ docker run -d -p 4010:4010 \
+  -v ./fixtures:/fixtures \
+  ghcr.io/copilotkit/aimock \
+  npx aimock -f /fixtures</code></pre>
+            </div>
+          </div>
+        </div>
+
+        <h2>CI Pipeline Workflow</h2>
+        <p>
+          Use the Docker image in CI with <code>--strict</code> mode to ensure every request matches
+          a recorded fixture. No API keys needed, no flaky network calls.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            GitHub Actions example <span class="lang-tag">yaml</span>
+          </div>
+          <pre><code>- name: Start aimock
+  run: |
+    docker run -d --name aimock \
+      -v ./fixtures:/fixtures \
+      -p 4010:4010 \
+      ghcr.io/copilotkit/aimock \
+      npx aimock --strict -f /fixtures
+
+- name: Run tests
+  env:
+    OPENAI_BASE_URL: http://localhost:4010/v1
+  run: pnpm test
+
+- name: Stop aimock
+  run: docker stop aimock</code></pre>
+        </div>
+
+        <h2>Building Fixture Sets</h2>
+        <p>A practical workflow for building and maintaining fixture sets:</p>
+        <ol>
+          <li>Run with <code>--record</code> against real APIs during development</li>
+          <li>Review recorded fixtures in <code>fixtures/recorded/</code></li>
+          <li>Move and rename to organized fixture directories</li>
+          <li>Switch to <code>--strict</code> mode in CI</li>
+          <li>Re-record when upstream APIs change (drift detection catches this)</li>
+        </ol>
+
+        <h2>Cross-Language Testing</h2>
+        <p>
+          The Docker image serves any language that speaks HTTP. Point your client at the mock
+          server's URL instead of the real API.
+        </p>
+
+        <div class="code-block">
+          <div class="code-block-header">
+            Any language, one server <span class="lang-tag">bash</span>
+          </div>
+          <pre><code># Docker image serves all languages
+docker run -d -p 4010:4010 ghcr.io/copilotkit/aimock npx aimock -f /fixtures
+
+# Python
+import openai
+client = openai.OpenAI(base_url="http://localhost:4010/v1", api_key="mock")
+
+# Go
+client := openai.NewClient(option.WithBaseURL("http://localhost:4010/v1"))
+
+# Rust
+let client = Client::new().with_base_url("http://localhost:4010/v1");</code></pre>
+        </div>
       </main>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/responses-api.html b/docs/responses-api.html
index 4a4ee15..14faeb9 100644
--- a/docs/responses-api.html
+++ b/docs/responses-api.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Responses API — llmock</title>
+    <title>Responses API — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,46 +42,17 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html" class="active">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>OpenAI Responses API</h1>
         <p class="lead">
           The Responses API uses <code>event:</code> + <code>data:</code> SSE format over HTTP, and
-          is also available over WebSocket. llmock supports both transports with the same fixtures.
+          is also available over WebSocket. aimock supports both transports with the same fixtures.
         </p>
 
         <h2>Endpoints</h2>
@@ -113,7 +84,7 @@ <h2>Unit Test: HTTP SSE Text Response</h2>
           <div class="code-block-header">
             responses-text.test.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="fn">createServer</span>, <span class="kw">type</span> <span class="type">ServerInstance</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock/server"</span>;
+          <pre><code><span class="kw">import</span> { <span class="fn">createServer</span>, <span class="kw">type</span> <span class="type">ServerInstance</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock/server"</span>;
 
 <span class="kw">const</span> <span class="op">instance</span> = <span class="kw">await</span> <span class="fn">createServer</span>([
   { <span class="prop">match</span>: { <span class="prop">userMessage</span>: <span class="str">"hello"</span> }, <span class="prop">response</span>: { <span class="prop">content</span>: <span class="str">"Hi there!"</span> } }
@@ -194,14 +165,16 @@ <h2>SSE Event Sequence</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/sequential-responses.html b/docs/sequential-responses.html
index dadbded..3da2916 100644
--- a/docs/sequential-responses.html
+++ b/docs/sequential-responses.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Sequential Responses — llmock</title>
+    <title>Sequential Responses — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,40 +42,11 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html" class="active">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Sequential / Stateful Responses</h1>
@@ -184,14 +155,16 @@ <h2>JSON Fixture</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/services.html b/docs/services.html
new file mode 100644
index 0000000..58a7cad
--- /dev/null
+++ b/docs/services.html
@@ -0,0 +1,241 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Services — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>Services (Search / Rerank / Moderation)</h1>
+        <p class="lead">
+          Built-in service mocks for web search, reranking, and content moderation APIs. Register
+          fixture patterns on the LLMock instance and requests are matched by query/input text. No
+          separate server needed — services are built into the LLMock HTTP server.
+        </p>
+
+        <h2>Search (Tavily-Compatible)</h2>
+        <p>
+          Mock web search API at <code>POST /search</code>. Matches the request
+          <code>query</code> field against registered patterns.
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">onSearch API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="kw">import</span> { LLMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> mock = <span class="kw">new</span> LLMock();
+
+<span class="cm">// String pattern — case-insensitive substring match</span>
+mock.onSearch(<span class="str">"weather"</span>, [
+  { title: <span class="str">"Weather Report"</span>, url: <span class="str">"https://example.com/weather"</span>, content: <span class="str">"Sunny today"</span> },
+]);
+
+<span class="cm">// RegExp pattern</span>
+mock.onSearch(<span class="kw">/</span>stock\s+price<span class="kw">/i</span>, [
+  { title: <span class="str">"ACME Stock"</span>, url: <span class="str">"https://example.com/stocks"</span>, content: <span class="str">"$42.00"</span>, score: <span class="num">0.95</span> },
+]);
+
+<span class="cm">// Catch-all — empty results for unmatched queries</span>
+mock.onSearch(<span class="kw">/</span>.*<span class="kw">/</span>, []);</code></pre>
+        </div>
+
+        <h3>Search Endpoint</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Request Body</th>
+              <th>Response</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/search</code></td>
+              <td><code>{ "query": "...", "max_results": 5 }</code></td>
+              <td><code>{ "results": [...] }</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Rerank (Cohere-Compatible)</h2>
+        <p>
+          Mock reranking API at <code>POST /v2/rerank</code>. Matches the request
+          <code>query</code> field against registered patterns.
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">onRerank API <span class="lang-tag">typescript</span></div>
+          <pre><code>mock.onRerank(<span class="str">"machine learning"</span>, [
+  { index: <span class="num">0</span>, relevance_score: <span class="num">0.99</span> },
+  { index: <span class="num">2</span>, relevance_score: <span class="num">0.85</span> },
+]);
+
+<span class="cm">// The response includes document text from the request body</span>
+<span class="cm">// Response format: { id, results: [{ index, relevance_score, document: { text } }] }</span></code></pre>
+        </div>
+
+        <h3>Rerank Endpoint</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Request Body</th>
+              <th>Response</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/v2/rerank</code></td>
+              <td><code>{ "query": "...", "documents": [...], "model": "..." }</code></td>
+              <td><code>{ "id": "...", "results": [...], "meta": {...} }</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Moderation (OpenAI-Compatible)</h2>
+        <p>
+          Mock content moderation API at <code>POST /v1/moderations</code>. Matches the request
+          <code>input</code> field against registered patterns. Unmatched requests return a default
+          unflagged result.
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            onModerate API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="cm">// Flag specific content</span>
+mock.onModerate(<span class="str">"violent"</span>, {
+  flagged: <span class="kw">true</span>,
+  categories: { violence: <span class="kw">true</span>, hate: <span class="kw">false</span> },
+  category_scores: { violence: <span class="num">0.95</span>, hate: <span class="num">0.01</span> },
+});
+
+<span class="cm">// Catch-all — everything passes</span>
+mock.onModerate(<span class="kw">/</span>.*<span class="kw">/</span>, {
+  flagged: <span class="kw">false</span>,
+  categories: {},
+});</code></pre>
+        </div>
+
+        <h3>Moderation Endpoint</h3>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Request Body</th>
+              <th>Response</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/v1/moderations</code></td>
+              <td><code>{ "input": "..." }</code></td>
+              <td><code>{ "id": "...", "model": "...", "results": [...] }</code></td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Pattern Matching</h2>
+        <p>All three services use the same matching logic:</p>
+        <ul>
+          <li><strong>String patterns</strong> — case-insensitive substring match</li>
+          <li><strong>RegExp patterns</strong> — full regex test</li>
+          <li><strong>First match wins</strong> — register specific patterns before catch-alls</li>
+        </ul>
+
+        <h2>Config File</h2>
+        <p>Enable services via the aimock config file with default catch-all responses:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"services"</span>: {
+    <span class="prop">"search"</span>: <span class="kw">true</span>,
+    <span class="prop">"rerank"</span>: <span class="kw">true</span>,
+    <span class="prop">"moderate"</span>: <span class="kw">true</span>
+  }
+}</code></pre>
+        </div>
+        <p>
+          When enabled via config, each service registers a <code>/.*/</code> catch-all pattern.
+          Search and rerank return empty results; moderation returns unflagged. For custom
+          responses, use the programmatic API.
+        </p>
+
+        <h2>Journal Integration</h2>
+        <p>
+          All service requests are recorded in the LLMock journal with the
+          <code>service</code> field set to <code>"search"</code>, <code>"rerank"</code>, or
+          <code>"moderation"</code>:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Journal entries <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">const</span> entries = mock.getRequests();
+<span class="kw">const</span> searchRequests = entries.filter((e) =&gt; e.service === <span class="str">"search"</span>);
+<span class="kw">const</span> rerankRequests = entries.filter((e) =&gt; e.service === <span class="str">"rerank"</span>);
+<span class="kw">const</span> moderationRequests = entries.filter((e) =&gt; e.service === <span class="str">"moderation"</span>);</code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/sidebar.js b/docs/sidebar.js
new file mode 100644
index 0000000..f41c73e
--- /dev/null
+++ b/docs/sidebar.js
@@ -0,0 +1,199 @@
+/* global window, document */
+(function () {
+  // ─── Nav Hierarchy ──────────────────────────────────────────────
+  var sections = [
+    {
+      title: "Getting Started",
+      links: [
+        { label: "Overview", href: "docs.html" },
+        { label: "Record & Replay", href: "record-replay.html" },
+        { label: "Quick Start: LLM", href: "chat-completions.html" },
+        { label: "Quick Start: aimock", href: "aimock-cli.html" },
+      ],
+    },
+    {
+      title: "LLM Providers",
+      links: [
+        { label: "Chat Completions (OpenAI)", href: "chat-completions.html" },
+        { label: "Responses API (OpenAI)", href: "responses-api.html" },
+        { label: "Claude Messages", href: "claude-messages.html" },
+        { label: "Gemini", href: "gemini.html" },
+        { label: "Azure OpenAI", href: "azure-openai.html" },
+        { label: "AWS Bedrock", href: "aws-bedrock.html" },
+        { label: "Ollama", href: "ollama.html" },
+        { label: "Cohere", href: "cohere.html" },
+        { label: "Vertex AI", href: "vertex-ai.html" },
+        { label: "Compatible Providers", href: "compatible-providers.html" },
+      ],
+    },
+    {
+      title: "LLM Features",
+      links: [
+        { label: "Embeddings", href: "embeddings.html" },
+        { label: "Structured Output", href: "structured-output.html" },
+        { label: "Sequential Responses", href: "sequential-responses.html" },
+        { label: "Fixtures", href: "fixtures.html" },
+        { label: "Error Injection", href: "error-injection.html" },
+        { label: "Chaos Testing", href: "chaos-testing.html" },
+        { label: "Streaming Physics", href: "streaming-physics.html" },
+        { label: "WebSocket APIs", href: "websocket.html" },
+        { label: "Prometheus Metrics", href: "metrics.html" },
+        { label: "Mount & Composition", href: "mount.html" },
+      ],
+    },
+    {
+      title: "Additional Mocks",
+      links: [
+        { label: "MCPMock", href: "mcp-mock.html" },
+        { label: "A2AMock", href: "a2a-mock.html" },
+        { label: "VectorMock", href: "vector-mock.html" },
+        { label: "Services", href: "services.html" },
+      ],
+    },
+    {
+      title: "Orchestration",
+      links: [
+        { label: "aimock CLI & Config", href: "aimock-cli.html" },
+        { label: "Docker & Helm", href: "docker.html" },
+        { label: "Drift Detection", href: "drift-detection.html" },
+      ],
+    },
+    {
+      title: "Switching to aimock",
+      links: [
+        { label: "From MSW", href: "migrate-from-msw.html" },
+        { label: "From VidaiMock", href: "migrate-from-vidaimock.html" },
+        { label: "From mock-llm", href: "migrate-from-mock-llm.html" },
+        { label: "From piyook/llm-mock", href: "migrate-from-piyook.html" },
+        { label: "From Python Mocks", href: "migrate-from-python-mocks.html" },
+        { label: "From Mokksy", href: "migrate-from-mokksy.html" },
+      ],
+    },
+  ];
+
+  // ─── Section Bar Items ──────────────────────────────────────────
+  var sectionBarItems = [
+    { icon: "&#128225;", label: "LLM Mocking", color: "pill-green", href: "chat-completions.html" },
+    { icon: "&#128268;", label: "MCP Protocol", color: "pill-blue", href: "mcp-mock.html" },
+    { icon: "&#129309;", label: "A2A Protocol", color: "pill-purple", href: "a2a-mock.html" },
+    { icon: "&#128230;", label: "Vector DBs", color: "pill-amber", href: "vector-mock.html" },
+    { icon: "&#128269;", label: "Search &amp; Rerank", color: "pill-red", href: "services.html" },
+    {
+      icon: "&#9881;",
+      label: "Chaos &amp; DevOps",
+      color: "pill-gray",
+      href: "chaos-testing.html",
+    },
+  ];
+
+  // ─── Detect current page ────────────────────────────────────────
+  var currentPage = window.location.pathname.split("/").pop() || "index.html";
+
+  // ─── Build Sidebar HTML ─────────────────────────────────────────
+  function buildSidebar() {
+    var html = "";
+    for (var i = 0; i < sections.length; i++) {
+      var section = sections[i];
+      html += '<div class="sidebar-section">';
+      html += "<h3>" + section.title + "</h3>";
+      for (var j = 0; j < section.links.length; j++) {
+        var link = section.links[j];
+        var activeClass = link.href === currentPage ? ' class="active"' : "";
+        html += '<a href="' + link.href + '"' + activeClass + ">" + link.label + "</a>";
+      }
+      html += "</div>";
+    }
+    return html;
+  }
+
+  // ─── Build Section Bar HTML ─────────────────────────────────────
+  function buildSectionBar() {
+    var html = '<div class="section-bar-inner">';
+    for (var i = 0; i < sectionBarItems.length; i++) {
+      var item = sectionBarItems[i];
+      html +=
+        '<a href="' +
+        item.href +
+        '" class="section-pill ' +
+        item.color +
+        '">' +
+        '<span class="section-pill-icon">' +
+        item.icon +
+        "</span> " +
+        item.label +
+        "</a>";
+    }
+    html += "</div>";
+    return html;
+  }
+
+  // ─── Inject Section Bar CSS ─────────────────────────────────────
+  var style = document.createElement("style");
+  style.textContent =
+    ".section-bar {" +
+    "  position: sticky;" +
+    "  top: 57px;" +
+    "  z-index: 90;" +
+    "  background: rgba(10, 10, 15, 0.85);" +
+    "  backdrop-filter: blur(20px) saturate(1.4);" +
+    "  -webkit-backdrop-filter: blur(20px) saturate(1.4);" +
+    "  border-bottom: 1px solid var(--border);" +
+    "  padding: 0.85rem 0;" +
+    "  overflow-x: auto;" +
+    "  -webkit-overflow-scrolling: touch;" +
+    "  scrollbar-width: none;" +
+    "}" +
+    ".section-bar::-webkit-scrollbar { display: none; }" +
+    ".section-bar-inner {" +
+    "  max-width: 1400px;" +
+    "  margin: 0 auto;" +
+    "  padding: 0 2rem;" +
+    "  display: flex;" +
+    "  align-items: center;" +
+    "  gap: 0.65rem;" +
+    "}" +
+    ".section-pill {" +
+    "  display: inline-flex;" +
+    "  align-items: center;" +
+    "  gap: 0.4rem;" +
+    "  padding: 0.5rem 0.85rem;" +
+    "  background: var(--bg-card);" +
+    "  border: 1px solid var(--border);" +
+    "  border-radius: 4px;" +
+    "  font-family: var(--font-mono);" +
+    "  font-size: 0.72rem;" +
+    "  font-weight: 500;" +
+    "  color: var(--text-secondary);" +
+    "  white-space: nowrap;" +
+    "  transition: all 0.2s var(--ease-out-expo);" +
+    "  text-decoration: none;" +
+    "}" +
+    ".section-pill:hover {" +
+    "  color: var(--text-primary);" +
+    "  border-color: var(--border-bright);" +
+    "  background: var(--bg-card-hover);" +
+    "  text-decoration: none;" +
+    "  transform: translateY(-1px);" +
+    "}" +
+    ".section-pill.pill-green  { border-left: 3px solid var(--accent); }" +
+    ".section-pill.pill-blue   { border-left: 3px solid var(--blue); }" +
+    ".section-pill.pill-purple { border-left: 3px solid var(--purple); }" +
+    ".section-pill.pill-amber  { border-left: 3px solid var(--warning); }" +
+    ".section-pill.pill-red    { border-left: 3px solid var(--error); }" +
+    ".section-pill.pill-gray   { border-left: 3px solid var(--text-dim); }" +
+    ".section-pill-icon {" +
+    "  font-size: 0.85rem;" +
+    "  line-height: 1;" +
+    "}" +
+    "@media (max-width: 900px) {" +
+    "  .section-bar-inner { padding: 0 1rem; }" +
+    "}";
+  document.head.appendChild(style);
+
+  // ─── Inject into DOM ────────────────────────────────────────────
+  var sidebarEl = document.getElementById("sidebar");
+  if (sidebarEl) sidebarEl.innerHTML = buildSidebar();
+
+  var sectionBarEl = document.getElementById("section-bar");
+  if (sectionBarEl) sectionBarEl.innerHTML = buildSectionBar();
+})();
diff --git a/docs/streaming-physics.html b/docs/streaming-physics.html
index a092a18..3238c6d 100644
--- a/docs/streaming-physics.html
+++ b/docs/streaming-physics.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Streaming Physics — llmock</title>
+    <title>Streaming Physics — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,40 +42,11 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html" class="active">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Streaming Physics</h1>
@@ -233,14 +204,16 @@ <h2>Realistic Profiles</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/structured-output.html b/docs/structured-output.html
index 55894cb..326f1ac 100644
--- a/docs/structured-output.html
+++ b/docs/structured-output.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Structured Output — llmock</title>
+    <title>Structured Output — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html" class="active">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Structured Output / JSON Mode</h1>
         <p class="lead">
-          llmock supports matching on <code>response_format</code> so you can return different
+          aimock supports matching on <code>response_format</code> so you can return different
           responses for JSON mode requests versus regular text requests. Use
           <code>match.responseFormat</code> in fixtures or the
           <code>onJsonOutput()</code> convenience method.
@@ -193,14 +164,16 @@ <h2>Match Behavior</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/style.css b/docs/style.css
index 4bf2c6c..3e555f3 100644
--- a/docs/style.css
+++ b/docs/style.css
@@ -26,6 +26,7 @@
   --blue: #4488ff;
   --purple: #aa66ff;
   --font-mono: "JetBrains Mono", "SF Mono", "Fira Code", monospace;
+  --font-features-mono: "liga" 0, "calt" 0;
   --font-sans: "Instrument Sans", -apple-system, system-ui, sans-serif;
   --ease-out-expo: cubic-bezier(0.16, 1, 0.3, 1);
   --sidebar-width: 260px;
@@ -46,6 +47,14 @@ body {
   overflow-x: hidden;
 }
 
+code,
+pre,
+kbd,
+.code-block,
+[style*="font-mono"] {
+  font-feature-settings: var(--font-features-mono);
+}
+
 a {
   color: var(--accent);
   text-decoration: none;
@@ -135,17 +144,17 @@ body::before {
 /* ─── Docs Layout ─────────────────────────────────────────────── */
 .docs-layout {
   display: flex;
-  margin-top: 57px; /* nav height */
-  min-height: calc(100vh - 57px);
+  margin-top: calc(57px + 50px); /* nav height + section bar */
+  min-height: calc(100vh - 57px - 50px);
 }
 
 /* ─── Sidebar ─────────────────────────────────────────────────── */
 .sidebar {
   position: fixed;
-  top: 57px;
+  top: calc(57px + 50px); /* nav height + section bar */
   left: 0;
   width: var(--sidebar-width);
-  height: calc(100vh - 57px);
+  height: calc(100vh - 57px - 50px);
   overflow-y: auto;
   background: var(--bg-surface);
   border-right: 1px solid var(--border);
diff --git a/docs/vector-mock.html b/docs/vector-mock.html
new file mode 100644
index 0000000..8140ed6
--- /dev/null
+++ b/docs/vector-mock.html
@@ -0,0 +1,279 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>VectorMock — aimock</title>
+    <link rel="icon" type="image/svg+xml" href="favicon.svg" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:ital,wght@0,300;0,400;0,500;0,600;0,700;1,400&family=Instrument+Sans:wght@400;500;600;700&display=swap"
+      rel="stylesheet"
+    />
+    <link rel="stylesheet" href="style.css" />
+  </head>
+  <body>
+    <nav class="top-nav">
+      <div class="nav-inner">
+        <div style="display: flex; align-items: center; gap: 1rem">
+          <button
+            class="sidebar-toggle"
+            onclick="document.querySelector('.sidebar').classList.toggle('open')"
+            aria-label="Toggle sidebar"
+          >
+            &#9776;
+          </button>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
+        </div>
+        <ul class="nav-links">
+          <li><a href="index.html">Home</a></li>
+          <li><a href="docs.html" style="color: var(--accent)">Docs</a></li>
+          <li>
+            <a href="https://github.com/CopilotKit/llmock" class="gh-link" target="_blank"
+              ><svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                <path
+                  d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"
+                />
+              </svg>
+              GitHub</a
+            >
+          </li>
+        </ul>
+      </div>
+    </nav>
+
+    <div id="section-bar"></div>
+
+    <div class="docs-layout">
+      <aside class="sidebar" id="sidebar"></aside>
+
+      <main class="docs-content">
+        <h1>VectorMock</h1>
+        <p class="lead">
+          Mock vector database server for testing RAG pipelines and embedding-based retrieval.
+          Supports Pinecone, Qdrant, and ChromaDB API formats with collection management, upsert,
+          query, and delete operations.
+        </p>
+
+        <h2>Quick Start</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Standalone mode <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { VectorMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> vector = <span class="kw">new</span> VectorMock();
+
+vector.addCollection(<span class="str">"docs"</span>, { dimension: <span class="num">1536</span> });
+vector.onQuery(<span class="str">"docs"</span>, [
+  { id: <span class="str">"doc-1"</span>, score: <span class="num">0.95</span>, metadata: { title: <span class="str">"Getting Started"</span> } },
+  { id: <span class="str">"doc-2"</span>, score: <span class="num">0.87</span>, metadata: { title: <span class="str">"API Reference"</span> } },
+]);
+
+<span class="kw">const</span> url = <span class="kw">await</span> vector.start();
+<span class="cm">// Point your vector DB client at `url`</span></code></pre>
+        </div>
+
+        <h2>Mounted Mode</h2>
+        <p>
+          Mount VectorMock onto an LLMock server to share a single port with LLM mocking and other
+          services:
+        </p>
+        <div class="code-block">
+          <div class="code-block-header">
+            Mount on LLMock <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="kw">import</span> { LLMock, VectorMock } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
+
+<span class="kw">const</span> llm = <span class="kw">new</span> LLMock({ port: <span class="num">5555</span> });
+<span class="kw">const</span> vector = <span class="kw">new</span> VectorMock();
+
+vector.addCollection(<span class="str">"embeddings"</span>, { dimension: <span class="num">768</span> });
+vector.onQuery(<span class="str">"embeddings"</span>, [{ id: <span class="str">"v1"</span>, score: <span class="num">0.9</span> }]);
+
+llm.mount(<span class="str">"/vector"</span>, vector);
+<span class="kw">await</span> llm.start();
+<span class="cm">// Vector API at http://127.0.0.1:5555/vector</span></code></pre>
+        </div>
+
+        <h2>Collection Management</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Collections API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code><span class="cm">// Create a collection with a dimension</span>
+vector.addCollection(<span class="str">"products"</span>, { dimension: <span class="num">384</span> });
+
+<span class="cm">// Upsert vectors into a collection</span>
+vector.upsert(<span class="str">"products"</span>, [
+  { id: <span class="str">"p1"</span>, values: [<span class="num">0.1</span>, <span class="num">0.2</span>, ...], metadata: { name: <span class="str">"Widget"</span> } },
+  { id: <span class="str">"p2"</span>, values: [<span class="num">0.3</span>, <span class="num">0.4</span>, ...], metadata: { name: <span class="str">"Gadget"</span> } },
+]);
+
+<span class="cm">// Delete a collection</span>
+vector.deleteCollection(<span class="str">"products"</span>);</code></pre>
+        </div>
+
+        <h2>Query Handlers</h2>
+        <p>Register static results or dynamic handlers for query responses:</p>
+        <div class="code-block">
+          <div class="code-block-header">onQuery API <span class="lang-tag">typescript</span></div>
+          <pre><code><span class="cm">// Static results — always returns these</span>
+vector.onQuery(<span class="str">"docs"</span>, [
+  { id: <span class="str">"d1"</span>, score: <span class="num">0.95</span>, metadata: { topic: <span class="str">"setup"</span> } },
+]);
+
+<span class="cm">// Dynamic handler — receives the query, returns results</span>
+vector.onQuery(<span class="str">"docs"</span>, (query) =&gt; {
+  <span class="kw">const</span> topK = query.topK ?? <span class="num">10</span>;
+  <span class="kw">return</span> Array.from({ length: topK }, (_, i) =&gt; ({
+    id: <span class="str">`result-${i}`</span>,
+    score: <span class="num">1</span> - i * <span class="num">0.1</span>,
+  }));
+});</code></pre>
+        </div>
+
+        <h2>Pinecone-Compatible Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/query</code></td>
+              <td>Query vectors by namespace</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/vectors/upsert</code></td>
+              <td>Upsert vectors into a namespace</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/vectors/delete</code></td>
+              <td>Delete vectors by ID</td>
+            </tr>
+            <tr>
+              <td><code>GET</code></td>
+              <td><code>/describe-index-stats</code></td>
+              <td>Get index statistics</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Qdrant-Compatible Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/collections/{name}/points/search</code></td>
+              <td>Search points in a collection</td>
+            </tr>
+            <tr>
+              <td><code>PUT</code></td>
+              <td><code>/collections/{name}/points</code></td>
+              <td>Upsert points into a collection</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/collections/{name}/points/delete</code></td>
+              <td>Delete points by ID</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>ChromaDB-Compatible Endpoints</h2>
+        <table class="endpoint-table">
+          <thead>
+            <tr>
+              <th>Method</th>
+              <th>Path</th>
+              <th>Description</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/api/v1/collections/{id}/query</code></td>
+              <td>Query a collection</td>
+            </tr>
+            <tr>
+              <td><code>POST</code></td>
+              <td><code>/api/v1/collections/{id}/add</code></td>
+              <td>Add embeddings to a collection</td>
+            </tr>
+            <tr>
+              <td><code>GET</code></td>
+              <td><code>/api/v1/collections</code></td>
+              <td>List all collections</td>
+            </tr>
+            <tr>
+              <td><code>DELETE</code></td>
+              <td><code>/api/v1/collections/{id}</code></td>
+              <td>Delete a collection</td>
+            </tr>
+          </tbody>
+        </table>
+
+        <h2>Config File</h2>
+        <p>VectorMock can be configured via the aimock JSON config file:</p>
+        <div class="code-block">
+          <div class="code-block-header">aimock.json <span class="lang-tag">json</span></div>
+          <pre><code>{
+  <span class="prop">"vector"</span>: {
+    <span class="prop">"path"</span>: <span class="str">"/vector"</span>,
+    <span class="prop">"collections"</span>: [
+      {
+        <span class="prop">"name"</span>: <span class="str">"docs"</span>,
+        <span class="prop">"dimension"</span>: <span class="num">1536</span>,
+        <span class="prop">"vectors"</span>: [
+          { <span class="prop">"id"</span>: <span class="str">"v1"</span>, <span class="prop">"values"</span>: [<span class="num">0.1</span>, <span class="num">0.2</span>], <span class="prop">"metadata"</span>: { <span class="prop">"title"</span>: <span class="str">"Intro"</span> } }
+        ],
+        <span class="prop">"queryResults"</span>: [
+          { <span class="prop">"id"</span>: <span class="str">"v1"</span>, <span class="prop">"score"</span>: <span class="num">0.95</span>, <span class="prop">"metadata"</span>: { <span class="prop">"title"</span>: <span class="str">"Intro"</span> } }
+        ]
+      }
+    ]
+  }
+}</code></pre>
+        </div>
+
+        <h2>Inspection</h2>
+        <div class="code-block">
+          <div class="code-block-header">
+            Inspection API <span class="lang-tag">typescript</span>
+          </div>
+          <pre><code>vector.health();       <span class="cm">// { status: "ok", collections: 2 }</span>
+vector.getRequests();  <span class="cm">// Journal entries (when mounted with shared journal)</span>
+vector.reset();        <span class="cm">// Clears all collections and query handlers</span></code></pre>
+        </div>
+      </main>
+    </div>
+    <footer class="docs-footer">
+      <div class="footer-inner">
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
+        <ul class="footer-links">
+          <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
+          <li>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
+          </li>
+        </ul>
+      </div>
+    </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
+  </body>
+</html>
diff --git a/docs/vertex-ai.html b/docs/vertex-ai.html
index f439b5c..89b45cf 100644
--- a/docs/vertex-ai.html
+++ b/docs/vertex-ai.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Vertex AI — llmock</title>
+    <title>Vertex AI — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html" class="active">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html">WebSocket APIs</a
-          ><a href="record-replay.html">Record &amp; Replay</a
-          ><a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>Vertex AI</h1>
         <p class="lead">
-          llmock supports Google Vertex AI endpoints using the same Gemini wire format with a
+          aimock supports Google Vertex AI endpoints using the same Gemini wire format with a
           different URL routing pattern. Vertex AI requests are handled by the same Gemini handler
           internally.
         </p>
@@ -120,7 +91,7 @@ <h2>URL Pattern Difference</h2>
         <pre><code>/v1beta/models/{model}:generateContent</code></pre>
         <p>While Vertex AI uses the fully qualified GCP resource path:</p>
         <pre><code>/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent</code></pre>
-        <p>llmock matches Vertex AI paths using this regex pattern:</p>
+        <p>aimock matches Vertex AI paths using this regex pattern:</p>
 
         <div class="code-block">
           <div class="code-block-header">
@@ -136,7 +107,7 @@ <h2>Quick Start</h2>
           <div class="code-block-header">
             vertex-ai-quick-start.ts <span class="lang-tag">ts</span>
           </div>
-          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/llmock"</span>;
+          <pre><code><span class="kw">import</span> { <span class="op">LLMock</span> } <span class="kw">from</span> <span class="str">"@copilotkit/aimock"</span>;
 
 <span class="kw">const</span> <span class="op">mock</span> = <span class="kw">new</span> <span class="fn">LLMock</span>();
 <span class="op">mock</span>.<span class="fn">onMessage</span>(<span class="str">"hello"</span>, { <span class="prop">content</span>: <span class="str">"Hi from Vertex AI!"</span> });
@@ -174,9 +145,9 @@ <h2>Same Wire Format as Gemini</h2>
 
         <h2>SDK Configuration</h2>
         <p>
-          To use llmock with the Vertex AI SDK, point the SDK's API endpoint to your llmock
+          To use aimock with the Vertex AI SDK, point the SDK's API endpoint to your aimock
           instance. The project, location, and model segments in the URL are matched but can be any
-          value &mdash; llmock extracts the model name for fixture matching.
+          value &mdash; aimock extracts the model name for fixture matching.
         </p>
 
         <div class="code-block">
@@ -186,7 +157,7 @@ <h2>SDK Configuration</h2>
 <span class="kw">const</span> <span class="op">vertexAI</span> = <span class="kw">new</span> <span class="fn">VertexAI</span>({
   <span class="prop">project</span>: <span class="str">"my-project"</span>,
   <span class="prop">location</span>: <span class="str">"us-central1"</span>,
-  <span class="prop">apiEndpoint</span>: <span class="str">"localhost:PORT"</span>, <span class="cmt">// llmock URL</span>
+  <span class="prop">apiEndpoint</span>: <span class="str">"localhost:PORT"</span>, <span class="cmt">// aimock URL</span>
 });
 
 <span class="kw">const</span> <span class="op">model</span> = <span class="op">vertexAI</span>.<span class="fn">getGenerativeModel</span>({
@@ -235,14 +206,16 @@ <h2>Metrics Path Normalization</h2>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/docs/websocket.html b/docs/websocket.html
index 6a5909a..94646e7 100644
--- a/docs/websocket.html
+++ b/docs/websocket.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>WebSocket APIs — llmock</title>
+    <title>WebSocket APIs — aimock</title>
     <link rel="icon" type="image/svg+xml" href="favicon.svg" />
     <link rel="preconnect" href="https://fonts.googleapis.com" />
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
@@ -24,7 +24,7 @@
           >
             &#9776;
           </button>
-          <a href="index.html" class="nav-brand"><span class="prompt">$</span> llmock</a>
+          <a href="index.html" class="nav-brand"> <span class="prompt">$</span> aimock </a>
         </div>
         <ul class="nav-links">
           <li><a href="index.html">Home</a></li>
@@ -42,45 +42,16 @@
         </ul>
       </div>
     </nav>
+
+    <div id="section-bar"></div>
+
     <div class="docs-layout">
-      <aside class="sidebar">
-        <div class="sidebar-section">
-          <h3>Getting Started</h3>
-          <a href="docs.html">Overview</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Providers</h3>
-          <a href="chat-completions.html">Chat Completions (OpenAI)</a
-          ><a href="responses-api.html">Responses API (OpenAI)</a
-          ><a href="claude-messages.html">Claude Messages</a><a href="gemini.html">Gemini</a
-          ><a href="azure-openai.html">Azure OpenAI</a><a href="aws-bedrock.html">AWS Bedrock</a
-          ><a href="ollama.html">Ollama</a><a href="cohere.html">Cohere</a
-          ><a href="vertex-ai.html">Vertex AI</a
-          ><a href="compatible-providers.html">Compatible Providers</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Features</h3>
-          <a href="embeddings.html">Embeddings</a
-          ><a href="structured-output.html">Structured Output</a
-          ><a href="sequential-responses.html">Sequential Responses</a
-          ><a href="fixtures.html">Fixtures</a><a href="error-injection.html">Error Injection</a
-          ><a href="chaos-testing.html">Chaos Testing</a
-          ><a href="streaming-physics.html">Streaming Physics</a
-          ><a href="websocket.html" class="active">WebSocket APIs</a>
-          <a href="record-replay.html">Record &amp; Replay</a>
-          <a href="metrics.html">Prometheus Metrics</a>
-        </div>
-        <div class="sidebar-section">
-          <h3>Deployment</h3>
-          <a href="docker.html">Docker &amp; Helm</a
-          ><a href="drift-detection.html">Drift Detection</a>
-        </div>
-      </aside>
+      <aside class="sidebar" id="sidebar"></aside>
 
       <main class="docs-content">
         <h1>WebSocket APIs</h1>
         <p class="lead">
-          llmock implements three WebSocket APIs with zero dependencies &mdash; real RFC 6455
+          aimock implements three WebSocket APIs with zero dependencies &mdash; real RFC 6455
           framing built from scratch. The same fixtures drive HTTP and WebSocket transports.
         </p>
 
@@ -224,7 +195,7 @@ <h2>Provider WebSocket Support</h2>
             <tr>
               <th>Provider</th>
               <th>WebSocket API</th>
-              <th>llmock Status</th>
+              <th>aimock Status</th>
             </tr>
           </thead>
           <tbody>
@@ -267,22 +238,24 @@ <h2>Provider WebSocket Support</h2>
         </table>
 
         <p>
-          llmock includes <strong>drift canary tests</strong> that automatically detect when
-          providers add new WebSocket capabilities. When a canary fires, it signals that llmock
+          aimock includes <strong>drift canary tests</strong> that automatically detect when
+          providers add new WebSocket capabilities. When a canary fires, it signals that aimock
           should be updated to support the new API.
         </p>
       </main>
     </div>
     <footer class="docs-footer">
       <div class="footer-inner">
-        <div class="footer-left"><span>$</span> llmock &middot; MIT License</div>
+        <div class="footer-left"><span>$</span> aimock &middot; MIT License</div>
         <ul class="footer-links">
           <li><a href="https://github.com/CopilotKit/llmock" target="_blank">GitHub</a></li>
           <li>
-            <a href="https://www.npmjs.com/package/@copilotkit/llmock" target="_blank">npm</a>
+            <a href="https://www.npmjs.com/package/@copilotkit/aimock" target="_blank">npm</a>
           </li>
         </ul>
       </div>
     </footer>
+    <script src="sidebar.js"></script>
+    <script src="cli-tabs.js"></script>
   </body>
 </html>
diff --git a/package.json b/package.json
index 91c85c7..3ca40d5 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
-  "name": "@copilotkit/llmock",
-  "version": "1.6.1",
-  "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)",
+  "name": "@copilotkit/aimock",
+  "version": "1.7.0",
+  "description": "Mock infrastructure for AI application testing — LLM APIs, MCP tools, A2A agents, vector databases, search, and more. Zero dependencies.",
   "license": "MIT",
   "packageManager": "pnpm@10.28.2",
   "engines": {
@@ -18,12 +18,56 @@
         "types": "./dist/index.d.cts",
         "default": "./dist/index.cjs"
       }
+    },
+    "./mcp": {
+      "import": {
+        "types": "./dist/mcp-stub.d.ts",
+        "default": "./dist/mcp-stub.js"
+      },
+      "require": {
+        "types": "./dist/mcp-stub.d.cts",
+        "default": "./dist/mcp-stub.cjs"
+      }
+    },
+    "./a2a": {
+      "import": {
+        "types": "./dist/a2a-stub.d.ts",
+        "default": "./dist/a2a-stub.js"
+      },
+      "require": {
+        "types": "./dist/a2a-stub.d.cts",
+        "default": "./dist/a2a-stub.cjs"
+      }
+    },
+    "./vector": {
+      "import": {
+        "types": "./dist/vector-stub.d.ts",
+        "default": "./dist/vector-stub.js"
+      },
+      "require": {
+        "types": "./dist/vector-stub.d.cts",
+        "default": "./dist/vector-stub.cjs"
+      }
     }
   },
   "main": "./dist/index.cjs",
   "module": "./dist/index.js",
   "types": "./dist/index.d.ts",
+  "typesVersions": {
+    "*": {
+      "mcp": [
+        "./dist/mcp-stub.d.ts"
+      ],
+      "a2a": [
+        "./dist/a2a-stub.d.ts"
+      ],
+      "vector": [
+        "./dist/vector-stub.d.ts"
+      ]
+    }
+  },
   "bin": {
+    "aimock": "./dist/aimock-cli.js",
     "llmock": "./dist/cli.js"
   },
   "files": [
@@ -50,24 +94,25 @@
     "*.{ts,mts,js,mjs}": "eslint --fix"
   },
   "devDependencies": {
+    "@anthropic-ai/sdk": "^0.78.0",
     "@arethetypeswrong/cli": "^0.17.3",
     "@commitlint/cli": "^19.8.1",
     "@commitlint/config-conventional": "^19.8.0",
     "@eslint/js": "^9.30.0",
+    "@google/generative-ai": "^0.24.0",
+    "@types/node": "^22.0.0",
+    "@vitest/coverage-v8": "^3.2.4",
     "eslint": "^9.30.0",
     "eslint-config-prettier": "^10.1.5",
     "husky": "^9.1.7",
     "lint-staged": "^16.3.2",
+    "openai": "^4.0.0",
     "prettier": "^3.6.2",
     "publint": "^0.3.12",
     "tsdown": "^0.12.5",
+    "tsx": "^4.19.0",
     "typescript": "^5.8.3",
     "typescript-eslint": "^8.35.1",
-    "@anthropic-ai/sdk": "^0.78.0",
-    "@google/generative-ai": "^0.24.0",
-    "@types/node": "^22.0.0",
-    "openai": "^4.0.0",
-    "tsx": "^4.19.0",
     "vitest": "^3.2.1"
   }
 }
diff --git a/packages/aimock-pytest/README.md b/packages/aimock-pytest/README.md
new file mode 100644
index 0000000..540939d
--- /dev/null
+++ b/packages/aimock-pytest/README.md
@@ -0,0 +1,124 @@
+# aimock-pytest
+
+pytest fixtures for [aimock](https://github.com/CopilotKit/llmock) — mock LLM APIs, MCP tools, A2A agents, vector databases, and more.
+
+## Install
+
+```bash
+# From PyPI (once published):
+pip install aimock-pytest
+
+# Local install from a repo checkout:
+pip install ./packages/aimock-pytest
+```
+
+**Requires:** Node.js >= 20 on `PATH` (or set `AIMOCK_NODE_PATH`).
+
+## Quick Start
+
+The plugin auto-registers two fixtures: `aimock` (function-scoped) and `aimock_session` (session-scoped).
+
+```python
+def test_hello(aimock):
+    import requests
+
+    # Set up a fixture
+    aimock.on_message("hello", {"content": "Hi there!"})
+
+    # Point your SDK at aimock
+    r = requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "hello"}],
+        },
+    )
+    assert r.json()["choices"][0]["message"]["content"] == "Hi there!"
+```
+
+## Fixtures
+
+| Fixture          | Scope    | Description                    |
+| ---------------- | -------- | ------------------------------ |
+| `aimock`         | function | Fresh server per test          |
+| `aimock_session` | session  | Shared server across all tests |
+
+## Server API
+
+```python
+# Add fixtures
+aimock.on_message("pattern", {"content": "response"})
+aimock.on_embedding("pattern", {"embedding": [0.1, 0.2]})
+aimock.add_fixture(match={...}, response={...}, chunkSize=10, latency=50)
+aimock.load_fixtures("path/to/fixtures.json")
+
+# Inspect
+aimock.get_journal()       # list of all recorded requests
+aimock.get_last_request()  # most recent request or None
+
+# Error injection
+aimock.next_error(429, {"message": "Rate limited"})
+
+# Reset
+aimock.clear_fixtures()    # remove all fixtures
+aimock.reset()             # clear fixtures + journal
+```
+
+## CLI Options
+
+```
+--aimock-node PATH       Path to node binary
+--aimock-version VER     aimock npm version (default: 1.7.0)
+```
+
+## Environment Variables
+
+| Variable           | Description                                           |
+| ------------------ | ----------------------------------------------------- |
+| `AIMOCK_NODE_PATH` | Path to node binary                                   |
+| `AIMOCK_CACHE_DIR` | Override cache directory (default: `~/.cache/aimock`) |
+
+## Development
+
+### Prerequisites
+
+- Node.js >= 20
+- Python >= 3.10
+- pnpm
+
+### Running tests locally
+
+Build the npm package first, then point `AIMOCK_CLI_PATH` at the local build:
+
+```bash
+pnpm install && pnpm run build
+AIMOCK_CLI_PATH=../../dist/cli.js pytest tests/ -v
+```
+
+If you install the test dependencies and run from the `packages/aimock-pytest/`
+directory, `conftest.py` will auto-detect the local build so you can omit the
+env var:
+
+```bash
+pip install ./packages/aimock-pytest[test]
+cd packages/aimock-pytest
+pytest tests/ -v
+```
+
+### How CI works
+
+The `test-pytest.yml` workflow:
+
+1. Checks out the repo
+2. Builds the TypeScript package (`pnpm run build`)
+3. Sets `AIMOCK_CLI_PATH` to the local `dist/cli.js`
+4. Installs `aimock-pytest[test]` and runs `pytest`
+
+Tests run across a matrix of Python 3.10--3.13 and Node 20/22.
+
+The `publish-pytest.yml` workflow publishes to PyPI on pushes to `main` when
+the version in `pyproject.toml` has not already been published.
+
+## License
+
+MIT
diff --git a/packages/aimock-pytest/pyproject.toml b/packages/aimock-pytest/pyproject.toml
new file mode 100644
index 0000000..58b2a96
--- /dev/null
+++ b/packages/aimock-pytest/pyproject.toml
@@ -0,0 +1,21 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "aimock-pytest"
+version = "0.1.0"
+description = "pytest fixtures for aimock — mock LLM APIs, MCP, A2A, vector DBs"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = ["requests>=2.28"]
+
+[project.optional-dependencies]
+test = ["pytest>=7.0", "requests>=2.28"]
+
+[project.entry-points."pytest11"]
+aimock = "aimock_pytest.plugin"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/aimock_pytest"]
diff --git a/packages/aimock-pytest/src/aimock_pytest/__init__.py b/packages/aimock-pytest/src/aimock_pytest/__init__.py
new file mode 100644
index 0000000..cb8bb5f
--- /dev/null
+++ b/packages/aimock-pytest/src/aimock_pytest/__init__.py
@@ -0,0 +1,5 @@
+from aimock_pytest._version import AIMOCK_VERSION
+from aimock_pytest._server import AIMockServer
+from aimock_pytest._node_manager import NodeManager
+
+__all__ = ["AIMockServer", "NodeManager", "AIMOCK_VERSION"]
diff --git a/packages/aimock-pytest/src/aimock_pytest/_node_manager.py b/packages/aimock-pytest/src/aimock_pytest/_node_manager.py
new file mode 100644
index 0000000..146fb32
--- /dev/null
+++ b/packages/aimock-pytest/src/aimock_pytest/_node_manager.py
@@ -0,0 +1,189 @@
+"""Handles Node.js detection and npm tarball caching for aimock."""
+
+from __future__ import annotations
+
+try:
+    import fcntl
+except ImportError:
+    fcntl = None  # Windows — file locking not available
+
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tarfile
+import tempfile
+from pathlib import Path
+
+import requests
+
+from aimock_pytest._version import AIMOCK_VERSION
+
+
+class NodeManager:
+    """Manages node binary detection and aimock npm package caching."""
+
+    # Use @copilotkit/aimock since that's the published npm package name.
+    _NPM_TARBALL_URL = (
+        "https://registry.npmjs.org/@copilotkit/aimock/-/aimock-{version}.tgz"
+    )
+    _MIN_NODE_MAJOR = 20
+
+    def __init__(
+        self,
+        version: str = AIMOCK_VERSION,
+        cache_dir: str | Path | None = None,
+        node_path: str | None = None,
+    ) -> None:
+        self.version = version
+        self._custom_cache_dir = Path(cache_dir) if cache_dir else None
+        self._custom_node_path = node_path
+
+    # ── public API ──────────────────────────────────────────────────────
+
+    def ensure_installed(self) -> Path:
+        """Return path to the aimock ``dist/cli.js``.  Downloads if needed.
+
+        If ``AIMOCK_CLI_PATH`` is set, returns that path directly (bypassing
+        the npm tarball download entirely).  This is used for local development
+        and CI where we test against the repo's own build.
+        """
+        env_cli = os.environ.get("AIMOCK_CLI_PATH")
+        if env_cli:
+            p = Path(env_cli)
+            if not p.is_file():
+                raise RuntimeError(
+                    f"AIMOCK_CLI_PATH is set to {env_cli!r} but the file does not exist"
+                )
+            return p
+
+        cli_js = self._package_dir() / "dist" / "cli.js"
+        if cli_js.is_file():
+            return cli_js
+
+        self._download_tarball()
+
+        if not cli_js.is_file():
+            raise RuntimeError(
+                f"Expected cli.js at {cli_js} but it was not found after extraction"
+            )
+        return cli_js
+
+    def find_node(self) -> str:
+        """Find the node binary.  Checks ``AIMOCK_NODE_PATH`` env, the
+        constructor override, then ``PATH``."""
+        # Explicit override from constructor
+        if self._custom_node_path:
+            self._verify_node(self._custom_node_path)
+            return self._custom_node_path
+
+        # Environment variable
+        env_path = os.environ.get("AIMOCK_NODE_PATH")
+        if env_path:
+            self._verify_node(env_path)
+            return env_path
+
+        # PATH lookup
+        node = shutil.which("node")
+        if node is None:
+            raise RuntimeError(
+                "node is not installed or not on PATH.  "
+                "Install Node.js >= 20 or set AIMOCK_NODE_PATH."
+            )
+        self._verify_node(node)
+        return node
+
+    # ── internal ────────────────────────────────────────────────────────
+
+    def _cache_dir(self) -> Path:
+        """``~/.cache/aimock/{version}/`` or ``AIMOCK_CACHE_DIR`` env."""
+        base: Path
+        if self._custom_cache_dir:
+            base = self._custom_cache_dir
+        else:
+            env = os.environ.get("AIMOCK_CACHE_DIR")
+            if env:
+                base = Path(env)
+            else:
+                base = Path.home() / ".cache" / "aimock"
+        return base / self.version
+
+    def _package_dir(self) -> Path:
+        """The extracted package directory inside the cache."""
+        return self._cache_dir() / "package"
+
+    def _download_tarball(self) -> None:
+        """Download the @copilotkit/aimock tarball from the npm registry and
+        extract it into the cache directory.
+
+        Uses file-locking (``fcntl.flock``) so parallel pytest-xdist workers
+        don't race.  On Windows where fcntl is unavailable, locking is skipped.
+        """
+        cache = self._cache_dir()
+        cache.mkdir(parents=True, exist_ok=True)
+
+        _fcntl = fcntl  # local binding for type narrowing
+        if _fcntl is not None:
+            lock_path = cache / ".lock"
+            lock_fd = open(lock_path, "w")
+            _fcntl.flock(lock_fd, _fcntl.LOCK_EX)
+        else:
+            lock_fd = None
+
+        try:
+            # Double-check after acquiring lock — another worker may have
+            # finished the download while we waited.
+            if (self._package_dir() / "dist" / "cli.js").is_file():
+                return
+
+            url = self._NPM_TARBALL_URL.format(version=self.version)
+            resp = requests.get(url, timeout=60, stream=True)
+            resp.raise_for_status()
+
+            with tempfile.NamedTemporaryFile(suffix=".tgz", delete=False) as tmp:
+                for chunk in resp.iter_content(chunk_size=65536):
+                    tmp.write(chunk)
+                tmp_path = tmp.name
+
+            try:
+                # npm tarballs always contain a top-level ``package/`` dir.
+                with tarfile.open(tmp_path, "r:gz") as tar:
+                    if sys.version_info >= (3, 12):
+                        tar.extractall(path=str(cache), filter="data")  # type: ignore[call-overload]
+                    else:
+                        tar.extractall(path=str(cache))
+            finally:
+                os.unlink(tmp_path)
+        finally:
+            if lock_fd is not None and _fcntl is not None:
+                _fcntl.flock(lock_fd, _fcntl.LOCK_UN)
+                lock_fd.close()
+
+    def _verify_node(self, node_path: str) -> None:
+        """Verify the node binary exists and is >= the minimum version."""
+        try:
+            result = subprocess.run(
+                [node_path, "--version"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+        except FileNotFoundError:
+            raise RuntimeError(f"node binary not found at {node_path}")
+        except subprocess.TimeoutExpired:
+            raise RuntimeError(f"node --version timed out for {node_path}")
+
+        version_str = result.stdout.strip()
+        match = re.match(r"v(\d+)\.", version_str)
+        if not match:
+            raise RuntimeError(
+                f"Could not parse node version from: {version_str!r}"
+            )
+
+        major = int(match.group(1))
+        if major < self._MIN_NODE_MAJOR:
+            raise RuntimeError(
+                f"Node.js >= {self._MIN_NODE_MAJOR} required, "
+                f"found {version_str} at {node_path}"
+            )
diff --git a/packages/aimock-pytest/src/aimock_pytest/_server.py b/packages/aimock-pytest/src/aimock_pytest/_server.py
new file mode 100644
index 0000000..ca97178
--- /dev/null
+++ b/packages/aimock-pytest/src/aimock_pytest/_server.py
@@ -0,0 +1,270 @@
+"""Manages the aimock subprocess and communicates via the /__aimock/* control API."""
+
+from __future__ import annotations
+
+import atexit
+import json
+import os
+import re
+import subprocess
+import time
+from pathlib import Path
+from typing import Any
+
+import requests
+
+from aimock_pytest._node_manager import NodeManager
+
+
+class AIMockServer:
+    """Wraps a running aimock Node.js process and exposes the control API as
+    Python methods."""
+
+    def __init__(
+        self,
+        node_manager: NodeManager,
+        port: int = 0,
+        fixtures_path: str | Path | None = None,
+    ) -> None:
+        self.node_manager = node_manager
+        self.port = port
+        self.fixtures_path = fixtures_path
+        self._proc: subprocess.Popen[str] | None = None
+        self._base_url: str | None = None
+
+    # ── lifecycle ───────────────────────────────────────────────────────
+
+    def start(self) -> str:
+        """Start the aimock subprocess, wait for it to be ready, and return
+        the base URL (e.g. ``http://127.0.0.1:54321``)."""
+        env_cli = os.environ.get("AIMOCK_CLI_PATH")
+        if env_cli:
+            cli_path = Path(env_cli)
+            if not cli_path.is_file():
+                raise RuntimeError(
+                    f"AIMOCK_CLI_PATH is set to {env_cli!r} but the file does not exist"
+                )
+        else:
+            cli_path = self.node_manager.ensure_installed()
+        node = self.node_manager.find_node()
+
+        # The CLI requires a valid fixtures path (exits 1 if not found).
+        # Use the provided path, or create an empty temp directory.
+        if self.fixtures_path:
+            fixtures_arg = str(self.fixtures_path)
+        else:
+            import tempfile
+
+            self._tmp_fixtures = tempfile.mkdtemp(prefix="aimock-fixtures-")
+            fixtures_arg = self._tmp_fixtures
+
+        cmd = [
+            node,
+            str(cli_path),
+            "--port",
+            str(self.port),
+            "--log-level",
+            "info",
+            "--fixtures",
+            fixtures_arg,
+        ]
+
+        self._proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+        )
+        atexit.register(self.stop)
+
+        self._base_url = self._wait_for_ready(timeout=15)
+        return self._base_url
+
+    def stop(self) -> None:
+        """Terminate the aimock subprocess."""
+        if self._proc is not None:
+            try:
+                self._proc.terminate()
+                self._proc.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self._proc.kill()
+                self._proc.wait()
+            except Exception:
+                try:
+                    self._proc.kill()
+                except Exception:
+                    pass
+            finally:
+                self._proc = None
+        # Clean up temp fixtures directory if we created one
+        if hasattr(self, "_tmp_fixtures") and self._tmp_fixtures:
+            import shutil
+
+            shutil.rmtree(self._tmp_fixtures, ignore_errors=True)
+            self._tmp_fixtures = None
+        atexit.unregister(self.stop)
+
+    @property
+    def base_url(self) -> str:
+        """The base URL of the running aimock server."""
+        if self._base_url is None:
+            raise RuntimeError("Server has not been started yet")
+        return self._base_url
+
+    @property
+    def url(self) -> str:
+        """Alias for :attr:`base_url`."""
+        return self.base_url
+
+    # ── control API methods ─────────────────────────────────────────────
+
+    def add_fixture(
+        self,
+        match: dict[str, Any],
+        response: dict[str, Any],
+        **opts: Any,
+    ) -> None:
+        """Add a single fixture via ``POST /__aimock/fixtures``."""
+        fixture: dict[str, Any] = {"match": match, "response": response}
+        if opts:
+            fixture["opts"] = opts
+        r = requests.post(
+            f"{self.base_url}/__aimock/fixtures",
+            json={"fixtures": [fixture]},
+            timeout=5,
+        )
+        r.raise_for_status()
+
+    def on_message(
+        self,
+        pattern: str,
+        response: dict[str, Any],
+        **opts: Any,
+    ) -> AIMockServer:
+        """Convenience: add a fixture matching ``userMessage``."""
+        self.add_fixture({"userMessage": pattern}, response, **opts)
+        return self
+
+    def on_embedding(
+        self,
+        pattern: str,
+        response: dict[str, Any],
+    ) -> AIMockServer:
+        """Convenience: add a fixture matching ``inputText``."""
+        self.add_fixture({"inputText": pattern}, response)
+        return self
+
+    def load_fixtures(self, path: str | Path) -> AIMockServer:
+        """Read a JSON fixture file and POST its contents to the control API.
+
+        The file must contain either:
+        - A JSON object with a ``"fixtures"`` key (list of fixtures)
+        - A JSON array of fixture objects
+        - A single fixture object (wrapped into a list automatically)
+
+        Raises :class:`ValueError` if the parsed JSON is not a dict or list.
+        """
+        with open(path) as f:
+            data = json.load(f)
+
+        if isinstance(data, list):
+            fixtures = data
+        elif isinstance(data, dict) and "fixtures" in data:
+            fixtures = data["fixtures"]
+        elif isinstance(data, dict):
+            fixtures = [data]
+        else:
+            raise ValueError(
+                f"Invalid fixture file {path}: expected a JSON object or array, "
+                f"got {type(data).__name__}"
+            )
+
+        r = requests.post(
+            f"{self.base_url}/__aimock/fixtures",
+            json={"fixtures": fixtures},
+            timeout=5,
+        )
+        r.raise_for_status()
+        return self
+
+    def clear_fixtures(self) -> AIMockServer:
+        """Delete all fixtures via ``DELETE /__aimock/fixtures``."""
+        requests.delete(
+            f"{self.base_url}/__aimock/fixtures", timeout=5
+        ).raise_for_status()
+        return self
+
+    def reset(self) -> AIMockServer:
+        """Clear fixtures, journal, and match counts via ``POST /__aimock/reset``."""
+        requests.post(
+            f"{self.base_url}/__aimock/reset", timeout=5
+        ).raise_for_status()
+        return self
+
+    def get_journal(self) -> list[dict[str, Any]]:
+        """Return all recorded journal entries."""
+        r = requests.get(f"{self.base_url}/__aimock/journal", timeout=5)
+        r.raise_for_status()
+        return r.json()  # type: ignore[no-any-return]
+
+    def get_last_request(self) -> dict[str, Any] | None:
+        """Return the most recent journal entry, or ``None``."""
+        journal = self.get_journal()
+        return journal[-1] if journal else None
+
+    def next_error(
+        self,
+        status: int,
+        body: dict[str, Any] | None = None,
+    ) -> AIMockServer:
+        """Queue a one-shot error via ``POST /__aimock/error``."""
+        requests.post(
+            f"{self.base_url}/__aimock/error",
+            json={"status": status, "body": body or {}},
+            timeout=5,
+        ).raise_for_status()
+        return self
+
+    # ── internal ────────────────────────────────────────────────────────
+
+    def _wait_for_ready(self, timeout: int = 15) -> str:
+        """Read stdout lines until we see the listening URL, then verify via
+        health check."""
+        assert self._proc is not None
+        assert self._proc.stdout is not None
+
+        deadline = time.monotonic() + timeout
+        while time.monotonic() < deadline:
+            # Check if process exited
+            if self._proc.poll() is not None:
+                remaining = ""
+                if self._proc.stdout:
+                    remaining = self._proc.stdout.read()
+                raise RuntimeError(
+                    f"aimock process exited with code {self._proc.returncode}"
+                    f"{': ' + remaining if remaining else ''}"
+                )
+
+            line = self._proc.stdout.readline()
+            if not line:
+                continue
+
+            m = re.search(r"listening on (http://\S+)", line)
+            if m:
+                url = m.group(1).rstrip("/")
+                # Verify health endpoint is reachable
+                for _ in range(30):
+                    try:
+                        r = requests.get(
+                            f"{url}/__aimock/health", timeout=0.5
+                        )
+                        if r.status_code == 200:
+                            return url
+                        time.sleep(0.1)
+                    except requests.RequestException:
+                        time.sleep(0.1)
+                raise RuntimeError(
+                    "aimock started but health check failed after 3 seconds"
+                )
+
+        raise RuntimeError(f"aimock did not start within {timeout}s")
diff --git a/packages/aimock-pytest/src/aimock_pytest/_version.py b/packages/aimock-pytest/src/aimock_pytest/_version.py
new file mode 100644
index 0000000..b8b2286
--- /dev/null
+++ b/packages/aimock-pytest/src/aimock_pytest/_version.py
@@ -0,0 +1,3 @@
+"""Single source of truth for the aimock package version."""
+
+AIMOCK_VERSION = "1.7.0"
diff --git a/packages/aimock-pytest/src/aimock_pytest/plugin.py b/packages/aimock-pytest/src/aimock_pytest/plugin.py
new file mode 100644
index 0000000..7b6a30d
--- /dev/null
+++ b/packages/aimock-pytest/src/aimock_pytest/plugin.py
@@ -0,0 +1,55 @@
+"""pytest plugin — registers ``aimock`` and ``aimock_session`` fixtures.
+
+Auto-discovered by pytest via the ``pytest11`` entry point in pyproject.toml.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from aimock_pytest._version import AIMOCK_VERSION
+from aimock_pytest._node_manager import NodeManager
+from aimock_pytest._server import AIMockServer
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    group = parser.getgroup("aimock", "aimock mock server options")
+    group.addoption(
+        "--aimock-node",
+        default=None,
+        help="Path to node binary (default: auto-detect from PATH)",
+    )
+    group.addoption(
+        "--aimock-version",
+        default=AIMOCK_VERSION,
+        help=f"aimock npm package version to use (default: {AIMOCK_VERSION})",
+    )
+
+
+@pytest.fixture(scope="session")
+def _aimock_node_manager(request: pytest.FixtureRequest) -> NodeManager:
+    """Session-scoped :class:`NodeManager` — shared across all fixtures."""
+    return NodeManager(
+        version=request.config.getoption("--aimock-version"),
+        node_path=request.config.getoption("--aimock-node"),
+    )
+
+
+@pytest.fixture
+def aimock(_aimock_node_manager: NodeManager) -> AIMockServer:
+    """Function-scoped aimock server.  A fresh server is started for every
+    test that requests this fixture, and torn down afterwards."""
+    server = AIMockServer(_aimock_node_manager, port=0)
+    server.start()
+    yield server  # type: ignore[misc]
+    server.stop()
+
+
+@pytest.fixture(scope="session")
+def aimock_session(_aimock_node_manager: NodeManager) -> AIMockServer:
+    """Session-scoped aimock server.  One server is shared across all tests
+    that request this fixture."""
+    server = AIMockServer(_aimock_node_manager, port=0)
+    server.start()
+    yield server  # type: ignore[misc]
+    server.stop()
diff --git a/packages/aimock-pytest/src/aimock_pytest/py.typed b/packages/aimock-pytest/src/aimock_pytest/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/packages/aimock-pytest/tests/conftest.py b/packages/aimock-pytest/tests/conftest.py
new file mode 100644
index 0000000..60ce0ca
--- /dev/null
+++ b/packages/aimock-pytest/tests/conftest.py
@@ -0,0 +1,14 @@
+# The aimock plugin is auto-registered via the pytest11 entry point.
+# This file exists so pytest recognises this directory as a test root.
+
+import os
+import pathlib
+
+# Auto-detect local repo build for development.  If AIMOCK_CLI_PATH is not
+# already set and a built cli.js exists at the repo root, use it directly
+# so tests run against the local build instead of downloading from npm.
+REPO_ROOT = pathlib.Path(__file__).parent.parent.parent.parent
+LOCAL_CLI = REPO_ROOT / "dist" / "cli.js"
+
+if LOCAL_CLI.exists() and "AIMOCK_CLI_PATH" not in os.environ:
+    os.environ["AIMOCK_CLI_PATH"] = str(LOCAL_CLI)
diff --git a/packages/aimock-pytest/tests/fixtures/hello.json b/packages/aimock-pytest/tests/fixtures/hello.json
new file mode 100644
index 0000000..bac235e
--- /dev/null
+++ b/packages/aimock-pytest/tests/fixtures/hello.json
@@ -0,0 +1,8 @@
+{
+  "fixtures": [
+    {
+      "match": { "userMessage": "hello" },
+      "response": { "content": "Hello from aimock!" }
+    }
+  ]
+}
diff --git a/packages/aimock-pytest/tests/test_basic.py b/packages/aimock-pytest/tests/test_basic.py
new file mode 100644
index 0000000..94f8941
--- /dev/null
+++ b/packages/aimock-pytest/tests/test_basic.py
@@ -0,0 +1,69 @@
+import requests
+
+# ── Session-scoped fixture tests ──────────────────────────────────────────
+# These two tests share a single aimock_session instance, verifying that the
+# session-scoped fixture persists state across test functions.
+
+_session_url: str | None = None
+
+
+def test_session_fixture_starts(aimock_session):
+    """aimock_session starts and its URL persists across tests."""
+    global _session_url
+    r = requests.get(f"{aimock_session.base_url}/__aimock/health")
+    assert r.status_code == 200
+    _session_url = aimock_session.base_url
+
+
+def test_session_fixture_persists(aimock_session):
+    """aimock_session is the same server instance as the previous test."""
+    # The base_url should be identical — same process, same port.
+    assert aimock_session.base_url == _session_url
+
+    # Fixtures added in a previous test would still be present (session scope
+    # does NOT auto-reset between tests).  Verify the server is still alive.
+    r = requests.get(f"{aimock_session.base_url}/__aimock/health")
+    assert r.status_code == 200
+
+
+# ── Function-scoped fixture tests ────────────────────────────────────────
+
+
+def test_server_starts(aimock):
+    """Server starts and health check works."""
+    r = requests.get(f"{aimock.base_url}/__aimock/health")
+    assert r.status_code == 200
+    assert r.json()["status"] == "ok"
+
+
+def test_add_fixture_and_match(aimock):
+    """Add a fixture via control API, then hit it."""
+    aimock.on_message("hello", {"content": "Hi there!"})
+
+    r = requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "hello"}],
+        },
+    )
+    assert r.status_code == 200
+    body = r.json()
+    assert body["choices"][0]["message"]["content"] == "Hi there!"
+
+
+def test_reset_clears_fixtures(aimock):
+    """Reset clears fixtures and journal."""
+    aimock.on_message("test", {"content": "response"})
+    aimock.reset()
+
+    r = requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "test"}],
+        },
+    )
+    # aimock returns 404 when no fixture matches the request, confirming
+    # that the previously-registered fixture was cleared by reset().
+    assert r.status_code == 404
diff --git a/packages/aimock-pytest/tests/test_fixtures.py b/packages/aimock-pytest/tests/test_fixtures.py
new file mode 100644
index 0000000..4c6e1b6
--- /dev/null
+++ b/packages/aimock-pytest/tests/test_fixtures.py
@@ -0,0 +1,60 @@
+import os
+
+import requests
+
+
+def test_load_fixture_file(aimock):
+    """Load fixtures from a JSON file."""
+    fixture_path = os.path.join(os.path.dirname(__file__), "fixtures", "hello.json")
+    aimock.load_fixtures(fixture_path)
+
+    r = requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "hello"}],
+        },
+    )
+    assert r.status_code == 200
+    assert "Hello from aimock" in r.json()["choices"][0]["message"]["content"]
+
+
+def test_journal_records_requests(aimock):
+    """Journal captures all requests."""
+    aimock.on_message("journal-test", {"content": "ok"})
+
+    requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "journal-test"}],
+        },
+    )
+
+    journal = aimock.get_journal()
+    assert len(journal) >= 1
+
+
+def test_one_shot_error(aimock):
+    """Queue a one-shot error."""
+    aimock.on_message("error-test", {"content": "ok"})
+    aimock.next_error(429, {"message": "Rate limited", "type": "rate_limit_error"})
+
+    r1 = requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "error-test"}],
+        },
+    )
+    assert r1.status_code == 429
+
+    # Second request should succeed
+    r2 = requests.post(
+        f"{aimock.base_url}/v1/chat/completions",
+        json={
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "error-test"}],
+        },
+    )
+    assert r2.status_code == 200
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index fa16176..a3e7fa1 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -29,6 +29,9 @@ importers:
       '@types/node':
         specifier: ^22.0.0
         version: 22.19.15
+      '@vitest/coverage-v8':
+        specifier: ^3.2.4
+        version: 3.2.4(vitest@3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))
       eslint:
         specifier: ^9.30.0
         version: 9.39.3(jiti@2.6.1)
@@ -68,6 +71,10 @@ importers:
 
 packages:
 
+  '@ampproject/remapping@2.3.0':
+    resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
+    engines: {node: '>=6.0.0'}
+
   '@andrewbranch/untar.js@1.0.3':
     resolution: {integrity: sha512-Jh15/qVmrLGhkKJBdXlK1+9tY4lZruYjsgkDFj08ZmDiWVBLJcqkok7Z0/R0In+i1rScBpJlSvrTS2Lm41Pbnw==}
 
@@ -118,6 +125,10 @@ packages:
     resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==}
     engines: {node: '>=6.9.0'}
 
+  '@bcoe/v8-coverage@1.0.2':
+    resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==}
+    engines: {node: '>=18'}
+
   '@braidai/lang@1.1.2':
     resolution: {integrity: sha512-qBcknbBufNHlui137Hft8xauQMTZDKdophmLFv05r2eNmdIv/MlPuP4TdUknHG68UdWLgVZwgxVe735HzJNIwA==}
 
@@ -417,6 +428,14 @@ packages:
     resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==}
     engines: {node: '>=18.18'}
 
+  '@isaacs/cliui@8.0.2':
+    resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
+    engines: {node: '>=12'}
+
+  '@istanbuljs/schema@0.1.3':
+    resolution: {integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==}
+    engines: {node: '>=8'}
+
   '@jridgewell/gen-mapping@0.3.13':
     resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==}
 
@@ -439,6 +458,10 @@ packages:
   '@oxc-project/types@0.115.0':
     resolution: {integrity: sha512-4n91DKnebUS4yjUHl2g3/b2T+IUdCfmoZGhmwsovZCDaJSs+QkVAM+0AqqTxHSsHfeiMuueT75cZaZcT/m0pSw==}
 
+  '@pkgjs/parseargs@0.11.0':
+    resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
+    engines: {node: '>=14'}
+
   '@publint/pack@0.1.4':
     resolution: {integrity: sha512-HDVTWq3H0uTXiU0eeSQntcVUTPP3GamzeXI41+x7uU9J65JgWQh3qWZHblR1i0npXfFtF+mxBiU2nJH8znxWnQ==}
     engines: {node: '>=18'}
@@ -758,6 +781,15 @@ packages:
     resolution: {integrity: sha512-KiROIzYdEV85YygXw6BI/Dx4fnBlFQu6Mq4QE4MOH9fFnhohw6wX/OAvDY2/C+ut0I3RSPKenvZJIVYqJNkhEw==}
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
 
+  '@vitest/coverage-v8@3.2.4':
+    resolution: {integrity: sha512-EyF9SXU6kS5Ku/U82E259WSnvg6c8KTjppUncuNdm5QHpe17mwREHnjDzozC8x9MZ0xfBUFSaLkRv4TMA75ALQ==}
+    peerDependencies:
+      '@vitest/browser': 3.2.4
+      vitest: 3.2.4
+    peerDependenciesMeta:
+      '@vitest/browser':
+        optional: true
+
   '@vitest/expect@3.2.4':
     resolution: {integrity: sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==}
 
@@ -856,6 +888,9 @@ packages:
     resolution: {integrity: sha512-m1Q/RaVOnTp9JxPX+F+Zn7IcLYMzM8kZofDImfsKZd8MbR+ikdOzTeztStWqfrqIxZnYWryyI9ePm3NGjnZgGw==}
     engines: {node: '>=20.19.0'}
 
+  ast-v8-to-istanbul@0.3.12:
+    resolution: {integrity: sha512-BRRC8VRZY2R4Z4lFIL35MwNXmwVqBityvOIwETtsCSwvjl0IdgFsy9NhdaA6j74nUdtJJlIypeRhpDam19Wq3g==}
+
   asynckit@0.4.0:
     resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==}
 
@@ -872,6 +907,9 @@ packages:
   brace-expansion@1.1.12:
     resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==}
 
+  brace-expansion@2.0.2:
+    resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==}
+
   brace-expansion@5.0.4:
     resolution: {integrity: sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==}
     engines: {node: 18 || 20 || >=22}
@@ -1053,12 +1091,18 @@ packages:
     resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
     engines: {node: '>= 0.4'}
 
+  eastasianwidth@0.2.0:
+    resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
+
   emoji-regex@10.6.0:
     resolution: {integrity: sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A==}
 
   emoji-regex@8.0.0:
     resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
 
+  emoji-regex@9.2.2:
+    resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
+
   emojilib@2.4.0:
     resolution: {integrity: sha512-5U0rVMU5Y2n2+ykNLQqMoqklN9ICBT/KsvC1Gz6vqHbz2AXXGkG+Pm5rMWk/8Vjrr/mY9985Hi8DYzn1F09Nyw==}
 
@@ -1222,6 +1266,10 @@ packages:
   flatted@3.3.4:
     resolution: {integrity: sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA==}
 
+  foreground-child@3.3.1:
+    resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==}
+    engines: {node: '>=14'}
+
   form-data-encoder@1.7.2:
     resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==}
 
@@ -1270,6 +1318,11 @@ packages:
     resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
     engines: {node: '>=10.13.0'}
 
+  glob@10.5.0:
+    resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==}
+    deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
+    hasBin: true
+
   global-directory@4.0.1:
     resolution: {integrity: sha512-wHTUcDUoZ1H5/0iVqEudYW4/kAlN5cZ3j/bXn0Dpbizl9iaUVeWSHqiOjsgk6OW2bkLclbBjzewBz6weQ1zA2Q==}
     engines: {node: '>=18'}
@@ -1304,6 +1357,9 @@ packages:
   hookable@5.5.3:
     resolution: {integrity: sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==}
 
+  html-escaper@2.0.2:
+    resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
+
   humanize-ms@1.2.1:
     resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==}
 
@@ -1369,10 +1425,32 @@ packages:
   isexe@2.0.0:
     resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
 
+  istanbul-lib-coverage@3.2.2:
+    resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==}
+    engines: {node: '>=8'}
+
+  istanbul-lib-report@3.0.1:
+    resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==}
+    engines: {node: '>=10'}
+
+  istanbul-lib-source-maps@5.0.6:
+    resolution: {integrity: sha512-yg2d+Em4KizZC5niWhQaIomgf5WlL4vOOjZ5xGCmF8SnPE/mDWWXgvRExdcpCgh9lLRRa1/fSYp2ymmbJ1pI+A==}
+    engines: {node: '>=10'}
+
+  istanbul-reports@3.2.0:
+    resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==}
+    engines: {node: '>=8'}
+
+  jackspeak@3.4.3:
+    resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
+
   jiti@2.6.1:
     resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
     hasBin: true
 
+  js-tokens@10.0.0:
+    resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==}
+
   js-tokens@4.0.0:
     resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}
 
@@ -1478,6 +1556,13 @@ packages:
   magic-string@0.30.21:
     resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
 
+  magicast@0.3.5:
+    resolution: {integrity: sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ==}
+
+  make-dir@4.0.0:
+    resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
+    engines: {node: '>=10'}
+
   marked-terminal@7.3.0:
     resolution: {integrity: sha512-t4rBvPsHc57uE/2nJOLmMbZCQ4tgAccAED3ngXQqW6g+TxA488JzJ+FK3lQkzBQOI1mRV/r/Kq+1ZlJ4D0owQw==}
     engines: {node: '>=16.0.0'}
@@ -1520,9 +1605,17 @@ packages:
   minimatch@3.1.5:
     resolution: {integrity: sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==}
 
+  minimatch@9.0.9:
+    resolution: {integrity: sha512-OBwBN9AL4dqmETlpS2zasx+vTeWclWzkblfZk7KTA5j3jeOONz/tRCnZomUyvNg83wL5Zv9Ss6HMJXAgL8R2Yg==}
+    engines: {node: '>=16 || 14 >=14.17'}
+
   minimist@1.2.8:
     resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
 
+  minipass@7.1.3:
+    resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==}
+    engines: {node: '>=16 || 14 >=14.17'}
+
   mri@1.2.0:
     resolution: {integrity: sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==}
     engines: {node: '>=4'}
@@ -1599,6 +1692,9 @@ packages:
     resolution: {integrity: sha512-wPrq66Llhl7/4AGC6I+cqxT07LhXvWL08LNXz1fENOw0Ap4sRZZ/gZpTTJ5jpurzzzfS2W/Ge9BY3LgLjCShcw==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
 
+  package-json-from-dist@1.0.1:
+    resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
+
   package-manager-detector@1.6.0:
     resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==}
 
@@ -1631,6 +1727,10 @@ packages:
     resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
     engines: {node: '>=8'}
 
+  path-scurry@1.11.1:
+    resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
+    engines: {node: '>=16 || 14 >=14.18'}
+
   pathe@2.0.3:
     resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}
 
@@ -1788,6 +1888,10 @@ packages:
     resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
     engines: {node: '>=8'}
 
+  string-width@5.1.2:
+    resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==}
+    engines: {node: '>=12'}
+
   string-width@7.2.0:
     resolution: {integrity: sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ==}
     engines: {node: '>=18'}
@@ -1819,6 +1923,10 @@ packages:
     resolution: {integrity: sha512-zFObLMyZeEwzAoKCyu1B91U79K2t7ApXuQfo8OuxwXLDgcKxuwM+YvcbIhm6QWqz7mHUH1TVytR1PwVVjEuMig==}
     engines: {node: '>=14.18'}
 
+  test-exclude@7.0.2:
+    resolution: {integrity: sha512-u9E6A+ZDYdp7a4WnarkXPZOx8Ilz46+kby6p1yZ8zsGTz9gYa6FIS7lj2oezzNKmtdyyJNNmmXDppga5GB7kSw==}
+    engines: {node: '>=18'}
+
   text-extensions@2.4.0:
     resolution: {integrity: sha512-te/NtwBwfiNRLf9Ijqx3T0nlqZiQ2XrrtBvu+cLL8ZRrGkO0NHTug8MYFKyoSrv/sHTaSKfilUkizV6XhxMJ3g==}
     engines: {node: '>=8'}
@@ -2054,6 +2162,10 @@ packages:
     resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
     engines: {node: '>=10'}
 
+  wrap-ansi@8.1.0:
+    resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==}
+    engines: {node: '>=12'}
+
   wrap-ansi@9.0.2:
     resolution: {integrity: sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww==}
     engines: {node: '>=18'}
@@ -2093,6 +2205,11 @@ packages:
 
 snapshots:
 
+  '@ampproject/remapping@2.3.0':
+    dependencies:
+      '@jridgewell/gen-mapping': 0.3.13
+      '@jridgewell/trace-mapping': 0.3.31
+
   '@andrewbranch/untar.js@1.0.3': {}
 
   '@anthropic-ai/sdk@0.78.0':
@@ -2149,6 +2266,8 @@ snapshots:
       '@babel/helper-string-parser': 7.27.1
       '@babel/helper-validator-identifier': 7.28.5
 
+  '@bcoe/v8-coverage@1.0.2': {}
+
   '@braidai/lang@1.1.2': {}
 
   '@colors/colors@1.5.0':
@@ -2417,6 +2536,17 @@ snapshots:
 
   '@humanwhocodes/retry@0.4.3': {}
 
+  '@isaacs/cliui@8.0.2':
+    dependencies:
+      string-width: 5.1.2
+      string-width-cjs: string-width@4.2.3
+      strip-ansi: 7.2.0
+      strip-ansi-cjs: strip-ansi@6.0.1
+      wrap-ansi: 8.1.0
+      wrap-ansi-cjs: wrap-ansi@7.0.0
+
+  '@istanbuljs/schema@0.1.3': {}
+
   '@jridgewell/gen-mapping@0.3.13':
     dependencies:
       '@jridgewell/sourcemap-codec': 1.5.5
@@ -2444,6 +2574,9 @@ snapshots:
 
   '@oxc-project/types@0.115.0': {}
 
+  '@pkgjs/parseargs@0.11.0':
+    optional: true
+
   '@publint/pack@0.1.4': {}
 
   '@quansync/fs@1.0.0':
@@ -2694,6 +2827,25 @@ snapshots:
       '@typescript-eslint/types': 8.56.1
       eslint-visitor-keys: 5.0.1
 
+  '@vitest/coverage-v8@3.2.4(vitest@3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2))':
+    dependencies:
+      '@ampproject/remapping': 2.3.0
+      '@bcoe/v8-coverage': 1.0.2
+      ast-v8-to-istanbul: 0.3.12
+      debug: 4.4.3
+      istanbul-lib-coverage: 3.2.2
+      istanbul-lib-report: 3.0.1
+      istanbul-lib-source-maps: 5.0.6
+      istanbul-reports: 3.2.0
+      magic-string: 0.30.21
+      magicast: 0.3.5
+      std-env: 3.10.0
+      test-exclude: 7.0.2
+      tinyrainbow: 2.0.0
+      vitest: 3.2.4(@types/node@22.19.15)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)
+    transitivePeerDependencies:
+      - supports-color
+
   '@vitest/expect@3.2.4':
     dependencies:
       '@types/chai': 5.2.3
@@ -2798,6 +2950,12 @@ snapshots:
       '@babel/parser': 7.29.0
       pathe: 2.0.3
 
+  ast-v8-to-istanbul@0.3.12:
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.31
+      estree-walker: 3.0.3
+      js-tokens: 10.0.0
+
   asynckit@0.4.0: {}
 
   balanced-match@1.0.2: {}
@@ -2811,6 +2969,10 @@ snapshots:
       balanced-match: 1.0.2
       concat-map: 0.0.1
 
+  brace-expansion@2.0.2:
+    dependencies:
+      balanced-match: 1.0.2
+
   brace-expansion@5.0.4:
     dependencies:
       balanced-match: 4.0.4
@@ -2977,10 +3139,14 @@ snapshots:
       es-errors: 1.3.0
       gopd: 1.2.0
 
+  eastasianwidth@0.2.0: {}
+
   emoji-regex@10.6.0: {}
 
   emoji-regex@8.0.0: {}
 
+  emoji-regex@9.2.2: {}
+
   emojilib@2.4.0: {}
 
   empathic@2.0.0: {}
@@ -3167,6 +3333,11 @@ snapshots:
 
   flatted@3.3.4: {}
 
+  foreground-child@3.3.1:
+    dependencies:
+      cross-spawn: 7.0.6
+      signal-exit: 4.1.0
+
   form-data-encoder@1.7.2: {}
 
   form-data@4.0.5:
@@ -3223,6 +3394,15 @@ snapshots:
     dependencies:
       is-glob: 4.0.3
 
+  glob@10.5.0:
+    dependencies:
+      foreground-child: 3.3.1
+      jackspeak: 3.4.3
+      minimatch: 9.0.9
+      minipass: 7.1.3
+      package-json-from-dist: 1.0.1
+      path-scurry: 1.11.1
+
   global-directory@4.0.1:
     dependencies:
       ini: 4.1.1
@@ -3247,6 +3427,8 @@ snapshots:
 
   hookable@5.5.3: {}
 
+  html-escaper@2.0.2: {}
+
   humanize-ms@1.2.1:
     dependencies:
       ms: 2.1.3
@@ -3292,8 +3474,37 @@ snapshots:
 
   isexe@2.0.0: {}
 
+  istanbul-lib-coverage@3.2.2: {}
+
+  istanbul-lib-report@3.0.1:
+    dependencies:
+      istanbul-lib-coverage: 3.2.2
+      make-dir: 4.0.0
+      supports-color: 7.2.0
+
+  istanbul-lib-source-maps@5.0.6:
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.31
+      debug: 4.4.3
+      istanbul-lib-coverage: 3.2.2
+    transitivePeerDependencies:
+      - supports-color
+
+  istanbul-reports@3.2.0:
+    dependencies:
+      html-escaper: 2.0.2
+      istanbul-lib-report: 3.0.1
+
+  jackspeak@3.4.3:
+    dependencies:
+      '@isaacs/cliui': 8.0.2
+    optionalDependencies:
+      '@pkgjs/parseargs': 0.11.0
+
   jiti@2.6.1: {}
 
+  js-tokens@10.0.0: {}
+
   js-tokens@4.0.0: {}
 
   js-tokens@9.0.1: {}
@@ -3392,6 +3603,16 @@ snapshots:
     dependencies:
       '@jridgewell/sourcemap-codec': 1.5.5
 
+  magicast@0.3.5:
+    dependencies:
+      '@babel/parser': 7.29.0
+      '@babel/types': 7.29.0
+      source-map-js: 1.2.1
+
+  make-dir@4.0.0:
+    dependencies:
+      semver: 7.7.4
+
   marked-terminal@7.3.0(marked@9.1.6):
     dependencies:
       ansi-escapes: 7.3.0
@@ -3430,8 +3651,14 @@ snapshots:
     dependencies:
       brace-expansion: 1.1.12
 
+  minimatch@9.0.9:
+    dependencies:
+      brace-expansion: 2.0.2
+
   minimist@1.2.8: {}
 
+  minipass@7.1.3: {}
+
   mri@1.2.0: {}
 
   ms@2.1.3: {}
@@ -3502,6 +3729,8 @@ snapshots:
     dependencies:
       p-limit: 4.0.0
 
+  package-json-from-dist@1.0.1: {}
+
   package-manager-detector@1.6.0: {}
 
   parent-module@1.0.1:
@@ -3529,6 +3758,11 @@ snapshots:
 
   path-key@3.1.1: {}
 
+  path-scurry@1.11.1:
+    dependencies:
+      lru-cache: 10.4.3
+      minipass: 7.1.3
+
   pathe@2.0.3: {}
 
   pathval@2.0.1: {}
@@ -3692,6 +3926,12 @@ snapshots:
       is-fullwidth-code-point: 3.0.0
       strip-ansi: 6.0.1
 
+  string-width@5.1.2:
+    dependencies:
+      eastasianwidth: 0.2.0
+      emoji-regex: 9.2.2
+      strip-ansi: 7.2.0
+
   string-width@7.2.0:
     dependencies:
       emoji-regex: 10.6.0
@@ -3726,6 +3966,12 @@ snapshots:
       has-flag: 4.0.0
       supports-color: 7.2.0
 
+  test-exclude@7.0.2:
+    dependencies:
+      '@istanbuljs/schema': 0.1.3
+      glob: 10.5.0
+      minimatch: 10.2.4
+
   text-extensions@2.4.0: {}
 
   thenify-all@1.6.0:
@@ -3950,6 +4196,12 @@ snapshots:
       string-width: 4.2.3
       strip-ansi: 6.0.1
 
+  wrap-ansi@8.1.0:
+    dependencies:
+      ansi-styles: 6.2.3
+      string-width: 5.1.2
+      strip-ansi: 7.2.0
+
   wrap-ansi@9.0.2:
     dependencies:
       ansi-styles: 6.2.3
diff --git a/scripts/convert-mockllm.ts b/scripts/convert-mockllm.ts
new file mode 100644
index 0000000..23692c0
--- /dev/null
+++ b/scripts/convert-mockllm.ts
@@ -0,0 +1,510 @@
+#!/usr/bin/env tsx
+
+/**
+ * mock-llm (dwmkerr) -> aimock fixture converter
+ *
+ * Parses mock-llm YAML config files and produces aimock fixture JSON.
+ *
+ * Usage:
+ *   npx tsx scripts/convert-mockllm.ts <input.yaml> [output.json]
+ *
+ * If output is omitted, prints to stdout.
+ */
+
+import { readFileSync, writeFileSync } from "node:fs";
+import { resolve } from "node:path";
+
+// ---------------------------------------------------------------------------
+// Minimal YAML parser
+// ---------------------------------------------------------------------------
+// Handles the subset used by mock-llm configs: indented maps, arrays with
+// `-` prefix, quoted/unquoted strings, numbers, booleans, and null.
+// Does NOT handle: anchors, aliases, multi-line scalars, flow collections,
+// tags, or other advanced YAML features.
+
+interface YamlLine {
+  indent: number;
+  raw: string;
+  content: string; // trimmed, without trailing comment
+  isArrayItem: boolean;
+  arrayItemContent: string; // content after "- "
+}
+
+function tokenizeYamlLines(input: string): YamlLine[] {
+  const lines: YamlLine[] = [];
+  for (const raw of input.split("\n")) {
+    // Skip blank lines and full-line comments
+    const trimmed = raw.trimStart();
+    if (trimmed === "" || trimmed.startsWith("#")) continue;
+
+    const indent = raw.length - raw.trimStart().length;
+    // Strip trailing comments (but not inside quoted strings)
+    const content = stripTrailingComment(trimmed);
+    const isArrayItem = content.startsWith("- ");
+    const arrayItemContent = isArrayItem ? content.slice(2).trim() : "";
+
+    lines.push({ indent, raw, content, isArrayItem, arrayItemContent });
+  }
+  return lines;
+}
+
+function stripTrailingComment(s: string): string {
+  // Naive: find # not inside quotes
+  let inSingle = false;
+  let inDouble = false;
+  for (let i = 0; i < s.length; i++) {
+    const ch = s[i];
+    if (ch === "'" && !inDouble) inSingle = !inSingle;
+    if (ch === '"' && !inSingle) inDouble = !inDouble;
+    if (ch === "#" && !inSingle && !inDouble && i > 0 && s[i - 1] === " ") {
+      return s.slice(0, i).trimEnd();
+    }
+  }
+  return s;
+}
+
+function parseScalar(value: string): unknown {
+  if (value === "" || value === "~" || value === "null") return null;
+  if (value === "true") return true;
+  if (value === "false") return false;
+
+  // Quoted string
+  if (
+    (value.startsWith('"') && value.endsWith('"')) ||
+    (value.startsWith("'") && value.endsWith("'"))
+  ) {
+    return value.slice(1, -1);
+  }
+
+  // Number
+  const num = Number(value);
+  if (!Number.isNaN(num) && value !== "") return num;
+
+  // Unquoted string
+  return value;
+}
+
+export function parseSimpleYaml(input: string): unknown {
+  const lines = tokenizeYamlLines(input);
+  if (lines.length === 0) return null;
+
+  const result = parseBlock(lines, 0, 0);
+  return result.value;
+}
+
+interface ParseResult {
+  value: unknown;
+  nextIndex: number;
+}
+
+function parseBlock(lines: YamlLine[], startIndex: number, minIndent: number): ParseResult {
+  if (startIndex >= lines.length) {
+    return { value: null, nextIndex: startIndex };
+  }
+
+  const line = lines[startIndex];
+
+  // Determine if this block is an array or a map
+  if (line.isArrayItem && line.indent >= minIndent) {
+    return parseArray(lines, startIndex, line.indent);
+  }
+
+  // Map
+  if (line.content.includes(":")) {
+    return parseMap(lines, startIndex, line.indent);
+  }
+
+  // Single scalar
+  return { value: parseScalar(line.content), nextIndex: startIndex + 1 };
+}
+
+function parseArray(lines: YamlLine[], startIndex: number, baseIndent: number): ParseResult {
+  const arr: unknown[] = [];
+  let i = startIndex;
+
+  while (i < lines.length) {
+    const line = lines[i];
+    if (line.indent < baseIndent) break;
+    if (line.indent > baseIndent) break; // shouldn't happen at array level
+    if (!line.isArrayItem) break;
+
+    const itemContent = line.arrayItemContent;
+
+    if (itemContent === "") {
+      // Array item with nested block on next lines
+      const nested = parseBlock(lines, i + 1, baseIndent + 1);
+      arr.push(nested.value);
+      i = nested.nextIndex;
+    } else if (itemContent.includes(":")) {
+      // Inline map start: "- key: value" possibly with more keys below
+      // Parse as a map, treating the "- " offset as extra indent
+      const inlineMap = parseArrayItemMap(lines, i, baseIndent);
+      arr.push(inlineMap.value);
+      i = inlineMap.nextIndex;
+    } else {
+      // Simple scalar array item
+      arr.push(parseScalar(itemContent));
+      i++;
+    }
+  }
+
+  return { value: arr, nextIndex: i };
+}
+
+function parseArrayItemMap(
+  lines: YamlLine[],
+  startIndex: number,
+  arrayIndent: number,
+): ParseResult {
+  // First line is "- key: value", subsequent lines at indent > arrayIndent are part of this map
+  const map: Record<string, unknown> = {};
+  const firstLine = lines[startIndex];
+  const firstContent = firstLine.arrayItemContent;
+
+  // Parse the first key: value from the array item line
+  const colonIdx = findColon(firstContent);
+  if (colonIdx === -1) {
+    return { value: parseScalar(firstContent), nextIndex: startIndex + 1 };
+  }
+
+  const key = firstContent.slice(0, colonIdx).trim();
+  const valueStr = firstContent.slice(colonIdx + 1).trim();
+
+  if (valueStr === "") {
+    // Value is a nested block
+    const nested = parseBlock(lines, startIndex + 1, arrayIndent + 2);
+    map[key] = nested.value;
+    let i = nested.nextIndex;
+
+    // Continue reading sibling keys at the array-item's content indent
+    const siblingIndent = arrayIndent + 2;
+    while (i < lines.length && lines[i].indent >= siblingIndent && !lines[i].isArrayItem) {
+      if (lines[i].indent === siblingIndent || lines[i].indent > siblingIndent) {
+        // Only parse if at exactly sibling indent and is a map key
+        if (lines[i].indent === siblingIndent && lines[i].content.includes(":")) {
+          const mapResult = parseMapEntries(lines, i, siblingIndent, map);
+          i = mapResult.nextIndex;
+        } else {
+          break;
+        }
+      }
+    }
+
+    return { value: map, nextIndex: i };
+  } else {
+    map[key] = parseScalar(valueStr);
+  }
+
+  // Read additional keys at indent > arrayIndent (the "  key: value" lines after "- first: val")
+  let i = startIndex + 1;
+  const contentIndent = arrayIndent + 2; // "- " adds 2 to effective indent
+
+  while (i < lines.length) {
+    const line = lines[i];
+    if (line.indent < contentIndent) break;
+    if (line.isArrayItem && line.indent <= arrayIndent) break;
+
+    if (line.indent === contentIndent && !line.isArrayItem && line.content.includes(":")) {
+      const colonPos = findColon(line.content);
+      if (colonPos === -1) break;
+      const k = line.content.slice(0, colonPos).trim();
+      const v = line.content.slice(colonPos + 1).trim();
+
+      if (v === "") {
+        const nested = parseBlock(lines, i + 1, contentIndent + 1);
+        map[k] = nested.value;
+        i = nested.nextIndex;
+      } else {
+        map[k] = parseScalar(v);
+        i++;
+      }
+    } else if (line.indent === contentIndent && line.isArrayItem) {
+      // This is a new array item at the same level -- not part of this map
+      break;
+    } else if (line.indent > contentIndent) {
+      // Skip nested content already consumed
+      i++;
+    } else {
+      break;
+    }
+  }
+
+  return { value: map, nextIndex: i };
+}
+
+function parseMap(lines: YamlLine[], startIndex: number, baseIndent: number): ParseResult {
+  const map: Record<string, unknown> = {};
+  const result = parseMapEntries(lines, startIndex, baseIndent, map);
+  return { value: map, nextIndex: result.nextIndex };
+}
+
+function parseMapEntries(
+  lines: YamlLine[],
+  startIndex: number,
+  baseIndent: number,
+  map: Record<string, unknown>,
+): ParseResult {
+  let i = startIndex;
+
+  while (i < lines.length) {
+    const line = lines[i];
+    if (line.indent < baseIndent) break;
+    if (line.indent > baseIndent) {
+      // Shouldn't happen at map level if properly structured -- skip
+      i++;
+      continue;
+    }
+    if (line.isArrayItem) break;
+
+    const colonIdx = findColon(line.content);
+    if (colonIdx === -1) {
+      // Not a map entry
+      break;
+    }
+
+    const key = line.content.slice(0, colonIdx).trim();
+    const valueStr = line.content.slice(colonIdx + 1).trim();
+
+    if (valueStr === "") {
+      // Value is a nested block on subsequent lines
+      const nested = parseBlock(lines, i + 1, baseIndent + 1);
+      map[key] = nested.value;
+      i = nested.nextIndex;
+    } else {
+      map[key] = parseScalar(valueStr);
+      i++;
+    }
+  }
+
+  return { value: map, nextIndex: i };
+}
+
+function findColon(s: string): number {
+  let inSingle = false;
+  let inDouble = false;
+  for (let i = 0; i < s.length; i++) {
+    const ch = s[i];
+    if (ch === "'" && !inDouble) inSingle = !inSingle;
+    if (ch === '"' && !inSingle) inDouble = !inDouble;
+    if (ch === ":" && !inSingle && !inDouble) {
+      // Must be followed by space, end of line, or nothing
+      if (i === s.length - 1 || s[i + 1] === " ") {
+        return i;
+      }
+    }
+  }
+  return -1;
+}
+
+// ---------------------------------------------------------------------------
+// mock-llm config types
+// ---------------------------------------------------------------------------
+
+export interface MockLLMRoute {
+  path: string;
+  method?: string;
+  match?: {
+    body?: {
+      messages?: Array<{ role: string; content: string }>;
+    };
+  };
+  response: Record<string, unknown>;
+}
+
+export interface MockLLMTool {
+  name: string;
+  description?: string;
+  parameters?: Record<string, unknown>;
+}
+
+export interface MockLLMConfig {
+  routes?: MockLLMRoute[];
+  mcp?: {
+    tools?: MockLLMTool[];
+  };
+}
+
+// ---------------------------------------------------------------------------
+// aimock output types
+// ---------------------------------------------------------------------------
+
+export interface AimockFixture {
+  match?: { userMessage?: string };
+  response: { content?: string; toolCalls?: Array<{ name: string; arguments: string }> };
+  _comment?: string;
+}
+
+export interface AimockMCPTool {
+  name: string;
+  description?: string;
+  inputSchema?: Record<string, unknown>;
+}
+
+export interface ConvertResult {
+  fixtures: AimockFixture[];
+  mcpTools?: AimockMCPTool[];
+}
+
+// ---------------------------------------------------------------------------
+// Converter
+// ---------------------------------------------------------------------------
+
+export function convertConfig(config: MockLLMConfig): ConvertResult {
+  const fixtures: AimockFixture[] = [];
+
+  if (config.routes) {
+    for (const route of config.routes) {
+      const fixture = convertRoute(route);
+      if (fixture) {
+        fixtures.push(fixture);
+      }
+    }
+  }
+
+  const result: ConvertResult = { fixtures };
+
+  if (config.mcp?.tools && config.mcp.tools.length > 0) {
+    result.mcpTools = config.mcp.tools.map(convertMCPTool);
+  }
+
+  return result;
+}
+
+function convertRoute(route: MockLLMRoute): AimockFixture | null {
+  // Extract content from response.choices[0].message.content
+  const content = extractResponseContent(route.response);
+  if (content === null) return null;
+
+  const fixture: AimockFixture = {
+    match: {},
+    response: { content },
+  };
+
+  // Extract match criteria from match.body.messages
+  const userMessage = extractUserMessage(route);
+  if (userMessage) {
+    fixture.match = { userMessage };
+  } else {
+    // Use path as a comment/identifier when no match criteria
+    fixture._comment = `${route.method ?? "POST"} ${route.path}`;
+  }
+
+  return fixture;
+}
+
+function extractResponseContent(response: Record<string, unknown>): string | null {
+  const choices = response.choices as Array<Record<string, unknown>> | undefined;
+  if (!Array.isArray(choices) || choices.length === 0) return null;
+
+  const firstChoice = choices[0];
+  const message = firstChoice.message as Record<string, unknown> | undefined;
+  if (!message) return null;
+
+  const content = message.content;
+  if (typeof content !== "string") return null;
+
+  return content;
+}
+
+function extractUserMessage(route: MockLLMRoute): string | null {
+  const messages = route.match?.body?.messages;
+  if (!Array.isArray(messages) || messages.length === 0) return null;
+
+  // Find the last user message
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === "user") {
+      return messages[i].content;
+    }
+  }
+
+  // Fall back to last message content regardless of role
+  return messages[messages.length - 1].content ?? null;
+}
+
+function convertMCPTool(tool: MockLLMTool): AimockMCPTool {
+  const result: AimockMCPTool = { name: tool.name };
+  if (tool.description) result.description = tool.description;
+  if (tool.parameters) result.inputSchema = tool.parameters;
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// CLI
+// ---------------------------------------------------------------------------
+
+function main(): void {
+  const args = process.argv.slice(2);
+
+  if (args.length === 0 || args.includes("--help") || args.includes("-h")) {
+    console.log(`Usage: npx tsx scripts/convert-mockllm.ts <input.yaml> [output.json]
+
+Converts a mock-llm (dwmkerr) YAML config to aimock fixture JSON.
+
+If output path is omitted, prints JSON to stdout.`);
+    process.exit(args.length === 0 ? 1 : 0);
+  }
+
+  const inputPath = resolve(args[0]);
+  const outputPath = args[1] ? resolve(args[1]) : null;
+
+  let yamlContent: string;
+  try {
+    yamlContent = readFileSync(inputPath, "utf-8");
+  } catch (err) {
+    console.error(`Error reading input file: ${(err as Error).message}`);
+    process.exit(1);
+  }
+
+  const parsed = parseSimpleYaml(yamlContent) as MockLLMConfig | null;
+  if (!parsed || typeof parsed !== "object") {
+    console.error("Error: could not parse YAML config");
+    process.exit(1);
+  }
+
+  const result = convertConfig(parsed);
+
+  // Build aimock fixture file
+  const fixtureOutput = { fixtures: result.fixtures };
+  const fixtureJson = JSON.stringify(fixtureOutput, null, 2);
+
+  if (outputPath) {
+    writeFileSync(outputPath, fixtureJson + "\n", "utf-8");
+    console.log(`Wrote fixtures to ${outputPath}`);
+
+    // If MCP tools present, write a companion aimock.json config
+    if (result.mcpTools) {
+      const configPath = outputPath.replace(/\.json$/, ".aimock.json");
+      const aimockConfig = {
+        llm: { fixtures: outputPath },
+        mcp: {
+          tools: result.mcpTools.map((t) => ({
+            name: t.name,
+            description: t.description ?? "",
+            inputSchema: t.inputSchema ?? {},
+            result: `Mock result for ${t.name}`,
+          })),
+        },
+      };
+      writeFileSync(configPath, JSON.stringify(aimockConfig, null, 2) + "\n", "utf-8");
+      console.log(`Wrote aimock config with MCP tools to ${configPath}`);
+    }
+  } else {
+    console.log(fixtureJson);
+
+    if (result.mcpTools) {
+      console.log("\n--- MCP Tools (aimock config format) ---");
+      console.log(JSON.stringify({ mcp: { tools: result.mcpTools } }, null, 2));
+    }
+  }
+}
+
+// Only run CLI when executed directly (not imported)
+const isDirectRun =
+  typeof process !== "undefined" &&
+  process.argv[1] &&
+  (process.argv[1].endsWith("convert-mockllm.ts") ||
+    process.argv[1].endsWith("convert-mockllm.js"));
+
+if (isDirectRun) {
+  main();
+}
diff --git a/scripts/convert-vidaimock.ts b/scripts/convert-vidaimock.ts
new file mode 100644
index 0000000..a900db7
--- /dev/null
+++ b/scripts/convert-vidaimock.ts
@@ -0,0 +1,250 @@
+#!/usr/bin/env tsx
+
+/**
+ * VidaiMock -> aimock Fixture Converter
+ *
+ * Reads VidaiMock Tera template files (single file or directory) and produces
+ * aimock-compatible fixture JSON.
+ *
+ * Usage:
+ *   npx tsx scripts/convert-vidaimock.ts <input-path> [output-path]
+ *
+ * - If <input-path> is a directory, every .tera / .json / .txt file inside it
+ *   is treated as a VidaiMock response template.
+ * - If <input-path> is a single file, only that file is converted.
+ * - [output-path] defaults to stdout when omitted; pass a path to write JSON
+ *   to a file instead.
+ */
+
+import { readFileSync, writeFileSync, readdirSync, statSync } from "node:fs";
+import { resolve, basename, extname } from "node:path";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface AimockFixture {
+  match: { userMessage: string };
+  response: { content: string };
+}
+
+export interface AimockFixtureFile {
+  fixtures: AimockFixture[];
+}
+
+// ---------------------------------------------------------------------------
+// Tera template stripping
+// ---------------------------------------------------------------------------
+
+/**
+ * Strip Tera template syntax and extract a usable response content string.
+ *
+ * Strategy:
+ * 1. If the template looks like JSON, try to pull out the nested
+ *    `choices[].message.content` value (the most common VidaiMock pattern).
+ * 2. Otherwise fall back to stripping all Tera delimiters and returning the
+ *    remaining text with placeholder variable names.
+ */
+export function stripTeraTemplate(raw: string): string {
+  const trimmed = raw.trim();
+
+  // --- Attempt JSON extraction first -----------------------------------
+  const contentValue = extractJsonContent(trimmed);
+  if (contentValue !== null) return contentValue;
+
+  // --- Fallback: strip Tera syntax -------------------------------------
+  let text = trimmed;
+
+  // Remove comment blocks {# ... #}
+  text = text.replace(/\{#[\s\S]*?#\}/g, "");
+
+  // Remove block tags {% ... %}
+  text = text.replace(/\{%[\s\S]*?%\}/g, "");
+
+  // Replace expression tags {{ expr }} with the expression name
+  text = text.replace(/\{\{\s*([\w.]+)\s*\}\}/g, "[$1]");
+
+  // Collapse excessive whitespace but preserve intentional newlines
+  text = text
+    .split("\n")
+    .map((l) => l.trim())
+    .filter((l) => l.length > 0)
+    .join("\n");
+
+  return text;
+}
+
+/**
+ * Try to parse the template as JSON (after substituting Tera expressions with
+ * dummy strings) and pull out `choices[0].message.content`.
+ */
+function extractJsonContent(raw: string): string | null {
+  try {
+    // Step 1: remove Tera comments and blocks
+    let substituted = raw.replace(/\{#[\s\S]*?#\}/g, "").replace(/\{%[\s\S]*?%\}/g, "");
+
+    // Step 2: Replace Tera expressions inside existing JSON strings.
+    // Pattern: the expression is already within quotes, e.g. "foo-{{ bar }}-baz"
+    // We replace the {{ ... }} with the placeholder without adding extra quotes.
+    substituted = substituted.replace(
+      /"([^"]*?)\{\{\s*([\w.]+)\s*\}\}([^"]*?)"/g,
+      (_, before, varName, after) => `"${before}[${varName}]${after}"`,
+    );
+
+    // Step 3: Replace standalone Tera expressions (not inside quotes),
+    // e.g. a bare `{{ content }}` used as a JSON value — wrap with quotes.
+    substituted = substituted.replace(/\{\{\s*([\w.]+)\s*\}\}/g, '"[$1]"');
+
+    const parsed = JSON.parse(substituted);
+
+    if (
+      parsed &&
+      Array.isArray(parsed.choices) &&
+      parsed.choices.length > 0 &&
+      parsed.choices[0]?.message?.content !== undefined
+    ) {
+      return String(parsed.choices[0].message.content);
+    }
+  } catch {
+    // Not valid JSON even after substitution — fall through
+  }
+  return null;
+}
+
+// ---------------------------------------------------------------------------
+// Filename -> match derivation
+// ---------------------------------------------------------------------------
+
+/**
+ * Derive a `userMessage` match string from the template filename.
+ *
+ * Examples:
+ *   "greeting.tera"       -> "greeting"
+ *   "tell_me_a_joke.json" -> "tell me a joke"
+ *   "003-weather.txt"     -> "weather"
+ */
+export function deriveMatchFromFilename(filename: string): string {
+  let name = basename(filename, extname(filename));
+
+  // Strip leading numeric prefixes like "003-"
+  name = name.replace(/^\d+[-_]/, "");
+
+  // Replace underscores / hyphens with spaces
+  name = name.replace(/[-_]+/g, " ");
+
+  return name.trim();
+}
+
+// ---------------------------------------------------------------------------
+// File / directory conversion
+// ---------------------------------------------------------------------------
+
+const TEMPLATE_EXTENSIONS = new Set([
+  ".tera",
+  ".json",
+  ".txt",
+  ".html",
+  ".jinja",
+  ".jinja2",
+  ".j2",
+]);
+
+export function convertFile(filePath: string): AimockFixture | null {
+  try {
+    const raw = readFileSync(filePath, "utf-8");
+    const content = stripTeraTemplate(raw);
+    if (!content) return null;
+
+    const match = deriveMatchFromFilename(filePath);
+    return { match: { userMessage: match }, response: { content } };
+  } catch {
+    // Unreadable / binary file — skip gracefully
+    return null;
+  }
+}
+
+export function convertDirectory(dirPath: string): AimockFixture[] {
+  const fixtures: AimockFixture[] = [];
+
+  let entries: string[];
+  try {
+    entries = readdirSync(dirPath);
+  } catch {
+    return fixtures;
+  }
+
+  for (const entry of entries.sort()) {
+    const fullPath = resolve(dirPath, entry);
+    try {
+      if (!statSync(fullPath).isFile()) continue;
+    } catch {
+      continue;
+    }
+
+    const ext = extname(entry).toLowerCase();
+    if (!TEMPLATE_EXTENSIONS.has(ext)) continue;
+
+    const fixture = convertFile(fullPath);
+    if (fixture) fixtures.push(fixture);
+  }
+
+  return fixtures;
+}
+
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+
+function main(): void {
+  const args = process.argv.slice(2);
+
+  if (args.length < 1) {
+    console.error("Usage: npx tsx scripts/convert-vidaimock.ts <input-path> [output-path]");
+    process.exit(1);
+  }
+
+  const inputPath = resolve(args[0]);
+  const outputPath = args[1] ? resolve(args[1]) : null;
+
+  let fixtures: AimockFixture[];
+
+  try {
+    const stat = statSync(inputPath);
+    if (stat.isDirectory()) {
+      fixtures = convertDirectory(inputPath);
+    } else {
+      const single = convertFile(inputPath);
+      fixtures = single ? [single] : [];
+    }
+  } catch (err) {
+    console.error(`Error reading input path: ${inputPath}`);
+    console.error(err instanceof Error ? err.message : String(err));
+    process.exit(1);
+  }
+
+  if (fixtures.length === 0) {
+    console.error(
+      "No fixtures produced — check that the input contains valid VidaiMock templates.",
+    );
+    process.exit(1);
+  }
+
+  const output: AimockFixtureFile = { fixtures };
+  const json = JSON.stringify(output, null, 2) + "\n";
+
+  if (outputPath) {
+    writeFileSync(outputPath, json, "utf-8");
+    console.log(`Wrote ${fixtures.length} fixture(s) to ${outputPath}`);
+  } else {
+    process.stdout.write(json);
+  }
+}
+
+// Only run CLI when executed directly (not when imported by tests)
+const isDirectExecution =
+  process.argv[1]?.endsWith("convert-vidaimock.ts") ||
+  process.argv[1]?.endsWith("convert-vidaimock.js");
+
+if (isDirectExecution) {
+  main();
+}
diff --git a/scripts/fix-drift.ts b/scripts/fix-drift.ts
index 07d12d6..8f6f1d5 100644
--- a/scripts/fix-drift.ts
+++ b/scripts/fix-drift.ts
@@ -39,6 +39,28 @@ const KILL_GRACE_MS = 10_000;
 
 const VALID_SEVERITIES: ReadonlySet<DriftSeverity> = new Set(["critical", "warning", "info"]);
 
+const SKILL_FILE = "skills/write-fixtures/SKILL.md";
+
+/**
+ * Map builder source files to the corresponding section names in the
+ * write-fixtures skill documentation.  Used to flag which skill sections
+ * may need updating when a drift fix changes a builder's output format.
+ */
+export const BUILDER_TO_SKILL_SECTION: Record<string, string> = {
+  "src/responses.ts": "Responses API",
+  "src/messages.ts": "Claude Messages",
+  "src/gemini.ts": "Gemini",
+  "src/bedrock.ts": "Bedrock",
+  "src/bedrock-converse.ts": "Bedrock",
+  "src/embeddings.ts": "Embeddings",
+  "src/ollama.ts": "Ollama",
+  "src/cohere.ts": "Cohere",
+  "src/ws-realtime.ts": "OpenAI Realtime WebSocket",
+  "src/ws-responses.ts": "OpenAI Responses WebSocket",
+  "src/ws-gemini-live.ts": "Gemini Live WebSocket",
+  "src/helpers.ts": "OpenAI Chat Completions",
+};
+
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
@@ -93,6 +115,20 @@ export function execFileSafe(file: string, args: string[]): void {
   }
 }
 
+/**
+ * Given a list of changed file paths, return the unique skill section names
+ * that correspond to modified builder files.  Returns an empty array when
+ * no builder files map to a known skill section.
+ */
+export function affectedSkillSections(changedFiles: string[]): string[] {
+  const sections = new Set<string>();
+  for (const file of changedFiles) {
+    const section = BUILDER_TO_SKILL_SECTION[file];
+    if (section) sections.add(section);
+  }
+  return [...sections].sort();
+}
+
 export function readFileIfExists(path: string): string | null {
   if (!existsSync(path)) return null;
   return readFileSync(path, "utf-8");
@@ -190,7 +226,7 @@ export function readDriftReport(path: string): DriftReport {
 export function buildPrompt(report: DriftReport): string {
   const lines: string[] = [];
 
-  lines.push("You are fixing API drift in the llmock mock server.");
+  lines.push("You are fixing API drift in the aimock mock server.");
   lines.push("");
   lines.push("## Workflow");
   lines.push("");
@@ -237,6 +273,14 @@ export function buildPrompt(report: DriftReport): string {
     lines.push("");
   }
 
+  lines.push("## Skill file update");
+  lines.push("");
+  lines.push("If any builder's output format changed (new fields, renamed fields, changed event");
+  lines.push("types), update the write-fixtures skill documentation to match:");
+  lines.push(`  File: ${SKILL_FILE}`);
+  lines.push("Only update the Response Types and API Endpoints sections that correspond to the");
+  lines.push("changed builders. Do not rewrite unrelated sections.");
+  lines.push("");
   lines.push("## After all fixes");
   lines.push("");
   lines.push("1. Run the full test suite: pnpm test");
@@ -386,7 +430,7 @@ export function addChangelogEntry(report: DriftReport, version: string): void {
   ].join("\n");
 
   // Insert after the first line (the title)
-  const titleLine = "# @copilotkit/llmock\n";
+  const titleLine = "# @copilotkit/aimock\n";
   if (existing.startsWith(titleLine)) {
     const rest = existing.slice(titleLine.length);
     writeFileSync(changelogPath, titleLine + "\n" + newEntry + rest, "utf-8");
@@ -395,7 +439,7 @@ export function addChangelogEntry(report: DriftReport, version: string): void {
   }
 }
 
-export function buildPrBody(report: DriftReport): string {
+export function buildPrBody(report: DriftReport, changedFiles?: string[]): string {
   const providers: string[] = [];
   const diffs: string[] = [];
 
@@ -408,7 +452,7 @@ export function buildPrBody(report: DriftReport): string {
 
   const reportJson = JSON.stringify(report, null, 2);
 
-  return [
+  const sections: string[] = [
     "## Summary",
     "",
     "Auto-generated drift remediation.",
@@ -419,6 +463,21 @@ export function buildPrBody(report: DriftReport): string {
     "### Diffs fixed",
     ...diffs,
     "",
+  ];
+
+  // Flag skill sections that may need review based on which builders changed
+  const skillSections = changedFiles ? affectedSkillSections(changedFiles) : [];
+  if (skillSections.length > 0) {
+    sections.push(
+      "### Skill documentation",
+      "",
+      `The following write-fixtures skill sections may need review after these builder changes:`,
+      ...skillSections.map((s) => `- ${s}`),
+      "",
+    );
+  }
+
+  sections.push(
     "## Drift Report",
     "",
     "<details>",
@@ -429,7 +488,9 @@ export function buildPrBody(report: DriftReport): string {
     "```",
     "",
     "</details>",
-  ].join("\n");
+  );
+
+  return sections.join("\n");
 }
 
 /**
@@ -485,6 +546,7 @@ function createPr(report: DriftReport): void {
     (f) => f.startsWith("src/") && !f.startsWith("src/__tests__/"),
   );
   const testFiles = changedFiles.filter((f) => f.startsWith("src/__tests__/"));
+  const skillFiles = changedFiles.filter((f) => f.startsWith("skills/"));
 
   // Abort if no source files were changed — a version-bump-only PR would be misleading
   if (builderFiles.length === 0 && testFiles.length === 0) {
@@ -505,6 +567,15 @@ function createPr(report: DriftReport): void {
     execFileSafe("git", ["commit", "-m", "test: update SDK shapes for drift remediation"]);
   }
 
+  if (skillFiles.length > 0) {
+    execFileSafe("git", ["add", ...skillFiles]);
+    execFileSafe("git", [
+      "commit",
+      "-m",
+      "docs: update write-fixtures skill for builder format changes",
+    ]);
+  }
+
   const newVersion = patchBumpVersion();
   console.log(`Bumped version to ${newVersion}`);
 
@@ -525,10 +596,10 @@ function createPr(report: DriftReport): void {
   execFileSafe("git", ["push", "-u", "origin", branchName]);
   console.log(`Pushed branch ${branchName}`);
 
-  const prBody = buildPrBody(report);
+  const prBody = buildPrBody(report, changedFiles);
   const prTitle = `fix: auto-remediate API drift (${stamp})`;
 
-  const prBodyFile = `/tmp/llmock-drift-${process.pid}-pr-body.md`;
+  const prBodyFile = `/tmp/aimock-drift-${process.pid}-pr-body.md`;
   writeFileSync(prBodyFile, prBody, "utf-8");
   try {
     execFileSafe("gh", [
@@ -593,7 +664,7 @@ function createIssue(report: DriftReport | null): void {
 
   const issueTitle = `Drift detected — auto-fix failed (${stamp})`;
 
-  const issueBodyFile = `/tmp/llmock-drift-${process.pid}-issue-body.md`;
+  const issueBodyFile = `/tmp/aimock-drift-${process.pid}-issue-body.md`;
   writeFileSync(issueBodyFile, issueBody, "utf-8");
   try {
     execFileSafe("gh", [
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
index 0b02bda..df8ab97 100644
--- a/skills/write-fixtures/SKILL.md
+++ b/skills/write-fixtures/SKILL.md
@@ -1,13 +1,13 @@
 ---
 name: write-fixtures
-description: Use when writing test fixtures for @copilotkit/llmock — mock LLM responses, tool call sequences, error injection, multi-turn agent loops, embeddings, structured output, sequential responses, or debugging fixture mismatches
+description: Use when writing test fixtures for @copilotkit/aimock — mock LLM responses, tool call sequences, error injection, multi-turn agent loops, embeddings, structured output, sequential responses, or debugging fixture mismatches
 ---
 
-# Writing llmock Test Fixtures
+# Writing aimock Test Fixtures
 
-## What llmock Is
+## What aimock Is
 
-Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Vertex AI, Ollama, Cohere). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs. Chaos testing and Prometheus metrics.
+aimock is a zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Vertex AI, Ollama, Cohere). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs. Chaos testing and Prometheus metrics.
 
 ## Core Mental Model
 
@@ -348,6 +348,154 @@ All providers share the same fixture pool — write fixtures once, they work for
 
 18. **Cohere requires `model` field** — returns 400 if `model` is missing from the request body.
 
+## Mount & Composition
+
+### mount() API
+
+Mount additional mock services onto a running LLMock server. All services share one port, one health endpoint, and one request journal.
+
+```typescript
+const llm = new LLMock({ port: 5555 });
+llm.mount("/mcp", mcpMock); // MCP tools at /mcp
+llm.mount("/a2a", a2aMock); // A2A agents at /a2a
+llm.mount("/vector", vectorMock); // Vector DB at /vector
+await llm.start();
+```
+
+Any object implementing the `Mountable` interface (a `handleRequest` method that returns `boolean`) can be mounted. Path prefixes are stripped before the service sees the request — `/mcp/tools/list` arrives as `/tools/list`.
+
+### createMockSuite()
+
+Unified lifecycle for LLMock + mounted services:
+
+```typescript
+import { createMockSuite } from "@copilotkit/aimock";
+
+const suite = createMockSuite({
+  port: 0,
+  fixtures: "./fixtures",
+  services: { "/mcp": mcpMock, "/a2a": a2aMock },
+});
+
+await suite.start();
+// suite.llm — the LLMock instance
+// suite.url — base URL
+
+afterEach(() => suite.reset()); // resets everything
+afterAll(() => suite.stop());
+```
+
+### aimock CLI config file
+
+The `aimock` CLI reads a JSON config and serves all services on one port:
+
+```bash
+aimock --config aimock.json --port 4010
+```
+
+Config format:
+
+```json
+{
+  "llm": {
+    "fixtures": "./fixtures",
+    "latency": 0,
+    "metrics": true
+  },
+  "services": {
+    "/mcp": { "type": "mcp", "tools": "./mcp-tools.json" },
+    "/a2a": { "type": "a2a", "agents": "./a2a-agents.json" }
+  }
+}
+```
+
+## VectorMock
+
+Mock vector database server for testing RAG pipelines. Supports Pinecone, Qdrant, and ChromaDB API formats.
+
+```typescript
+import { VectorMock } from "@copilotkit/aimock";
+
+const vector = new VectorMock();
+
+// Create a collection and register query results
+vector.addCollection("docs", { dimension: 1536 });
+vector.onQuery("docs", [
+  { id: "doc-1", score: 0.95, metadata: { title: "Getting Started" } },
+  { id: "doc-2", score: 0.87, metadata: { title: "API Reference" } },
+]);
+
+// Upsert vectors
+vector.upsert("docs", [
+  { id: "v1", values: [0.1, 0.2, ...], metadata: { title: "Intro" } },
+]);
+
+// Dynamic query handler
+vector.onQuery("docs", (query) => {
+  return [{ id: "result", score: 1.0, metadata: { topK: query.topK } }];
+});
+
+// Standalone or mounted
+const url = await vector.start();
+// Or: llm.mount("/vector", vector);
+```
+
+### VectorMock endpoints
+
+| Provider | Endpoints                                                                                                                                |
+| -------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| Pinecone | `POST /query`, `POST /vectors/upsert`, `POST /vectors/delete`, `GET /describe-index-stats`                                               |
+| Qdrant   | `POST /collections/{name}/points/search`, `PUT /collections/{name}/points`, `POST /collections/{name}/points/delete`                     |
+| ChromaDB | `POST /api/v1/collections/{id}/query`, `POST /api/v1/collections/{id}/add`, `GET /api/v1/collections`, `DELETE /api/v1/collections/{id}` |
+
+## Service Mocks (Search / Rerank / Moderation)
+
+Built-in mocks for common AI-adjacent services. Registered on the LLMock instance directly — no separate server needed.
+
+### Search (Tavily-compatible)
+
+```typescript
+// POST /search — matches request `query` field
+mock.onSearch("weather", [
+  { title: "Weather Report", url: "https://example.com", content: "Sunny today" },
+]);
+mock.onSearch(/stock\s+price/i, [
+  { title: "ACME Stock", url: "https://example.com", content: "$42", score: 0.95 },
+]);
+```
+
+### Rerank (Cohere-compatible)
+
+```typescript
+// POST /v2/rerank — matches request `query` field
+mock.onRerank("machine learning", [
+  { index: 0, relevance_score: 0.99 },
+  { index: 2, relevance_score: 0.85 },
+]);
+```
+
+### Moderation (OpenAI-compatible)
+
+```typescript
+// POST /v1/moderations — matches request `input` field
+mock.onModerate("violent", {
+  flagged: true,
+  categories: { violence: true, hate: false },
+  category_scores: { violence: 0.95, hate: 0.01 },
+});
+
+// Catch-all — everything passes
+mock.onModerate(/.*/, { flagged: false, categories: {} });
+```
+
+### Pattern matching
+
+All three services use the same matching logic:
+
+- **String patterns** — case-insensitive substring match
+- **RegExp patterns** — full regex test
+- **First match wins** — register specific patterns before catch-alls
+
 ## Debugging Fixture Mismatches
 
 When a fixture doesn't match:
@@ -360,7 +508,7 @@ When a fixture doesn't match:
 ## E2E Test Setup Pattern
 
 ```typescript
-import { LLMock } from "@copilotkit/llmock";
+import { LLMock } from "@copilotkit/aimock";
 
 // Setup — port: 0 picks a random available port
 const mock = new LLMock({ port: 0 });
@@ -408,6 +556,10 @@ const mock = await LLMock.create({ port: 0 }); // creates + starts in one call
 | `clearRequests()`                       | Clear journal only                          |
 | `setChaos(opts)`                        | Set server-level chaos rates                |
 | `clearChaos()`                          | Remove server-level chaos                   |
+| `onSearch(pattern, results)`            | Match search requests by query              |
+| `onRerank(pattern, results)`            | Match rerank requests by query              |
+| `onModerate(pattern, result)`           | Match moderation requests by input          |
+| `mount(path, handler)`                  | Mount a Mountable (VectorMock, etc.)        |
 | `url` / `baseUrl`                       | Server URL (throws if not started)          |
 | `port`                                  | Server port number                          |
 
diff --git a/src/__tests__/a2a-mock.test.ts b/src/__tests__/a2a-mock.test.ts
new file mode 100644
index 0000000..74ceac4
--- /dev/null
+++ b/src/__tests__/a2a-mock.test.ts
@@ -0,0 +1,1248 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { A2AMock } from "../a2a-mock.js";
+import { LLMock } from "../llmock.js";
+import { Journal } from "../journal.js";
+
+// ---- Helpers ----
+
+function get(
+  url: string,
+  path: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function post(
+  url: string,
+  path: string,
+  body: object,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; data: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(payload),
+        },
+      },
+      (res) => {
+        let data = "";
+        res.on("data", (chunk: Buffer) => (data += chunk));
+        res.on("end", () => resolve({ status: res.statusCode!, headers: res.headers, data }));
+      },
+    );
+    req.on("error", reject);
+    req.write(payload);
+    req.end();
+  });
+}
+
+function postSSE(
+  url: string,
+  path: string,
+  body: object,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; events: string[] }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(payload),
+        },
+      },
+      (res) => {
+        let raw = "";
+        res.on("data", (chunk: Buffer) => (raw += chunk));
+        res.on("end", () => {
+          const events = raw
+            .split("\n\n")
+            .filter((e) => e.startsWith("data: "))
+            .map((e) => e.replace("data: ", ""));
+          resolve({ status: res.statusCode!, headers: res.headers, events });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(method: string, params: unknown, id: number | string = 1): object {
+  return { jsonrpc: "2.0", method, params, id };
+}
+
+// ---- Tests ----
+
+describe("A2AMock", () => {
+  let a2a: A2AMock | null = null;
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (a2a) {
+      try {
+        await a2a.stop();
+      } catch (err) {
+        if (!(err instanceof Error && err.message === "A2AMock server not started")) {
+          throw err;
+        }
+      }
+      a2a = null;
+    }
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch (err) {
+        if (!(err instanceof Error && err.message === "Server not started")) {
+          throw err;
+        }
+      }
+      llm = null;
+    }
+  });
+
+  describe("standalone start/stop", () => {
+    it("starts and stops without error", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "test-agent" });
+      const url = await a2a.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+      await a2a.stop();
+      a2a = null;
+    });
+  });
+
+  describe("mounted mode via llm.mount", () => {
+    it("routes requests through LLMock mount", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({
+        name: "mounted-agent",
+        skills: [{ id: "s1", name: "greet" }],
+      });
+      a2a.onMessage("mounted-agent", "hello", [{ text: "hi from mount" }]);
+
+      llm = new LLMock();
+      llm.mount("/a2a", a2a);
+      await llm.start();
+
+      const res = await post(
+        llm.url,
+        "/a2a",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "hello" }] } }),
+      );
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.data);
+      expect(body.result.message.role).toBe("ROLE_AGENT");
+      expect(body.result.message.parts[0].text).toBe("hi from mount");
+
+      // Clean up - a2a doesn't have its own server in mounted mode
+      a2a = null;
+    });
+  });
+
+  describe("GET /.well-known/agent-card.json", () => {
+    it("returns agent card with skills", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({
+        name: "skill-agent",
+        description: "An agent with skills",
+        version: "2.0.0",
+        skills: [{ id: "s1", name: "translate", description: "Translates text", tags: ["i18n"] }],
+        capabilities: { streaming: true },
+      });
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      expect(res.status).toBe(200);
+      const card = JSON.parse(res.body);
+      expect(card.name).toBe("skill-agent");
+      expect(card.description).toBe("An agent with skills");
+      expect(card.version).toBe("2.0.0");
+      expect(card.skills).toHaveLength(1);
+      expect(card.skills[0].id).toBe("s1");
+      expect(card.skills[0].name).toBe("translate");
+      expect(card.supportedInterfaces).toHaveLength(1);
+      expect(card.supportedInterfaces[0].protocolBinding).toBe("JSONRPC");
+      expect(card.capabilities.streaming).toBe(true);
+    });
+
+    it("includes A2A-Version header", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "header-agent" });
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      expect(res.headers["a2a-version"]).toBe("1.0");
+    });
+  });
+
+  describe("SendMessage", () => {
+    it("returns message response matched by string substring", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "echo" });
+      a2a.onMessage("echo", "greet", [{ text: "Hello there!" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "please greet me" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.role).toBe("ROLE_AGENT");
+      expect(body.result.message.parts).toEqual([{ text: "Hello there!" }]);
+      expect(body.result.message.messageId).toBeDefined();
+    });
+
+    it("returns message response matched by RegExp", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "regex-agent" });
+      a2a.onMessage("regex-agent", /^hello\s+world$/i, [{ text: "matched regex" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "Hello World" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("matched regex");
+    });
+
+    it("returns task response with artifacts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "task-agent" });
+      a2a.onTask("task-agent", "compute", [
+        { parts: [{ text: "result: 42" }], name: "computation" },
+      ]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "compute something" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.task).toBeDefined();
+      expect(body.result.task.id).toBeDefined();
+      expect(body.result.task.contextId).toBeDefined();
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+      expect(body.result.task.artifacts).toHaveLength(1);
+      expect(body.result.task.artifacts[0].parts[0].text).toBe("result: 42");
+    });
+
+    it("returns error when no pattern matches", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "strict" });
+      a2a.onMessage("strict", "specific-phrase", [{ text: "ok" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "something else entirely" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.error).toBeDefined();
+      expect(body.error.code).toBe(-32000);
+      expect(body.error.message).toContain("No matching pattern");
+    });
+
+    it("includes A2A-Version header on response", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "hdr" });
+      a2a.onMessage("hdr", "ping", [{ text: "pong" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "ping" }] } }),
+      );
+      expect(res.headers["a2a-version"]).toBe("1.0");
+    });
+  });
+
+  describe("SendStreamingMessage", () => {
+    it("returns SSE stream with status and artifact events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "stream-agent" });
+      a2a.onStreamingTask("stream-agent", "stream", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "chunk1" }], name: "out" },
+        { type: "artifact", parts: [{ text: "chunk2" }], lastChunk: true, name: "out" },
+      ]);
+      const url = await a2a.start();
+
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "stream this" }] } }),
+      );
+
+      expect(res.status).toBe(200);
+      expect(res.headers["content-type"]).toBe("text/event-stream");
+      expect(res.headers["a2a-version"]).toBe("1.0");
+      expect(res.events.length).toBe(3);
+
+      const evt0 = JSON.parse(res.events[0]);
+      expect(evt0.jsonrpc).toBe("2.0");
+      expect(evt0.result.task.status.state).toBe("TASK_STATE_WORKING");
+
+      const evt1 = JSON.parse(res.events[1]);
+      expect(evt1.result.artifact.parts[0].text).toBe("chunk1");
+
+      const evt2 = JSON.parse(res.events[2]);
+      expect(evt2.result.artifact.parts[0].text).toBe("chunk2");
+      expect(evt2.result.artifact.lastChunk).toBe(true);
+    });
+
+    it("preserves TASK_STATE_FAILED terminal state after streaming", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "fail-agent" });
+      a2a.onStreamingTask("fail-agent", "fail-task", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "partial" }], name: "out" },
+        { type: "status", state: "TASK_STATE_FAILED" },
+      ]);
+      const url = await a2a.start();
+
+      // Send streaming message — stream ends with TASK_STATE_FAILED
+      const streamRes = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "fail-task" }] } }),
+      );
+      expect(streamRes.status).toBe(200);
+
+      // Extract the task ID from the first SSE event
+      const firstEvent = JSON.parse(streamRes.events[0]);
+      const taskId = firstEvent.result.task.id;
+
+      // Verify via GetTask that the terminal state is preserved (not overwritten to COMPLETED)
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.status.state).toBe("TASK_STATE_FAILED");
+    });
+
+    it("preserves TASK_STATE_CANCELED terminal state after streaming", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "cancel-agent" });
+      a2a.onStreamingTask("cancel-agent", "cancel-task", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "partial" }], name: "out" },
+        { type: "status", state: "TASK_STATE_CANCELED" },
+      ]);
+      const url = await a2a.start();
+
+      const streamRes = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "cancel-task" }] } }),
+      );
+      expect(streamRes.status).toBe(200);
+
+      const firstEvent = JSON.parse(streamRes.events[0]);
+      const taskId = firstEvent.result.task.id;
+
+      // Verify via GetTask that CANCELED is preserved (not overwritten to COMPLETED)
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.status.state).toBe("TASK_STATE_CANCELED");
+    });
+  });
+
+  describe("GetTask", () => {
+    it("returns stored task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "do-work", [{ parts: [{ text: "done" }] }]);
+      const url = await a2a.start();
+
+      // Create a task via SendMessage
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "do-work" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Retrieve it
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.id).toBe(taskId);
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+    });
+
+    it("returns -32001 for unknown task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("GetTask", { id: "nonexistent" }));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32001);
+      expect(body.error.message).toBe("Task not found");
+    });
+  });
+
+  describe("ListTasks", () => {
+    it("filters by contextId", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "job", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create two tasks
+      const r1 = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "job 1" }] } }, 1),
+      );
+      const task1 = JSON.parse(r1.data).result.task;
+
+      await post(url, "/", jsonRpc("SendMessage", { message: { parts: [{ text: "job 2" }] } }, 2));
+
+      // List by contextId of task1
+      const listRes = await post(url, "/", jsonRpc("ListTasks", { contextId: task1.contextId }, 3));
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(1);
+      expect(body.result.tasks[0].id).toBe(task1.id);
+    });
+  });
+
+  describe("CancelTask", () => {
+    it("transitions task to CANCELED", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      // Use streaming to create a working task (non-terminal)
+      a2a.onTask("ta", "cancel-me", [{ parts: [{ text: "partial" }] }]);
+      const url = await a2a.start();
+
+      // Create task
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "cancel-me" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Task is COMPLETED, but let's test with a working task.
+      // We need to modify the task state to WORKING first for a meaningful test.
+      // Actually, per spec: CancelTask on completed → -32002. Let's test both paths.
+
+      // CancelTask on a completed task should return -32002
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: taskId }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.error.code).toBe(-32002);
+      expect(body.error.message).toBe("Task already in terminal state");
+    });
+
+    it("returns -32001 for unknown task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: "no-such" }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.error.code).toBe(-32001);
+    });
+
+    it("cancels a non-terminal task", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "cancel-target", [{ parts: [{ text: "partial" }] }]);
+      const url = await a2a.start();
+
+      // Create a task via SendMessage (created as COMPLETED by default)
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "cancel-target" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Patch the task to WORKING state so we can test the cancel path.
+      // Tasks map is private but accessible at runtime for testing purposes.
+      const tasksMap = (
+        a2a as unknown as { tasks: Map<string, { status: { state: string; timestamp: string } }> }
+      ).tasks;
+      const task = tasksMap.get(taskId)!;
+      task.status = { state: "TASK_STATE_WORKING", timestamp: new Date().toISOString() };
+
+      // Now cancel should succeed
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: taskId }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.result.task).toBeDefined();
+      expect(body.result.task.status.state).toBe("TASK_STATE_CANCELED");
+
+      // Verify via GetTask
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 3));
+      const getBody = JSON.parse(getRes.data);
+      expect(getBody.result.task.status.state).toBe("TASK_STATE_CANCELED");
+    });
+  });
+
+  describe("multiple agents", () => {
+    it("routes messages to the correct agent", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "agent-a" });
+      a2a.registerAgent({ name: "agent-b" });
+      a2a.onMessage("agent-a", "alpha", [{ text: "from A" }]);
+      a2a.onMessage("agent-b", "beta", [{ text: "from B" }]);
+      const url = await a2a.start();
+
+      const resA = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "alpha request" }] } }, 1),
+      );
+      expect(JSON.parse(resA.data).result.message.parts[0].text).toBe("from A");
+
+      const resB = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "beta request" }] } }, 2),
+      );
+      expect(JSON.parse(resB.data).result.message.parts[0].text).toBe("from B");
+    });
+  });
+
+  describe("reset()", () => {
+    it("clears agents and tasks", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "resettable" });
+      a2a.onTask("resettable", "work", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create a task
+      await post(url, "/", jsonRpc("SendMessage", { message: { parts: [{ text: "work" }] } }));
+
+      const healthBefore = a2a.health();
+      expect(healthBefore.agents).toBe(1);
+      expect(healthBefore.tasks).toBe(1);
+
+      a2a.reset();
+
+      const healthAfter = a2a.health();
+      expect(healthAfter.agents).toBe(0);
+      expect(healthAfter.tasks).toBe(0);
+    });
+  });
+
+  describe("health()", () => {
+    it("returns agent and task counts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "h1" });
+      a2a.registerAgent({ name: "h2" });
+
+      const h = a2a.health();
+      expect(h.status).toBe("ok");
+      expect(h.agents).toBe(2);
+      expect(h.tasks).toBe(0);
+    });
+  });
+
+  describe("setJournal", () => {
+    it("journal entries have service: a2a", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "journaled" });
+      a2a.onMessage("journaled", "log-me", [{ text: "logged" }]);
+
+      const journal = new Journal();
+      a2a.setJournal(journal);
+
+      const url = await a2a.start();
+
+      await post(url, "/", jsonRpc("SendMessage", { message: { parts: [{ text: "log-me" }] } }));
+
+      const entries = journal.getAll();
+      expect(entries.length).toBeGreaterThanOrEqual(1);
+      expect(entries[0].service).toBe("a2a");
+    });
+
+    it("journals streaming messages", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "jstream" });
+      a2a.onStreamingTask("jstream", "log-stream", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "streamed" }], name: "out" },
+      ]);
+
+      const journal = new Journal();
+      a2a.setJournal(journal);
+
+      const url = await a2a.start();
+
+      await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "log-stream" }] } }),
+      );
+
+      const entries = journal.getAll();
+      expect(entries.length).toBeGreaterThanOrEqual(1);
+      expect(entries[0].service).toBe("a2a");
+    });
+  });
+
+  describe("ListTasks", () => {
+    it("filters by status", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "status-filter", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create two tasks
+      await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "status-filter 1" }] } }, 1),
+      );
+      const r2 = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "status-filter 2" }] } }, 2),
+      );
+      const task2Id = JSON.parse(r2.data).result.task.id;
+
+      // Patch task2 to WORKING so we can filter
+      const tasksMap = (
+        a2a as unknown as { tasks: Map<string, { status: { state: string; timestamp: string } }> }
+      ).tasks;
+      tasksMap.get(task2Id)!.status = {
+        state: "TASK_STATE_WORKING",
+        timestamp: new Date().toISOString(),
+      };
+
+      // Filter by COMPLETED — should only return task1
+      const listRes = await post(
+        url,
+        "/",
+        jsonRpc("ListTasks", { status: "TASK_STATE_COMPLETED" }, 3),
+      );
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(1);
+      expect(body.result.tasks[0].status.state).toBe("TASK_STATE_COMPLETED");
+    });
+
+    it("returns all tasks when no filters provided", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "all-tasks", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "all-tasks a" }] } }, 1),
+      );
+      await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "all-tasks b" }] } }, 2),
+      );
+
+      const listRes = await post(url, "/", jsonRpc("ListTasks", {}, 3));
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(2);
+    });
+  });
+
+  describe("SendStreamingMessage", () => {
+    it("returns error when no streaming pattern matches", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "no-stream" });
+      a2a.onMessage("no-stream", "only-message", [{ text: "msg" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "no match" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.error).toBeDefined();
+      expect(body.error.code).toBe(-32000);
+      expect(body.error.message).toContain("No matching pattern");
+    });
+
+    it("supports delayMs between events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "delayed" });
+      a2a.onStreamingTask(
+        "delayed",
+        "slow-stream",
+        [
+          { type: "status", state: "TASK_STATE_WORKING" },
+          { type: "artifact", parts: [{ text: "delayed-chunk" }], name: "out" },
+        ],
+        10, // 10ms delay between events
+      );
+      const url = await a2a.start();
+
+      const start = Date.now();
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "slow-stream" }] } }),
+      );
+      const elapsed = Date.now() - start;
+
+      expect(res.status).toBe(200);
+      expect(res.events.length).toBe(2);
+      // With 2 events and 10ms delay each, at least ~20ms total
+      expect(elapsed).toBeGreaterThanOrEqual(15);
+    });
+  });
+
+  describe("SendMessage with streamingTask pattern", () => {
+    it("returns task response collapsing streaming events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "hybrid" });
+      a2a.onStreamingTask("hybrid", "hybrid-task", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "piece1" }], name: "result" },
+        { type: "artifact", parts: [{ text: "piece2" }], name: "result" },
+      ]);
+      const url = await a2a.start();
+
+      // Send via SendMessage (non-streaming) — should collapse artifacts
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "hybrid-task" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.task).toBeDefined();
+      expect(body.result.task.artifacts).toHaveLength(2);
+      expect(body.result.task.artifacts[0].parts[0].text).toBe("piece1");
+      expect(body.result.task.artifacts[1].parts[0].text).toBe("piece2");
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+    });
+  });
+
+  describe("agent card defaults", () => {
+    it("uses fallback defaults for missing agent fields", async () => {
+      a2a = new A2AMock();
+      // Register one minimal agent (no description/version/skills/capabilities)
+      a2a.registerAgent({ name: "minimal" });
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      const card = JSON.parse(res.body);
+      expect(card.name).toBe("minimal");
+      // buildAgentCard falls back to defaults for missing fields
+      expect(card.description).toBe("A2A mock agent");
+      expect(card.version).toBe("1.0.0");
+      expect(card.skills).toEqual([]);
+      expect(card.capabilities).toEqual({ streaming: true });
+    });
+  });
+
+  describe("error handling", () => {
+    it("returns parse error for invalid JSON body", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "err" });
+      const url = await a2a.start();
+
+      const res = await new Promise<{ status: number; data: string }>((resolve, reject) => {
+        const parsed = new URL(url);
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/",
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+          },
+          (r) => {
+            let data = "";
+            r.on("data", (chunk: Buffer) => (data += chunk));
+            r.on("end", () => resolve({ status: r.statusCode!, data }));
+          },
+        );
+        req.on("error", reject);
+        req.write("not json{{{");
+        req.end();
+      });
+
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32700);
+      expect(body.error.message).toBe("Parse error");
+    });
+
+    it("throws when registering patterns for unregistered agent", () => {
+      a2a = new A2AMock();
+      expect(() => a2a!.onMessage("ghost", "x", [{ text: "y" }])).toThrow(
+        'Agent "ghost" not registered',
+      );
+      expect(() => a2a!.onTask("ghost", "x", [{ parts: [{ text: "y" }] }])).toThrow(
+        'Agent "ghost" not registered',
+      );
+      expect(() => a2a!.onStreamingTask("ghost", "x", [])).toThrow('Agent "ghost" not registered');
+    });
+
+    it("throws when starting an already-started server", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "double-start" });
+      await a2a.start();
+
+      await expect(a2a.start()).rejects.toThrow("A2AMock server already started");
+    });
+
+    it("throws when stopping a non-started server", async () => {
+      a2a = new A2AMock();
+      await expect(a2a.stop()).rejects.toThrow("A2AMock server not started");
+      a2a = null; // prevent afterEach from trying to stop
+    });
+
+    it("throws when accessing url before start", () => {
+      a2a = new A2AMock();
+      expect(() => a2a!.url).toThrow("A2AMock server not started");
+      a2a = null;
+    });
+  });
+
+  describe("handleRequest routing", () => {
+    it("returns false for unrecognized methods/paths", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "route-test" });
+
+      // Test GET on / returns false
+      const fakeReq = {
+        method: "GET",
+        url: "/",
+        headers: {},
+      } as http.IncomingMessage;
+      const fakeRes = {
+        writeHead: () => {},
+        end: () => {},
+        setHeader: () => {},
+        headersSent: false,
+        statusCode: 200,
+      } as unknown as http.ServerResponse;
+
+      const result = await a2a.handleRequest(fakeReq, fakeRes, "/some-random-path");
+      expect(result).toBe(false);
+    });
+  });
+
+  describe("reset() chaining", () => {
+    it("returns this for method chaining", () => {
+      a2a = new A2AMock();
+      const returned = a2a.reset();
+      expect(returned).toBe(a2a);
+      a2a = null;
+    });
+  });
+
+  describe("setBaseUrl", () => {
+    it("sets the base URL used by agent card", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "base-url-agent" });
+      a2a.setBaseUrl("http://example.com:1234");
+
+      // The base URL is used in agent card
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      const card = JSON.parse(res.body);
+      // After start(), baseUrl is overwritten with the actual URL
+      expect(card.supportedInterfaces[0].url).toBe(url);
+    });
+  });
+
+  describe("SendStreamingMessage without message field", () => {
+    it("uses text fallback for parts when message field is absent", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "nomsg" });
+      // Pattern matches empty string (extractText returns "" when no message field)
+      a2a.onStreamingTask("nomsg", "", [{ type: "status", state: "TASK_STATE_WORKING" }]);
+      const url = await a2a.start();
+
+      // Send streaming request where params has no "message" field — hits the else branch (line 263)
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { notMessage: "something" }),
+      );
+
+      expect(res.status).toBe(200);
+      expect(res.events.length).toBe(1);
+    });
+  });
+
+  describe("constructor with custom options", () => {
+    it("accepts host and port options", async () => {
+      a2a = new A2AMock({ host: "127.0.0.1", port: 0 });
+      a2a.registerAgent({ name: "opts-agent" });
+      const url = await a2a.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+  });
+
+  describe("streaming message with no message.parts", () => {
+    it("falls back to text extraction from message without parts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "noparts" });
+      // Pattern that matches empty string
+      a2a.onStreamingTask("noparts", "", [
+        { type: "artifact", parts: [{ text: "found" }], name: "out" },
+      ]);
+      const url = await a2a.start();
+
+      // Send streaming request where message exists but has no parts
+      const res = await postSSE(url, "/", jsonRpc("SendStreamingMessage", { message: {} }));
+      expect(res.status).toBe(200);
+      expect(res.events.length).toBe(1);
+      const evt = JSON.parse(res.events[0]);
+      expect(evt.result.artifact.parts[0].text).toBe("found");
+    });
+  });
+
+  describe("GetTask with missing params", () => {
+    it("returns -32001 when params is undefined", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("GetTask", undefined));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32001);
+    });
+  });
+
+  describe("CancelTask with missing params", () => {
+    it("returns -32001 when params is undefined", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("CancelTask", undefined));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32001);
+    });
+  });
+
+  describe("url getter", () => {
+    it("returns the base URL after start", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "url-test" });
+      await a2a.start();
+
+      // Access via getter, not the start() return value
+      expect(a2a.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+  });
+
+  describe("extractText edge cases", () => {
+    it("handles message with non-text parts gracefully", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "et" });
+      // Pattern matches empty string since non-text parts are filtered out
+      a2a.onMessage("et", "", [{ text: "found-non-text" }]);
+      const url = await a2a.start();
+
+      // Send a message with data part only (no text fields)
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", {
+          message: { parts: [{ data: { foo: "bar" }, mediaType: "application/json" }] },
+        }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("found-non-text");
+    });
+
+    it("handles message with mixed text and non-text parts", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "mixed" });
+      a2a.onMessage("mixed", "hello", [{ text: "matched-mixed" }]);
+      const url = await a2a.start();
+
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", {
+          message: {
+            parts: [{ data: { x: 1 }, mediaType: "application/json" }, { text: "hello" }],
+          },
+        }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("matched-mixed");
+    });
+
+    it("handles empty parts array", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "empty" });
+      a2a.onMessage("empty", "", [{ text: "empty-match" }]);
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("SendMessage", { message: { parts: [] } }));
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("empty-match");
+    });
+
+    it("handles missing message field entirely", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "nomsg" });
+      a2a.onMessage("nomsg", "", [{ text: "no-msg-match" }]);
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("SendMessage", {}));
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("no-msg-match");
+    });
+
+    it("handles undefined params", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "undef" });
+      a2a.onMessage("undef", "", [{ text: "undef-match" }]);
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("SendMessage", undefined));
+      const body = JSON.parse(res.data);
+      expect(body.result.message.parts[0].text).toBe("undef-match");
+    });
+  });
+
+  describe("streaming task stored in tasks map", () => {
+    it("task created by streaming is retrievable via GetTask", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "stored" });
+      a2a.onStreamingTask("stored", "store-test", [
+        { type: "status", state: "TASK_STATE_WORKING" },
+        { type: "artifact", parts: [{ text: "streamed-data" }], name: "out" },
+      ]);
+      const url = await a2a.start();
+
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "store-test" }] } }),
+      );
+
+      // Extract task ID from the first event
+      const evt0 = JSON.parse(res.events[0]);
+      const taskId = evt0.result.task.id;
+
+      // Retrieve task via GetTask
+      const getRes = await post(url, "/", jsonRpc("GetTask", { id: taskId }, 2));
+      const body = JSON.parse(getRes.data);
+      expect(body.result.task.id).toBe(taskId);
+      // After streaming completes, task should be COMPLETED
+      expect(body.result.task.status.state).toBe("TASK_STATE_COMPLETED");
+      expect(body.result.task.artifacts).toHaveLength(1);
+    });
+  });
+
+  describe("ListTasks combined filters", () => {
+    it("filters by both contextId and status", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "combo-filter", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      // Create task
+      const r1 = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "combo-filter 1" }] } }, 1),
+      );
+      const task1 = JSON.parse(r1.data).result.task;
+
+      // Filter with matching contextId and status
+      const listRes = await post(
+        url,
+        "/",
+        jsonRpc("ListTasks", { contextId: task1.contextId, status: "TASK_STATE_COMPLETED" }, 2),
+      );
+      const body = JSON.parse(listRes.data);
+      expect(body.result.tasks).toHaveLength(1);
+      expect(body.result.tasks[0].id).toBe(task1.id);
+
+      // Filter with matching contextId but wrong status
+      const listRes2 = await post(
+        url,
+        "/",
+        jsonRpc("ListTasks", { contextId: task1.contextId, status: "TASK_STATE_WORKING" }, 3),
+      );
+      const body2 = JSON.parse(listRes2.data);
+      expect(body2.result.tasks).toHaveLength(0);
+    });
+  });
+
+  describe("registerAgent chaining", () => {
+    it("returns this for method chaining", () => {
+      a2a = new A2AMock();
+      const returned = a2a.registerAgent({ name: "chain1" });
+      expect(returned).toBe(a2a);
+      a2a = null;
+    });
+  });
+
+  describe("onMessage/onTask/onStreamingTask chaining", () => {
+    it("all return this for method chaining", () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "chain" });
+
+      const r1 = a2a.onMessage("chain", "x", [{ text: "y" }]);
+      expect(r1).toBe(a2a);
+
+      const r2 = a2a.onTask("chain", "x", [{ parts: [{ text: "y" }] }]);
+      expect(r2).toBe(a2a);
+
+      const r3 = a2a.onStreamingTask("chain", "x", []);
+      expect(r3).toBe(a2a);
+
+      a2a = null;
+    });
+  });
+
+  describe("streaming event append flag", () => {
+    it("includes append flag on artifact events", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "appender" });
+      a2a.onStreamingTask("appender", "append-test", [
+        { type: "artifact", parts: [{ text: "chunk1" }], name: "out", append: true },
+        {
+          type: "artifact",
+          parts: [{ text: "chunk2" }],
+          name: "out",
+          append: true,
+          lastChunk: true,
+        },
+      ]);
+      const url = await a2a.start();
+
+      const res = await postSSE(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "append-test" }] } }),
+      );
+
+      expect(res.events.length).toBe(2);
+      const evt0 = JSON.parse(res.events[0]);
+      expect(evt0.result.artifact.append).toBe(true);
+      const evt1 = JSON.parse(res.events[1]);
+      expect(evt1.result.artifact.append).toBe(true);
+      expect(evt1.result.artifact.lastChunk).toBe(true);
+    });
+  });
+
+  describe("agent card with no agents registered", () => {
+    it("returns defaults when no agents are registered", async () => {
+      a2a = new A2AMock();
+      // Don't register any agent — buildAgentCard should use fallback defaults
+      const url = await a2a.start();
+
+      const res = await get(url, "/.well-known/agent-card.json");
+      const card = JSON.parse(res.body);
+      expect(card.name).toBe("a2a-mock");
+      expect(card.description).toBe("A2A mock agent");
+      expect(card.version).toBe("1.0.0");
+      expect(card.skills).toEqual([]);
+      expect(card.capabilities).toEqual({ streaming: true });
+    });
+  });
+
+  describe("CancelTask on FAILED task", () => {
+    it("returns -32002 for FAILED terminal state", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      a2a.onTask("ta", "fail-cancel", [{ parts: [{ text: "r" }] }]);
+      const url = await a2a.start();
+
+      const createRes = await post(
+        url,
+        "/",
+        jsonRpc("SendMessage", { message: { parts: [{ text: "fail-cancel" }] } }, 1),
+      );
+      const taskId = JSON.parse(createRes.data).result.task.id;
+
+      // Patch task to FAILED state
+      const tasksMap = (
+        a2a as unknown as { tasks: Map<string, { status: { state: string; timestamp: string } }> }
+      ).tasks;
+      tasksMap.get(taskId)!.status = {
+        state: "TASK_STATE_FAILED",
+        timestamp: new Date().toISOString(),
+      };
+
+      const cancelRes = await post(url, "/", jsonRpc("CancelTask", { id: taskId }, 2));
+      const body = JSON.parse(cancelRes.data);
+      expect(body.error.code).toBe(-32002);
+    });
+  });
+
+  describe("unknown JSON-RPC method", () => {
+    it("returns method not found error", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "ta" });
+      const url = await a2a.start();
+
+      const res = await post(url, "/", jsonRpc("NonExistentMethod", {}));
+      const body = JSON.parse(res.data);
+      expect(body.error.code).toBe(-32601);
+      expect(body.error.message).toBe("Method not found");
+    });
+  });
+
+  describe("findStreamingMatch", () => {
+    it("returns null when no streaming patterns exist", async () => {
+      a2a = new A2AMock();
+      a2a.registerAgent({ name: "msg-only" });
+      a2a.onMessage("msg-only", "hello", [{ text: "hi" }]);
+      const url = await a2a.start();
+
+      // SendStreamingMessage with text that only matches a message pattern (not streaming)
+      const res = await post(
+        url,
+        "/",
+        jsonRpc("SendStreamingMessage", { message: { parts: [{ text: "hello" }] } }),
+      );
+      const body = JSON.parse(res.data);
+      expect(body.error).toBeDefined();
+      expect(body.error.code).toBe(-32000);
+    });
+  });
+});
diff --git a/src/__tests__/aimock-cli.test.ts b/src/__tests__/aimock-cli.test.ts
new file mode 100644
index 0000000..767063d
--- /dev/null
+++ b/src/__tests__/aimock-cli.test.ts
@@ -0,0 +1,626 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+import { execFile, type ChildProcess } from "node:child_process";
+import { existsSync, mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+import { runAimockCli, type AimockCliDeps } from "../aimock-cli.js";
+import type { AimockConfig } from "../config-loader.js";
+
+const CLI_PATH = resolve(__dirname, "../../dist/aimock-cli.js");
+const CLI_AVAILABLE = existsSync(CLI_PATH);
+
+/** Spawn the CLI and collect stdout/stderr/exit code. */
+function runCli(
+  args: string[],
+  opts: { timeout?: number } = {},
+): Promise<{ stdout: string; stderr: string; code: number | null }> {
+  const timeout = opts.timeout ?? 5000;
+  return new Promise((res) => {
+    const cp = execFile("node", [CLI_PATH, ...args], { timeout }, (err, stdout, stderr) => {
+      const code = cp.exitCode ?? (err && "code" in err ? (err as { code: number }).code : null);
+      res({ stdout, stderr, code });
+    });
+  });
+}
+
+/**
+ * Spawn the CLI expecting a long-running server.  Returns the child
+ * process plus helpers to read accumulated output and send signals.
+ */
+function spawnCli(args: string[]): {
+  cp: ChildProcess;
+  stdout: () => string;
+  stderr: () => string;
+  kill: (signal?: NodeJS.Signals) => void;
+  waitForOutput: (match: RegExp, timeoutMs?: number) => Promise<void>;
+} {
+  let out = "";
+  let err = "";
+  const cp = execFile("node", [CLI_PATH, ...args]);
+  cp.stdout?.on("data", (d) => {
+    out += d;
+  });
+  cp.stderr?.on("data", (d) => {
+    err += d;
+  });
+
+  const waitForOutput = (match: RegExp, timeoutMs = 5000): Promise<void> =>
+    new Promise((resolve, reject) => {
+      const deadline = setTimeout(() => {
+        reject(new Error(`Timed out waiting for ${match} — stdout: ${out}, stderr: ${err}`));
+      }, timeoutMs);
+
+      const check = () => {
+        if (match.test(out) || match.test(err)) {
+          clearTimeout(deadline);
+          resolve();
+          return;
+        }
+        setTimeout(check, 50);
+      };
+      check();
+    });
+
+  return {
+    cp,
+    stdout: () => out,
+    stderr: () => err,
+    kill: (signal: NodeJS.Signals = "SIGTERM") => cp.kill(signal),
+    waitForOutput,
+  };
+}
+
+function makeTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "aimock-cli-test-"));
+}
+
+function writeConfig(dir: string, config: object, name = "aimock.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(filePath, JSON.stringify(config), "utf-8");
+  return filePath;
+}
+
+function writeFixtureFile(dir: string, name = "fixtures.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(
+    filePath,
+    JSON.stringify({
+      fixtures: [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "Hello from aimock test!" },
+        },
+      ],
+    }),
+    "utf-8",
+  );
+  return filePath;
+}
+
+/* ================================================================== */
+/* Integration tests (require dist build)                              */
+/* ================================================================== */
+
+describe.skipIf(!CLI_AVAILABLE)("aimock CLI: --help", () => {
+  it("prints usage text and exits with code 0", async () => {
+    const { stdout, code } = await runCli(["--help"]);
+    expect(stdout).toContain("Usage: aimock");
+    expect(stdout).toContain("--config");
+    expect(code).toBe(0);
+  });
+});
+
+describe.skipIf(!CLI_AVAILABLE)("aimock CLI: argument validation", () => {
+  it("exits with error when --config is missing", async () => {
+    const { stderr, code } = await runCli([]);
+    expect(stderr).toContain("--config is required");
+    expect(code).toBe(1);
+  });
+
+  it("exits with error for missing config file", async () => {
+    const { stderr, code } = await runCli(["--config", "/nonexistent/aimock.json"]);
+    expect(stderr).toContain("Failed to load config");
+    expect(code).toBe(1);
+  });
+});
+
+describe.skipIf(!CLI_AVAILABLE)("aimock CLI: server lifecycle", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("starts server with valid config, responds to requests, exits on SIGTERM", async () => {
+    const fixturePath = writeFixtureFile(tmpDir);
+    const configPath = writeConfig(tmpDir, {
+      llm: { fixtures: fixturePath },
+    });
+
+    const child = spawnCli(["--config", configPath]);
+    await child.waitForOutput(/listening on/i, 5000);
+
+    // Extract the URL from output
+    const match = child.stdout().match(/listening on (http:\/\/\S+)/);
+    expect(match).not.toBeNull();
+    const url = match![1];
+
+    // Verify server responds to a request
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    });
+    expect(resp.ok).toBe(true);
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("applies port override from --port flag", async () => {
+    const configPath = writeConfig(tmpDir, {});
+    const child = spawnCli(["--config", configPath, "--port", "0"]);
+    await child.waitForOutput(/listening on/i, 5000);
+
+    expect(child.stdout()).toContain("listening on");
+
+    child.kill("SIGTERM");
+    await new Promise<void>((resolve) => {
+      child.cp.on("close", () => resolve());
+    });
+  });
+
+  it("exits with error for invalid JSON config", async () => {
+    const configPath = join(tmpDir, "bad.json");
+    writeFileSync(configPath, "{ not json", "utf-8");
+
+    const { stderr, code } = await runCli(["--config", configPath]);
+    expect(stderr).toContain("Failed to load config");
+    expect(code).toBe(1);
+  });
+});
+
+/* ================================================================== */
+/* Unit tests (exercise runAimockCli directly for coverage)            */
+/* ================================================================== */
+
+/** Helper: call runAimockCli with captured output and a synchronous exit stub. */
+function callCli(
+  argv: string[],
+  overrides: Partial<AimockCliDeps> = {},
+): { logs: string[]; errors: string[]; exitCode: number | null } {
+  const logs: string[] = [];
+  const errors: string[] = [];
+  let exitCode: number | null = null;
+
+  runAimockCli({
+    argv,
+    log: (msg) => logs.push(msg),
+    logError: (msg) => errors.push(msg),
+    exit: (code) => {
+      exitCode = code;
+    },
+    ...overrides,
+  });
+
+  return { logs, errors, exitCode };
+}
+
+describe("runAimockCli: --help flag", () => {
+  it("prints help and exits 0", () => {
+    const { logs, exitCode } = callCli(["--help"]);
+    expect(exitCode).toBe(0);
+    expect(logs.join("\n")).toContain("Usage: aimock");
+    expect(logs.join("\n")).toContain("--config");
+    expect(logs.join("\n")).toContain("--port");
+    expect(logs.join("\n")).toContain("--host");
+  });
+});
+
+describe("runAimockCli: missing --config", () => {
+  it("prints error and exits 1 when no args given", () => {
+    const { errors, exitCode } = callCli([]);
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("--config is required");
+  });
+});
+
+describe("runAimockCli: unknown flag (strict parsing)", () => {
+  it("prints error and exits 1 for unknown flags", () => {
+    const { errors, exitCode } = callCli(["--unknown-flag"]);
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("Error:");
+  });
+});
+
+describe("runAimockCli: config loading failure", () => {
+  it("prints error and exits 1 when loadConfig throws an Error", () => {
+    const { errors, exitCode } = callCli(["--config", "/fake/path.json"], {
+      loadConfigFn: () => {
+        throw new Error("ENOENT: no such file");
+      },
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("Failed to load config");
+    expect(errors.join("\n")).toContain("ENOENT: no such file");
+  });
+
+  it("handles non-Error throws from loadConfig", () => {
+    const { errors, exitCode } = callCli(["--config", "/fake/path.json"], {
+      loadConfigFn: () => {
+        throw "string error";
+      },
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("string error");
+  });
+});
+
+describe("runAimockCli: successful server start", () => {
+  // Track shutdown functions so we can clean up signal handlers after each test
+  let cleanupFn: (() => void) | null = null;
+
+  afterEach(() => {
+    if (cleanupFn) {
+      cleanupFn();
+      cleanupFn = null;
+    }
+  });
+
+  it("calls startFromConfig with correct args and logs the URL", async () => {
+    const mockStop = vi.fn().mockResolvedValue(undefined);
+    const mockLlmock = { stop: mockStop };
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: mockLlmock,
+      url: "http://127.0.0.1:9876",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({ port: 3000 } as AimockConfig);
+    const logs: string[] = [];
+    const errors: string[] = [];
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/some/config.json"],
+      log: (msg) => logs.push(msg),
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    // Wait for the async main() to complete
+    await vi.waitFor(() => {
+      expect(logs).toContain("aimock server listening on http://127.0.0.1:9876");
+    });
+
+    expect(loadConfigFn).toHaveBeenCalledWith(resolve("/some/config.json"));
+    expect(startFromConfigFn).toHaveBeenCalledWith(
+      { port: 3000 },
+      { port: undefined, host: undefined },
+    );
+    expect(exitCode).toBeNull(); // no exit — server stays running
+    expect(errors).toHaveLength(0);
+  });
+
+  it("passes port and host overrides to startFromConfig", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://0.0.0.0:8080",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const logs: string[] = [];
+
+    runAimockCli({
+      argv: ["--config", "/c.json", "--port", "8080", "--host", "0.0.0.0"],
+      log: (msg) => logs.push(msg),
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: 8080, host: "0.0.0.0" });
+  });
+
+  it("passes short flags correctly (-c, -p, -h)", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://localhost:5555",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const logs: string[] = [];
+
+    runAimockCli({
+      argv: ["-c", "/c.json", "-p", "5555", "-h", "localhost"],
+      log: (msg) => logs.push(msg),
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: 5555, host: "localhost" });
+  });
+});
+
+describe("runAimockCli: startFromConfig failure", () => {
+  it("logs error and exits 1 when startFromConfig rejects", async () => {
+    const startFromConfigFn = vi.fn().mockRejectedValue(new Error("bind EADDRINUSE"));
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const errors: string[] = [];
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+    });
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(1);
+    });
+
+    expect(errors.join("\n")).toContain("bind EADDRINUSE");
+  });
+
+  it("handles non-Error rejection from startFromConfig", async () => {
+    const startFromConfigFn = vi.fn().mockRejectedValue("raw string rejection");
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const errors: string[] = [];
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+    });
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(1);
+    });
+
+    expect(errors.join("\n")).toContain("raw string rejection");
+  });
+});
+
+describe("runAimockCli: onReady and shutdown", () => {
+  let cleanupFn: (() => void) | null = null;
+
+  afterEach(() => {
+    if (cleanupFn) {
+      cleanupFn();
+      cleanupFn = null;
+    }
+  });
+
+  it("invokes onReady callback after server starts", async () => {
+    const mockStop = vi.fn().mockResolvedValue(undefined);
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: mockStop },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(cleanupFn).not.toBeNull();
+    });
+  });
+
+  it("shutdown calls llmock.stop()", async () => {
+    const mockStop = vi.fn().mockResolvedValue(undefined);
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: mockStop },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const logs: string[] = [];
+    let shutdownFn: (() => void) | null = null;
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: (msg) => logs.push(msg),
+      logError: () => {},
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        shutdownFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(shutdownFn).not.toBeNull();
+    });
+
+    // Calling shutdown removes signal handlers and stops the server
+    shutdownFn!();
+    cleanupFn = null; // Already cleaned up by shutdown
+    expect(logs).toContain("Shutting down...");
+    expect(mockStop).toHaveBeenCalled();
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(0);
+    });
+  });
+
+  it("shutdown logs error and exits 1 when llmock.stop() rejects", async () => {
+    const mockStop = vi.fn().mockRejectedValue(new Error("close ENOTCONN"));
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: mockStop },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const errors: string[] = [];
+    let shutdownFn: (() => void) | null = null;
+    let exitCode: number | null = null;
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: (msg) => errors.push(msg),
+      exit: (code) => {
+        exitCode = code;
+      },
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        shutdownFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(shutdownFn).not.toBeNull();
+    });
+
+    shutdownFn!();
+    cleanupFn = null;
+
+    await vi.waitFor(() => {
+      expect(exitCode).toBe(1);
+    });
+
+    expect(errors.join("\n")).toContain("Shutdown error");
+    expect(errors.join("\n")).toContain("close ENOTCONN");
+  });
+});
+
+describe("runAimockCli: port parsing edge case", () => {
+  let cleanupFn: (() => void) | null = null;
+
+  afterEach(() => {
+    if (cleanupFn) {
+      cleanupFn();
+      cleanupFn = null;
+    }
+  });
+
+  it("passes undefined port when --port is not provided", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://127.0.0.1:0",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+
+    runAimockCli({
+      argv: ["--config", "/c.json"],
+      log: () => {},
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: undefined, host: undefined });
+  });
+
+  it("rejects non-numeric port (NaN)", () => {
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const { errors, exitCode } = callCli(["--config", "/c.json", "--port", "abc"], {
+      loadConfigFn,
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("invalid port");
+  });
+
+  it("rejects negative port", () => {
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const { errors, exitCode } = callCli(["--config", "/c.json", "--port=-1"], { loadConfigFn });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("invalid port");
+  });
+
+  it("rejects port above 65535", () => {
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+    const { errors, exitCode } = callCli(["--config", "/c.json", "--port", "99999"], {
+      loadConfigFn,
+    });
+    expect(exitCode).toBe(1);
+    expect(errors.join("\n")).toContain("invalid port");
+  });
+
+  it("converts string port to number", async () => {
+    const startFromConfigFn = vi.fn().mockResolvedValue({
+      llmock: { stop: vi.fn().mockResolvedValue(undefined) },
+      url: "http://127.0.0.1:4242",
+    });
+    const loadConfigFn = vi.fn().mockReturnValue({} as AimockConfig);
+
+    runAimockCli({
+      argv: ["--config", "/c.json", "--port", "4242"],
+      log: () => {},
+      logError: () => {},
+      exit: () => {},
+      loadConfigFn,
+      startFromConfigFn,
+      onReady: (ctx) => {
+        cleanupFn = ctx.shutdown;
+      },
+    });
+
+    await vi.waitFor(() => {
+      expect(startFromConfigFn).toHaveBeenCalled();
+    });
+
+    expect(startFromConfigFn).toHaveBeenCalledWith({}, { port: 4242, host: undefined });
+  });
+});
diff --git a/src/__tests__/bedrock-stream.test.ts b/src/__tests__/bedrock-stream.test.ts
index 0fa3f03..349dea0 100644
--- a/src/__tests__/bedrock-stream.test.ts
+++ b/src/__tests__/bedrock-stream.test.ts
@@ -1,9 +1,15 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
 import { crc32 } from "node:zlib";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { converseToCompletionRequest } from "../bedrock-converse.js";
+import {
+  converseToCompletionRequest,
+  handleConverse,
+  handleConverseStream,
+} from "../bedrock-converse.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -1153,3 +1159,722 @@ describe("POST /model/{modelId}/converse (error fixture)", () => {
     expect(body.error.message).toBe("Rate limited");
   });
 });
+
+// ─── converseToCompletionRequest: edge case branches ─────────────────────────
+
+describe("converseToCompletionRequest (edge cases)", () => {
+  it("handles empty system array (no system message pushed)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("handles system with empty text (no system message pushed)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [{ text: "" }],
+      },
+      "model",
+    );
+    // Empty systemText → no system message
+    expect(result.messages[0]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("handles user text content blocks with missing text (text ?? '' fallback)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [{ text: undefined }],
+          },
+        ],
+      } as unknown as Parameters<typeof converseToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "user", content: "" });
+  });
+
+  it("handles assistant text-only messages (no toolUse blocks)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [{ text: "Just text" }],
+          },
+        ],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: "Just text" });
+  });
+
+  it("handles assistant empty content (content: null)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [],
+          },
+        ],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: null });
+  });
+
+  it("handles user tool result with missing text in content items (text ?? '' fallback)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "toolu_x",
+                  content: [{ text: undefined }, { text: "result" }],
+                },
+              },
+            ],
+          },
+        ],
+      } as unknown as Parameters<typeof converseToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "result",
+      tool_call_id: "toolu_x",
+    });
+  });
+
+  it("handles user tool results with text blocks alongside", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "toolu_x",
+                  content: [{ text: "ok" }],
+                },
+              },
+              { text: "extra info" },
+            ],
+          },
+        ],
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "ok",
+      tool_call_id: "toolu_x",
+    });
+    expect(result.messages[1]).toEqual({ role: "user", content: "extra info" });
+  });
+
+  it("omits tools when no toolConfig is provided", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+      },
+      "model",
+    );
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("omits tools when toolConfig has empty tools array", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        toolConfig: { tools: [] },
+      },
+      "model",
+    );
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("handles inferenceConfig without temperature (undefined)", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        inferenceConfig: { maxTokens: 100 },
+      },
+      "model",
+    );
+    expect(result.temperature).toBeUndefined();
+  });
+
+  it("handles assistant text blocks with missing text alongside toolUse (text ?? '')", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              { text: undefined },
+              {
+                toolUse: {
+                  toolUseId: "toolu_123",
+                  name: "fn",
+                  input: {},
+                },
+              },
+            ],
+          },
+        ],
+      } as unknown as Parameters<typeof converseToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0].tool_calls).toHaveLength(1);
+    // Empty text → content is null (falsy)
+    expect(result.messages[0].content).toBeNull();
+  });
+});
+
+// ─── Converse response edge cases ───────────────────────────────────────────
+
+describe("POST /model/{modelId}/converse (malformed tool call arguments)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("falls back to empty input for malformed JSON", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "bad-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.content[0].toolUse.input).toEqual({});
+  });
+});
+
+describe("POST /model/{modelId}/converse (tool call with no id)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("generates tool use id when fixture provides none", async () => {
+    const noIdFixture: Fixture = {
+      match: { userMessage: "no-id-tool" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([noIdFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "no-id-tool" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.content[0].toolUse.toolUseId).toMatch(/^toolu_/);
+  });
+});
+
+describe("POST /model/{modelId}/converse (tool call with empty arguments)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to {} when arguments is empty string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "empty-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.content[0].toolUse.input).toEqual({});
+  });
+});
+
+describe("POST /model/{modelId}/converse (error fixture no explicit status)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "err-no-status" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (error fixture no explicit status)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "err-no-status" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+describe("POST /model/{modelId}/invoke-with-response-stream (error fixture no explicit status)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("defaults to 500 when streaming error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "err-no-status" }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+// ─── Direct handler tests for req.method/req.url fallback branches ──────────
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & { _written: string; _status: number } {
+  const res = {
+    _written: "",
+    _status: 0,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number) {
+      res._status = status;
+      res.statusCode = status;
+    },
+    setHeader() {},
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & { _written: string; _status: number };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleConverse (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for text response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: [{ text: "hi" }] }],
+    });
+
+    await handleConverse(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/converse");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(req, res, "{bad", "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+  });
+
+  it("uses fallback for missing messages", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "err" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "tool" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverse(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "embed" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+describe("handleConverseStream (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for text response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "hi" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/converse-stream");
+  });
+
+  it("uses fallback for malformed JSON in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      "{bad",
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+  });
+
+  it("uses fallback for missing messages in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "x" }] }] }),
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "err" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "tool" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleConverseStream(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: [{ text: "embed" }] }] }),
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/bedrock.test.ts b/src/__tests__/bedrock.test.ts
index 969365c..5fc47d9 100644
--- a/src/__tests__/bedrock.test.ts
+++ b/src/__tests__/bedrock.test.ts
@@ -1,8 +1,10 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { bedrockToCompletionRequest } from "../bedrock.js";
+import { bedrockToCompletionRequest, handleBedrock, handleBedrockStream } from "../bedrock.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -528,6 +530,996 @@ describe("bedrockToCompletionRequest", () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// bedrockToCompletionRequest: edge case branches
+// ---------------------------------------------------------------------------
+
+describe("bedrockToCompletionRequest (edge cases)", () => {
+  it("handles system content blocks with missing text (text ?? '' fallback)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        system: [{ type: "text" }, { type: "text", text: "Hello" }] as unknown[],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // First block has undefined text → falls back to ""
+    expect(result.messages[0]).toEqual({ role: "system", content: "Hello" });
+  });
+
+  it("handles empty system text (no system message pushed)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        system: [{ type: "text" }] as unknown[],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // Empty systemText → no system message
+    expect(result.messages[0]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("handles tool_result content as array of content blocks", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_123",
+                content: [
+                  { type: "text", text: "Part 1" },
+                  { type: "text", text: " Part 2" },
+                ],
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "Part 1 Part 2",
+      tool_call_id: "toolu_123",
+    });
+  });
+
+  it("handles tool_result with non-string non-array content (fallback to '')", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_789",
+                content: undefined,
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "",
+      tool_call_id: "toolu_789",
+    });
+  });
+
+  it("handles assistant tool_use block with missing id (generates one)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                name: "search",
+                input: { query: "test" },
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].id).toMatch(/^toolu_/);
+  });
+
+  it("handles assistant tool_use block with missing name (falls back to '')", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                input: { x: 1 },
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].function.name).toBe("");
+  });
+
+  it("handles assistant tool_use with string input", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                name: "fn",
+                input: '{"key":"value"}',
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe('{"key":"value"}');
+  });
+
+  it("handles assistant tool_use with undefined input (falls back to {})", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_123",
+                name: "fn",
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe("{}");
+  });
+
+  it("handles assistant content that is neither string nor array (null branch)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: 42,
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: null });
+  });
+
+  it("handles assistant text-only content blocks (no tool_use, content or null)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [{ type: "text", text: "Just text" }],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "assistant", content: "Just text" });
+  });
+
+  it("handles assistant empty content blocks (content: null)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "assistant",
+            content: [],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    // Empty array → no tool_use blocks, textContent is "" → null
+    expect(result.messages[0]).toEqual({ role: "assistant", content: null });
+  });
+
+  it("handles user message with content blocks but no tool_results (text extraction)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Hello " },
+              { type: "text", text: "World" },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages[0]).toEqual({ role: "user", content: "Hello World" });
+  });
+
+  it("handles tool_result content blocks with missing text (text ?? '' fallback)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_abc",
+                content: [{ type: "text" }, { type: "image" }],
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // First block has no text → "", second is image (filtered out)
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles user message with text blocks alongside tool_results (text ?? '' fallback)", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_abc",
+                content: "result",
+              },
+              {
+                type: "text",
+                // text field missing - uses ?? ""
+              },
+            ],
+          },
+        ],
+        max_tokens: 100,
+      } as unknown as Parameters<typeof bedrockToCompletionRequest>[0],
+      "model",
+    );
+    // tool result + text block with missing text
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[1].content).toBe("");
+  });
+
+  it("omits system message when system field is absent", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("user");
+  });
+
+  it("omits tools when tools array is empty", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        tools: [],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("sets stream to false", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 100,
+      },
+      "model",
+    );
+    expect(result.stream).toBe(false);
+  });
+
+  it("passes through temperature", () => {
+    const result = bedrockToCompletionRequest(
+      {
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 100,
+        temperature: 0.5,
+      },
+      "model",
+    );
+    expect(result.temperature).toBe(0.5);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// handleBedrock: invoke-level edge case branches
+// ---------------------------------------------------------------------------
+
+describe("POST /model/{modelId}/invoke (unknown response type)", () => {
+  it("returns 500 for embedding fixture on invoke endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-invoke" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "embed-invoke" }],
+      },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (error fixture no explicit status)", () => {
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "err-no-status" }],
+      },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+describe("POST /model/{modelId}/invoke (malformed tool call arguments)", () => {
+  it("falls back to empty object for malformed JSON in non-streaming", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "bad-args" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+});
+
+describe("POST /model/{modelId}/invoke (tool call with no id)", () => {
+  it("generates tool use id when fixture provides none", async () => {
+    const noIdFixture: Fixture = {
+      match: { userMessage: "no-id-tool" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([noIdFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "no-id-tool" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].id).toMatch(/^toolu_/);
+  });
+});
+
+describe("POST /model/{modelId}/invoke (tool call with empty arguments)", () => {
+  it("defaults to {} when arguments is empty string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "empty-args" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Direct handler tests for req.method/req.url fallback branches
+// ---------------------------------------------------------------------------
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & {
+  _written: string;
+  _status: number;
+  _headers: Record<string, string>;
+} {
+  const res = {
+    _written: "",
+    _status: 0,
+    _headers: {} as Record<string, string>,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number, headers?: Record<string, string>) {
+      res._status = status;
+      res.statusCode = status;
+      if (headers) Object.assign(res._headers, headers);
+    },
+    setHeader(name: string, value: string) {
+      res._headers[name] = value;
+    },
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & {
+    _written: string;
+    _status: number;
+    _headers: Record<string, string>;
+  };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleBedrock (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback values when req.method and req.url are undefined", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/invoke");
+  });
+
+  it("uses fallback for malformed JSON with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrock(req, res, "{bad", "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for missing messages with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrock(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode with undefined method/url", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(
+      req,
+      res,
+      raw,
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "err" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "tool" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+  });
+
+  it("uses fallback for unknown response type with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "embed" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrock(req, res, raw, "model-id", [fixture], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+describe("handleBedrockStream (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback values when req.method and req.url are undefined", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toContain("/model/model-id/invoke-with-response-stream");
+  });
+
+  it("uses fallback for malformed JSON in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrockStream(
+      req,
+      res,
+      "{bad",
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for missing messages in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleBedrockStream(
+      req,
+      res,
+      JSON.stringify({}),
+      "model-id",
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(req, res, raw, "model-id", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode in streaming", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "nomatch" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "err" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for tool call response in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "tool" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type in streaming", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+    const raw = JSON.stringify({
+      messages: [{ role: "user", content: "embed" }],
+      max_tokens: 100,
+    });
+
+    await handleBedrockStream(
+      req,
+      res,
+      raw,
+      "model-id",
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Error fixture with error.type ?? "api_error" fallback
+// ---------------------------------------------------------------------------
+
+describe("POST /model/{modelId}/invoke (error fixture no error type)", () => {
+  it("defaults to 'api_error' when error.type is undefined", async () => {
+    const noTypeError: Fixture = {
+      match: { userMessage: "err-no-type" },
+      response: {
+        error: {
+          message: "Something went wrong",
+        },
+      },
+    };
+    instance = await createServer([noTypeError]);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        max_tokens: 512,
+        messages: [{ role: "user", content: "err-no-type" }],
+      },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.type).toBe("api_error");
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildBedrockStreamTextEvents / buildBedrockStreamToolCallEvents unit tests
+// ---------------------------------------------------------------------------
+
+import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "../bedrock.js";
+import { Logger } from "../logger.js";
+
+describe("buildBedrockStreamTextEvents", () => {
+  it("creates correct event sequence for empty content", () => {
+    const events = buildBedrockStreamTextEvents("", 10);
+    // Should have: messageStart, contentBlockStart, contentBlockStop, messageStop (no deltas)
+    expect(events).toHaveLength(4);
+    expect(events[0].eventType).toBe("messageStart");
+    expect(events[1].eventType).toBe("contentBlockStart");
+    expect(events[2].eventType).toBe("contentBlockStop");
+    expect(events[3].eventType).toBe("messageStop");
+  });
+
+  it("chunks content according to chunkSize", () => {
+    const events = buildBedrockStreamTextEvents("ABCDEF", 2);
+    const deltas = events.filter((e) => e.eventType === "contentBlockDelta");
+    expect(deltas).toHaveLength(3);
+    expect((deltas[0].payload as { delta: { text: string } }).delta.text).toBe("AB");
+    expect((deltas[1].payload as { delta: { text: string } }).delta.text).toBe("CD");
+    expect((deltas[2].payload as { delta: { text: string } }).delta.text).toBe("EF");
+  });
+});
+
+describe("buildBedrockStreamToolCallEvents", () => {
+  const logger = new Logger("silent");
+
+  it("falls back to '{}' for malformed JSON arguments", () => {
+    const events = buildBedrockStreamToolCallEvents(
+      [{ name: "fn", arguments: "NOT VALID" }],
+      100,
+      logger,
+    );
+    const deltas = events.filter((e) => e.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((e) => (e.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(fullJson).toBe("{}");
+  });
+
+  it("generates tool use id when not provided", () => {
+    const events = buildBedrockStreamToolCallEvents(
+      [{ name: "fn", arguments: '{"x":1}' }],
+      100,
+      logger,
+    );
+    const startEvent = events.find((e) => e.eventType === "contentBlockStart");
+    const payload = startEvent!.payload as {
+      start: { toolUse: { toolUseId: string } };
+    };
+    expect(payload.start.toolUse.toolUseId).toMatch(/^toolu_/);
+  });
+
+  it("uses provided tool id", () => {
+    const events = buildBedrockStreamToolCallEvents(
+      [{ name: "fn", arguments: '{"x":1}', id: "custom_id" }],
+      100,
+      logger,
+    );
+    const startEvent = events.find((e) => e.eventType === "contentBlockStart");
+    const payload = startEvent!.payload as {
+      start: { toolUse: { toolUseId: string } };
+    };
+    expect(payload.start.toolUse.toolUseId).toBe("custom_id");
+  });
+
+  it("uses '{}' when arguments is empty string", () => {
+    const events = buildBedrockStreamToolCallEvents([{ name: "fn", arguments: "" }], 100, logger);
+    const deltas = events.filter((e) => e.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((e) => (e.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(fullJson).toBe("{}");
+  });
+});
+
 // ---------------------------------------------------------------------------
 // strict:true returns 503 for unmatched Bedrock request
 // ---------------------------------------------------------------------------
diff --git a/src/__tests__/chaos.test.ts b/src/__tests__/chaos.test.ts
index 6bcc01d..07934c6 100644
--- a/src/__tests__/chaos.test.ts
+++ b/src/__tests__/chaos.test.ts
@@ -112,8 +112,8 @@ describe("evaluateChaos", () => {
     };
     // Fixture says malformed, header says disconnect
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-malformed": "0",
-      "x-llmock-chaos-disconnect": "1.0",
+      "x-aimock-chaos-malformed": "0",
+      "x-aimock-chaos-disconnect": "1.0",
     };
     const result = evaluateChaos(fixture, undefined, headers);
     expect(result).toBe("disconnect");
@@ -121,7 +121,7 @@ describe("evaluateChaos", () => {
 
   it("header drop overrides everything", () => {
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-drop": "1.0",
+      "x-aimock-chaos-drop": "1.0",
     };
     const result = evaluateChaos(null, undefined, headers);
     expect(result).toBe("drop");
@@ -164,7 +164,7 @@ describe("evaluateChaos — header value clamping and validation", () => {
   it("ignores NaN header value (e.g., 'banana') and does not trigger chaos", () => {
     // "banana" parses to NaN via parseFloat — should be ignored, not crash
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-drop": "banana",
+      "x-aimock-chaos-drop": "banana",
     };
     // Run 20 times — none should trigger (NaN ignored means no rate set)
     for (let i = 0; i < 20; i++) {
@@ -175,7 +175,7 @@ describe("evaluateChaos — header value clamping and validation", () => {
 
   it("clamps header drop value > 1 to 1.0 (always triggers)", () => {
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-drop": "2.0",
+      "x-aimock-chaos-drop": "2.0",
     };
     // Run 20 times — every one must trigger since clamped to 1.0
     for (let i = 0; i < 20; i++) {
@@ -186,7 +186,7 @@ describe("evaluateChaos — header value clamping and validation", () => {
 
   it("clamps header drop value < 0 to 0 (never triggers)", () => {
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-drop": "-1.0",
+      "x-aimock-chaos-drop": "-1.0",
     };
     // Run 50 times — none should trigger since clamped to 0
     for (let i = 0; i < 50; i++) {
@@ -197,7 +197,7 @@ describe("evaluateChaos — header value clamping and validation", () => {
 
   it("clamps header malformed value > 1 to 1.0 (always triggers)", () => {
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-malformed": "5.0",
+      "x-aimock-chaos-malformed": "5.0",
     };
     for (let i = 0; i < 20; i++) {
       const result = evaluateChaos(null, undefined, headers);
@@ -207,7 +207,7 @@ describe("evaluateChaos — header value clamping and validation", () => {
 
   it("clamps header disconnect value > 1 to 1.0 (always triggers)", () => {
     const headers: http.IncomingHttpHeaders = {
-      "x-llmock-chaos-disconnect": "99.0",
+      "x-aimock-chaos-disconnect": "99.0",
     };
     for (let i = 0; i < 20; i++) {
       const result = evaluateChaos(null, undefined, headers);
@@ -265,14 +265,14 @@ describe("chaos integration: fixture-level", () => {
 });
 
 describe("chaos integration: header override", () => {
-  it("drops request when X-LLMock-Chaos-Drop header is 1.0", async () => {
+  it("drops request when X-AIMock-Chaos-Drop header is 1.0", async () => {
     const fixtures: Fixture[] = [
       { match: { userMessage: "hello" }, response: { content: "Hi there" } },
     ];
     instance = await createServer(fixtures);
 
     const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"), {
-      "X-LLMock-Chaos-Drop": "1.0",
+      "X-AIMock-Chaos-Drop": "1.0",
     });
     expect(res.status).toBe(500);
 
@@ -590,7 +590,7 @@ describe("fixture-level chaos on non-OpenAI provider", () => {
 // ---------------------------------------------------------------------------
 
 describe("chaos with logLevel silent: invalid header is ignored gracefully", () => {
-  it("proceeds normally and does not throw when x-llmock-chaos-drop is not a number", async () => {
+  it("proceeds normally and does not throw when x-aimock-chaos-drop is not a number", async () => {
     const fixtures: Fixture[] = [
       { match: { userMessage: "hello" }, response: { content: "Hi there" } },
     ];
@@ -598,7 +598,7 @@ describe("chaos with logLevel silent: invalid header is ignored gracefully", ()
 
     // "notanumber" parses to NaN — should be silently ignored, request proceeds normally
     const res = await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"), {
-      "X-LLMock-Chaos-Drop": "notanumber",
+      "X-AIMock-Chaos-Drop": "notanumber",
     });
     expect(res.status).toBe(200);
     const body = JSON.parse(res.body);
@@ -610,7 +610,7 @@ describe("chaos with logLevel silent: invalid header is ignored gracefully", ()
     // must not produce console.warn output — the caller has no logger to suppress it.
     const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
     // "notanumber" parses to NaN — old code would call console.warn; new code uses logger?.warn (no-op)
-    evaluateChaos(null, undefined, { "x-llmock-chaos-drop": "notanumber" });
+    evaluateChaos(null, undefined, { "x-aimock-chaos-drop": "notanumber" });
     expect(warnSpy).not.toHaveBeenCalled();
     warnSpy.mockRestore();
   });
diff --git a/src/__tests__/cli.test.ts b/src/__tests__/cli.test.ts
index 2355868..532df83 100644
--- a/src/__tests__/cli.test.ts
+++ b/src/__tests__/cli.test.ts
@@ -178,11 +178,11 @@ describe.skipIf(!CLI_AVAILABLE)("CLI: --log-level", () => {
     const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--log-level", "silent"]);
 
     // Wait for the server to be ready (listen on port)
-    // With silent, there should be no [llmock] output
+    // With silent, there should be no [aimock] output
     await new Promise((r) => setTimeout(r, 1500));
 
     const stdout = child.stdout();
-    expect(stdout).not.toContain("[llmock]");
+    expect(stdout).not.toContain("[aimock]");
 
     child.kill("SIGTERM");
     await new Promise<void>((resolve) => {
@@ -195,7 +195,7 @@ describe.skipIf(!CLI_AVAILABLE)("CLI: --log-level", () => {
     const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--log-level", "info"]);
 
     await child.waitForOutput(/listening on/i, 5000);
-    expect(child.stdout()).toContain("[llmock]");
+    expect(child.stdout()).toContain("[aimock]");
     expect(child.stdout()).toContain("Loaded 1 fixture(s)");
 
     child.kill("SIGTERM");
@@ -209,7 +209,7 @@ describe.skipIf(!CLI_AVAILABLE)("CLI: --log-level", () => {
     const child = spawnCli(["--fixtures", fixturePath, "--port", "0", "--log-level", "debug"]);
 
     await child.waitForOutput(/listening on/i, 5000);
-    expect(child.stdout()).toContain("[llmock]");
+    expect(child.stdout()).toContain("[aimock]");
 
     child.kill("SIGTERM");
     await new Promise<void>((resolve) => {
diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts
index a7655d9..e954dd5 100644
--- a/src/__tests__/cohere.test.ts
+++ b/src/__tests__/cohere.test.ts
@@ -1,8 +1,10 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { cohereToCompletionRequest } from "../cohere.js";
+import { cohereToCompletionRequest, handleCohere } from "../cohere.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -687,7 +689,7 @@ describe("POST /v2/chat (chaos)", () => {
         messages: [{ role: "user", content: "hello" }],
         stream: false,
       },
-      { "x-llmock-chaos-drop": "1.0" },
+      { "x-aimock-chaos-drop": "1.0" },
     );
 
     expect(res.status).toBe(500);
@@ -931,6 +933,135 @@ describe("POST /v2/chat (journal)", () => {
   });
 });
 
+// ─── Integration tests: POST /v2/chat (streaming malformed tool call args) ──
+
+describe("POST /v2/chat (streaming malformed tool call arguments)", () => {
+  it("falls back to '{}' for malformed JSON in streaming tool call", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "bad-stream-args" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe("{}");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming tool call with empty args) ─
+
+describe("POST /v2/chat (streaming tool call with empty arguments)", () => {
+  it("defaults to '{}' when arguments is empty string in streaming", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "empty-stream-args" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe("{}");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (tool call with empty/missing args non-streaming) ─
+
+describe("POST /v2/chat (non-streaming tool call with empty arguments)", () => {
+  it("defaults to '{}' when arguments is empty string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args-ns" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "empty-args-ns" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls[0].function.arguments).toBe("{}");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (tool call with no id, non-streaming) ─
+
+describe("POST /v2/chat (non-streaming tool call with no id)", () => {
+  it("generates tool call id when fixture provides none", async () => {
+    const noIdFixture: Fixture = {
+      match: { userMessage: "no-id-ns" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([noIdFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "no-id-ns" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls[0].id).toMatch(/^call_/);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (error fixture streaming) ─────────────
+
+describe("POST /v2/chat (error fixture streaming)", () => {
+  it("returns error fixture with correct status even when stream:true", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "fail" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
 // ---------------------------------------------------------------------------
 // Streaming tool call with explicit fixture id
 // ---------------------------------------------------------------------------
@@ -994,3 +1125,294 @@ describe("POST /v2/chat (streaming tool call with fixture-provided id)", () => {
     expect(endDelta.finish_reason).toBe("TOOL_CALL");
   });
 });
+
+// ---------------------------------------------------------------------------
+// Direct handler tests for req.method/req.url fallback branches
+// ---------------------------------------------------------------------------
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & { _written: string; _status: number } {
+  const res = {
+    _written: "",
+    _status: 0,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number) {
+      res._status = status;
+      res.statusCode = status;
+    },
+    setHeader() {},
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & { _written: string; _status: number };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleCohere (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for text response (non-streaming) with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "hi" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "hi" }], stream: true }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(req, res, "{bad", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for missing model", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ messages: [{ role: "user", content: "hi" }] }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for missing messages", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r" }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v2/chat");
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "x" }] }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "x" }] }),
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "err" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for non-streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "tool" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({
+        model: "cmd-r",
+        messages: [{ role: "user", content: "tool" }],
+        stream: true,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleCohere(
+      req,
+      res,
+      JSON.stringify({ model: "cmd-r", messages: [{ role: "user", content: "embed" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/config-loader.test.ts b/src/__tests__/config-loader.test.ts
new file mode 100644
index 0000000..861a9e8
--- /dev/null
+++ b/src/__tests__/config-loader.test.ts
@@ -0,0 +1,790 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { loadConfig, startFromConfig } from "../config-loader.js";
+import type { AimockConfig } from "../config-loader.js";
+
+function makeTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "config-loader-test-"));
+}
+
+function writeConfig(dir: string, config: AimockConfig, name = "aimock.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(filePath, JSON.stringify(config), "utf-8");
+  return filePath;
+}
+
+function writeFixtureFile(dir: string, name = "fixtures.json"): string {
+  const filePath = join(dir, name);
+  writeFileSync(
+    filePath,
+    JSON.stringify({
+      fixtures: [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "Hello from config test!" },
+        },
+      ],
+    }),
+    "utf-8",
+  );
+  return filePath;
+}
+
+describe("loadConfig", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("reads valid JSON config", () => {
+    const config: AimockConfig = { port: 5000, host: "0.0.0.0", metrics: true };
+    const filePath = writeConfig(tmpDir, config);
+    const result = loadConfig(filePath);
+    expect(result.port).toBe(5000);
+    expect(result.host).toBe("0.0.0.0");
+    expect(result.metrics).toBe(true);
+  });
+
+  it("throws on invalid JSON", () => {
+    const filePath = join(tmpDir, "bad.json");
+    writeFileSync(filePath, "{ not valid json", "utf-8");
+    expect(() => loadConfig(filePath)).toThrow();
+  });
+
+  it("throws on missing file", () => {
+    expect(() => loadConfig(join(tmpDir, "nonexistent.json"))).toThrow();
+  });
+});
+
+describe("startFromConfig", () => {
+  let tmpDir: string;
+  let cleanups: Array<() => Promise<void>> = [];
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+    cleanups = [];
+  });
+
+  afterEach(async () => {
+    for (const cleanup of cleanups) {
+      await cleanup();
+    }
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("creates server with LLM fixtures from a file", async () => {
+    const fixturePath = writeFixtureFile(tmpDir);
+    const config: AimockConfig = { llm: { fixtures: fixturePath } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    expect(llmock.getFixtures()).toHaveLength(1);
+
+    // Verify server responds
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    });
+    expect(resp.ok).toBe(true);
+  });
+
+  it("creates server with LLM fixtures from a directory", async () => {
+    const fixtureDir = join(tmpDir, "fixtures");
+    mkdirSync(fixtureDir);
+    writeFixtureFile(fixtureDir, "test.json");
+
+    const config: AimockConfig = { llm: { fixtures: fixtureDir } };
+    const { llmock } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(llmock.getFixtures()).toHaveLength(1);
+  });
+
+  it("with metrics: true, /metrics returns 200", async () => {
+    const config: AimockConfig = { metrics: true };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/metrics`);
+    expect(resp.status).toBe(200);
+  });
+
+  it("with strict: true, unmatched request returns 503", async () => {
+    const config: AimockConfig = { strict: true };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "no match" }],
+      }),
+    });
+    expect(resp.status).toBe(503);
+  });
+
+  it("with chaos config, chaos applies", async () => {
+    const fixturePath = writeFixtureFile(tmpDir);
+    const config: AimockConfig = {
+      llm: { fixtures: fixturePath, chaos: { dropRate: 1.0 } },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+      }),
+    });
+    expect(resp.status).toBe(500);
+  });
+
+  it("with mcp tools config, MCPMock created and tools/list works", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        tools: [
+          { name: "search", description: "Search the web" },
+          { name: "calc", description: "Calculator", result: "42" },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize MCP session
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    expect(initRes.status).toBe(200);
+    const sessionId = initRes.headers.get("mcp-session-id");
+    expect(sessionId).toBeTruthy();
+
+    // Send initialized notification
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId! },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "notifications/initialized",
+        params: {},
+      }),
+    });
+
+    // List tools
+    const listRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId! },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "tools/list", params: {}, id: 2 }),
+    });
+    expect(listRes.status).toBe(200);
+    const listData = await listRes.json();
+    expect(listData.result.tools).toHaveLength(2);
+    expect(listData.result.tools[0].name).toBe("search");
+    expect(listData.result.tools[1].name).toBe("calc");
+
+    // Call tool with result
+    const callRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId! },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "tools/call",
+        params: { name: "calc", arguments: {} },
+        id: 3,
+      }),
+    });
+    const callData = await callRes.json();
+    expect(callData.result.content).toEqual([{ type: "text", text: "42" }]);
+  });
+
+  it("with mcp resources config, resources are served", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        resources: [
+          { uri: "file:///readme.md", name: "README", mimeType: "text/markdown", text: "# Hello" },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    const sessionId = initRes.headers.get("mcp-session-id")!;
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "notifications/initialized",
+        params: {},
+      }),
+    });
+
+    // Read resource
+    const readRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "resources/read",
+        params: { uri: "file:///readme.md" },
+        id: 2,
+      }),
+    });
+    const readData = await readRes.json();
+    expect(readData.result.contents[0].text).toBe("# Hello");
+  });
+
+  it("with mcp custom path, mounts at specified path", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        path: "/custom-mcp",
+        tools: [{ name: "test-tool" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const initRes = await fetch(`${url}/custom-mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    expect(initRes.status).toBe(200);
+  });
+
+  it("with a2a agents config, A2AMock created and agent card served", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "test-agent",
+            description: "A test agent",
+            skills: [{ id: "s1", name: "greet" }],
+            messages: [{ pattern: "hello", parts: [{ text: "Hi there!" }] }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Get agent card
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("test-agent");
+    expect(card.skills).toHaveLength(1);
+
+    // Send message
+    const msgRes = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: { message: { parts: [{ text: "hello world" }] } },
+        id: 1,
+      }),
+    });
+    expect(msgRes.status).toBe(200);
+    const msgData = await msgRes.json();
+    expect(msgData.result.message.parts[0].text).toBe("Hi there!");
+  });
+
+  it("with a2a custom path, mounts at specified path", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        path: "/agents",
+        agents: [{ name: "custom-agent" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const cardRes = await fetch(`${url}/agents/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("custom-agent");
+  });
+
+  it("port/host overrides work", async () => {
+    const config: AimockConfig = { port: 9999, host: "0.0.0.0" };
+    const { llmock, url } = await startFromConfig(config, { port: 0, host: "127.0.0.1" });
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+
+  it("starts with no config sections at all", async () => {
+    const config: AimockConfig = {};
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+
+  it("with vector collections config, VectorMock created and collections work", async () => {
+    const config: AimockConfig = {
+      vector: {
+        collections: [
+          {
+            name: "docs",
+            dimension: 3,
+            vectors: [
+              { id: "v1", values: [1, 0, 0], metadata: { title: "doc1" } },
+              { id: "v2", values: [0, 1, 0], metadata: { title: "doc2" } },
+            ],
+            queryResults: [{ id: "v1", score: 0.95, metadata: { title: "doc1" } }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Query the vector collection via Pinecone-compatible endpoint
+    const resp = await fetch(`${url}/vector/query`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        namespace: "docs",
+        vector: [1, 0, 0],
+        topK: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.matches).toHaveLength(1);
+    expect(data.matches[0].id).toBe("v1");
+  });
+
+  it("with vector custom path, mounts at specified path", async () => {
+    const config: AimockConfig = {
+      vector: {
+        path: "/my-vector",
+        collections: [{ name: "test", dimension: 2 }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Upsert to the custom path (Pinecone-compatible)
+    const resp = await fetch(`${url}/my-vector/vectors/upsert`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        namespace: "test",
+        vectors: [{ id: "a", values: [1, 0] }],
+      }),
+    });
+    expect(resp.status).toBe(200);
+  });
+
+  it("with vector collections without vectors or queryResults", async () => {
+    const config: AimockConfig = {
+      vector: {
+        collections: [{ name: "empty", dimension: 4 }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Upsert to the collection to verify it was created (Pinecone-compatible)
+    const resp = await fetch(`${url}/vector/vectors/upsert`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        namespace: "empty",
+        vectors: [{ id: "x", values: [1, 0, 0, 0] }],
+      }),
+    });
+    expect(resp.status).toBe(200);
+  });
+
+  it("with vector config but no collections array", async () => {
+    const config: AimockConfig = { vector: {} };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+
+  it("with services.search enabled, /v1/search returns empty results", async () => {
+    const config: AimockConfig = { services: { search: true } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/search`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ query: "test query" }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.results).toEqual([]);
+  });
+
+  it("with services.rerank enabled, /v1/rerank returns empty results", async () => {
+    const config: AimockConfig = { services: { rerank: true } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v2/rerank`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ query: "test", documents: ["a", "b"] }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.results).toEqual([]);
+  });
+
+  it("with services.moderate enabled, /v1/moderations returns unflagged", async () => {
+    const config: AimockConfig = { services: { moderate: true } };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/v1/moderations`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ input: "some text" }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.results[0].flagged).toBe(false);
+  });
+
+  it("with mcp prompts config, prompts are served", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        prompts: [
+          {
+            name: "greeting",
+            description: "A greeting prompt",
+            result: {
+              messages: [{ role: "assistant", content: { type: "text", text: "Hello!" } }],
+            },
+          },
+          {
+            name: "no-result-prompt",
+            description: "A prompt without a result handler",
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize MCP session
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    const sessionId = initRes.headers.get("mcp-session-id")!;
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized", params: {} }),
+    });
+
+    // List prompts
+    const listRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "prompts/list", params: {}, id: 2 }),
+    });
+    expect(listRes.status).toBe(200);
+    const listData = await listRes.json();
+    expect(listData.result.prompts).toHaveLength(2);
+
+    // Get prompt with result handler
+    const getRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "prompts/get",
+        params: { name: "greeting" },
+        id: 3,
+      }),
+    });
+    const getData = await getRes.json();
+    expect(getData.result.messages[0].content.text).toBe("Hello!");
+  });
+
+  it("with mcp serverInfo config, serverInfo is set", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        serverInfo: { name: "test-server", version: "1.0.0" },
+        tools: [{ name: "t1" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    expect(initRes.status).toBe(200);
+    const initData = await initRes.json();
+    expect(initData.result.serverInfo.name).toBe("test-server");
+    expect(initData.result.serverInfo.version).toBe("1.0.0");
+  });
+
+  it("with mcp resource without content, addResource called with undefined content", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        resources: [{ uri: "file:///empty.txt", name: "empty" }],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Initialize MCP session
+    const initRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "initialize", params: {}, id: 1 }),
+    });
+    const sessionId = initRes.headers.get("mcp-session-id")!;
+    await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "notifications/initialized", params: {} }),
+    });
+
+    // List resources — resource should be registered
+    const listRes = await fetch(`${url}/mcp`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "mcp-session-id": sessionId },
+      body: JSON.stringify({ jsonrpc: "2.0", method: "resources/list", params: {}, id: 2 }),
+    });
+    expect(listRes.status).toBe(200);
+    const listData = await listRes.json();
+    expect(listData.result.resources).toHaveLength(1);
+    expect(listData.result.resources[0].uri).toBe("file:///empty.txt");
+  });
+
+  it("with a2a tasks config, tasks are handled", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "task-agent",
+            description: "An agent with tasks",
+            skills: [{ id: "s1", name: "do-work" }],
+            tasks: [
+              {
+                pattern: "work",
+                artifacts: [
+                  {
+                    artifactId: "a1",
+                    parts: [{ text: "result of work" }],
+                  },
+                ],
+              },
+            ],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Send message that matches a task pattern
+    const resp = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: {
+          message: { parts: [{ text: "do some work" }] },
+        },
+        id: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.result.task.artifacts[0].parts[0].text).toBe("result of work");
+  });
+
+  it("with a2a streamingTasks config, streamingTasks are handled", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "stream-agent",
+            description: "An agent with streaming tasks",
+            skills: [{ id: "s1", name: "stream" }],
+            streamingTasks: [
+              {
+                pattern: "stream",
+                events: [
+                  {
+                    kind: "status-update",
+                    taskId: "t1",
+                    status: { state: "working", message: { parts: [{ text: "streaming..." }] } },
+                  },
+                ],
+                delayMs: 0,
+              },
+            ],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Verify agent card is available (streaming tasks also need agent registered)
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("stream-agent");
+  });
+
+  it("with a2a custom path, mounts at specified path for tasks", async () => {
+    // Already tested for messages in existing test; verify the a2a path default as well
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "default-path-agent",
+            messages: [{ pattern: "hi", parts: [{ text: "hey" }] }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Default A2A path is /a2a
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+  });
+
+  it("with a2a message pattern without parts, uses default empty text part", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "fallback-agent",
+            messages: [{ pattern: "anything" }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Send message matching pattern — default parts [{ text: "" }] should be used
+    const resp = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: { message: { parts: [{ text: "anything at all" }] } },
+        id: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.result.message.parts[0].text).toBe("");
+  });
+
+  it("with a2a task pattern without artifacts, uses default empty array", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "no-artifact-agent",
+            tasks: [{ pattern: "work" }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    const resp = await fetch(`${url}/a2a`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        jsonrpc: "2.0",
+        method: "SendMessage",
+        params: { message: { parts: [{ text: "do work" }] } },
+        id: 1,
+      }),
+    });
+    expect(resp.status).toBe(200);
+    const data = await resp.json();
+    expect(data.result.task.artifacts).toEqual([]);
+  });
+
+  it("with a2a streamingTask pattern without events, uses default empty array", async () => {
+    const config: AimockConfig = {
+      a2a: {
+        agents: [
+          {
+            name: "no-events-agent",
+            streamingTasks: [{ pattern: "stream" }],
+          },
+        ],
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Verify agent card is available (confirms registration works)
+    const cardRes = await fetch(`${url}/a2a/.well-known/agent-card.json`);
+    expect(cardRes.status).toBe(200);
+    const card = await cardRes.json();
+    expect(card.name).toBe("no-events-agent");
+  });
+
+  it("with record config, llmock receives record settings", async () => {
+    const config: AimockConfig = {
+      llm: {
+        record: {
+          providers: { openai: "sk-test-key" },
+          fixturePath: "/tmp/recorded-fixtures",
+        },
+      },
+    };
+    const { llmock, url } = await startFromConfig(config);
+    cleanups.push(() => llmock.stop());
+
+    // Server should start successfully with record config
+    expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+});
diff --git a/src/__tests__/control-api.test.ts b/src/__tests__/control-api.test.ts
new file mode 100644
index 0000000..50a502a
--- /dev/null
+++ b/src/__tests__/control-api.test.ts
@@ -0,0 +1,281 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture, ChatCompletionRequest } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function httpRequest(
+  url: string,
+  method: string,
+  body?: object,
+): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const opts: http.RequestOptions = {
+      hostname: parsed.hostname,
+      port: parsed.port,
+      path: parsed.pathname + parsed.search,
+      method,
+      headers: body ? { "Content-Type": "application/json" } : undefined,
+    };
+    const req = http.request(opts, (res) => {
+      const chunks: Buffer[] = [];
+      res.on("data", (c: Buffer) => chunks.push(c));
+      res.on("end", () =>
+        resolve({
+          status: res.statusCode ?? 0,
+          body: Buffer.concat(chunks).toString(),
+        }),
+      );
+    });
+    req.on("error", reject);
+    if (body) req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+
+function httpRaw(
+  url: string,
+  method: string,
+  rawBody: string,
+): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const opts: http.RequestOptions = {
+      hostname: parsed.hostname,
+      port: parsed.port,
+      path: parsed.pathname + parsed.search,
+      method,
+      headers: { "Content-Type": "application/json" },
+    };
+    const req = http.request(opts, (res) => {
+      const chunks: Buffer[] = [];
+      res.on("data", (c: Buffer) => chunks.push(c));
+      res.on("end", () =>
+        resolve({
+          status: res.statusCode ?? 0,
+          body: Buffer.concat(chunks).toString(),
+        }),
+      );
+    });
+    req.on("error", reject);
+    req.write(rawBody);
+    req.end();
+  });
+}
+
+function chatRequest(userContent: string): ChatCompletionRequest {
+  return {
+    model: "gpt-4",
+    stream: false,
+    messages: [{ role: "user", content: userContent }],
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("/__aimock control API", () => {
+  let instance: ServerInstance | undefined;
+
+  afterEach(async () => {
+    if (instance) {
+      await new Promise<void>((resolve, reject) =>
+        instance!.server.close((err) => (err ? reject(err) : resolve())),
+      );
+      instance = undefined;
+    }
+  });
+
+  describe("GET /__aimock/health", () => {
+    it("returns 200 with status ok", async () => {
+      instance = await createServer([]);
+      const res = await httpRequest(`${instance.url}/__aimock/health`, "GET");
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toEqual({ status: "ok" });
+    });
+  });
+
+  describe("POST /__aimock/fixtures", () => {
+    it("adds fixtures and they match requests", async () => {
+      const fixtures: Fixture[] = [];
+      instance = await createServer(fixtures);
+
+      // Add a fixture via control API
+      const addRes = await httpRequest(`${instance.url}/__aimock/fixtures`, "POST", {
+        fixtures: [{ match: { userMessage: "hello" }, response: { content: "Hi there!" } }],
+      });
+      expect(addRes.status).toBe(200);
+      expect(JSON.parse(addRes.body)).toEqual({ added: 1 });
+
+      // Verify the fixture works by sending a chat request
+      const chatRes = await httpRequest(
+        `${instance.url}/v1/chat/completions`,
+        "POST",
+        chatRequest("hello"),
+      );
+      expect(chatRes.status).toBe(200);
+      const chatBody = JSON.parse(chatRes.body);
+      expect(chatBody.choices[0].message.content).toBe("Hi there!");
+    });
+
+    it("adds multiple fixtures at once", async () => {
+      const fixtures: Fixture[] = [];
+      instance = await createServer(fixtures);
+
+      const addRes = await httpRequest(`${instance.url}/__aimock/fixtures`, "POST", {
+        fixtures: [
+          { match: { userMessage: "a" }, response: { content: "A" } },
+          { match: { userMessage: "b" }, response: { content: "B" } },
+        ],
+      });
+      expect(addRes.status).toBe(200);
+      expect(JSON.parse(addRes.body)).toEqual({ added: 2 });
+    });
+
+    it("returns 400 for invalid JSON", async () => {
+      instance = await createServer([]);
+      const res = await httpRaw(`${instance.url}/__aimock/fixtures`, "POST", "not json{{{");
+      expect(res.status).toBe(400);
+      const body = JSON.parse(res.body);
+      expect(body.error).toBe("Invalid JSON");
+    });
+
+    it("returns 400 when fixtures array is missing", async () => {
+      instance = await createServer([]);
+      const res = await httpRequest(`${instance.url}/__aimock/fixtures`, "POST", {
+        notFixtures: [],
+      });
+      expect(res.status).toBe(400);
+      const body = JSON.parse(res.body);
+      expect(body.error).toContain("fixtures");
+    });
+
+    it("returns 400 with details for validation errors", async () => {
+      instance = await createServer([]);
+      // A fixture with no recognized response type triggers a validation error
+      const res = await httpRequest(`${instance.url}/__aimock/fixtures`, "POST", {
+        fixtures: [{ match: { userMessage: "x" }, response: {} }],
+      });
+      expect(res.status).toBe(400);
+      const body = JSON.parse(res.body);
+      expect(body.error).toBe("Validation failed");
+      expect(body.details).toBeInstanceOf(Array);
+      expect(body.details.length).toBeGreaterThan(0);
+      expect(body.details[0].severity).toBe("error");
+    });
+  });
+
+  describe("DELETE /__aimock/fixtures", () => {
+    it("clears all fixtures", async () => {
+      const fixtures: Fixture[] = [
+        { match: { userMessage: "hello" }, response: { content: "Hi" } },
+      ];
+      instance = await createServer(fixtures);
+
+      // Verify fixture exists
+      expect(fixtures.length).toBe(1);
+
+      const res = await httpRequest(`${instance.url}/__aimock/fixtures`, "DELETE");
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toEqual({ cleared: true });
+      expect(fixtures.length).toBe(0);
+    });
+  });
+
+  describe("POST /__aimock/reset", () => {
+    it("clears fixtures, journal, and match counts", async () => {
+      const fixtures: Fixture[] = [
+        { match: { userMessage: "hello" }, response: { content: "Hi" } },
+      ];
+      instance = await createServer(fixtures);
+
+      // Make a request to populate journal
+      await httpRequest(`${instance.url}/v1/chat/completions`, "POST", chatRequest("hello"));
+      expect(instance.journal.size).toBeGreaterThan(0);
+
+      const res = await httpRequest(`${instance.url}/__aimock/reset`, "POST");
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toEqual({ reset: true });
+      expect(fixtures.length).toBe(0);
+      expect(instance.journal.size).toBe(0);
+    });
+  });
+
+  describe("GET /__aimock/journal", () => {
+    it("returns journal entries", async () => {
+      const fixtures: Fixture[] = [
+        { match: { userMessage: "hello" }, response: { content: "Hi" } },
+      ];
+      instance = await createServer(fixtures);
+
+      // Make a request to populate journal
+      await httpRequest(`${instance.url}/v1/chat/completions`, "POST", chatRequest("hello"));
+
+      const res = await httpRequest(`${instance.url}/__aimock/journal`, "GET");
+      expect(res.status).toBe(200);
+      const entries = JSON.parse(res.body);
+      expect(entries).toBeInstanceOf(Array);
+      expect(entries.length).toBeGreaterThan(0);
+      expect(entries[0].method).toBe("POST");
+      expect(entries[0].path).toBe("/v1/chat/completions");
+    });
+
+    it("returns empty array when no requests made", async () => {
+      instance = await createServer([]);
+      const res = await httpRequest(`${instance.url}/__aimock/journal`, "GET");
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toEqual([]);
+    });
+  });
+
+  describe("POST /__aimock/error", () => {
+    it("queues a one-shot error", async () => {
+      const fixtures: Fixture[] = [
+        { match: { userMessage: "hello" }, response: { content: "Hi" } },
+      ];
+      instance = await createServer(fixtures);
+
+      // Queue an error
+      const queueRes = await httpRequest(`${instance.url}/__aimock/error`, "POST", {
+        status: 429,
+        body: { message: "Rate limited", type: "rate_limit_error" },
+      });
+      expect(queueRes.status).toBe(200);
+      expect(JSON.parse(queueRes.body)).toEqual({ queued: true });
+
+      // First request should get the error
+      const errRes = await httpRequest(
+        `${instance.url}/v1/chat/completions`,
+        "POST",
+        chatRequest("hello"),
+      );
+      expect(errRes.status).toBe(429);
+
+      // Wait for queueMicrotask to clean up the one-shot fixture
+      await new Promise((r) => setTimeout(r, 50));
+
+      // Second request should succeed normally
+      const okRes = await httpRequest(
+        `${instance.url}/v1/chat/completions`,
+        "POST",
+        chatRequest("hello"),
+      );
+      expect(okRes.status).toBe(200);
+      const body = JSON.parse(okRes.body);
+      expect(body.choices[0].message.content).toBe("Hi");
+    });
+  });
+
+  describe("unknown control path", () => {
+    it("returns 404 for unknown /__aimock/ paths", async () => {
+      instance = await createServer([]);
+      const res = await httpRequest(`${instance.url}/__aimock/unknown`, "GET");
+      expect(res.status).toBe(404);
+    });
+  });
+});
diff --git a/src/__tests__/convert-mockllm.test.ts b/src/__tests__/convert-mockllm.test.ts
new file mode 100644
index 0000000..6d40944
--- /dev/null
+++ b/src/__tests__/convert-mockllm.test.ts
@@ -0,0 +1,417 @@
+import { describe, it, expect } from "vitest";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import {
+  parseSimpleYaml,
+  convertConfig,
+  type MockLLMConfig,
+} from "../../scripts/convert-mockllm.js";
+import { loadFixtureFile } from "../fixture-loader.js";
+
+function makeTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "convert-mockllm-test-"));
+}
+
+describe("convert-mockllm", () => {
+  describe("convertConfig: simple route to fixture", () => {
+    it("converts a single route with choices[0].message.content", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            method: "POST",
+            response: {
+              id: "chatcmpl-123",
+              object: "chat.completion",
+              choices: [
+                {
+                  index: 0,
+                  message: {
+                    role: "assistant",
+                    content: "Hello from mock-llm!",
+                  },
+                },
+              ],
+              usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+            },
+          },
+        ],
+      };
+
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(1);
+      expect(result.fixtures[0].response.content).toBe("Hello from mock-llm!");
+      // No match criteria -> should have _comment with path and empty match
+      expect(result.fixtures[0]._comment).toBe("POST /v1/chat/completions");
+      expect(result.fixtures[0].match).toEqual({});
+    });
+  });
+
+  describe("convertConfig: route with match criteria", () => {
+    it("extracts userMessage from match.body.messages", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            method: "POST",
+            match: {
+              body: {
+                messages: [{ role: "user", content: "weather" }],
+              },
+            },
+            response: {
+              choices: [
+                {
+                  message: {
+                    role: "assistant",
+                    content: "The weather is sunny.",
+                  },
+                },
+              ],
+            },
+          },
+        ],
+      };
+
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(1);
+      expect(result.fixtures[0].match).toEqual({ userMessage: "weather" });
+      expect(result.fixtures[0].response.content).toBe("The weather is sunny.");
+      expect(result.fixtures[0]._comment).toBeUndefined();
+    });
+
+    it("uses the last user message when multiple messages present", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            match: {
+              body: {
+                messages: [
+                  { role: "system", content: "You are helpful" },
+                  { role: "user", content: "first question" },
+                  { role: "assistant", content: "answer" },
+                  { role: "user", content: "follow up" },
+                ],
+              },
+            },
+            response: {
+              choices: [{ message: { role: "assistant", content: "Follow-up response" } }],
+            },
+          },
+        ],
+      };
+
+      const result = convertConfig(config);
+      expect(result.fixtures[0].match).toEqual({ userMessage: "follow up" });
+    });
+  });
+
+  describe("convertConfig: MCP tools", () => {
+    it("converts mcp.tools to aimock MCPTool format", () => {
+      const config: MockLLMConfig = {
+        mcp: {
+          tools: [
+            {
+              name: "get_weather",
+              description: "Get weather for a location",
+              parameters: {
+                type: "object",
+                properties: {
+                  location: { type: "string" },
+                },
+              },
+            },
+            {
+              name: "search",
+              description: "Search the web",
+            },
+          ],
+        },
+      };
+
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(0);
+      expect(result.mcpTools).toHaveLength(2);
+      expect(result.mcpTools![0]).toEqual({
+        name: "get_weather",
+        description: "Get weather for a location",
+        inputSchema: {
+          type: "object",
+          properties: { location: { type: "string" } },
+        },
+      });
+      expect(result.mcpTools![1]).toEqual({
+        name: "search",
+        description: "Search the web",
+      });
+    });
+
+    it("omits mcpTools from result when no mcp.tools present", () => {
+      const config: MockLLMConfig = { routes: [] };
+      const result = convertConfig(config);
+      expect(result.mcpTools).toBeUndefined();
+    });
+  });
+
+  describe("convertConfig: multiple routes", () => {
+    it("converts all routes and preserves order", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            response: {
+              choices: [{ message: { role: "assistant", content: "Default response" } }],
+            },
+          },
+          {
+            path: "/v1/chat/completions",
+            match: { body: { messages: [{ role: "user", content: "hello" }] } },
+            response: {
+              choices: [{ message: { role: "assistant", content: "Hi there!" } }],
+            },
+          },
+          {
+            path: "/v1/chat/completions",
+            match: { body: { messages: [{ role: "user", content: "bye" }] } },
+            response: {
+              choices: [{ message: { role: "assistant", content: "Goodbye!" } }],
+            },
+          },
+        ],
+      };
+
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(3);
+      expect(result.fixtures[0].response.content).toBe("Default response");
+      expect(result.fixtures[0]._comment).toBeDefined();
+      expect(result.fixtures[1].match).toEqual({ userMessage: "hello" });
+      expect(result.fixtures[1].response.content).toBe("Hi there!");
+      expect(result.fixtures[2].match).toEqual({ userMessage: "bye" });
+      expect(result.fixtures[2].response.content).toBe("Goodbye!");
+    });
+  });
+
+  describe("convertConfig: missing/malformed config", () => {
+    it("returns empty fixtures for empty config", () => {
+      const result = convertConfig({});
+      expect(result.fixtures).toHaveLength(0);
+      expect(result.mcpTools).toBeUndefined();
+    });
+
+    it("skips routes with no choices in response", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            response: { id: "123" },
+          },
+        ],
+      };
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(0);
+    });
+
+    it("skips routes with empty choices array", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            response: { choices: [] },
+          },
+        ],
+      };
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(0);
+    });
+
+    it("skips routes where message.content is not a string", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            response: {
+              choices: [{ message: { role: "assistant" } }],
+            },
+          },
+        ],
+      };
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(0);
+    });
+
+    it("handles config with routes: undefined", () => {
+      const config: MockLLMConfig = { routes: undefined };
+      const result = convertConfig(config);
+      expect(result.fixtures).toHaveLength(0);
+    });
+  });
+
+  describe("YAML parsing", () => {
+    it("parses mock-llm style YAML config", () => {
+      const yaml = `
+routes:
+  - path: /v1/chat/completions
+    method: POST
+    response:
+      choices:
+        - message:
+            role: assistant
+            content: "Hello!"
+`;
+      const parsed = parseSimpleYaml(yaml) as MockLLMConfig;
+      expect(parsed.routes).toHaveLength(1);
+      expect(parsed.routes![0].path).toBe("/v1/chat/completions");
+      expect(parsed.routes![0].method).toBe("POST");
+    });
+
+    it("parses nested match criteria", () => {
+      const yaml = `
+routes:
+  - path: /v1/chat/completions
+    match:
+      body:
+        messages:
+          - role: user
+            content: weather
+    response:
+      choices:
+        - message:
+            role: assistant
+            content: "Sunny today"
+`;
+      const parsed = parseSimpleYaml(yaml) as MockLLMConfig;
+      const route = parsed.routes![0];
+      expect(route.match?.body?.messages).toHaveLength(1);
+      expect(route.match!.body!.messages![0].role).toBe("user");
+      expect(route.match!.body!.messages![0].content).toBe("weather");
+    });
+
+    it("parses mcp tools", () => {
+      const yaml = `
+mcp:
+  tools:
+    - name: get_weather
+      description: "Get weather"
+      parameters:
+        type: object
+        properties:
+          location:
+            type: string
+`;
+      const parsed = parseSimpleYaml(yaml) as MockLLMConfig;
+      expect(parsed.mcp?.tools).toHaveLength(1);
+      expect(parsed.mcp!.tools![0].name).toBe("get_weather");
+      expect(parsed.mcp!.tools![0].description).toBe("Get weather");
+    });
+
+    it("handles numbers and booleans", () => {
+      const yaml = `
+count: 42
+enabled: true
+disabled: false
+ratio: 3.14
+`;
+      const parsed = parseSimpleYaml(yaml) as Record<string, unknown>;
+      expect(parsed.count).toBe(42);
+      expect(parsed.enabled).toBe(true);
+      expect(parsed.disabled).toBe(false);
+      expect(parsed.ratio).toBe(3.14);
+    });
+  });
+
+  describe("round-trip: converted fixtures load into aimock", () => {
+    it("produces valid aimock fixture JSON that loadFixtureFile accepts", () => {
+      const config: MockLLMConfig = {
+        routes: [
+          {
+            path: "/v1/chat/completions",
+            match: { body: { messages: [{ role: "user", content: "hello" }] } },
+            response: {
+              choices: [{ message: { role: "assistant", content: "Hi there!" } }],
+            },
+          },
+          {
+            path: "/v1/chat/completions",
+            match: { body: { messages: [{ role: "user", content: "goodbye" }] } },
+            response: {
+              choices: [{ message: { role: "assistant", content: "See you!" } }],
+            },
+          },
+        ],
+      };
+
+      const converted = convertConfig(config);
+
+      // Write to a temp file in aimock fixture format
+      const tmpDir = makeTmpDir();
+      try {
+        const outputPath = join(tmpDir, "converted.json");
+        const fixtureJson = JSON.stringify({ fixtures: converted.fixtures }, null, 2);
+        writeFileSync(outputPath, fixtureJson, "utf-8");
+
+        // Load with aimock's fixture loader
+        const loaded = loadFixtureFile(outputPath);
+        expect(loaded).toHaveLength(2);
+
+        // Verify the fixtures match what aimock expects
+        expect(loaded[0].match.userMessage).toBe("hello");
+        expect((loaded[0].response as { content: string }).content).toBe("Hi there!");
+
+        expect(loaded[1].match.userMessage).toBe("goodbye");
+        expect((loaded[1].response as { content: string }).content).toBe("See you!");
+      } finally {
+        rmSync(tmpDir, { recursive: true, force: true });
+      }
+    });
+
+    it("round-trips a full YAML config through parse -> convert -> load", () => {
+      const yaml = `
+routes:
+  - path: /v1/chat/completions
+    match:
+      body:
+        messages:
+          - role: user
+            content: weather
+    response:
+      choices:
+        - message:
+            role: assistant
+            content: "The weather is sunny."
+  - path: /v1/chat/completions
+    response:
+      choices:
+        - message:
+            role: assistant
+            content: "Default response"
+`;
+      const parsed = parseSimpleYaml(yaml) as MockLLMConfig;
+      const converted = convertConfig(parsed);
+
+      const tmpDir = makeTmpDir();
+      try {
+        const outputPath = join(tmpDir, "roundtrip.json");
+        writeFileSync(
+          outputPath,
+          JSON.stringify({ fixtures: converted.fixtures }, null, 2),
+          "utf-8",
+        );
+
+        const loaded = loadFixtureFile(outputPath);
+        expect(loaded).toHaveLength(2);
+
+        // First fixture has match criteria
+        expect(loaded[0].match.userMessage).toBe("weather");
+        expect((loaded[0].response as { content: string }).content).toBe("The weather is sunny.");
+
+        // Second fixture is a catch-all (no match criteria)
+        expect(loaded[1].match.userMessage).toBeUndefined();
+        expect((loaded[1].response as { content: string }).content).toBe("Default response");
+      } finally {
+        rmSync(tmpDir, { recursive: true, force: true });
+      }
+    });
+  });
+});
diff --git a/src/__tests__/convert-vidaimock.test.ts b/src/__tests__/convert-vidaimock.test.ts
new file mode 100644
index 0000000..b40f55d
--- /dev/null
+++ b/src/__tests__/convert-vidaimock.test.ts
@@ -0,0 +1,277 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import {
+  stripTeraTemplate,
+  deriveMatchFromFilename,
+  convertFile,
+  convertDirectory,
+} from "../../scripts/convert-vidaimock.js";
+import type { AimockFixtureFile } from "../../scripts/convert-vidaimock.js";
+import { loadFixtureFile } from "../fixture-loader.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeTmpDir(): string {
+  return mkdtempSync(join(tmpdir(), "vidaimock-convert-test-"));
+}
+
+function writeTemplate(dir: string, name: string, content: string): string {
+  const filePath = join(dir, name);
+  writeFileSync(filePath, content, "utf-8");
+  return filePath;
+}
+
+// ---------------------------------------------------------------------------
+// stripTeraTemplate
+// ---------------------------------------------------------------------------
+
+describe("stripTeraTemplate", () => {
+  it("strips simple Tera variable expressions", () => {
+    const input = "{{ model }}\n---\n{{ content }}";
+    const result = stripTeraTemplate(input);
+    expect(result).toBe("[model]\n---\n[content]");
+  });
+
+  it("removes Tera comment blocks", () => {
+    const input = "{# This is a comment #}Hello world";
+    expect(stripTeraTemplate(input)).toBe("Hello world");
+  });
+
+  it("removes Tera block tags", () => {
+    const input = "{% if show %}visible{% endif %}";
+    expect(stripTeraTemplate(input)).toBe("visible");
+  });
+
+  it("extracts content from a JSON response template with Tera expressions", () => {
+    // This is what a VidaiMock Tera template file looks like on disk —
+    // Tera expressions sit where JSON string values would normally be.
+    const input = `{
+  "id": "chatcmpl-{{ id }}",
+  "object": "chat.completion",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": {{ response_text }}
+      }
+    }
+  ]
+}`;
+    // The bare {{ response_text }} (no surrounding quotes) gets replaced
+    // with a quoted placeholder, making valid JSON that the extractor parses.
+    const result = stripTeraTemplate(input);
+    expect(result).toBe("[response_text]");
+  });
+
+  it("extracts literal content from a JSON response template", () => {
+    const input = JSON.stringify({
+      choices: [
+        {
+          message: {
+            role: "assistant",
+            content: "Hello there!",
+          },
+        },
+      ],
+    });
+    expect(stripTeraTemplate(input)).toBe("Hello there!");
+  });
+
+  it("returns empty-ish text for an empty template", () => {
+    expect(stripTeraTemplate("")).toBe("");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// deriveMatchFromFilename
+// ---------------------------------------------------------------------------
+
+describe("deriveMatchFromFilename", () => {
+  it("strips extension and converts underscores to spaces", () => {
+    expect(deriveMatchFromFilename("tell_me_a_joke.tera")).toBe("tell me a joke");
+  });
+
+  it("strips leading numeric prefix", () => {
+    expect(deriveMatchFromFilename("003-weather.txt")).toBe("weather");
+  });
+
+  it("handles hyphens", () => {
+    expect(deriveMatchFromFilename("my-greeting.json")).toBe("my greeting");
+  });
+
+  it("handles path with directories", () => {
+    expect(deriveMatchFromFilename("/some/path/hello_world.tera")).toBe("hello world");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// convertFile — single template
+// ---------------------------------------------------------------------------
+
+describe("convertFile", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("converts a simple Tera template to a fixture", () => {
+    const fp = writeTemplate(tmpDir, "greeting.tera", "Hello! How can I help?");
+    const fixture = convertFile(fp);
+    expect(fixture).not.toBeNull();
+    expect(fixture!.match.userMessage).toBe("greeting");
+    expect(fixture!.response.content).toBe("Hello! How can I help?");
+  });
+
+  it("converts a JSON response template", () => {
+    const json = JSON.stringify({
+      choices: [{ message: { role: "assistant", content: "I am a mock." } }],
+    });
+    const fp = writeTemplate(tmpDir, "mock_response.json", json);
+    const fixture = convertFile(fp);
+    expect(fixture).not.toBeNull();
+    expect(fixture!.match.userMessage).toBe("mock response");
+    expect(fixture!.response.content).toBe("I am a mock.");
+  });
+
+  it("returns null for a non-existent file", () => {
+    expect(convertFile(join(tmpDir, "nope.tera"))).toBeNull();
+  });
+
+  it("handles malformed templates gracefully (empty file)", () => {
+    const fp = writeTemplate(tmpDir, "empty.tera", "");
+    expect(convertFile(fp)).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// convertDirectory
+// ---------------------------------------------------------------------------
+
+describe("convertDirectory", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("converts all template files in a directory", () => {
+    writeTemplate(tmpDir, "greeting.tera", "Hello!");
+    writeTemplate(tmpDir, "farewell.txt", "Goodbye!");
+    writeTemplate(tmpDir, "ignored.md", "Should be skipped");
+
+    const fixtures = convertDirectory(tmpDir);
+    expect(fixtures).toHaveLength(2);
+
+    const matches = fixtures.map((f) => f.match.userMessage).sort();
+    expect(matches).toEqual(["farewell", "greeting"]);
+  });
+
+  it("returns empty array for an empty directory", () => {
+    expect(convertDirectory(tmpDir)).toEqual([]);
+  });
+
+  it("returns empty array for a non-existent directory", () => {
+    expect(convertDirectory(join(tmpDir, "nope"))).toEqual([]);
+  });
+
+  it("skips subdirectories", () => {
+    writeTemplate(tmpDir, "root.tera", "Root template");
+    mkdirSync(join(tmpDir, "subdir"));
+    writeTemplate(join(tmpDir, "subdir"), "nested.tera", "Nested");
+
+    const fixtures = convertDirectory(tmpDir);
+    expect(fixtures).toHaveLength(1);
+    expect(fixtures[0].match.userMessage).toBe("root");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Output validity — end-to-end check
+// ---------------------------------------------------------------------------
+
+describe("aimock fixture output validity", () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    tmpDir = makeTmpDir();
+  });
+
+  afterEach(() => {
+    rmSync(tmpDir, { recursive: true, force: true });
+  });
+
+  it("produces valid aimock fixture JSON from a directory", () => {
+    writeTemplate(tmpDir, "hello.tera", "Hi there!");
+    writeTemplate(
+      tmpDir,
+      "joke.json",
+      JSON.stringify({
+        choices: [{ message: { role: "assistant", content: "Why did the chicken..." } }],
+      }),
+    );
+
+    const fixtures = convertDirectory(tmpDir);
+    const output: AimockFixtureFile = { fixtures };
+    const json = JSON.stringify(output, null, 2);
+
+    // Must be valid JSON
+    const parsed = JSON.parse(json) as AimockFixtureFile;
+    expect(parsed.fixtures).toHaveLength(2);
+
+    // Every fixture must have the required shape
+    for (const f of parsed.fixtures) {
+      expect(f).toHaveProperty("match.userMessage");
+      expect(f).toHaveProperty("response.content");
+      expect(typeof f.match.userMessage).toBe("string");
+      expect(typeof f.response.content).toBe("string");
+    }
+  });
+
+  it("round-trips through loadFixtureFile", () => {
+    writeTemplate(tmpDir, "greeting.tera", "Hello from VidaiMock!");
+    writeTemplate(
+      tmpDir,
+      "weather.json",
+      JSON.stringify({
+        choices: [{ message: { role: "assistant", content: "Sunny and warm" } }],
+      }),
+    );
+
+    // Convert VidaiMock templates to aimock fixture format
+    const fixtures = convertDirectory(tmpDir);
+    const output: AimockFixtureFile = { fixtures };
+
+    // Write to disk as an aimock fixture file
+    const outPath = join(tmpDir, "converted-fixtures.json");
+    writeFileSync(outPath, JSON.stringify(output, null, 2), "utf-8");
+
+    // Load it back via aimock's fixture loader
+    const loaded = loadFixtureFile(outPath);
+    expect(loaded).toHaveLength(2);
+
+    // Verify that the loaded fixtures have match/response data intact
+    const messages = loaded.map((f) => f.match.userMessage).sort();
+    expect(messages).toEqual(["greeting", "weather"]);
+
+    for (const f of loaded) {
+      const resp = f.response as { content?: string };
+      expect(typeof resp.content).toBe("string");
+      expect(resp.content!.length).toBeGreaterThan(0);
+    }
+  });
+});
diff --git a/src/__tests__/cross-cutting.test.ts b/src/__tests__/cross-cutting.test.ts
new file mode 100644
index 0000000..738214c
--- /dev/null
+++ b/src/__tests__/cross-cutting.test.ts
@@ -0,0 +1,748 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { MCPMock } from "../mcp-mock.js";
+import { A2AMock } from "../a2a-mock.js";
+import { VectorMock } from "../vector-mock.js";
+import { createMockSuite, type MockSuite } from "../suite.js";
+import { startFromConfig, type AimockConfig } from "../config-loader.js";
+
+// ---- HTTP Helpers ----
+
+function httpRequest(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(method: string, params?: unknown, id: number = 1): object {
+  return { jsonrpc: "2.0", method, params, id };
+}
+
+async function initMcpSession(url: string, path: string): Promise<string> {
+  const res = await httpRequest(url, path, "POST", jsonRpc("initialize", {}, 1) as object);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  await httpRequest(
+    url,
+    path,
+    "POST",
+    { jsonrpc: "2.0", method: "notifications/initialized" },
+    { "mcp-session-id": sessionId },
+  );
+  return sessionId;
+}
+
+// ==========================================================================
+// 1. Multi-mock composition on a single server
+// ==========================================================================
+
+describe("cross-cutting: multi-mock composition", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("mounts LLM + MCP + A2A + Vector on one server and all respond", async () => {
+    llmock = new LLMock();
+
+    // Configure LLM fixture
+    llmock.on({ userMessage: /hello/ }, { content: "Hi from LLM" });
+
+    // MCP
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "calc", description: "calculator" });
+    mcp.onToolCall("calc", () => "42");
+    llmock.mount("/mcp", mcp);
+
+    // A2A
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "helper", description: "helper agent" });
+    a2a.onMessage("helper", /.*/, [{ text: "I can help" }]);
+    llmock.mount("/a2a", a2a);
+
+    // Vector
+    const vector = new VectorMock();
+    vector.addCollection("docs", { dimension: 3 });
+    vector.onQuery("docs", [
+      { id: "d1", score: 0.9, values: [1, 0, 0], metadata: { title: "doc1" } },
+    ]);
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    // LLM completions
+    const llmRes = await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello world" }],
+    });
+    expect(llmRes.status).toBe(200);
+    const llmData = JSON.parse(llmRes.body);
+    expect(llmData.choices[0].message.content).toBe("Hi from LLM");
+
+    // MCP tools/list
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    const mcpRes = await httpRequest(
+      llmock.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(mcpRes.status).toBe(200);
+    const mcpData = JSON.parse(mcpRes.body);
+    expect(mcpData.result.tools).toHaveLength(1);
+    expect(mcpData.result.tools[0].name).toBe("calc");
+
+    // MCP tool call
+    const callRes = await httpRequest(
+      llmock.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/call", { name: "calc", arguments: {} }, 3) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(callRes.status).toBe(200);
+    const callData = JSON.parse(callRes.body);
+    expect(callData.result.content[0].text).toBe("42");
+
+    // A2A agent card
+    const a2aRes = await httpRequest(llmock.url, "/a2a/.well-known/agent-card.json", "GET");
+    expect(a2aRes.status).toBe(200);
+    const card = JSON.parse(a2aRes.body);
+    expect(card.name).toBe("helper");
+
+    // Vector query
+    const vecRes = await httpRequest(llmock.url, "/vector/query", "POST", {
+      namespace: "docs",
+      vector: [1, 0, 0],
+      topK: 5,
+    });
+    expect(vecRes.status).toBe(200);
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches).toHaveLength(1);
+    expect(vecData.matches[0].id).toBe("d1");
+  });
+
+  it("streaming LLM responses work alongside mounted mocks", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /stream/ }, { content: "streamed response" });
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "noop" });
+    llmock.mount("/mcp", mcp);
+
+    await llmock.start();
+
+    const res = await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream this" }],
+      stream: true,
+    });
+    expect(res.status).toBe(200);
+    expect(res.body).toContain("data: ");
+    // Collect text from SSE chunks
+    const chunks = res.body
+      .split("\n\n")
+      .filter((l) => l.startsWith("data: ") && !l.includes("[DONE]"))
+      .map((l) => JSON.parse(l.slice(6)));
+    const text = chunks
+      .map(
+        (c: { choices: Array<{ delta: { content?: string } }> }) =>
+          c.choices[0]?.delta?.content ?? "",
+      )
+      .join("");
+    expect(text).toBe("streamed response");
+  });
+});
+
+// ==========================================================================
+// 2. Health endpoint aggregation
+// ==========================================================================
+
+describe("cross-cutting: health endpoint aggregation", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("health endpoint aggregates status from all mounted mocks", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /.*/ }, { content: "ok" });
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "t1" });
+    mcp.addTool({ name: "t2" });
+    mcp.addResource({ uri: "file://r1", name: "r1" });
+    llmock.mount("/mcp", mcp);
+
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "ag1", description: "test" });
+    llmock.mount("/a2a", a2a);
+
+    const vector = new VectorMock();
+    vector.addCollection("col1", { dimension: 3 });
+    vector.addCollection("col2", { dimension: 5 });
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    const res = await httpRequest(llmock.url, "/health", "GET");
+    expect(res.status).toBe(200);
+
+    const health = JSON.parse(res.body);
+    expect(health.status).toBe("ok");
+    expect(health.services).toBeDefined();
+
+    // LLM service status
+    expect(health.services.llm).toBeDefined();
+    expect(health.services.llm.status).toBe("ok");
+    expect(health.services.llm.fixtures).toBe(1);
+
+    // MCP service status
+    expect(health.services.mcp).toBeDefined();
+    expect(health.services.mcp.status).toBe("ok");
+    expect(health.services.mcp.tools).toBe(2);
+    expect(health.services.mcp.resources).toBe(1);
+
+    // A2A service status
+    expect(health.services.a2a).toBeDefined();
+    expect(health.services.a2a.status).toBe("ok");
+    expect(health.services.a2a.agents).toBe(1);
+
+    // Vector service status
+    expect(health.services.vector).toBeDefined();
+    expect(health.services.vector.status).toBe("ok");
+    expect(health.services.vector.collections).toBe(2);
+  });
+
+  it("health endpoint with no mounts returns simple status", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    const res = await httpRequest(llmock.url, "/health", "GET");
+    expect(res.status).toBe(200);
+
+    const health = JSON.parse(res.body);
+    expect(health.status).toBe("ok");
+    expect(health.services).toBeUndefined();
+  });
+});
+
+// ==========================================================================
+// 3. Journal captures requests across all mock types
+// ==========================================================================
+
+describe("cross-cutting: journal across mock types", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("journal records LLM, MCP, A2A, and Vector requests in order", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /journal/ }, { content: "noted" });
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "log-tool" });
+    llmock.mount("/mcp", mcp);
+
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "journal-agent", description: "test" });
+    a2a.onMessage("journal-agent", /.*/, [{ text: "logged" }]);
+    llmock.mount("/a2a", a2a);
+
+    const vector = new VectorMock();
+    vector.addCollection("journal-col", { dimension: 2 });
+    vector.onQuery("journal-col", [{ id: "j1", score: 1.0, values: [1, 0] }]);
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    // 1. LLM request
+    await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "journal test" }],
+    });
+
+    // 2. MCP request
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    await httpRequest(llmock.url, "/mcp", "POST", jsonRpc("tools/list", {}, 2) as object, {
+      "mcp-session-id": sessionId,
+    });
+
+    // 3. A2A request (agent card GET)
+    await httpRequest(llmock.url, "/a2a/.well-known/agent-card.json", "GET");
+
+    // 4. Vector request
+    await httpRequest(llmock.url, "/vector/query", "POST", {
+      namespace: "journal-col",
+      vector: [1, 0],
+      topK: 3,
+    });
+
+    const entries = llmock.getRequests();
+
+    // Should have entries from all services
+    // LLM entry
+    const llmEntries = entries.filter((e) => e.path === "/v1/chat/completions");
+    expect(llmEntries.length).toBeGreaterThanOrEqual(1);
+
+    // MCP entries (initialize + notification + tools/list)
+    const mcpEntries = entries.filter((e) => e.service === "mcp");
+    expect(mcpEntries.length).toBeGreaterThanOrEqual(1);
+
+    // Vector entries
+    const vectorEntries = entries.filter((e) => e.service === "vector");
+    expect(vectorEntries.length).toBeGreaterThanOrEqual(1);
+
+    // All entries have timestamps and IDs
+    for (const entry of entries) {
+      expect(entry.id).toBeTruthy();
+      expect(entry.timestamp).toBeGreaterThan(0);
+    }
+  });
+
+  it("journal entries from mounts have correct service tags", async () => {
+    llmock = new LLMock();
+
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "svc-tool" });
+    llmock.mount("/mcp", mcp);
+
+    const vector = new VectorMock();
+    vector.addCollection("svc-col", { dimension: 2 });
+    llmock.mount("/vector", vector);
+
+    await llmock.start();
+
+    // MCP request
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    await httpRequest(llmock.url, "/mcp", "POST", jsonRpc("tools/list", {}, 2) as object, {
+      "mcp-session-id": sessionId,
+    });
+
+    // Vector request (describe-index-stats is a Pinecone GET endpoint)
+    await httpRequest(llmock.url, "/vector/describe-index-stats", "GET");
+
+    const entries = llmock.getRequests();
+    const mcpEntries = entries.filter((e) => e.service === "mcp");
+    const vectorEntries = entries.filter((e) => e.service === "vector");
+
+    expect(mcpEntries.length).toBeGreaterThanOrEqual(1);
+    for (const e of mcpEntries) {
+      expect(e.service).toBe("mcp");
+    }
+
+    expect(vectorEntries.length).toBeGreaterThanOrEqual(1);
+    for (const e of vectorEntries) {
+      expect(e.service).toBe("vector");
+    }
+  });
+});
+
+// ==========================================================================
+// 4. Config loader with multi-mock configurations
+// ==========================================================================
+
+describe("cross-cutting: config loader with all mock types", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("startFromConfig with MCP + A2A + Vector all configured", async () => {
+    const config: AimockConfig = {
+      mcp: {
+        path: "/mcp",
+        serverInfo: { name: "config-mcp", version: "1.0.0" },
+        tools: [{ name: "config-tool", description: "from config", result: "config-result" }],
+        resources: [{ uri: "file://readme", name: "README", text: "Hello from config" }],
+      },
+      a2a: {
+        path: "/a2a",
+        agents: [
+          {
+            name: "config-agent",
+            description: "from config",
+            messages: [{ pattern: "hello", parts: [{ text: "Hi from config agent" }] }],
+          },
+        ],
+      },
+      vector: {
+        path: "/vector",
+        collections: [
+          {
+            name: "config-col",
+            dimension: 3,
+            vectors: [{ id: "cv1", values: [1, 0, 0], metadata: { src: "config" } }],
+            queryResults: [
+              { id: "cv1", score: 0.99, values: [1, 0, 0], metadata: { src: "config" } },
+            ],
+          },
+        ],
+      },
+    };
+
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    // Health should show all services
+    const healthRes = await httpRequest(result.url, "/health", "GET");
+    const health = JSON.parse(healthRes.body);
+    expect(health.services.mcp).toBeDefined();
+    expect(health.services.mcp.tools).toBe(1);
+    expect(health.services.a2a).toBeDefined();
+    expect(health.services.a2a.agents).toBe(1);
+    expect(health.services.vector).toBeDefined();
+    expect(health.services.vector.collections).toBe(1);
+
+    // MCP tool call works
+    const sessionId = await initMcpSession(result.url, "/mcp");
+    const toolCallRes = await httpRequest(
+      result.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/call", { name: "config-tool", arguments: {} }, 3) as object,
+      { "mcp-session-id": sessionId },
+    );
+    const toolData = JSON.parse(toolCallRes.body);
+    expect(toolData.result.content[0].text).toBe("config-result");
+
+    // A2A agent card works
+    const a2aRes = await httpRequest(result.url, "/a2a/.well-known/agent-card.json", "GET");
+    const card = JSON.parse(a2aRes.body);
+    expect(card.name).toBe("config-agent");
+
+    // Vector query works
+    const vecRes = await httpRequest(result.url, "/vector/query", "POST", {
+      namespace: "config-col",
+      vector: [1, 0, 0],
+      topK: 5,
+    });
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches).toHaveLength(1);
+    expect(vecData.matches[0].id).toBe("cv1");
+  });
+
+  it("startFromConfig with custom mount paths", async () => {
+    const config: AimockConfig = {
+      mcp: { path: "/custom-mcp" },
+      a2a: { path: "/custom-a2a" },
+      vector: { path: "/custom-vector" },
+    };
+
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    // Health shows custom paths
+    const healthRes = await httpRequest(result.url, "/health", "GET");
+    const health = JSON.parse(healthRes.body);
+    expect(health.services["custom-mcp"]).toBeDefined();
+    expect(health.services["custom-a2a"]).toBeDefined();
+    expect(health.services["custom-vector"]).toBeDefined();
+  });
+
+  it("startFromConfig with services (search, rerank, moderate)", async () => {
+    const config: AimockConfig = {
+      services: { search: true, rerank: true, moderate: true },
+    };
+
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    // Search endpoint should respond
+    const searchRes = await httpRequest(result.url, "/search", "POST", {
+      query: "test query",
+    });
+    expect(searchRes.status).toBe(200);
+
+    // Rerank endpoint should respond
+    const rerankRes = await httpRequest(result.url, "/v2/rerank", "POST", {
+      query: "test",
+      documents: ["a", "b"],
+    });
+    expect(rerankRes.status).toBe(200);
+
+    // Moderation endpoint should respond
+    const modRes = await httpRequest(result.url, "/v1/moderations", "POST", {
+      input: "test content",
+    });
+    expect(modRes.status).toBe(200);
+  });
+
+  it("startFromConfig with empty config starts cleanly", async () => {
+    const config: AimockConfig = {};
+    const result = await startFromConfig(config);
+    llmock = result.llmock;
+
+    const healthRes = await httpRequest(result.url, "/health", "GET");
+    expect(healthRes.status).toBe(200);
+  });
+});
+
+// ==========================================================================
+// 5. Suite runner with heterogeneous mock types
+// ==========================================================================
+
+describe("cross-cutting: suite runner with heterogeneous mocks", () => {
+  let suite: MockSuite | null = null;
+
+  afterEach(async () => {
+    if (suite) {
+      await suite.stop();
+      suite = null;
+    }
+  });
+
+  it("suite with all mocks supports concurrent requests to different services", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+
+    suite.llm.on({ userMessage: /concurrent/ }, { content: "concurrent reply" });
+    suite.mcp!.addTool({ name: "conc-tool" });
+    suite.a2a!.registerAgent({ name: "conc-agent", description: "concurrent" });
+    suite.vector!.addCollection("conc-col", { dimension: 2 });
+    suite.vector!.onQuery("conc-col", [{ id: "c1", score: 0.8, values: [1, 0] }]);
+
+    await suite.start();
+
+    // Fire all requests concurrently
+    const [llmRes, mcpInitRes, a2aRes, vecRes] = await Promise.all([
+      httpRequest(suite.llm.url, "/v1/chat/completions", "POST", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "concurrent test" }],
+      }),
+      httpRequest(suite.llm.url, "/mcp", "POST", jsonRpc("initialize", {}, 1) as object),
+      httpRequest(suite.llm.url, "/a2a/.well-known/agent-card.json", "GET"),
+      httpRequest(suite.llm.url, "/vector/query", "POST", {
+        namespace: "conc-col",
+        vector: [1, 0],
+        topK: 3,
+      }),
+    ]);
+
+    expect(llmRes.status).toBe(200);
+    expect(mcpInitRes.status).toBe(200);
+    expect(a2aRes.status).toBe(200);
+    expect(vecRes.status).toBe(200);
+
+    // Verify content
+    const llmData = JSON.parse(llmRes.body);
+    expect(llmData.choices[0].message.content).toBe("concurrent reply");
+
+    const card = JSON.parse(a2aRes.body);
+    expect(card.name).toBe("conc-agent");
+
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches[0].id).toBe("c1");
+  });
+
+  it("suite.reset() clears all mock state but server stays running", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+    suite.llm.on({ userMessage: /test/ }, { content: "before reset" });
+    suite.mcp!.addTool({ name: "reset-tool" });
+    suite.a2a!.registerAgent({ name: "reset-agent", description: "test" });
+    suite.vector!.addCollection("reset-col", { dimension: 2 });
+
+    await suite.start();
+
+    // Verify mcp has tools before reset
+    const sessionId = await initMcpSession(suite.llm.url, "/mcp");
+    const beforeRes = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(JSON.parse(beforeRes.body).result.tools).toHaveLength(1);
+
+    suite.reset();
+
+    // After reset, MCP tools cleared (need new session since sessions also cleared)
+    const sessionId2 = await initMcpSession(suite.llm.url, "/mcp");
+    const afterRes = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId2 },
+    );
+    expect(JSON.parse(afterRes.body).result.tools).toHaveLength(0);
+
+    // Health still works
+    const healthRes = await httpRequest(suite.llm.url, "/health", "GET");
+    expect(healthRes.status).toBe(200);
+  });
+});
+
+// ==========================================================================
+// 6. Late-mount journal/baseUrl fix verification
+// ==========================================================================
+
+describe("cross-cutting: late-mount journal and baseUrl wiring", () => {
+  let llmock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llmock) {
+      await llmock.stop();
+      llmock = null;
+    }
+  });
+
+  it("mount added after start() gets journal wired — requests are journaled", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    // Mount MCP after start
+    const mcp = new MCPMock();
+    mcp.addTool({ name: "late-tool" });
+    llmock.mount("/mcp", mcp);
+
+    // Make a request to the late mount
+    const sessionId = await initMcpSession(llmock.url, "/mcp");
+    await httpRequest(llmock.url, "/mcp", "POST", jsonRpc("tools/list", {}, 2) as object, {
+      "mcp-session-id": sessionId,
+    });
+
+    // Journal should have captured the MCP requests
+    const entries = llmock.getRequests();
+    const mcpEntries = entries.filter((e) => e.service === "mcp");
+    expect(mcpEntries.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it("mount added after start() gets baseUrl wired — A2A agent card has correct URL", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    // Mount A2A after start
+    const a2a = new A2AMock();
+    a2a.registerAgent({ name: "late-agent", description: "added after start" });
+    llmock.mount("/a2a", a2a);
+
+    // Agent card should be accessible and have the correct baseUrl in url field
+    const res = await httpRequest(llmock.url, "/a2a/.well-known/agent-card.json", "GET");
+    expect(res.status).toBe(200);
+    const card = JSON.parse(res.body);
+    expect(card.name).toBe("late-agent");
+    // The card's supportedInterfaces[0].url should contain the server URL + /a2a mount path
+    expect(card.supportedInterfaces[0].url).toContain(llmock.url + "/a2a");
+  });
+
+  it("mount added after start() appears in health endpoint", async () => {
+    llmock = new LLMock();
+    await llmock.start();
+
+    // Health before any mounts — no services
+    const healthBefore = await httpRequest(llmock.url, "/health", "GET");
+    JSON.parse(healthBefore.body); // verify it's valid JSON
+    // With 0 mounts but mounts array exists, the server checks mounts.length
+    // Since we add after, the array is shared so it should pick up new mounts
+
+    // Mount vector after start
+    const vector = new VectorMock();
+    vector.addCollection("late-col", { dimension: 3 });
+    llmock.mount("/vector", vector);
+
+    // Health after mount — should show vector
+    const healthAfter = await httpRequest(llmock.url, "/health", "GET");
+    const dataAfter = JSON.parse(healthAfter.body);
+    expect(dataAfter.services).toBeDefined();
+    expect(dataAfter.services.vector).toBeDefined();
+    expect(dataAfter.services.vector.status).toBe("ok");
+    expect(dataAfter.services.vector.collections).toBe(1);
+  });
+
+  it("late-mounted vector mock handles requests correctly", async () => {
+    llmock = new LLMock();
+    llmock.on({ userMessage: /.*/ }, { content: "llm works" });
+    await llmock.start();
+
+    // Mount vector after start
+    const vector = new VectorMock();
+    vector.addCollection("late-vec", { dimension: 2 });
+    vector.onQuery("late-vec", [
+      { id: "lv1", score: 0.95, values: [1, 0], metadata: { late: true } },
+    ]);
+    llmock.mount("/vector", vector);
+
+    // LLM still works
+    const llmRes = await httpRequest(llmock.url, "/v1/chat/completions", "POST", {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "test" }],
+    });
+    expect(llmRes.status).toBe(200);
+
+    // Late-mounted vector works
+    const vecRes = await httpRequest(llmock.url, "/vector/query", "POST", {
+      namespace: "late-vec",
+      vector: [1, 0],
+      topK: 3,
+    });
+    expect(vecRes.status).toBe(200);
+    const vecData = JSON.parse(vecRes.body);
+    expect(vecData.matches).toHaveLength(1);
+    expect(vecData.matches[0].id).toBe("lv1");
+
+    // Verify journal captured both
+    const entries = llmock.getRequests();
+    const llmEntries = entries.filter((e) => e.path === "/v1/chat/completions");
+    const vecEntries = entries.filter((e) => e.service === "vector");
+    expect(llmEntries.length).toBe(1);
+    expect(vecEntries.length).toBeGreaterThanOrEqual(1);
+  });
+});
diff --git a/src/__tests__/drift-scripts.test.ts b/src/__tests__/drift-scripts.test.ts
index be31876..4f6aad4 100644
--- a/src/__tests__/drift-scripts.test.ts
+++ b/src/__tests__/drift-scripts.test.ts
@@ -263,7 +263,7 @@ describe("addChangelogEntry", () => {
   });
 
   it("inserts entry after title line in existing changelog", () => {
-    const existing = "# @copilotkit/llmock\n\n## 1.0.0\n\nOld entry\n";
+    const existing = "# @copilotkit/aimock\n\n## 1.0.0\n\nOld entry\n";
     writeFileSync(join(tmpDir, "CHANGELOG.md"), existing, "utf-8");
     addChangelogEntry(makeReport(), "1.2.4");
     const content = readFileSync(join(tmpDir, "CHANGELOG.md"), "utf-8");
@@ -278,7 +278,7 @@ describe("addChangelogEntry", () => {
   });
 
   it("includes provider summaries", () => {
-    writeFileSync(join(tmpDir, "CHANGELOG.md"), "# @copilotkit/llmock\n", "utf-8");
+    writeFileSync(join(tmpDir, "CHANGELOG.md"), "# @copilotkit/aimock\n", "utf-8");
     addChangelogEntry(makeReport(), "1.2.4");
     const content = readFileSync(join(tmpDir, "CHANGELOG.md"), "utf-8");
     expect(content).toContain("OpenAI Chat (non-streaming text)");
diff --git a/src/__tests__/embeddings.test.ts b/src/__tests__/embeddings.test.ts
index 61a3efa..317a02f 100644
--- a/src/__tests__/embeddings.test.ts
+++ b/src/__tests__/embeddings.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
+import { PassThrough } from "node:stream";
 import {
   isEmbeddingResponse,
   generateDeterministicEmbedding,
@@ -7,6 +8,9 @@ import {
 } from "../helpers.js";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
+import { handleEmbeddings } from "../embeddings.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // ---------------------------------------------------------------------------
 // isEmbeddingResponse type guard
@@ -717,3 +721,280 @@ describe("POST /v1/embeddings (Unicode input handling)", () => {
     expect(body1.data[0].embedding).not.toEqual(body2.data[0].embedding);
   });
 });
+
+// ─── Branch coverage: strict mode, error defaults, incompatible response ─────
+
+describe("POST /v1/embeddings (strict mode)", () => {
+  it("returns 503 when strict mode is enabled and no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "unmatched input",
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+    expect(body.error.code).toBe("no_fixture_match");
+  });
+});
+
+describe("POST /v1/embeddings (error response with default status)", () => {
+  it("defaults error status to 500 when status field is omitted", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { inputText: "error-no-status" },
+        response: {
+          error: {
+            message: "Server error",
+            type: "server_error",
+          },
+        } as Fixture["response"],
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await post(`${instance.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "error-no-status test",
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Server error");
+  });
+});
+
+// ─── Direct handler tests: covering ?? fallbacks on req.method/req.url ───────
+
+function createMockRes(): http.ServerResponse {
+  const res = new PassThrough() as unknown as http.ServerResponse;
+  let ended = false;
+  const headers: Record<string, string> = {};
+  res.setHeader = (name: string, value: string | number | readonly string[]) => {
+    headers[name.toLowerCase()] = String(value);
+    return res;
+  };
+  res.writeHead = (statusCode: number, hdrs?: Record<string, string>) => {
+    (res as { statusCode: number }).statusCode = statusCode;
+    if (hdrs) {
+      for (const [k, v] of Object.entries(hdrs)) {
+        headers[k.toLowerCase()] = v;
+      }
+    }
+    return res;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.write = (chunk: string) => true;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.end = ((...args: unknown[]) => {
+    ended = true;
+    return res;
+  }) as typeof res.end;
+  Object.defineProperty(res, "writableEnded", { get: () => ended });
+  res.destroy = () => {
+    ended = true;
+    return res;
+  };
+  return res;
+}
+
+describe("handleEmbeddings (direct call — ?? fallback branches)", () => {
+  it("uses fallback POST and /v1/embeddings when req.method and req.url are undefined", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "hello",
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback method/path on malformed JSON with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(mockReq, mockRes, "{bad", [], journal, defaults, () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for strict mode with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger, strict: true };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "unmatched",
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const errorFixture: Fixture = {
+      match: { inputText: "err" },
+      response: {
+        error: { message: "Fail", type: "server_error" },
+        status: 500,
+      },
+    };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "err input",
+      }),
+      [errorFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for embedding fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const embFixture: Fixture = {
+      match: { inputText: "embed" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "embed this",
+      }),
+      [embFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for incompatible fixture response with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const badFixture: Fixture = {
+      match: { predicate: () => true },
+      response: { content: "text, not embedding" },
+    };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleEmbeddings(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "text-embedding-3-small",
+        input: "anything",
+      }),
+      [badFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/embeddings");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/fix-drift.test.ts b/src/__tests__/fix-drift.test.ts
index 4927a2d..e4a75e6 100644
--- a/src/__tests__/fix-drift.test.ts
+++ b/src/__tests__/fix-drift.test.ts
@@ -40,6 +40,8 @@ import {
   execFileSafe,
   parseMode,
   getChangedFiles,
+  affectedSkillSections,
+  BUILDER_TO_SKILL_SECTION,
 } from "../../scripts/fix-drift.js";
 
 import { readFileSync, writeFileSync, existsSync } from "node:fs";
@@ -411,7 +413,7 @@ describe("patchBumpVersion", () => {
   });
 
   it('bumps patch version from "1.2.3" to "1.2.4"', () => {
-    const pkg = { name: "@copilotkit/llmock", version: "1.2.3" };
+    const pkg = { name: "@copilotkit/aimock", version: "1.2.3" };
     mockedReadFileSync.mockReturnValue(JSON.stringify(pkg));
     mockedWriteFileSync.mockImplementation(() => {});
 
@@ -479,7 +481,7 @@ describe("addChangelogEntry", () => {
   });
 
   it("inserts entry after title line when changelog has title", () => {
-    const existing = "# @copilotkit/llmock\n\n## 1.0.0\n\nOld entry\n";
+    const existing = "# @copilotkit/aimock\n\n## 1.0.0\n\nOld entry\n";
     mockedExistsSync.mockReturnValue(true);
     mockedReadFileSync.mockReturnValue(existing);
     mockedWriteFileSync.mockImplementation(() => {});
@@ -491,7 +493,7 @@ describe("addChangelogEntry", () => {
     const written = vi.mocked(writeFileSync).mock.calls[0][1] as string;
 
     // Title is preserved at the top
-    expect(written.startsWith("# @copilotkit/llmock\n")).toBe(true);
+    expect(written.startsWith("# @copilotkit/aimock\n")).toBe(true);
     // New version entry comes before old
     expect(written.indexOf("## 1.0.1")).toBeLessThan(written.indexOf("## 1.0.0"));
     // Contains patch changes section
@@ -743,3 +745,107 @@ describe("getChangedFiles", () => {
     expect(result).toEqual(["new.ts", "src/foo.ts"]);
   });
 });
+
+// ---------------------------------------------------------------------------
+// affectedSkillSections
+// ---------------------------------------------------------------------------
+
+describe("affectedSkillSections", () => {
+  it("returns empty array when no builder files are present", () => {
+    expect(affectedSkillSections(["src/__tests__/foo.test.ts", "package.json"])).toEqual([]);
+  });
+
+  it("maps known builder files to skill sections", () => {
+    const result = affectedSkillSections(["src/responses.ts", "src/messages.ts"]);
+    expect(result).toEqual(["Claude Messages", "Responses API"]);
+  });
+
+  it("deduplicates sections from multiple files mapping to the same section", () => {
+    const result = affectedSkillSections(["src/bedrock.ts", "src/bedrock-converse.ts"]);
+    expect(result).toEqual(["Bedrock"]);
+  });
+
+  it("ignores files not in the mapping", () => {
+    const result = affectedSkillSections(["src/responses.ts", "src/router.ts", "src/server.ts"]);
+    expect(result).toEqual(["Responses API"]);
+  });
+
+  it("returns sorted section names", () => {
+    const result = affectedSkillSections(["src/ollama.ts", "src/gemini.ts", "src/embeddings.ts"]);
+    expect(result).toEqual(["Embeddings", "Gemini", "Ollama"]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// BUILDER_TO_SKILL_SECTION
+// ---------------------------------------------------------------------------
+
+describe("BUILDER_TO_SKILL_SECTION", () => {
+  it("includes all expected builder files", () => {
+    const expectedFiles = [
+      "src/responses.ts",
+      "src/messages.ts",
+      "src/gemini.ts",
+      "src/bedrock.ts",
+      "src/bedrock-converse.ts",
+      "src/embeddings.ts",
+      "src/ollama.ts",
+      "src/cohere.ts",
+      "src/ws-realtime.ts",
+      "src/ws-responses.ts",
+      "src/ws-gemini-live.ts",
+    ];
+    for (const file of expectedFiles) {
+      expect(BUILDER_TO_SKILL_SECTION).toHaveProperty(file);
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildPrBody — skill sections
+// ---------------------------------------------------------------------------
+
+describe("buildPrBody — skill sections", () => {
+  it("includes skill documentation section when builder files changed", () => {
+    const report = makeReport();
+    const body = buildPrBody(report, ["src/responses.ts", "src/__tests__/foo.test.ts"]);
+
+    expect(body).toContain("### Skill documentation");
+    expect(body).toContain("- Responses API");
+  });
+
+  it("omits skill documentation section when no builder files changed", () => {
+    const report = makeReport();
+    const body = buildPrBody(report, ["src/__tests__/foo.test.ts", "package.json"]);
+
+    expect(body).not.toContain("### Skill documentation");
+  });
+
+  it("omits skill documentation section when changedFiles is not provided", () => {
+    const report = makeReport();
+    const body = buildPrBody(report);
+
+    expect(body).not.toContain("### Skill documentation");
+  });
+
+  it("lists multiple affected skill sections", () => {
+    const report = makeReport();
+    const body = buildPrBody(report, ["src/responses.ts", "src/gemini.ts", "src/bedrock.ts"]);
+
+    expect(body).toContain("- Bedrock");
+    expect(body).toContain("- Gemini");
+    expect(body).toContain("- Responses API");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildPrompt — skill file reference
+// ---------------------------------------------------------------------------
+
+describe("buildPrompt — skill file", () => {
+  it("includes skill file update instructions", () => {
+    const prompt = buildPrompt(makeReport());
+    expect(prompt).toContain("## Skill file update");
+    expect(prompt).toContain("skills/write-fixtures/SKILL.md");
+  });
+});
diff --git a/src/__tests__/gemini.test.ts b/src/__tests__/gemini.test.ts
index 6823ed6..ea5e7ea 100644
--- a/src/__tests__/gemini.test.ts
+++ b/src/__tests__/gemini.test.ts
@@ -691,6 +691,31 @@ describe("Gemini journal", () => {
   });
 });
 
+// ─── Error fixture without type field ─────────────────────────────────────────
+
+describe("Gemini error fixture without type", () => {
+  it("falls back to ERROR status when error.type is undefined", async () => {
+    const noTypeFixture: Fixture = {
+      match: { userMessage: "no-type-error" },
+      response: {
+        error: {
+          message: "Something went wrong",
+        },
+        status: 500,
+      },
+    };
+    instance = await createServer([noTypeFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "no-type-error" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+    expect(body.error.status).toBe("ERROR");
+  });
+});
+
 // ─── CORS ───────────────────────────────────────────────────────────────────
 
 describe("Gemini CORS", () => {
@@ -703,3 +728,322 @@ describe("Gemini CORS", () => {
     expect(res.headers["access-control-allow-origin"]).toBe("*");
   });
 });
+
+// ─── Input conversion: additional branch coverage ────────────────────────────
+
+describe("geminiToCompletionRequest — additional branches", () => {
+  it("defaults role to 'user' when content.role is missing", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ parts: [{ text: "no role" }] }],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // role defaults to "user"
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("user");
+    expect(result.messages[0].content).toBe("no role");
+  });
+
+  it("converts functionResponse.response that is a string", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              {
+                functionResponse: {
+                  name: "search",
+                  response: "plain string response" as unknown as Record<string, unknown>,
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("tool");
+    // String response is used directly
+    expect(result.messages[0].content).toBe("plain string response");
+  });
+
+  it("includes text parts alongside functionResponse parts", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [
+              {
+                functionResponse: {
+                  name: "search",
+                  response: { data: "result" },
+                },
+              },
+              { text: "Additional context" },
+            ],
+          },
+        ],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // functionResponse → tool message, then text → user message
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[1].role).toBe("user");
+    expect(result.messages[1].content).toBe("Additional context");
+  });
+
+  it("handles tools with empty functionDeclarations", () => {
+    const result = geminiToCompletionRequest(
+      {
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+        tools: [{}],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // No functionDeclarations → tools should be undefined
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("handles empty systemInstruction text", () => {
+    const result = geminiToCompletionRequest(
+      {
+        systemInstruction: { parts: [{ functionCall: { name: "x", args: {} } }] },
+        contents: [{ role: "user", parts: [{ text: "hi" }] }],
+      },
+      "gemini-2.0-flash",
+      false,
+    );
+    // systemInstruction has no text parts → no system message
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("user");
+  });
+});
+
+// ─── Streaming: empty content ────────────────────────────────────────────────
+
+describe("Gemini streaming empty content", () => {
+  it("streams a single empty-text chunk for empty content", async () => {
+    const emptyFixture: Fixture = {
+      match: { userMessage: "empty" },
+      response: { content: "" },
+    };
+    instance = await createServer([emptyFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "empty" }] }],
+    });
+
+    expect(res.status).toBe(200);
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { text: string }[] };
+        finishReason?: string;
+      }[];
+      usageMetadata?: unknown;
+    }[];
+
+    // Empty content produces a single chunk with empty text
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].text).toBe("");
+    expect(chunks[0].candidates[0].finishReason).toBe("STOP");
+    expect(chunks[0].usageMetadata).toBeDefined();
+  });
+});
+
+// ─── Tool call with malformed JSON arguments ─────────────────────────────────
+
+describe("Gemini tool call malformed arguments", () => {
+  it("non-streaming: falls back to empty args for malformed JSON", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-args" },
+      response: {
+        toolCalls: [{ name: "broken_tool", arguments: "{not valid json}" }],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "malformed-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("broken_tool");
+    // Falls back to empty args
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({});
+    expect(body.candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+
+  it("non-streaming: uses empty object for empty arguments string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [{ name: "no_args_tool", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "empty-args" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("no_args_tool");
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({});
+  });
+
+  it("streaming: falls back to empty args for malformed JSON", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-stream" },
+      response: {
+        toolCalls: [{ name: "broken_tool", arguments: "{{bad}}" }],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "malformed-stream" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { functionCall: { name: string; args: unknown } }[] };
+      }[];
+    }[];
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.name).toBe("broken_tool");
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.args).toEqual({});
+  });
+
+  it("streaming: uses empty object for empty arguments string", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args-stream" },
+      response: {
+        toolCalls: [{ name: "no_args_tool", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "empty-args-stream" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { functionCall: { name: string; args: unknown } }[] };
+      }[];
+    }[];
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.name).toBe("no_args_tool");
+    expect(chunks[0].candidates[0].content.parts[0].functionCall.args).toEqual({});
+  });
+});
+
+// ─── Strict mode ─────────────────────────────────────────────────────────────
+
+describe("Gemini strict mode", () => {
+  it("returns 503 in strict mode when no fixture matches", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "nomatch-strict" }] }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+    expect(body.error.status).toBe("UNAVAILABLE");
+  });
+});
+
+// ─── Streaming interruptions ─────────────────────────────────────────────────
+
+describe("Gemini streaming interruptions", () => {
+  it("text: records interruption in journal when stream is truncated", async () => {
+    const interruptFixture: Fixture = {
+      match: { userMessage: "interrupt-text" },
+      response: { content: "ABCDEFGHIJKLMNOP" },
+      chunkSize: 1,
+      latency: 10,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([interruptFixture]);
+
+    // The server destroys the connection mid-stream, so the client will get
+    // a socket error. Use a race with a timeout to avoid hanging.
+    const parsed = new URL(instance.url);
+    await new Promise<void>((resolve) => {
+      const data = JSON.stringify({
+        contents: [{ role: "user", parts: [{ text: "interrupt-text" }] }],
+      });
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: "/v1beta/models/gemini-2.0-flash:streamGenerateContent",
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", () => {});
+          res.on("end", () => resolve());
+          res.on("error", () => resolve());
+          res.on("close", () => resolve());
+        },
+      );
+      req.on("error", () => resolve());
+      req.write(data);
+      req.end();
+    });
+
+    // Wait briefly for the server to finish processing
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Journal should record interruption
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("tool call: records interruption in journal when disconnected", async () => {
+    const interruptToolFixture: Fixture = {
+      match: { userMessage: "interrupt-tool" },
+      response: {
+        toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+      },
+      disconnectAfterMs: 1,
+      latency: 100,
+    };
+    instance = await createServer([interruptToolFixture]);
+
+    try {
+      await post(`${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, {
+        contents: [{ role: "user", parts: [{ text: "interrupt-tool" }] }],
+      });
+    } catch {
+      // Expected — socket hang up
+    }
+
+    // Wait briefly for the server to finish processing
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Journal should record interruption
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
+  });
+});
diff --git a/src/__tests__/health.test.ts b/src/__tests__/health.test.ts
index 6f23185..f50b1f4 100644
--- a/src/__tests__/health.test.ts
+++ b/src/__tests__/health.test.ts
@@ -96,7 +96,7 @@ describe("health endpoints", () => {
       expect(ids).toContain("text-embedding-3-small");
       for (const model of body.data) {
         expect(model.object).toBe("model");
-        expect(model.owned_by).toBe("llmock");
+        expect(model.owned_by).toBe("aimock");
         expect(typeof model.created).toBe("number");
       }
     });
diff --git a/src/__tests__/jsonrpc.test.ts b/src/__tests__/jsonrpc.test.ts
new file mode 100644
index 0000000..6965bb4
--- /dev/null
+++ b/src/__tests__/jsonrpc.test.ts
@@ -0,0 +1,514 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import * as http from "node:http";
+import { createJsonRpcDispatcher, type JsonRpcResponse, type MethodHandler } from "../jsonrpc.js";
+
+// --- helpers ---
+
+function makeReqRes(): {
+  req: http.IncomingMessage;
+  res: http.ServerResponse & {
+    _statusCode: number;
+    _headers: Record<string, string>;
+    _body: string;
+  };
+} {
+  const req = Object.create(http.IncomingMessage.prototype) as http.IncomingMessage;
+  const res = {
+    _statusCode: 0,
+    _headers: {} as Record<string, string>,
+    _body: "",
+    writeHead(statusCode: number, headers?: Record<string, string>) {
+      this._statusCode = statusCode;
+      if (headers) Object.assign(this._headers, headers);
+      return this;
+    },
+    end(body?: string) {
+      if (body !== undefined) this._body = body;
+    },
+  } as unknown as http.ServerResponse & {
+    _statusCode: number;
+    _headers: Record<string, string>;
+    _body: string;
+  };
+  return { req, res };
+}
+
+function parseBody(res: { _body: string }): unknown {
+  return res._body ? JSON.parse(res._body) : undefined;
+}
+
+// --- tests ---
+
+describe("createJsonRpcDispatcher", () => {
+  let echoHandler: MethodHandler;
+
+  beforeEach(() => {
+    echoHandler = vi.fn(async (params, id) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+  });
+
+  it("calls method handler and returns response for valid request", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    const body = JSON.stringify({ jsonrpc: "2.0", method: "echo", params: { a: 1 }, id: 1 });
+    await dispatch(req, res, body);
+
+    expect(res._statusCode).toBe(200);
+    expect(res._headers["Content-Type"]).toBe("application/json");
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.jsonrpc).toBe("2.0");
+    expect(parsed.id).toBe(1);
+    expect(parsed.result).toEqual({ a: 1 });
+  });
+
+  it("returns -32700 on invalid JSON", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, "not json{{{");
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32700);
+    expect(parsed.error?.message).toContain("Parse error");
+    expect(parsed.id).toBeNull();
+  });
+
+  it("returns -32600 when jsonrpc field is missing", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ method: "echo", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32600 when method field is missing", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32601 when method is not found", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "missing", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32601);
+    expect(parsed.error?.message).toContain("Method not found");
+  });
+
+  it("returns -32603 when handler throws", async () => {
+    const throwHandler: MethodHandler = async () => {
+      throw new Error("boom");
+    };
+    const dispatch = createJsonRpcDispatcher({ methods: { boom: throwHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "boom", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32603);
+    expect(parsed.error?.message).toContain("Internal error");
+  });
+
+  it("propagates custom error returned by handler", async () => {
+    const errorHandler: MethodHandler = async (_params, id) => ({
+      jsonrpc: "2.0",
+      id,
+      error: { code: -32602, message: "Invalid params", data: { field: "x" } },
+    });
+    const dispatch = createJsonRpcDispatcher({ methods: { bad: errorHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "bad", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32602);
+    expect(parsed.error?.data).toEqual({ field: "x" });
+  });
+
+  it("returns 202 with no body for notification (no id) and calls handler with null id", async () => {
+    const handler = vi.fn(async () => null);
+    const dispatch = createJsonRpcDispatcher({ methods: { notify: handler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "notify", params: {} }));
+
+    expect(res._statusCode).toBe(202);
+    expect(res._body).toBe("");
+    // Handler IS called for side effects, but with null id (not 0)
+    expect(handler).toHaveBeenCalledWith({}, null, expect.anything());
+  });
+
+  it("fires onNotification callback for notifications", async () => {
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: {},
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "log", params: { msg: "hi" } }),
+    );
+
+    expect(onNotification).toHaveBeenCalledWith("log", { msg: "hi" });
+    expect(res._statusCode).toBe(202);
+  });
+
+  it("handles batch of 2 requests and returns array of 2 responses", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    const batch = [
+      { jsonrpc: "2.0", method: "echo", params: "a", id: 1 },
+      { jsonrpc: "2.0", method: "echo", params: "b", id: 2 },
+    ];
+    await dispatch(req, res, JSON.stringify(batch));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse[];
+    expect(Array.isArray(parsed)).toBe(true);
+    expect(parsed).toHaveLength(2);
+    expect(parsed[0].id).toBe(1);
+    expect(parsed[0].result).toBe("a");
+    expect(parsed[1].id).toBe(2);
+    expect(parsed[1].result).toBe("b");
+  });
+
+  it("batch with mixed requests and notifications returns only request responses", async () => {
+    const handler = vi.fn(async (params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: handler } });
+    const { req, res } = makeReqRes();
+
+    const batch = [
+      { jsonrpc: "2.0", method: "echo", params: "a", id: 1 },
+      { jsonrpc: "2.0", method: "echo", params: "notify-me" }, // notification, no id
+      { jsonrpc: "2.0", method: "echo", params: "b", id: 2 },
+    ];
+    await dispatch(req, res, JSON.stringify(batch));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse[];
+    expect(parsed).toHaveLength(2);
+    expect(parsed[0].id).toBe(1);
+    expect(parsed[1].id).toBe(2);
+  });
+
+  it("returns -32600 for empty batch", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify([]));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("returns single object (not array) for single request", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: null, id: 42 }),
+    );
+
+    const parsed = parseBody(res);
+    expect(Array.isArray(parsed)).toBe(false);
+    expect((parsed as JsonRpcResponse).id).toBe(42);
+  });
+
+  it("sets Content-Type to application/json on JSON responses", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: null, id: 1 }),
+    );
+
+    expect(res._headers["Content-Type"]).toBe("application/json");
+  });
+
+  it("passes params, id, and req to handler", async () => {
+    const spy = vi.fn(async (_params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: null,
+    }));
+    const dispatch = createJsonRpcDispatcher({ methods: { test: spy } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "test", params: { x: 1 }, id: "abc" }),
+    );
+
+    expect(spy).toHaveBeenCalledWith({ x: 1 }, "abc", req);
+  });
+
+  it("returns -32600 when jsonrpc is not '2.0'", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "1.0", method: "echo", id: 1 }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32600 when method is not a string", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: { echo: echoHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: 123, id: 1 }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+  });
+
+  it("returns -32600 when entry is not an object (e.g. a number)", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    // A batch entry that is a raw number, not an object
+    await dispatch(req, res, JSON.stringify([42]));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse[];
+    expect(parsed).toHaveLength(1);
+    expect(parsed[0].error?.code).toBe(-32600);
+    expect(parsed[0].id).toBeNull();
+  });
+
+  it("returns -32600 for a single non-object request (string)", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify("just a string"));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("does not invoke method handler for notifications (spec compliance)", async () => {
+    const methodHandler = vi.fn(async (_params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: "should not be called",
+    }));
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: { foo: methodHandler },
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    // Notification: no id field
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "foo", params: { x: 1 } }));
+
+    expect(res._statusCode).toBe(202);
+    expect(res._body).toBe("");
+    // Handler IS called for side effects, but with null id (not 0)
+    expect(methodHandler).toHaveBeenCalledWith({ x: 1 }, null, expect.anything());
+    expect(onNotification).toHaveBeenCalledWith("foo", { x: 1 });
+  });
+
+  it("handles request with id: null as a real request, not a notification", async () => {
+    // JSON-RPC 2.0 spec: id of null is valid and indicates the client cannot
+    // determine the request id. It is NOT a notification (notifications omit id
+    // entirely or set it to undefined).
+    const methodHandler = vi.fn(async (params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: { echo: methodHandler },
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: { val: "null-id" }, id: null }),
+    );
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.jsonrpc).toBe("2.0");
+    expect(parsed.id).toBeNull();
+    expect(parsed.result).toEqual({ val: "null-id" });
+    // Should NOT be treated as a notification
+    expect(onNotification).not.toHaveBeenCalled();
+  });
+
+  it("handles request with id: 0 as a real request, not a notification", async () => {
+    const methodHandler = vi.fn(async (params: unknown, id: string | number) => ({
+      jsonrpc: "2.0" as const,
+      id,
+      result: params,
+    }));
+    const onNotification = vi.fn();
+    const dispatch = createJsonRpcDispatcher({
+      methods: { echo: methodHandler },
+      onNotification,
+    });
+    const { req, res } = makeReqRes();
+
+    // id: 0 is a valid JSON-RPC id — this is a request, not a notification
+    await dispatch(
+      req,
+      res,
+      JSON.stringify({ jsonrpc: "2.0", method: "echo", params: { val: "zero" }, id: 0 }),
+    );
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.jsonrpc).toBe("2.0");
+    expect(parsed.id).toBe(0);
+    expect(parsed.result).toEqual({ val: "zero" });
+    expect(methodHandler).toHaveBeenCalledWith({ val: "zero" }, 0, req);
+    expect(onNotification).not.toHaveBeenCalled();
+  });
+
+  it("returns result: null when handler returns null", async () => {
+    const nullHandler: MethodHandler = async () => null;
+    const dispatch = createJsonRpcDispatcher({ methods: { noop: nullHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "noop", id: 5 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.id).toBe(5);
+    expect(parsed.result).toBeNull();
+    expect(parsed.error).toBeUndefined();
+  });
+
+  it("coerces non-string/number id to null in error responses", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    // id is a boolean - not valid per JSON-RPC spec
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "missing", id: true }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32601);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("treats id: undefined (present but undefined) as notification", async () => {
+    const handler = vi.fn(async () => null);
+    const dispatch = createJsonRpcDispatcher({ methods: { ping: handler } });
+    const { req, res } = makeReqRes();
+
+    // JSON.stringify strips undefined values, so id won't be in the output.
+    // We test this by constructing a request without id at all.
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "ping", params: {} }));
+
+    expect(res._statusCode).toBe(202);
+    // Handler IS called for side effects with null id
+    expect(handler).toHaveBeenCalledWith({}, null, expect.anything());
+  });
+
+  it("stringifies non-Error thrown values in internal error message", async () => {
+    const throwHandler: MethodHandler = async () => {
+      throw "raw string error";
+    };
+    const dispatch = createJsonRpcDispatcher({ methods: { bad: throwHandler } });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "2.0", method: "bad", id: 1 }));
+
+    expect(res._statusCode).toBe(200);
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32603);
+    expect(parsed.error?.message).toContain("raw string error");
+  });
+
+  it("returns -32600 with numeric id when jsonrpc is wrong", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "3.0", method: "test", id: 99 }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBe(99);
+  });
+
+  it("returns -32600 with string id when jsonrpc is wrong", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "3.0", method: "test", id: "str-id" }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBe("str-id");
+  });
+
+  it("returns -32600 with null id when id is non-string/non-number in invalid request", async () => {
+    const dispatch = createJsonRpcDispatcher({ methods: {} });
+    const { req, res } = makeReqRes();
+
+    // id is an object — not a valid JSON-RPC id type
+    await dispatch(req, res, JSON.stringify({ jsonrpc: "3.0", method: "test", id: { bad: true } }));
+
+    const parsed = parseBody(res) as JsonRpcResponse;
+    expect(parsed.error?.code).toBe(-32600);
+    expect(parsed.id).toBeNull();
+  });
+
+  it("batch of all notifications returns 202 with no body", async () => {
+    const handler = vi.fn(async () => null);
+    const dispatch = createJsonRpcDispatcher({ methods: { ping: handler } });
+    const { req, res } = makeReqRes();
+
+    const batch = [
+      { jsonrpc: "2.0", method: "ping", params: {} },
+      { jsonrpc: "2.0", method: "ping", params: {} },
+    ];
+    await dispatch(req, res, JSON.stringify(batch));
+
+    expect(res._statusCode).toBe(202);
+    expect(res._body).toBe("");
+    // Handlers called with null id for each notification
+    expect(handler).toHaveBeenCalledTimes(2);
+  });
+});
diff --git a/src/__tests__/mcp-mock.test.ts b/src/__tests__/mcp-mock.test.ts
new file mode 100644
index 0000000..b411662
--- /dev/null
+++ b/src/__tests__/mcp-mock.test.ts
@@ -0,0 +1,966 @@
+import { describe, it, expect, afterEach, vi } from "vitest";
+import * as http from "node:http";
+import { MCPMock } from "../mcp-mock.js";
+import { LLMock } from "../llmock.js";
+import type { MCPContent } from "../mcp-types.js";
+
+// ---- HTTP Helpers ----
+
+interface HttpResult {
+  status: number;
+  headers: http.IncomingHttpHeaders;
+  body: string;
+}
+
+function request(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<HttpResult> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(
+  url: string,
+  path: string,
+  method: string,
+  params?: unknown,
+  id?: number,
+  extraHeaders?: Record<string, string>,
+) {
+  const body: Record<string, unknown> = { jsonrpc: "2.0", method };
+  if (params !== undefined) body.params = params;
+  if (id !== undefined) {
+    body.id = id;
+  } else {
+    body.id = 1;
+  }
+  return request(url, path, "POST", body, extraHeaders);
+}
+
+function notification(
+  url: string,
+  path: string,
+  method: string,
+  params?: unknown,
+  extraHeaders?: Record<string, string>,
+) {
+  // Notifications have no id field
+  const body: Record<string, unknown> = { jsonrpc: "2.0", method };
+  if (params !== undefined) body.params = params;
+  return request(url, path, "POST", body, extraHeaders);
+}
+
+async function initSession(url: string, path = "/"): Promise<string> {
+  const res = await jsonRpc(url, path, "initialize", {}, 1);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  // Send initialized notification
+  await notification(url, path, "notifications/initialized", {}, { "mcp-session-id": sessionId });
+  return sessionId;
+}
+
+// ---- Tests ----
+
+describe("MCPMock", () => {
+  let mcp: MCPMock | null = null;
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (mcp) {
+      try {
+        await mcp.stop();
+      } catch {
+        // not started
+      }
+      mcp = null;
+    }
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch {
+        // not started
+      }
+      llm = null;
+    }
+  });
+
+  // ---- Standalone mode ----
+
+  describe("standalone mode", () => {
+    it("starts and stops", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+      await mcp.stop();
+      mcp = null;
+    });
+
+    it("handles initialize handshake", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "initialize", {}, 1);
+      expect(res.status).toBe(200);
+
+      const data = JSON.parse(res.body);
+      expect(data.result.protocolVersion).toBe("2025-03-26");
+      expect(data.result.capabilities).toEqual({ tools: {}, resources: {}, prompts: {} });
+      expect(data.result.serverInfo).toEqual({ name: "mcp-mock", version: "1.0.0" });
+
+      const sessionId = res.headers["mcp-session-id"];
+      expect(sessionId).toBeDefined();
+      expect(typeof sessionId).toBe("string");
+    });
+
+    it("custom serverInfo", async () => {
+      mcp = new MCPMock({ serverInfo: { name: "test-server", version: "2.0.0" } });
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "initialize", {}, 1);
+      const data = JSON.parse(res.body);
+      expect(data.result.serverInfo).toEqual({ name: "test-server", version: "2.0.0" });
+    });
+  });
+
+  // ---- Mounted mode ----
+
+  describe("mounted mode", () => {
+    it("routes via LLMock mount", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const sessionId = await initSession(llm.url, "/mcp");
+
+      const res = await jsonRpc(llm.url, "/mcp", "tools/list", {}, 2, {
+        "mcp-session-id": sessionId,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.result.tools).toHaveLength(1);
+      expect(data.result.tools[0].name).toBe("echo");
+    });
+
+    it("does not intercept non-root paths", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      llm.onMessage("hello", { content: "world" });
+      await llm.start();
+
+      // /mcp/something should fall through because MCPMock only handles /
+      const res = await request(llm.url, "/v1/chat/completions", "POST", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.choices[0].message.content).toBe("world");
+    });
+  });
+
+  // ---- Session management ----
+
+  describe("sessions", () => {
+    it("initialize returns session ID in header", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "initialize", {}, 1);
+      expect(res.headers["mcp-session-id"]).toBeDefined();
+    });
+
+    it("notifications/initialized marks session as ready", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const initRes = await jsonRpc(url, "/", "initialize", {}, 1);
+      const sessionId = initRes.headers["mcp-session-id"] as string;
+
+      // Send notification (no id field)
+      const notifRes = await notification(
+        url,
+        "/",
+        "notifications/initialized",
+        {},
+        {
+          "mcp-session-id": sessionId,
+        },
+      );
+      // Notifications return 202
+      expect(notifRes.status).toBe(202);
+
+      // Session should be initialized
+      const sessions = mcp.getSessions();
+      const session = sessions.get(sessionId);
+      expect(session).toBeDefined();
+      expect(session!.initialized).toBe(true);
+    });
+
+    it("missing session header returns 400", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1);
+      expect(res.status).toBe(400);
+    });
+
+    it("invalid session ID returns 404", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1, {
+        "mcp-session-id": "nonexistent-id",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("uninitialized session rejects requests with -32002", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "echo" });
+      const url = await mcp.start();
+
+      // Step 1: send initialize to get a session ID
+      const initRes = await jsonRpc(url, "/", "initialize", {}, 1);
+      const sessionId = initRes.headers["mcp-session-id"] as string;
+      expect(sessionId).toBeDefined();
+
+      // Step 2: WITHOUT sending notifications/initialized, try tools/list
+      const res = await jsonRpc(url, "/", "tools/list", {}, 2, {
+        "mcp-session-id": sessionId,
+      });
+      expect(res.status).toBe(400);
+
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32002);
+      expect(data.error.message).toBe("Session not initialized");
+    });
+
+    it("DELETE removes session", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const sessionId = await initSession(url);
+
+      // DELETE the session
+      const delRes = await request(url, "/", "DELETE", undefined, {
+        "mcp-session-id": sessionId,
+      });
+      expect(delRes.status).toBe(200);
+
+      // Session should be gone
+      const sessions = mcp.getSessions();
+      expect(sessions.has(sessionId)).toBe(false);
+
+      // Subsequent requests with that session ID should 404
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("DELETE with missing header returns 400", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await request(url, "/", "DELETE");
+      expect(res.status).toBe(400);
+    });
+
+    it("DELETE with unknown session returns 404", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      const res = await request(url, "/", "DELETE", undefined, {
+        "mcp-session-id": "does-not-exist",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("multiple concurrent sessions", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "test-tool" });
+      const url = await mcp.start();
+
+      const session1 = await initSession(url);
+      const session2 = await initSession(url);
+
+      expect(session1).not.toBe(session2);
+
+      // Both sessions can make requests
+      const res1 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session1 });
+      const res2 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session2 });
+
+      expect(JSON.parse(res1.body).result.tools).toHaveLength(1);
+      expect(JSON.parse(res2.body).result.tools).toHaveLength(1);
+
+      // Delete one session, other still works
+      await request(url, "/", "DELETE", undefined, { "mcp-session-id": session1 });
+
+      const res3 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session2 });
+      expect(res3.status).toBe(200);
+
+      const res4 = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": session1 });
+      expect(res4.status).toBe(404);
+    });
+  });
+
+  // ---- Tools ----
+
+  describe("tools", () => {
+    it("tools/list returns registered tools", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({
+        name: "search",
+        description: "Search the web",
+        inputSchema: { type: "object" },
+      });
+      mcp.addTool({ name: "calc", description: "Calculator" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/list", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result.tools).toHaveLength(2);
+      expect(data.result.tools[0]).toEqual({
+        name: "search",
+        description: "Search the web",
+        inputSchema: { type: "object" },
+      });
+      expect(data.result.tools[1]).toEqual({
+        name: "calc",
+        description: "Calculator",
+      });
+    });
+
+    it("tools/call with function handler", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("add", (args: unknown) => {
+        const { a, b } = args as { a: number; b: number };
+        return `${a + b}`;
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(
+        url,
+        "/",
+        "tools/call",
+        { name: "add", arguments: { a: 2, b: 3 } },
+        1,
+        {
+          "mcp-session-id": sessionId,
+        },
+      );
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content).toEqual([{ type: "text", text: "5" }]);
+    });
+
+    it("tools/call with MCPContent[] handler", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("rich", (): MCPContent[] => {
+        return [
+          { type: "text", text: "hello" },
+          { type: "text", text: "world" },
+        ];
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "rich" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content).toHaveLength(2);
+    });
+
+    it("tools/call unknown tool returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "nonexistent" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+    });
+
+    it("tools/call handler error returns isError: true", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("fail", () => {
+        throw new Error("Something went wrong");
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "fail" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(true);
+      expect(data.result.content).toEqual([{ type: "text", text: "Something went wrong" }]);
+    });
+
+    it("tools/call with no handler returns empty content", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "noop" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "noop" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content).toEqual([]);
+    });
+  });
+
+  // ---- Resources ----
+
+  describe("resources", () => {
+    it("resources/list returns registered resources", async () => {
+      mcp = new MCPMock();
+      mcp.addResource(
+        { uri: "file:///readme.md", name: "README", mimeType: "text/markdown" },
+        { text: "# Hello" },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/list", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result.resources).toHaveLength(1);
+      expect(data.result.resources[0]).toEqual({
+        uri: "file:///readme.md",
+        name: "README",
+        mimeType: "text/markdown",
+      });
+    });
+
+    it("resources/read returns content", async () => {
+      mcp = new MCPMock();
+      mcp.addResource(
+        { uri: "file:///data.json", name: "Data", mimeType: "application/json" },
+        { text: '{"key":"value"}', mimeType: "application/json" },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///data.json" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.contents).toHaveLength(1);
+      expect(data.result.contents[0].uri).toBe("file:///data.json");
+      expect(data.result.contents[0].text).toBe('{"key":"value"}');
+      expect(data.result.contents[0].mimeType).toBe("application/json");
+    });
+
+    it("resources/read unknown URI returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///nope" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+    });
+  });
+
+  // ---- Prompts ----
+
+  describe("prompts", () => {
+    it("prompts/list returns registered prompts", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({
+        name: "summarize",
+        description: "Summarize text",
+        arguments: [{ name: "text", required: true }],
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/list", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result.prompts).toHaveLength(1);
+      expect(data.result.prompts[0].name).toBe("summarize");
+    });
+
+    it("prompts/get with handler returns result", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt(
+        { name: "greet", arguments: [{ name: "name", required: true }] },
+        (args: unknown) => {
+          const { name } = args as { name: string };
+          return {
+            messages: [
+              { role: "user", content: { type: "text" as const, text: `Hello, ${name}!` } },
+            ],
+          };
+        },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(
+        url,
+        "/",
+        "prompts/get",
+        { name: "greet", arguments: { name: "World" } },
+        1,
+        {
+          "mcp-session-id": sessionId,
+        },
+      );
+      const data = JSON.parse(res.body);
+      expect(data.result.messages).toHaveLength(1);
+      expect(data.result.messages[0].content.text).toBe("Hello, World!");
+    });
+
+    it("prompts/get unknown prompt returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "missing" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+    });
+  });
+
+  // ---- Ping ----
+
+  describe("ping", () => {
+    it("returns empty object", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "ping", {}, 1, { "mcp-session-id": sessionId });
+      const data = JSON.parse(res.body);
+      expect(data.result).toEqual({});
+    });
+  });
+
+  // ---- Reset ----
+
+  describe("reset", () => {
+    it("clears tools, resources, prompts, and sessions", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      mcp.addResource({ uri: "file:///r1", name: "R1" });
+      mcp.addPrompt({ name: "p1" });
+      const url = await mcp.start();
+
+      await initSession(url);
+      expect(mcp.getSessions().size).toBe(1);
+
+      mcp.reset();
+
+      const health = mcp.health();
+      expect(health.tools).toBe(0);
+      expect(health.resources).toBe(0);
+      expect(health.prompts).toBe(0);
+      expect(health.sessions).toBe(0);
+    });
+  });
+
+  // ---- Health ----
+
+  describe("health", () => {
+    it("returns counts", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      mcp.addTool({ name: "t2" });
+      mcp.addResource({ uri: "file:///r1", name: "R1" });
+
+      const health = mcp.health();
+      expect(health).toEqual({
+        status: "ok",
+        tools: 2,
+        resources: 1,
+        prompts: 0,
+        sessions: 0,
+      });
+    });
+  });
+
+  // ---- Tools edge cases ----
+
+  describe("tools edge cases", () => {
+    it("tools/call with missing name returns -32602", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { arguments: {} }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+      expect(data.error.message).toBe("Missing tool name");
+    });
+
+    it("onToolCall on existing tool attaches handler", async () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+      mcp.onToolCall("echo", (args: unknown) => {
+        return `echoed: ${JSON.stringify(args)}`;
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(
+        url,
+        "/",
+        "tools/call",
+        { name: "echo", arguments: { msg: "hi" } },
+        1,
+        { "mcp-session-id": sessionId },
+      );
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(false);
+      expect(data.result.content[0].text).toContain("hi");
+    });
+
+    it("tools/call handler throwing non-Error returns string coercion", async () => {
+      mcp = new MCPMock();
+      mcp.onToolCall("throws-string", () => {
+        throw "raw string error";
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "tools/call", { name: "throws-string" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.isError).toBe(true);
+      expect(data.result.content[0].text).toBe("raw string error");
+    });
+  });
+
+  // ---- Resources edge cases ----
+
+  describe("resources edge cases", () => {
+    it("resources/read with missing URI returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+      expect(data.error.message).toBe("Missing resource URI");
+    });
+
+    it("resources/read with blob content", async () => {
+      mcp = new MCPMock();
+      mcp.addResource(
+        { uri: "file:///image.png", name: "Image" },
+        { blob: "aGVsbG8=", mimeType: "image/png" },
+      );
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///image.png" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.contents[0].blob).toBe("aGVsbG8=");
+      expect(data.result.contents[0].mimeType).toBe("image/png");
+      expect(data.result.contents[0].text).toBeUndefined();
+    });
+
+    it("resources/read with no content fields", async () => {
+      mcp = new MCPMock();
+      mcp.addResource({ uri: "file:///empty", name: "Empty" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "resources/read", { uri: "file:///empty" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.contents[0].uri).toBe("file:///empty");
+      expect(data.result.contents[0].text).toBeUndefined();
+      expect(data.result.contents[0].blob).toBeUndefined();
+    });
+  });
+
+  // ---- Prompts edge cases ----
+
+  describe("prompts edge cases", () => {
+    it("prompts/get with missing name returns -32602", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { arguments: {} }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32602);
+      expect(data.error.message).toBe("Missing prompt name");
+    });
+
+    it("prompts/get with no handler returns empty messages", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({ name: "no-handler" });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "no-handler" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result.messages).toEqual([]);
+    });
+
+    it("prompts/get handler error returns -32603", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({ name: "fail" }, () => {
+        throw new Error("prompt boom");
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "fail" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32603);
+      expect(data.error.message).toContain("prompt boom");
+    });
+
+    it("prompts/get handler throwing non-Error returns string coercion", async () => {
+      mcp = new MCPMock();
+      mcp.addPrompt({ name: "fail-string" }, () => {
+        throw "string error";
+      });
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      const res = await jsonRpc(url, "/", "prompts/get", { name: "fail-string" }, 1, {
+        "mcp-session-id": sessionId,
+      });
+      const data = JSON.parse(res.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32603);
+      expect(data.error.message).toContain("string error");
+    });
+  });
+
+  // ---- Protocol edge cases ----
+
+  describe("protocol edge cases", () => {
+    it("malformed JSON body returns parse error", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+      const sessionId = await initSession(url);
+
+      // Send invalid JSON to the server
+      // The request helper sends no body when body is undefined,
+      // so we need to send raw invalid JSON
+      const parsed = new URL(url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": "12",
+              "mcp-session-id": sessionId,
+            },
+          },
+          (response) => {
+            const chunks: Buffer[] = [];
+            response.on("data", (c: Buffer) => chunks.push(c));
+            response.on("end", () => {
+              resolve({
+                status: response.statusCode ?? 0,
+                headers: response.headers,
+                body: Buffer.concat(chunks).toString(),
+              });
+            });
+          },
+        );
+        req.on("error", reject);
+        req.write("{not valid}!");
+        req.end();
+      });
+
+      expect(result.status).toBe(200);
+      const data = JSON.parse(result.body);
+      expect(data.error).toBeDefined();
+      expect(data.error.code).toBe(-32700);
+    });
+
+    it("non-POST/non-DELETE method is rejected in mounted mode", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const res = await request(llm.url, "/mcp", "GET");
+      // MCPMock returns false for GET, so LLMock handles it (likely 404 or similar)
+      expect(res.status).not.toBe(200);
+    });
+  });
+
+  // ---- Lifecycle edge cases ----
+
+  describe("lifecycle", () => {
+    it("start() when already started throws", async () => {
+      mcp = new MCPMock();
+      await mcp.start();
+      await expect(mcp.start()).rejects.toThrow("Server already started");
+    });
+
+    it("stop() when not started throws", async () => {
+      mcp = new MCPMock();
+      await expect(mcp.stop()).rejects.toThrow("Server not started");
+      mcp = null; // prevent afterEach from trying to stop
+    });
+
+    it("start() with explicit host and port options", async () => {
+      mcp = new MCPMock({ host: "127.0.0.1", port: 0 });
+      const url = await mcp.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+
+    it("standalone server catch block handles requestHandler rejection", async () => {
+      mcp = new MCPMock();
+      const url = await mcp.start();
+
+      // Monkey-patch the private requestHandler to throw
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      (mcp as any).requestHandler = async () => {
+        throw new Error("synthetic handler crash");
+      };
+
+      // Suppress console.error noise
+      const spy = vi.spyOn(console, "error").mockImplementation(() => {});
+
+      const res = await request(url, "/", "POST", { jsonrpc: "2.0", method: "initialize", id: 1 });
+      expect(res.status).toBe(500);
+      expect(res.body).toBe("Internal server error");
+
+      spy.mockRestore();
+    });
+
+    it("getRequests() with no journal returns empty array", () => {
+      mcp = new MCPMock();
+      expect(mcp.getRequests()).toEqual([]);
+    });
+
+    it("reset() returns this for chaining", () => {
+      mcp = new MCPMock();
+      mcp.addTool({ name: "t1" });
+      const result = mcp.reset();
+      expect(result).toBe(mcp);
+    });
+  });
+
+  // ---- Journal ----
+
+  describe("journal", () => {
+    it("setJournal records entries with service: mcp", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const sessionId = await initSession(llm.url, "/mcp");
+
+      await jsonRpc(llm.url, "/mcp", "tools/list", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+
+      const entries = llm.getRequests();
+      const mcpEntries = entries.filter((e) => e.service === "mcp");
+      expect(mcpEntries.length).toBeGreaterThan(0);
+    });
+
+    it("getRequests() returns filtered journal entries when journal is set", async () => {
+      mcp = new MCPMock();
+      llm = new LLMock();
+      llm.mount("/mcp", mcp);
+      await llm.start();
+
+      const sessionId = await initSession(llm.url, "/mcp");
+
+      await jsonRpc(llm.url, "/mcp", "tools/list", {}, 1, {
+        "mcp-session-id": sessionId,
+      });
+
+      // Use mcp.getRequests() directly (not llm.getRequests())
+      const mcpEntries = mcp.getRequests();
+      expect(mcpEntries.length).toBeGreaterThan(0);
+      expect((mcpEntries[0] as { service: string }).service).toBe("mcp");
+    });
+  });
+});
diff --git a/src/__tests__/messages.test.ts b/src/__tests__/messages.test.ts
index 927ac46..1cc72ed 100644
--- a/src/__tests__/messages.test.ts
+++ b/src/__tests__/messages.test.ts
@@ -1,8 +1,11 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
+import { PassThrough } from "node:stream";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { claudeToCompletionRequest } from "../messages.js";
+import { claudeToCompletionRequest, handleMessages } from "../messages.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -806,3 +809,916 @@ describe("POST /v1/messages (CORS)", () => {
     expect(res.headers["access-control-allow-origin"]).toBe("*");
   });
 });
+
+// ─── Branch coverage: ?? defaults and fallback paths ─────────────────────────
+
+describe("claudeToCompletionRequest (fallback branches)", () => {
+  it("handles tool_result with undefined content (defaults to empty string)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_abc",
+              // content intentionally omitted (undefined)
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages).toHaveLength(1);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles tool_result with text blocks alongside in same user message", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_abc",
+              content: "result data",
+            },
+            { type: "text", text: "follow up question" },
+          ],
+        },
+      ],
+    });
+    // Should produce tool message + user message
+    expect(result.messages).toHaveLength(2);
+    expect(result.messages[0].role).toBe("tool");
+    expect(result.messages[0].content).toBe("result data");
+    expect(result.messages[1].role).toBe("user");
+    expect(result.messages[1].content).toBe("follow up question");
+  });
+
+  it("handles text content blocks with missing text (text ?? '')", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text" }, // text field missing
+          ] as Array<{ type: "text"; text?: string }>,
+        },
+      ],
+    });
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles assistant tool_use block with missing id (generates one)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              // id intentionally omitted
+              name: "my_tool",
+              input: { x: 1 },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].id).toMatch(/^toolu_/);
+  });
+
+  it("handles assistant tool_use block with missing name (defaults to empty)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_xyz",
+              // name intentionally omitted
+              input: { x: 1 },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].function.name).toBe("");
+  });
+
+  it("handles assistant tool_use with string input", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_xyz",
+              name: "my_tool",
+              input: '{"already":"stringified"}',
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe('{"already":"stringified"}');
+  });
+
+  it("handles assistant tool_use with undefined input (defaults to {})", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "toolu_xyz",
+              name: "my_tool",
+              // input intentionally omitted
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].tool_calls![0].function.arguments).toBe("{}");
+  });
+
+  it("handles assistant content blocks with text and tool_use together", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [
+            { type: "text", text: "Let me help with that." },
+            {
+              type: "tool_use",
+              id: "toolu_123",
+              name: "search",
+              input: { q: "test" },
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].content).toBe("Let me help with that.");
+    expect(result.messages[0].tool_calls).toHaveLength(1);
+  });
+
+  it("handles assistant content blocks with only text (no tool_use)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Just text" }],
+        },
+      ],
+    });
+    // No tool_use blocks, so textContent is used; no tool_calls
+    expect(result.messages[0].content).toBe("Just text");
+    expect(result.messages[0].tool_calls).toBeUndefined();
+  });
+
+  it("handles assistant content blocks with empty text (null fallback)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "assistant",
+          content: [{ type: "image" }] as Array<{
+            type: "text" | "tool_use" | "image";
+            text?: string;
+          }>,
+        },
+      ],
+    });
+    // No text blocks, no tool_use blocks → textContent is "" → falls to null
+    expect(result.messages[0].content).toBeNull();
+  });
+
+  it("handles system as empty content blocks array (no system message added)", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: [{ type: "text" }] as Array<{ type: "text"; text?: string }>,
+      messages: [{ role: "user", content: "hi" }],
+    });
+    // text ?? "" gives "", which is falsy → no system message pushed
+    expect(result.messages[0].role).toBe("user");
+    expect(result.messages).toHaveLength(1);
+  });
+
+  it("returns undefined tools for empty tools array", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hi" }],
+      tools: [],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+
+  it("handles tool_result with nested text blocks where text is missing", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_789",
+              content: [
+                { type: "text" }, // text field missing
+              ] as Array<{ type: "text"; text?: string }>,
+            },
+          ],
+        },
+      ],
+    });
+    expect(result.messages[0].content).toBe("");
+  });
+
+  it("handles text blocks in tool_result+text user message where text is missing", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: "toolu_abc",
+              content: "result",
+            },
+            { type: "text" }, // text missing → text ?? ""
+          ] as Array<{
+            type: "text" | "tool_result";
+            text?: string;
+            tool_use_id?: string;
+            content?: string;
+          }>,
+        },
+      ],
+    });
+    expect(result.messages[1].role).toBe("user");
+    expect(result.messages[1].content).toBe("");
+  });
+
+  it("handles system content blocks with text ?? '' in filter/map", () => {
+    const result = claudeToCompletionRequest({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      system: [
+        { type: "text", text: "Part 1" },
+        { type: "text", text: " Part 2" },
+      ],
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.messages[0]).toEqual({ role: "system", content: "Part 1 Part 2" });
+  });
+});
+
+describe("POST /v1/messages (strict mode)", () => {
+  it("returns 503 when strict mode is enabled and no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "unmatched" }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+});
+
+describe("POST /v1/messages (error response with default status)", () => {
+  it("defaults error status to 500 when status field is omitted", async () => {
+    const errorNoStatus: Fixture = {
+      match: { userMessage: "error-no-status" },
+      response: {
+        error: {
+          message: "Internal failure",
+          type: "server_error",
+        },
+      } as Fixture["response"],
+    };
+    instance = await createServer([errorNoStatus]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "error-no-status" }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+
+  it("defaults error.type to api_error when type is omitted", async () => {
+    const errorNoType: Fixture = {
+      match: { userMessage: "error-no-type" },
+      response: {
+        error: {
+          message: "Something went wrong",
+        },
+        status: 500,
+      } as Fixture["response"],
+    };
+    instance = await createServer([errorNoType]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "error-no-type" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.type).toBe("api_error");
+  });
+});
+
+describe("POST /v1/messages (tool call with malformed JSON arguments)", () => {
+  it("falls back to {} for malformed tool call arguments in non-streaming", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-args" },
+      response: {
+        toolCalls: [
+          {
+            name: "broken_tool",
+            arguments: "not valid json{",
+          },
+        ],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "malformed-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+
+  it("falls back to {} for malformed tool call arguments in streaming", async () => {
+    const malformedToolFixture: Fixture = {
+      match: { userMessage: "malformed-args-stream" },
+      response: {
+        toolCalls: [
+          {
+            name: "broken_tool",
+            arguments: "{{invalid}}",
+          },
+        ],
+      },
+    };
+    instance = await createServer([malformedToolFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "malformed-args-stream" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseClaudeSSEEvents(res.body);
+    // The arguments delta should contain "{}" since the malformed JSON falls back to {}
+    const deltas = events.filter(
+      (e) =>
+        e.type === "content_block_delta" &&
+        (e.delta as { type: string })?.type === "input_json_delta",
+    ) as (SSEEvent & { delta: { partial_json: string } })[];
+    const fullJson = deltas.map((d) => d.delta.partial_json).join("");
+    expect(JSON.parse(fullJson)).toEqual({});
+  });
+});
+
+describe("POST /v1/messages (tool call with empty arguments)", () => {
+  it("defaults empty arguments to '{}' in non-streaming", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args" },
+      response: {
+        toolCalls: [
+          {
+            name: "no_args_tool",
+            arguments: "",
+          },
+        ],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "empty-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].input).toEqual({});
+  });
+
+  it("defaults empty arguments to '{}' in streaming", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-args-stream" },
+      response: {
+        toolCalls: [
+          {
+            name: "no_args_tool",
+            arguments: "",
+          },
+        ],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "empty-args-stream" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseClaudeSSEEvents(res.body);
+    const deltas = events.filter(
+      (e) =>
+        e.type === "content_block_delta" &&
+        (e.delta as { type: string })?.type === "input_json_delta",
+    ) as (SSEEvent & { delta: { partial_json: string } })[];
+    const fullJson = deltas.map((d) => d.delta.partial_json).join("");
+    expect(JSON.parse(fullJson)).toEqual({});
+  });
+});
+
+describe("POST /v1/messages (tool call with explicit id)", () => {
+  it("uses explicit tool call id in non-streaming", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-explicit-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "toolu_explicit_123",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "tool-explicit-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    expect(body.content[0].id).toBe("toolu_explicit_123");
+  });
+
+  it("uses explicit tool call id in streaming", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-explicit-id-stream" },
+      response: {
+        toolCalls: [
+          {
+            id: "toolu_explicit_456",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "tool-explicit-id-stream" }],
+      stream: true,
+    });
+    const events = parseClaudeSSEEvents(res.body);
+    const blockStart = events.find(
+      (e) =>
+        e.type === "content_block_start" &&
+        (e.content_block as { type: string })?.type === "tool_use",
+    ) as SSEEvent & { content_block: { id: string } };
+    expect(blockStart.content_block.id).toBe("toolu_explicit_456");
+  });
+
+  it("generates tool call id when id is empty string", async () => {
+    const toolEmptyId: Fixture = {
+      match: { userMessage: "tool-empty-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolEmptyId]);
+    const res = await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "tool-empty-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    expect(body.content[0].id).toMatch(/^toolu_/);
+  });
+});
+
+describe("POST /v1/messages (streaming interruption)", () => {
+  it("truncates text stream after specified chunks and records interruption", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "truncate-text" },
+      response: { content: "ABCDEFGHIJKLMNOP" },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedFixture]);
+    try {
+      await post(`${instance.url}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "truncate-text" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncates tool call stream after specified chunks and records interruption", async () => {
+    const truncatedToolFixture: Fixture = {
+      match: { userMessage: "truncate-tool" },
+      response: {
+        toolCalls: [
+          {
+            name: "my_func",
+            arguments: '{"key":"value"}',
+          },
+        ],
+      },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedToolFixture]);
+    try {
+      await post(`${instance.url}/v1/messages`, {
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "truncate-tool" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+describe("POST /v1/messages (streaming tool call journal)", () => {
+  it("records streaming tool call response in journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(toolFixture);
+  });
+});
+
+// ─── Direct handler tests: covering ?? fallbacks on req.method/req.url ───────
+
+function createMockRes(): http.ServerResponse {
+  const res = new PassThrough() as unknown as http.ServerResponse;
+  let ended = false;
+  const headers: Record<string, string> = {};
+  res.setHeader = (name: string, value: string | number | readonly string[]) => {
+    headers[name.toLowerCase()] = String(value);
+    return res;
+  };
+  res.writeHead = (statusCode: number, hdrs?: Record<string, string>) => {
+    (res as { statusCode: number }).statusCode = statusCode;
+    if (hdrs) {
+      for (const [k, v] of Object.entries(hdrs)) {
+        headers[k.toLowerCase()] = v;
+      }
+    }
+    return res;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.write = (chunk: string) => true;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.end = ((...args: unknown[]) => {
+    ended = true;
+    return res;
+  }) as typeof res.end;
+  Object.defineProperty(res, "writableEnded", { get: () => ended });
+  res.destroy = () => {
+    ended = true;
+    return res;
+  };
+  return res;
+}
+
+describe("handleMessages (direct call — ?? fallback branches)", () => {
+  it("uses fallback POST and /v1/messages when req.method and req.url are undefined", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback method/path on malformed JSON with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(mockReq, mockRes, "{bad", [], journal, defaults, () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback method/path on no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for error fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "fail" }],
+      }),
+      [errorFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(429);
+  });
+
+  it("uses fallback for streaming text with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "weather" }],
+        stream: true,
+      }),
+      [toolFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "badtype" }],
+      }),
+      [badResponseFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for strict mode no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger, strict: true };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleMessages(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "claude-3-5-sonnet-20241022",
+        max_tokens: 1024,
+        messages: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/messages");
+    expect(entry!.response.status).toBe(503);
+  });
+});
diff --git a/src/__tests__/metrics.test.ts b/src/__tests__/metrics.test.ts
index 0266fbb..9cd9434 100644
--- a/src/__tests__/metrics.test.ts
+++ b/src/__tests__/metrics.test.ts
@@ -455,13 +455,13 @@ describe("MetricsRegistry: all three types serialized together", () => {
 describe("MetricsRegistry: status label in counter output", () => {
   it("status label appears correctly in serialized counter", () => {
     const reg = createMetricsRegistry();
-    reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" });
-    reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" });
-    reg.incrementCounter("llmock_requests_total", { status: "404", path: "/v1/chat/completions" });
+    reg.incrementCounter("aimock_requests_total", { status: "200", path: "/v1/chat/completions" });
+    reg.incrementCounter("aimock_requests_total", { status: "200", path: "/v1/chat/completions" });
+    reg.incrementCounter("aimock_requests_total", { status: "404", path: "/v1/chat/completions" });
 
     const output = reg.serialize();
-    expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="200"} 2');
-    expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="404"} 1');
+    expect(output).toContain('aimock_requests_total{path="/v1/chat/completions",status="200"} 2');
+    expect(output).toContain('aimock_requests_total{path="/v1/chat/completions",status="404"} 1');
   });
 });
 
@@ -518,9 +518,9 @@ describe("integration: /metrics endpoint", () => {
     await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
 
     const res = await httpGet(`${instance.url}/metrics`);
-    expect(res.body).toContain("llmock_requests_total");
+    expect(res.body).toContain("aimock_requests_total");
     // Should have count of 2 for the completions path
-    expect(res.body).toMatch(/llmock_requests_total\{[^}]*path="\/v1\/chat\/completions"[^}]*\} 2/);
+    expect(res.body).toMatch(/aimock_requests_total\{[^}]*path="\/v1\/chat\/completions"[^}]*\} 2/);
   });
 
   it("records histogram bucket distribution after a request", async () => {
@@ -536,14 +536,14 @@ describe("integration: /metrics endpoint", () => {
 
     const res = await httpGet(`${instance.url}/metrics`);
     // Should have histogram buckets
-    expect(res.body).toContain("llmock_request_duration_seconds_bucket");
-    expect(res.body).toContain("llmock_request_duration_seconds_count");
-    expect(res.body).toContain("llmock_request_duration_seconds_sum");
+    expect(res.body).toContain("aimock_request_duration_seconds_bucket");
+    expect(res.body).toContain("aimock_request_duration_seconds_count");
+    expect(res.body).toContain("aimock_request_duration_seconds_sum");
     // +Inf bucket should equal count
     const infMatch = res.body.match(
-      /llmock_request_duration_seconds_bucket\{[^}]*le="\+Inf"\} (\d+)/,
+      /aimock_request_duration_seconds_bucket\{[^}]*le="\+Inf"\} (\d+)/,
     );
-    const countMatch = res.body.match(/llmock_request_duration_seconds_count\{[^}]*\} (\d+)/);
+    const countMatch = res.body.match(/aimock_request_duration_seconds_count\{[^}]*\} (\d+)/);
     expect(infMatch).not.toBeNull();
     expect(countMatch).not.toBeNull();
     expect(infMatch![1]).toBe(countMatch![1]);
@@ -564,8 +564,8 @@ describe("integration: /metrics endpoint", () => {
     await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
 
     const res = await httpGet(`${instance.url}/metrics`);
-    expect(res.body).toContain("llmock_chaos_triggered_total");
-    expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
+    expect(res.body).toContain("aimock_chaos_triggered_total");
+    expect(res.body).toMatch(/aimock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
   });
 
   it("increments chaos counter on Anthropic /v1/messages endpoint", async () => {
@@ -587,8 +587,8 @@ describe("integration: /metrics endpoint", () => {
     });
 
     const res = await httpGet(`${instance.url}/metrics`);
-    expect(res.body).toContain("llmock_chaos_triggered_total");
-    expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
+    expect(res.body).toContain("aimock_chaos_triggered_total");
+    expect(res.body).toMatch(/aimock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
   });
 
   it("tracks fixtures loaded gauge", async () => {
@@ -598,7 +598,7 @@ describe("integration: /metrics endpoint", () => {
     ];
     instance = await createServer(fixtures, { metrics: true });
     const res = await httpGet(`${instance.url}/metrics`);
-    expect(res.body).toContain("llmock_fixtures_loaded{} 2");
+    expect(res.body).toContain("aimock_fixtures_loaded{} 2");
   });
 
   it("metrics endpoint remains responsive after normal requests", async () => {
@@ -620,7 +620,7 @@ describe("integration: /metrics endpoint", () => {
     // Server remains reachable and metrics endpoint still responds after the request
     const metricsRes = await httpGet(`${instance.url}/metrics`);
     expect(metricsRes.status).toBe(200);
-    expect(metricsRes.body).toContain("llmock_requests_total");
+    expect(metricsRes.body).toContain("aimock_requests_total");
   });
 
   it("continues serving requests when metrics registry throws (try-catch guards EventEmitter crash)", async () => {
diff --git a/src/__tests__/mount.test.ts b/src/__tests__/mount.test.ts
new file mode 100644
index 0000000..fd97835
--- /dev/null
+++ b/src/__tests__/mount.test.ts
@@ -0,0 +1,388 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { Journal } from "../journal.js";
+import type { Mountable } from "../types.js";
+
+// ---- Helpers ----
+
+function get(url: string, path: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function post(url: string, path: string, body: object): Promise<{ status: number; data: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = JSON.stringify(body);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(payload),
+        },
+      },
+      (res) => {
+        let data = "";
+        res.on("data", (chunk: Buffer) => (data += chunk));
+        res.on("end", () => resolve({ status: res.statusCode!, data }));
+      },
+    );
+    req.on("error", reject);
+    req.write(payload);
+    req.end();
+  });
+}
+
+// ---- Test Mountable implementations ----
+
+class TestMount implements Mountable {
+  requests: Array<{ pathname: string }> = [];
+  journal: Journal | null = null;
+
+  async handleRequest(
+    _req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    this.requests.push({ pathname });
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ mounted: true, pathname }));
+    return true;
+  }
+
+  health() {
+    return { status: "ok", requests: this.requests.length };
+  }
+
+  setJournal(j: Journal) {
+    this.journal = j;
+  }
+}
+
+class PassThroughMount implements Mountable {
+  /* eslint-disable @typescript-eslint/no-unused-vars */
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    return false;
+  }
+  /* eslint-enable @typescript-eslint/no-unused-vars */
+}
+
+class NoHealthMount implements Mountable {
+  async handleRequest(
+    _req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ mounted: true, pathname }));
+    return true;
+  }
+}
+
+class BaseUrlMount implements Mountable {
+  baseUrl: string | null = null;
+
+  async handleRequest(
+    _req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ mounted: true, pathname }));
+    return true;
+  }
+
+  setBaseUrl(url: string) {
+    this.baseUrl = url;
+  }
+}
+
+// ---- Tests ----
+
+describe("Mountable interface", () => {
+  let mock: LLMock | null = null;
+
+  afterEach(async () => {
+    if (mock) {
+      try {
+        await mock.stop();
+      } catch (err) {
+        if (!(err instanceof Error && err.message === "Server not started")) {
+          throw err;
+        }
+      }
+      mock = null;
+    }
+  });
+
+  describe("mount dispatch", () => {
+    it("routes /test/foo to handler with /foo", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      await mock.start();
+
+      const res = await get(mock.url, "/test/foo");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ mounted: true, pathname: "/foo" });
+      expect(mount.requests).toHaveLength(1);
+      expect(mount.requests[0].pathname).toBe("/foo");
+    });
+
+    it("routes /test to handler with /", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      await mock.start();
+
+      const res = await get(mock.url, "/test");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ mounted: true, pathname: "/" });
+      expect(mount.requests[0].pathname).toBe("/");
+    });
+
+    it("falls through to LLMock when handler returns false", async () => {
+      const mount = new PassThroughMount();
+      mock = new LLMock();
+      mock.mount("/v1/chat", mount);
+      mock.onMessage("hello", { content: "fixture response" });
+      await mock.start();
+
+      const res = await post(mock.url, "/v1/chat/completions", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.data);
+      expect(body.choices[0].message.content).toBe("fixture response");
+    });
+
+    it("does not intercept non-mount paths", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      mock.onMessage("hello", { content: "normal response" });
+      await mock.start();
+
+      const res = await post(mock.url, "/v1/chat/completions", {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.data);
+      expect(body.choices[0].message.content).toBe("normal response");
+      expect(mount.requests).toHaveLength(0);
+    });
+
+    it("routes to correct mount with two mounts at /a and /b", async () => {
+      const mountA = new TestMount();
+      const mountB = new TestMount();
+      mock = new LLMock();
+      mock.mount("/a", mountA);
+      mock.mount("/b", mountB);
+      await mock.start();
+
+      await get(mock.url, "/a/foo");
+      await get(mock.url, "/b/bar");
+
+      expect(mountA.requests).toHaveLength(1);
+      expect(mountA.requests[0].pathname).toBe("/foo");
+      expect(mountB.requests).toHaveLength(1);
+      expect(mountB.requests[0].pathname).toBe("/bar");
+    });
+
+    it("does not match paths that share a prefix but not a segment boundary", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/app", mount);
+      await mock.start();
+
+      // /application should NOT be intercepted by mount at /app
+      const res = await get(mock.url, "/application");
+      expect(res.status).toBe(404);
+      expect(mount.requests).toHaveLength(0);
+
+      // But /app/foo should be intercepted
+      const res2 = await get(mock.url, "/app/foo");
+      expect(res2.status).toBe(200);
+      expect(mount.requests).toHaveLength(1);
+    });
+
+    it("mount added after start() works immediately", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      await mock.start();
+
+      // Mount after server is already running
+      mock.mount("/late", mount);
+
+      const res = await get(mock.url, "/late/endpoint");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ mounted: true, pathname: "/endpoint" });
+    });
+  });
+
+  describe("unified health", () => {
+    it("returns services with llm and mounted service health", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      mock.onMessage("x", { content: "y" });
+      await mock.start();
+
+      const res = await get(mock.url, "/health");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({
+        status: "ok",
+        services: {
+          llm: { status: "ok", fixtures: 1 },
+          test: { status: "ok", requests: 0 },
+        },
+      });
+    });
+
+    it("mount without health() is not in health response", async () => {
+      const mount = new NoHealthMount();
+      mock = new LLMock();
+      mock.mount("/noh", mount);
+      await mock.start();
+
+      const res = await get(mock.url, "/health");
+      const body = JSON.parse(res.body);
+      expect(body.services).toBeDefined();
+      expect(body.services.noh).toBeUndefined();
+      expect(body.services.llm).toBeDefined();
+    });
+  });
+
+  describe("shared journal", () => {
+    it("setJournal is called with the shared journal", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      await mock.start();
+
+      expect(mount.journal).toBeInstanceOf(Journal);
+      expect(mount.journal).toBe(mock.journal);
+    });
+
+    it("journal entry can include service field", async () => {
+      // Create a mount that writes a journal entry with service field
+      const serviceMount: Mountable = {
+        journal: null as Journal | null,
+        /* eslint-disable @typescript-eslint/no-unused-vars */
+        async handleRequest(
+          req: http.IncomingMessage,
+          res: http.ServerResponse,
+          pathname: string,
+        ): Promise<boolean> {
+          /* eslint-enable @typescript-eslint/no-unused-vars */
+          if (this.journal) {
+            this.journal.add({
+              method: "GET",
+              path: "/svc/test",
+              headers: {},
+              body: null,
+              service: "my-service",
+              response: { status: 200, fixture: null },
+            });
+          }
+          res.writeHead(200);
+          res.end("ok");
+          return true;
+        },
+        setJournal(j: Journal) {
+          this.journal = j;
+        },
+      };
+
+      mock = new LLMock();
+      mock.mount("/svc", serviceMount);
+      await mock.start();
+
+      await get(mock.url, "/svc/test");
+
+      const entries = mock.getRequests();
+      expect(entries).toHaveLength(1);
+      expect(entries[0].service).toBe("my-service");
+    });
+  });
+
+  describe("setBaseUrl", () => {
+    it("calls setBaseUrl with the server URL + mount path on start", async () => {
+      const mount = new BaseUrlMount();
+      mock = new LLMock();
+      mock.mount("/svc", mount);
+      await mock.start();
+
+      expect(mount.baseUrl).toBe(mock.url + "/svc");
+    });
+
+    it("does not call setBaseUrl on mounts that do not implement it", async () => {
+      const mount = new TestMount();
+      mock = new LLMock();
+      mock.mount("/test", mount);
+      // Should not throw even though TestMount has no setBaseUrl
+      await mock.start();
+      expect(mock.url).toBeDefined();
+    });
+  });
+
+  describe("health without mounts", () => {
+    it("returns simple status ok without services key", async () => {
+      mock = new LLMock();
+      await mock.start();
+
+      const res = await get(mock.url, "/health");
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body).toEqual({ status: "ok" });
+      expect(body.services).toBeUndefined();
+    });
+  });
+
+  describe("mount() chaining", () => {
+    it("returns this for chaining", () => {
+      mock = new LLMock();
+      const mount = new TestMount();
+      const result = mock.mount("/test", mount);
+      expect(result).toBe(mock);
+    });
+  });
+});
diff --git a/src/__tests__/ndjson-writer.test.ts b/src/__tests__/ndjson-writer.test.ts
new file mode 100644
index 0000000..31604aa
--- /dev/null
+++ b/src/__tests__/ndjson-writer.test.ts
@@ -0,0 +1,216 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { PassThrough } from "node:stream";
+import type * as http from "node:http";
+import { writeNDJSONStream } from "../ndjson-writer.js";
+
+// ---------------------------------------------------------------------------
+// Mock response helper (mirrors sse-writer.test.ts pattern)
+// ---------------------------------------------------------------------------
+
+function makeMockResponse(): {
+  res: http.ServerResponse;
+  output: () => string;
+  headers: () => Record<string, string | string[] | number | undefined>;
+  ended: () => boolean;
+} {
+  const stream = new PassThrough();
+  const chunks: Buffer[] = [];
+  stream.on("data", (chunk: Buffer) => chunks.push(chunk));
+
+  const writtenHeaders: Record<string, string | string[] | number | undefined> = {};
+  let isEnded = false;
+
+  const res = {
+    setHeader(name: string, value: string) {
+      writtenHeaders[name] = value;
+    },
+    write(data: string) {
+      stream.write(data);
+    },
+    end(data?: string) {
+      if (data !== undefined) {
+        stream.write(data);
+      }
+      isEnded = true;
+      stream.end();
+    },
+    writableEnded: false,
+  } as unknown as http.ServerResponse;
+
+  return {
+    res,
+    output: () => Buffer.concat(chunks).toString("utf8"),
+    headers: () => writtenHeaders,
+    ended: () => isEnded,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// writeNDJSONStream
+// ---------------------------------------------------------------------------
+
+describe("writeNDJSONStream", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("sets correct NDJSON headers", async () => {
+    const { res, headers } = makeMockResponse();
+    await writeNDJSONStream(res, []);
+    expect(headers()["Content-Type"]).toBe("application/x-ndjson");
+    expect(headers()["Cache-Control"]).toBe("no-cache");
+    expect(headers()["Connection"]).toBe("keep-alive");
+  });
+
+  it("writes each chunk as a JSON line", async () => {
+    const { res, output } = makeMockResponse();
+    const chunks = [{ text: "hello" }, { text: "world" }];
+    await writeNDJSONStream(res, chunks);
+
+    const lines = output().trim().split("\n");
+    expect(lines).toHaveLength(2);
+    expect(JSON.parse(lines[0])).toEqual({ text: "hello" });
+    expect(JSON.parse(lines[1])).toEqual({ text: "world" });
+  });
+
+  it("calls res.end() when done", async () => {
+    const { res, ended } = makeMockResponse();
+    await writeNDJSONStream(res, [{ done: true }]);
+    expect(ended()).toBe(true);
+  });
+
+  it("returns true on normal completion", async () => {
+    const { res } = makeMockResponse();
+    const result = await writeNDJSONStream(res, [{ ok: true }]);
+    expect(result).toBe(true);
+  });
+
+  it("returns true immediately when res.writableEnded is already true", async () => {
+    const { res, headers } = makeMockResponse();
+    Object.defineProperty(res, "writableEnded", { get: () => true });
+    const result = await writeNDJSONStream(res, [{ text: "should not write" }]);
+    expect(result).toBe(true);
+    // Should not have set any headers (returned before writing)
+    expect(headers()["Content-Type"]).toBeUndefined();
+  });
+
+  it("returns false when signal is aborted after delay", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const controller = new AbortController();
+
+    const chunks = [{ text: "A" }, { text: "B" }];
+    const promise = writeNDJSONStream(res, chunks, {
+      latency: 100,
+      signal: controller.signal,
+    });
+
+    // Abort during the delay before the second chunk
+    controller.abort();
+    await vi.runAllTimersAsync();
+    const result = await promise;
+
+    expect(result).toBe(false);
+    vi.useRealTimers();
+  });
+
+  it("returns false when signal is aborted after a chunk is sent", async () => {
+    const { res, output } = makeMockResponse();
+    const controller = new AbortController();
+
+    const chunks = [{ text: "A" }, { text: "B" }, { text: "C" }];
+    let chunksSent = 0;
+    const result = await writeNDJSONStream(res, chunks, {
+      signal: controller.signal,
+      onChunkSent: () => {
+        chunksSent++;
+        if (chunksSent === 1) controller.abort();
+      },
+    });
+
+    expect(result).toBe(false);
+    const body = output();
+    expect(body).toContain(JSON.stringify({ text: "A" }));
+  });
+
+  it("returns true when res.writableEnded becomes true mid-loop", async () => {
+    const { res, output } = makeMockResponse();
+    let writeCount = 0;
+    const originalWrite = res.write.bind(res);
+    res.write = ((data: string) => {
+      writeCount++;
+      originalWrite(data);
+      if (writeCount === 1) {
+        // Simulate the response ending externally after first chunk
+        Object.defineProperty(res, "writableEnded", { get: () => true });
+      }
+      return true;
+    }) as typeof res.write;
+
+    const chunks = [{ text: "A" }, { text: "B" }];
+    const result = await writeNDJSONStream(res, chunks);
+
+    expect(result).toBe(true);
+    // Only first chunk should have been written
+    const body = output();
+    expect(body).toContain(JSON.stringify({ text: "A" }));
+    expect(body).not.toContain(JSON.stringify({ text: "B" }));
+  });
+
+  it("onChunkSent fires per chunk", async () => {
+    const { res } = makeMockResponse();
+    const chunks = [{ a: 1 }, { b: 2 }, { c: 3 }];
+    let count = 0;
+    await writeNDJSONStream(res, chunks, {
+      onChunkSent: () => {
+        count++;
+      },
+    });
+    expect(count).toBe(3);
+  });
+
+  it("applies latency delay between chunks", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const chunks = [{ text: "A" }, { text: "B" }];
+    const promise = writeNDJSONStream(res, chunks, { latency: 50 });
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+    vi.useRealTimers();
+  });
+
+  it("handles undefined options (defaults)", async () => {
+    const { res, output } = makeMockResponse();
+    const result = await writeNDJSONStream(res, [{ test: true }]);
+    expect(result).toBe(true);
+    expect(output()).toContain('{"test":true}');
+  });
+
+  it("does not end stream if already ended by external code", async () => {
+    const { res } = makeMockResponse();
+    // Process no chunks, but simulate writableEnded becoming true externally
+    const originalEnd = res.end.bind(res);
+    let endCallCount = 0;
+    res.end = ((...args: unknown[]) => {
+      endCallCount++;
+      return (originalEnd as (...a: unknown[]) => void)(...args);
+    }) as typeof res.end;
+
+    // Set writableEnded after headers are set but before end is called
+    const chunks = [{ x: 1 }];
+    const originalWrite = res.write.bind(res);
+    res.write = ((data: string) => {
+      originalWrite(data);
+      Object.defineProperty(res, "writableEnded", {
+        get: () => true,
+        configurable: true,
+      });
+      return true;
+    }) as typeof res.write;
+
+    await writeNDJSONStream(res, chunks);
+    // res.end should not be called because writableEnded was true
+    expect(endCallCount).toBe(0);
+  });
+});
diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts
index 1a5a217..13b9f7b 100644
--- a/src/__tests__/ollama.test.ts
+++ b/src/__tests__/ollama.test.ts
@@ -1,9 +1,11 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
-import type { Fixture } from "../types.js";
+import type { Fixture, HandlerDefaults } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { ollamaToCompletionRequest } from "../ollama.js";
+import { ollamaToCompletionRequest, handleOllama, handleOllamaGenerate } from "../ollama.js";
 import { writeNDJSONStream } from "../ndjson-writer.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -577,7 +579,7 @@ describe("POST /api/chat (chaos)", () => {
           headers: {
             "Content-Type": "application/json",
             "Content-Length": Buffer.byteLength(data),
-            "x-llmock-chaos-drop": "1.0",
+            "x-aimock-chaos-drop": "1.0",
           },
         },
         (res) => {
@@ -727,7 +729,7 @@ describe("POST /api/generate (chaos)", () => {
           headers: {
             "Content-Type": "application/json",
             "Content-Length": Buffer.byteLength(data),
-            "x-llmock-chaos-drop": "1.0",
+            "x-aimock-chaos-drop": "1.0",
           },
         },
         (res) => {
@@ -1044,6 +1046,240 @@ describe("POST /api/chat (error fixture no explicit status)", () => {
   });
 });
 
+// ─── Integration tests: POST /api/chat (unknown response type) ──────────────
+
+describe("POST /api/chat (unknown response type)", () => {
+  it("returns 500 for embedding fixture", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-chat" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "embed-chat" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (error fixture streaming) ────────────
+
+describe("POST /api/chat (error fixture streaming)", () => {
+  it("returns error fixture for streaming request too", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "fail" }],
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (malformed JSON) ─────────────────
+
+describe("POST /api/generate (malformed JSON)", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/api/generate`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (unknown response type streaming) ─
+
+describe("POST /api/generate (unknown response type streaming)", () => {
+  it("returns 500 for tool call fixture on /api/generate (streaming default)", async () => {
+    const tcFixture: Fixture = {
+      match: { userMessage: "tool-gen-stream" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: '{"x":1}' }],
+      },
+    };
+    instance = await createServer([tcFixture]);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "tool-gen-stream",
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (error fixture streaming) ────────
+
+describe("POST /api/generate (error fixture streaming)", () => {
+  it("returns error fixture for streaming generate request", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "fail",
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming malformed tool call args) ──
+
+describe("POST /api/chat (streaming malformed tool call arguments)", () => {
+  it("falls back to empty object for malformed JSON in streaming", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "bad-stream-args" }],
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { tool_calls?: Array<{ function: { arguments: unknown } }> };
+      done: boolean;
+    }>;
+    const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0);
+    expect(toolChunk).toBeDefined();
+    expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({});
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming tool call with empty args) ─
+
+describe("POST /api/chat (streaming tool call with empty arguments)", () => {
+  it("defaults to {} when arguments is empty string (streaming)", async () => {
+    const emptyArgsFixture: Fixture = {
+      match: { userMessage: "empty-stream-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "" }],
+      },
+    };
+    instance = await createServer([emptyArgsFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "empty-stream-args" }],
+      // stream omitted → defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { tool_calls?: Array<{ function: { arguments: unknown } }> };
+      done: boolean;
+    }>;
+    const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0);
+    expect(toolChunk).toBeDefined();
+    expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({});
+  });
+});
+
+// ─── Integration tests: POST /api/generate (interruption) ───────────────────
+
+describe("POST /api/generate (interruption)", () => {
+  it("truncates after specified number of chunks", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate-gen" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 1,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncFixture]);
+
+    const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        prompt: "truncate-gen",
+        // stream omitted → defaults to true
+      });
+      const parsed = new URL(`${instance!.url}/api/generate`);
+      const chunks: Buffer[] = [];
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({ aborted: false, body: Buffer.concat(chunks).toString() });
+          });
+          res.on("aborted", () => {
+            resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+          });
+        },
+      );
+      req.on("error", () => {
+        resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+      });
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.aborted).toBe(true);
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Unit tests: ollamaToCompletionRequest edge cases ───────────────────────
+
+describe("ollamaToCompletionRequest (edge cases)", () => {
+  it("handles missing options (temperature and max_tokens undefined)", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.temperature).toBeUndefined();
+    expect(result.max_tokens).toBeUndefined();
+  });
+
+  it("handles stream undefined (passes through as undefined)", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.stream).toBeUndefined();
+  });
+
+  it("handles empty tools array (returns undefined)", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
 // ---------------------------------------------------------------------------
 // writeNDJSONStream with non-zero latency
 // ---------------------------------------------------------------------------
@@ -1112,3 +1348,489 @@ describe("writeNDJSONStream with non-zero latency", () => {
     }
   });
 });
+
+// ---------------------------------------------------------------------------
+// Direct handler tests for req.method/req.url fallback branches
+// ---------------------------------------------------------------------------
+
+function createMockReq(overrides: Partial<http.IncomingMessage> = {}): http.IncomingMessage {
+  return {
+    method: undefined,
+    url: undefined,
+    headers: {},
+    ...overrides,
+  } as unknown as http.IncomingMessage;
+}
+
+function createMockRes(): http.ServerResponse & { _written: string; _status: number } {
+  const res = {
+    _written: "",
+    _status: 0,
+    writableEnded: false,
+    statusCode: 0,
+    writeHead(status: number) {
+      res._status = status;
+      res.statusCode = status;
+    },
+    setHeader() {},
+    write(data: string) {
+      res._written += data;
+      return true;
+    },
+    end(data?: string) {
+      if (data) res._written += data;
+      res.writableEnded = true;
+    },
+    destroy() {
+      res.writableEnded = true;
+    },
+  };
+  return res as unknown as http.ServerResponse & { _written: string; _status: number };
+}
+
+function createDefaults(overrides: Partial<HandlerDefaults> = {}): HandlerDefaults {
+  return {
+    latency: 0,
+    chunkSize: 100,
+    logger: new Logger("silent"),
+    ...overrides,
+  };
+}
+
+describe("handleOllama (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for non-streaming text response with undefined method/url", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "hi" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/chat");
+  });
+
+  it("uses fallback for streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", messages: [{ role: "user", content: "hi" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/chat");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(req, res, "{bad", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/chat");
+  });
+
+  it("uses fallback for missing messages", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({ model: "llama3" }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "x" }],
+        stream: false,
+      }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "x" }],
+        stream: false,
+      }),
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "err" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for non-streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "tool" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "tool" },
+      response: { toolCalls: [{ name: "fn", arguments: '{"x":1}' }] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", messages: [{ role: "user", content: "tool" }] }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllama(
+      req,
+      res,
+      JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "embed" }],
+        stream: false,
+      }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
+
+describe("handleOllamaGenerate (direct handler call, method/url fallbacks)", () => {
+  it("uses fallback for non-streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "hi", stream: false }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    expect(res._status).toBe(200);
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/generate");
+  });
+
+  it("uses fallback for streaming text response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "hi" },
+      response: { content: "Hello" },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "hi" }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/generate");
+  });
+
+  it("uses fallback for malformed JSON", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(req, res, "{bad", [], journal, createDefaults(), () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/api/generate");
+  });
+
+  it("uses fallback for missing prompt", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3" }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback for no fixture match", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "x", stream: false }),
+      [],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback for strict mode", async () => {
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "x", stream: false }),
+      [],
+      journal,
+      createDefaults({ strict: true }),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(503);
+  });
+
+  it("uses fallback for error response", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "err" },
+      response: { error: { message: "fail", type: "err" }, status: 500 },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "err", stream: false }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for unknown response type (non-streaming)", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "embed", stream: false }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for unknown response type (streaming)", async () => {
+    const fixture: Fixture = {
+      match: { userMessage: "embed" },
+      response: { embedding: [0.1] },
+    };
+    const journal = new Journal();
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleOllamaGenerate(
+      req,
+      res,
+      JSON.stringify({ model: "llama3", prompt: "embed" }),
+      [fixture],
+      journal,
+      createDefaults(),
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.response.status).toBe(500);
+  });
+});
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
index 647c9a4..d4503af 100644
--- a/src/__tests__/recorder.test.ts
+++ b/src/__tests__/recorder.test.ts
@@ -2763,6 +2763,313 @@ describe("recorder streaming edge cases", () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// buildFixtureResponse — additional format variants for branch coverage
+// ---------------------------------------------------------------------------
+
+describe("buildFixtureResponse additional format variants", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function createRawUpstream(responseBody: object): Promise<{ url: string; server: http.Server }> {
+    return new Promise((resolve) => {
+      const srv = http.createServer((_req, res) => {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(responseBody));
+      });
+      srv.listen(0, "127.0.0.1", () => {
+        const addr = srv.address() as { port: number };
+        servers.push(srv);
+        resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv });
+      });
+    });
+  }
+
+  it("detects Bedrock Converse format (output.message.content text)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      output: {
+        message: {
+          role: "assistant",
+          content: [{ text: "Hello from Bedrock Converse" }],
+        },
+      },
+      stopReason: "end_turn",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "bedrock converse test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Bedrock Converse");
+  });
+
+  it("detects Bedrock Converse toolUse format", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      output: {
+        message: {
+          role: "assistant",
+          content: [
+            {
+              toolUse: {
+                name: "get_weather",
+                input: { city: "NYC" },
+              },
+            },
+          ],
+        },
+      },
+      stopReason: "tool_use",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "bedrock converse tooluse test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+  });
+
+  it("detects Anthropic tool_use with string input", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      content: [
+        {
+          type: "tool_use",
+          id: "toolu_str",
+          name: "search",
+          input: '{"query":"hello"}',
+        },
+      ],
+      role: "assistant",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "anthropic string input test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    // When input is a string, it's used as-is
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].arguments).toBe('{"query":"hello"}');
+  });
+
+  it("detects Gemini functionCall with string args", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      candidates: [
+        {
+          content: {
+            parts: [
+              {
+                functionCall: {
+                  name: "search",
+                  args: '{"query":"hello"}',
+                },
+              },
+            ],
+          },
+        },
+      ],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "gemini string args test" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].arguments).toBe('{"query":"hello"}');
+  });
+
+  it("detects Ollama message.content as array format", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      model: "llama3",
+      message: {
+        role: "assistant",
+        content: [{ text: "Array content from Ollama" }],
+      },
+      done: true,
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "ollama array content test" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Array content from Ollama");
+  });
+
+  it("detects Ollama tool_calls with string arguments", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      model: "llama3",
+      message: {
+        role: "assistant",
+        content: "",
+        tool_calls: [
+          {
+            function: {
+              name: "search",
+              arguments: '{"query":"test"}',
+            },
+          },
+        ],
+      },
+      done: true,
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "ollama string args test" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].arguments).toBe('{"query":"test"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Invalid upstream URL — 502 with proxy_error
+// ---------------------------------------------------------------------------
+
+describe("recorder invalid upstream URL", () => {
+  it("returns 502 for invalid upstream URL format", async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      logLevel: "silent",
+      record: {
+        providers: { openai: "not-a-valid-url" },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "invalid url test" }],
+    });
+
+    expect(resp.status).toBe(502);
+    const body = JSON.parse(resp.body);
+    expect(body.error.type).toBe("proxy_error");
+    expect(body.error.message).toContain("Invalid upstream URL");
+  });
+});
+
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
diff --git a/src/__tests__/responses.test.ts b/src/__tests__/responses.test.ts
index 370c341..5f66880 100644
--- a/src/__tests__/responses.test.ts
+++ b/src/__tests__/responses.test.ts
@@ -1,8 +1,15 @@
 import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
+import { PassThrough } from "node:stream";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
-import { responsesInputToMessages, responsesToCompletionRequest } from "../responses.js";
+import {
+  responsesInputToMessages,
+  responsesToCompletionRequest,
+  handleResponses,
+} from "../responses.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
 
 // --- helpers ---
 
@@ -728,3 +735,623 @@ describe("POST /v1/responses (CORS)", () => {
     expect(res.headers["access-control-allow-origin"]).toBe("*");
   });
 });
+
+// ─── Branch coverage: ?? defaults and fallback paths ─────────────────────────
+
+describe("responsesInputToMessages (fallback branches)", () => {
+  it("generates call_id when function_call has no call_id", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call",
+          name: "do_thing",
+          arguments: '{"x":1}',
+          // call_id intentionally omitted
+        },
+      ],
+    });
+    expect(messages).toHaveLength(1);
+    expect(messages[0].tool_calls![0].id).toMatch(/^call_/);
+  });
+
+  it("defaults name to empty string when function_call has no name", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call",
+          call_id: "call_abc",
+          // name intentionally omitted
+          arguments: '{"x":1}',
+        },
+      ],
+    });
+    expect(messages[0].tool_calls![0].function.name).toBe("");
+  });
+
+  it("defaults arguments to empty string when function_call has no arguments", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call",
+          call_id: "call_abc",
+          name: "do_thing",
+          // arguments intentionally omitted
+        },
+      ],
+    });
+    expect(messages[0].tool_calls![0].function.arguments).toBe("");
+  });
+
+  it("defaults output to empty string when function_call_output has no output", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          type: "function_call_output",
+          call_id: "call_abc",
+          // output intentionally omitted
+        },
+      ],
+    });
+    expect(messages[0].content).toBe("");
+  });
+
+  it("handles content parts with missing text (text ?? '')", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          role: "user",
+          content: [
+            { type: "input_text" }, // text field missing
+          ] as Array<{ type: string; text?: string }>,
+        },
+      ],
+    });
+    expect(messages[0].content).toBe("");
+  });
+
+  it("handles output_text content parts", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [
+        {
+          role: "assistant",
+          content: [{ type: "output_text", text: "response text" }] as Array<{
+            type: string;
+            text?: string;
+          }>,
+        },
+      ],
+    });
+    expect(messages[0].content).toBe("response text");
+  });
+
+  it("handles system role input item", () => {
+    const messages = responsesInputToMessages({
+      model: "gpt-4",
+      input: [{ role: "system", content: "You are helpful" }],
+    });
+    expect(messages).toEqual([{ role: "system", content: "You are helpful" }]);
+  });
+});
+
+describe("responsesToCompletionRequest (tool filtering)", () => {
+  it("filters out non-function type tools", () => {
+    const result = responsesToCompletionRequest({
+      model: "gpt-4",
+      input: [{ role: "user", content: "hi" }],
+      tools: [
+        { type: "function", name: "real_tool", description: "a tool" },
+        { type: "web_search" as "function", name: "web", description: "search" },
+      ],
+    });
+    // Only the "function" type tool should be included
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0].function.name).toBe("real_tool");
+  });
+});
+
+describe("POST /v1/responses (strict mode)", () => {
+  it("returns 503 when strict mode is enabled and no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "unmatched" }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+    expect(body.error.code).toBe("no_fixture_match");
+  });
+});
+
+describe("POST /v1/responses (error response with default status)", () => {
+  it("defaults error status to 500 when status is omitted", async () => {
+    const errorNoStatus: Fixture = {
+      match: { userMessage: "error-no-status" },
+      response: {
+        error: {
+          message: "Something broke",
+          type: "server_error",
+        },
+      } as Fixture["response"],
+    };
+    instance = await createServer([errorNoStatus]);
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "error-no-status" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something broke");
+  });
+});
+
+describe("POST /v1/responses (latency and chunkSize defaults)", () => {
+  it("uses server default latency when fixture has no latency", async () => {
+    instance = await createServer([textFixture], { latency: 0 });
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+    expect(res.status).toBe(200);
+    const events = parseResponsesSSEEvents(res.body);
+    expect(events.length).toBeGreaterThan(0);
+  });
+
+  it("uses server default chunkSize when fixture has no chunkSize", async () => {
+    instance = await createServer([textFixture], { chunkSize: 3 });
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+    expect(res.status).toBe(200);
+    const events = parseResponsesSSEEvents(res.body);
+    const deltas = events.filter((e) => e.type === "response.output_text.delta");
+    // "Hi there!" = 9 chars, chunkSize 3 => 3 deltas
+    expect(deltas).toHaveLength(3);
+  });
+});
+
+describe("POST /v1/responses (tool call with explicit id)", () => {
+  it("uses explicit tool call id when provided", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-with-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "call_explicit_123",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+
+    // Non-streaming
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "tool-with-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    expect(body.output[0].call_id).toBe("call_explicit_123");
+  });
+
+  it("uses explicit tool call id in streaming mode", async () => {
+    const toolWithId: Fixture = {
+      match: { userMessage: "tool-with-id-stream" },
+      response: {
+        toolCalls: [
+          {
+            id: "call_explicit_456",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolWithId]);
+
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "tool-with-id-stream" }],
+      stream: true,
+    });
+    const events = parseResponsesSSEEvents(res.body);
+    const itemAdded = events.find((e) => e.type === "response.output_item.added") as SSEEvent & {
+      item: { call_id: string };
+    };
+    expect(itemAdded.item.call_id).toBe("call_explicit_456");
+  });
+
+  it("generates tool call id when id is empty string", async () => {
+    const toolEmptyId: Fixture = {
+      match: { userMessage: "tool-empty-id" },
+      response: {
+        toolCalls: [
+          {
+            id: "",
+            name: "my_func",
+            arguments: '{"a":1}',
+          },
+        ],
+      },
+    };
+    instance = await createServer([toolEmptyId]);
+
+    const res = await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "tool-empty-id" }],
+      stream: false,
+    });
+    const body = JSON.parse(res.body);
+    // Empty string is falsy, so it should generate an id
+    expect(body.output[0].call_id).toMatch(/^call_/);
+  });
+});
+
+describe("POST /v1/responses (streaming interruption)", () => {
+  it("truncates text stream after specified chunks and records interruption", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "truncate-text" },
+      response: { content: "ABCDEFGHIJKLMNOP" },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedFixture]);
+    try {
+      await post(`${instance.url}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "truncate-text" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    // Wait briefly for journal to be updated
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("truncates tool call stream after specified chunks and records interruption", async () => {
+    const truncatedToolFixture: Fixture = {
+      match: { userMessage: "truncate-tool" },
+      response: {
+        toolCalls: [
+          {
+            name: "my_func",
+            arguments: '{"key":"value"}',
+          },
+        ],
+      },
+      chunkSize: 1,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedToolFixture]);
+    try {
+      await post(`${instance.url}/v1/responses`, {
+        model: "gpt-4",
+        input: [{ role: "user", content: "truncate-tool" }],
+        stream: true,
+      });
+    } catch {
+      // Expected: socket hang up due to server destroying connection
+    }
+
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+describe("POST /v1/responses (streaming text — journal records tool call fixture)", () => {
+  it("records streaming tool call response in journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v1/responses`, {
+      model: "gpt-4",
+      input: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const entry = instance.journal.getLast();
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(toolFixture);
+  });
+});
+
+// ─── Direct handler tests: covering ?? fallbacks on req.method/req.url ───────
+
+function createMockRes(): http.ServerResponse {
+  const res = new PassThrough() as unknown as http.ServerResponse;
+  let ended = false;
+  const headers: Record<string, string> = {};
+  res.setHeader = (name: string, value: string | number | readonly string[]) => {
+    headers[name.toLowerCase()] = String(value);
+    return res;
+  };
+  res.writeHead = (statusCode: number, hdrs?: Record<string, string>) => {
+    (res as { statusCode: number }).statusCode = statusCode;
+    if (hdrs) {
+      for (const [k, v] of Object.entries(hdrs)) {
+        headers[k.toLowerCase()] = v;
+      }
+    }
+    return res;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.write = (chunk: string) => true;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  res.end = ((...args: unknown[]) => {
+    ended = true;
+    return res;
+  }) as typeof res.end;
+  Object.defineProperty(res, "writableEnded", { get: () => ended });
+  res.destroy = () => {
+    ended = true;
+    return res;
+  };
+  return res;
+}
+
+describe("handleResponses (direct call — ?? fallback branches)", () => {
+  it("uses fallback POST and /v1/responses when req.method and req.url are undefined", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback method/path on malformed JSON with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(mockReq, mockRes, "{bad", [], journal, defaults, () => {});
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(400);
+  });
+
+  it("uses fallback method/path on no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(404);
+  });
+
+  it("uses fallback method/path for error fixture with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "fail" }],
+      }),
+      [errorFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(429);
+  });
+
+  it("uses fallback for streaming text with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "hello" }],
+        stream: true,
+      }),
+      [textFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for streaming tool call with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "weather" }],
+        stream: true,
+      }),
+      [toolFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(200);
+  });
+
+  it("uses fallback for unknown response type with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "badtype" }],
+      }),
+      [badResponseFixture],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(500);
+  });
+
+  it("uses fallback for strict mode no-match with undefined req fields", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const defaults = { latency: 0, chunkSize: 10, logger, strict: true };
+
+    const mockReq = {
+      method: undefined,
+      url: undefined,
+      headers: {},
+    } as unknown as http.IncomingMessage;
+
+    const mockRes = createMockRes();
+
+    await handleResponses(
+      mockReq,
+      mockRes,
+      JSON.stringify({
+        model: "gpt-4",
+        input: [{ role: "user", content: "nomatch" }],
+      }),
+      [],
+      journal,
+      defaults,
+      () => {},
+    );
+
+    const entry = journal.getLast();
+    expect(entry!.method).toBe("POST");
+    expect(entry!.path).toBe("/v1/responses");
+    expect(entry!.response.status).toBe(503);
+  });
+});
diff --git a/src/__tests__/service-metrics.test.ts b/src/__tests__/service-metrics.test.ts
new file mode 100644
index 0000000..46b0e0e
--- /dev/null
+++ b/src/__tests__/service-metrics.test.ts
@@ -0,0 +1,379 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { MCPMock } from "../mcp-mock.js";
+import { A2AMock } from "../a2a-mock.js";
+import { VectorMock } from "../vector-mock.js";
+
+// ---- HTTP Helpers ----
+
+interface HttpResult {
+  status: number;
+  headers: http.IncomingHttpHeaders;
+  body: string;
+}
+
+function request(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<HttpResult> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function get(url: string, path: string): Promise<HttpResult> {
+  return request(url, path, "GET");
+}
+
+function post(
+  url: string,
+  path: string,
+  body: object,
+  extraHeaders?: Record<string, string>,
+): Promise<HttpResult> {
+  return request(url, path, "POST", body, extraHeaders);
+}
+
+function jsonRpc(
+  url: string,
+  path: string,
+  method: string,
+  params?: unknown,
+  id?: number,
+  extraHeaders?: Record<string, string>,
+) {
+  const body: Record<string, unknown> = { jsonrpc: "2.0", method };
+  if (params !== undefined) body.params = params;
+  body.id = id ?? 1;
+  return post(url, path, body, extraHeaders);
+}
+
+function notification(
+  url: string,
+  path: string,
+  method: string,
+  params?: unknown,
+  extraHeaders?: Record<string, string>,
+) {
+  const body: Record<string, unknown> = { jsonrpc: "2.0", method };
+  if (params !== undefined) body.params = params;
+  return post(url, path, body, extraHeaders);
+}
+
+async function initMcpSession(url: string, path: string): Promise<string> {
+  const res = await jsonRpc(url, path, "initialize", {}, 1);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  await notification(url, path, "notifications/initialized", {}, { "mcp-session-id": sessionId });
+  return sessionId;
+}
+
+// ---- Tests ----
+
+describe("service metrics", () => {
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch {
+        // not started
+      }
+      llm = null;
+    }
+  });
+
+  // ---- MCP Metrics ----
+
+  describe("MCP metrics (aimock_mcp_requests_total)", () => {
+    it("increments counter with method label for tools/list", async () => {
+      const mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/mcp", mcp);
+      const url = await llm.start();
+
+      const sessionId = await initMcpSession(url, "/mcp");
+      await jsonRpc(url, "/mcp", "tools/list", {}, 2, { "mcp-session-id": sessionId });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toContain("aimock_mcp_requests_total");
+      // initialize + notifications/initialized + tools/list = 3 entries
+      expect(metrics.body).toMatch(/aimock_mcp_requests_total\{method="tools\/list"\} 1/);
+      expect(metrics.body).toMatch(/aimock_mcp_requests_total\{method="initialize"\} 1/);
+    });
+
+    it("increments counter for tools/call", async () => {
+      const mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+      mcp.onToolCall("echo", (args) => `echo: ${JSON.stringify(args)}`);
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/mcp", mcp);
+      const url = await llm.start();
+
+      const sessionId = await initMcpSession(url, "/mcp");
+      await jsonRpc(url, "/mcp", "tools/call", { name: "echo", arguments: { text: "hi" } }, 2, {
+        "mcp-session-id": sessionId,
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(/aimock_mcp_requests_total\{method="tools\/call"\} 1/);
+    });
+
+    it("increments counter for resources/read", async () => {
+      const mcp = new MCPMock();
+      mcp.addResource({ uri: "file:///test.txt", name: "test" }, { text: "hello" });
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/mcp", mcp);
+      const url = await llm.start();
+
+      const sessionId = await initMcpSession(url, "/mcp");
+      await jsonRpc(url, "/mcp", "resources/read", { uri: "file:///test.txt" }, 2, {
+        "mcp-session-id": sessionId,
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(/aimock_mcp_requests_total\{method="resources\/read"\} 1/);
+    });
+  });
+
+  // ---- A2A Metrics ----
+
+  describe("A2A metrics (aimock_a2a_requests_total)", () => {
+    it("increments counter for GetAgentCard", async () => {
+      const a2a = new A2AMock();
+      a2a.registerAgent({ name: "test-agent", description: "Test agent" });
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/a2a", a2a);
+      const url = await llm.start();
+
+      await get(url, "/a2a/.well-known/agent-card.json");
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toContain("aimock_a2a_requests_total");
+      expect(metrics.body).toMatch(/aimock_a2a_requests_total\{method="GetAgentCard"\} 1/);
+    });
+
+    it("increments counter for SendMessage", async () => {
+      const a2a = new A2AMock();
+      a2a.registerAgent({ name: "test-agent", description: "Test agent" });
+      a2a.onMessage("test-agent", "hello", [{ text: "world" }]);
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/a2a", a2a);
+      const url = await llm.start();
+
+      await post(url, "/a2a", {
+        jsonrpc: "2.0",
+        id: 1,
+        method: "SendMessage",
+        params: {
+          message: {
+            messageId: "msg-1",
+            role: "ROLE_USER",
+            parts: [{ text: "hello" }],
+          },
+        },
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(/aimock_a2a_requests_total\{method="SendMessage"\} 1/);
+    });
+
+    it("increments counter for GetTask", async () => {
+      const a2a = new A2AMock();
+      a2a.registerAgent({ name: "test-agent", description: "Test agent" });
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/a2a", a2a);
+      const url = await llm.start();
+
+      // GetTask for a nonexistent task (error, but still counted)
+      await post(url, "/a2a", {
+        jsonrpc: "2.0",
+        id: 1,
+        method: "GetTask",
+        params: { id: "nonexistent" },
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(/aimock_a2a_requests_total\{method="GetTask"\} 1/);
+    });
+  });
+
+  // ---- Vector Metrics ----
+
+  describe("Vector metrics (aimock_vector_requests_total)", () => {
+    it("increments counter for Pinecone query", async () => {
+      const vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/vector", vector);
+      const url = await llm.start();
+
+      await post(url, "/vector/query", {
+        vector: [1, 0, 0],
+        topK: 5,
+        namespace: "default",
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toContain("aimock_vector_requests_total");
+      expect(metrics.body).toMatch(
+        /aimock_vector_requests_total\{operation="query",provider="pinecone"\} 1/,
+      );
+    });
+
+    it("increments counter for Pinecone upsert", async () => {
+      const vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/vector", vector);
+      const url = await llm.start();
+
+      await post(url, "/vector/vectors/upsert", {
+        vectors: [{ id: "v1", values: [1, 0, 0] }],
+        namespace: "default",
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(
+        /aimock_vector_requests_total\{operation="upsert",provider="pinecone"\} 1/,
+      );
+    });
+
+    it("increments counter for Qdrant search", async () => {
+      const vector = new VectorMock();
+      vector.addCollection("my-collection", { dimension: 3 });
+      vector.onQuery("my-collection", [{ id: "v1", score: 0.9 }]);
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/vector", vector);
+      const url = await llm.start();
+
+      await post(url, "/vector/collections/my-collection/points/search", {
+        vector: [1, 0, 0],
+        limit: 5,
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(
+        /aimock_vector_requests_total\{operation="query",provider="qdrant"\} 1/,
+      );
+    });
+
+    it("increments counter for ChromaDB query", async () => {
+      const vector = new VectorMock();
+      vector.addCollection("my-collection", { dimension: 3 });
+      vector.onQuery("my-collection", [{ id: "v1", score: 0.1 }]);
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/vector", vector);
+      const url = await llm.start();
+
+      await post(url, "/vector/api/v1/collections/my-collection/query", {
+        query_embeddings: [[1, 0, 0]],
+        n_results: 5,
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(
+        /aimock_vector_requests_total\{operation="query",provider="chromadb"\} 1/,
+      );
+    });
+
+    it("tracks multiple providers independently", async () => {
+      const vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.addCollection("my-col", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+      vector.onQuery("my-col", [{ id: "v2", score: 0.8 }]);
+
+      llm = new LLMock({ metrics: true });
+      llm.mount("/vector", vector);
+      const url = await llm.start();
+
+      // Pinecone query
+      await post(url, "/vector/query", {
+        vector: [1, 0, 0],
+        topK: 5,
+        namespace: "default",
+      });
+
+      // Qdrant search
+      await post(url, "/vector/collections/my-col/points/search", {
+        vector: [1, 0, 0],
+        limit: 5,
+      });
+
+      const metrics = await get(url, "/metrics");
+      expect(metrics.body).toMatch(
+        /aimock_vector_requests_total\{operation="query",provider="pinecone"\} 1/,
+      );
+      expect(metrics.body).toMatch(
+        /aimock_vector_requests_total\{operation="query",provider="qdrant"\} 1/,
+      );
+    });
+  });
+
+  // ---- Metrics disabled ----
+
+  describe("no metrics when disabled", () => {
+    it("does not emit service counters when metrics is not enabled", async () => {
+      const mcp = new MCPMock();
+      mcp.addTool({ name: "echo", description: "Echo tool" });
+
+      llm = new LLMock({}); // no metrics: true
+      llm.mount("/mcp", mcp);
+      const url = await llm.start();
+
+      // /metrics should 404
+      const res = await get(url, "/metrics");
+      expect(res.status).toBe(404);
+    });
+  });
+});
diff --git a/src/__tests__/services.test.ts b/src/__tests__/services.test.ts
new file mode 100644
index 0000000..5a9fbc6
--- /dev/null
+++ b/src/__tests__/services.test.ts
@@ -0,0 +1,674 @@
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import { Readable, Writable } from "node:stream";
+import type * as http from "node:http";
+import { LLMock } from "../llmock.js";
+import { handleModeration } from "../moderation.js";
+import { handleRerank } from "../rerank.js";
+import { handleSearch } from "../search.js";
+import { Journal } from "../journal.js";
+import { Logger } from "../logger.js";
+
+// ---------------------------------------------------------------------------
+// Service mock endpoints: search, rerank, moderation
+// ---------------------------------------------------------------------------
+
+let mock: LLMock;
+
+afterEach(async () => {
+  if (mock) {
+    await mock.stop();
+  }
+});
+
+async function post(url: string, body: unknown): Promise<{ status: number; json: unknown }> {
+  const res = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  const json = await res.json();
+  return { status: res.status, json };
+}
+
+async function postRaw(url: string, raw: string): Promise<{ status: number; json: unknown }> {
+  const res = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: raw,
+  });
+  const json = await res.json();
+  return { status: res.status, json };
+}
+
+// ---------------------------------------------------------------------------
+// POST /search
+// ---------------------------------------------------------------------------
+
+describe("POST /search", () => {
+  it("returns matching results for a string pattern", async () => {
+    mock = new LLMock();
+    mock.onSearch("weather", [
+      { title: "Weather Report", url: "https://example.com/weather", content: "Sunny today" },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, { query: "What is the weather?" });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ title: string; url: string; content: string }> };
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].title).toBe("Weather Report");
+    expect(data.results[0].url).toBe("https://example.com/weather");
+    expect(data.results[0].content).toBe("Sunny today");
+  });
+
+  it("returns empty results when no fixture matches", async () => {
+    mock = new LLMock();
+    mock.onSearch("weather", [
+      { title: "Weather Report", url: "https://example.com/weather", content: "Sunny today" },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, { query: "stock prices" });
+
+    expect(status).toBe(200);
+    const data = json as { results: unknown[] };
+    expect(data.results).toHaveLength(0);
+  });
+
+  it("matches with RegExp patterns", async () => {
+    mock = new LLMock();
+    mock.onSearch(/\bweather\b/i, [
+      { title: "Weather", url: "https://example.com", content: "Rain expected", score: 0.95 },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, { query: "WEATHER forecast" });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ score?: number }> };
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].score).toBe(0.95);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v2/rerank
+// ---------------------------------------------------------------------------
+
+describe("POST /v2/rerank", () => {
+  it("returns scored results for a matching query", async () => {
+    mock = new LLMock();
+    mock.onRerank("machine learning", [
+      { index: 0, relevance_score: 0.99 },
+      { index: 2, relevance_score: 0.85 },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "What is machine learning?",
+      documents: ["ML is a subset of AI", "Cooking recipes", "Deep learning overview"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      id: string;
+      results: Array<{
+        index: number;
+        relevance_score: number;
+        document: { text: string };
+      }>;
+      meta: { billed_units: { search_units: number } };
+    };
+    expect(data.id).toMatch(/^rerank-/);
+    expect(data.results).toHaveLength(2);
+    expect(data.results[0].index).toBe(0);
+    expect(data.results[0].relevance_score).toBe(0.99);
+    expect(data.results[0].document.text).toBe("ML is a subset of AI");
+    expect(data.results[1].index).toBe(2);
+    expect(data.results[1].document.text).toBe("Deep learning overview");
+    expect(data.meta.billed_units.search_units).toBe(0);
+  });
+
+  it("returns empty results when no fixture matches", async () => {
+    mock = new LLMock();
+    mock.onRerank("machine learning", [{ index: 0, relevance_score: 0.99 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "cooking tips",
+      documents: ["How to bake bread"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: unknown[] };
+    expect(data.results).toHaveLength(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v1/moderations
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/moderations", () => {
+  it("returns flagged result for matching content", async () => {
+    mock = new LLMock();
+    mock.onModerate("violent", {
+      flagged: true,
+      categories: { violence: true, hate: false },
+      category_scores: { violence: 0.95, hate: 0.01 },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: "This is violent content",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      id: string;
+      model: string;
+      results: Array<{
+        flagged: boolean;
+        categories: Record<string, boolean>;
+        category_scores: Record<string, number>;
+      }>;
+    };
+    expect(data.id).toMatch(/^modr-/);
+    expect(data.model).toBe("text-moderation-latest");
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].flagged).toBe(true);
+    expect(data.results[0].categories.violence).toBe(true);
+    expect(data.results[0].category_scores!.violence).toBe(0.95);
+  });
+
+  it("returns unflagged default result when no fixture matches", async () => {
+    mock = new LLMock();
+    mock.onModerate("violent", {
+      flagged: true,
+      categories: { violence: true },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: "A nice sunny day",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ flagged: boolean; categories: Record<string, boolean> }>;
+    };
+    expect(data.results[0].flagged).toBe(false);
+    expect(data.results[0].categories.violence).toBe(false);
+  });
+
+  it("matches with RegExp catch-all", async () => {
+    mock = new LLMock();
+    mock.onModerate(/.*/, {
+      flagged: false,
+      categories: {},
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: "Anything at all",
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ flagged: boolean }> };
+    expect(data.results[0].flagged).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /search — additional coverage
+// ---------------------------------------------------------------------------
+
+describe("POST /search — edge cases", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    mock = new LLMock();
+    mock.onSearch("anything", [{ title: "T", url: "https://t.com", content: "C" }]);
+    const url = await mock.start();
+
+    const { status, json } = await postRaw(`${url}/search`, "{not valid json");
+
+    expect(status).toBe(400);
+    const data = json as { error: { message: string; type: string; code: string } };
+    expect(data.error.message).toBe("Malformed JSON");
+    expect(data.error.type).toBe("invalid_request_error");
+    expect(data.error.code).toBe("invalid_json");
+  });
+
+  it("respects max_results to limit returned results", async () => {
+    mock = new LLMock();
+    mock.onSearch("docs", [
+      { title: "Doc 1", url: "https://1.com", content: "First" },
+      { title: "Doc 2", url: "https://2.com", content: "Second" },
+      { title: "Doc 3", url: "https://3.com", content: "Third" },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, {
+      query: "docs topic",
+      max_results: 2,
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ title: string }> };
+    expect(data.results).toHaveLength(2);
+    expect(data.results[0].title).toBe("Doc 1");
+    expect(data.results[1].title).toBe("Doc 2");
+  });
+
+  it("returns all results when max_results is 0 or undefined", async () => {
+    mock = new LLMock();
+    mock.onSearch("docs", [
+      { title: "Doc 1", url: "https://1.com", content: "First" },
+      { title: "Doc 2", url: "https://2.com", content: "Second" },
+    ]);
+    const url = await mock.start();
+
+    // max_results = 0 should not limit (the code checks > 0)
+    const { json: json0 } = await post(`${url}/search`, {
+      query: "docs topic",
+      max_results: 0,
+    });
+    expect((json0 as { results: unknown[] }).results).toHaveLength(2);
+
+    // No max_results at all
+    const { json: jsonNone } = await post(`${url}/search`, { query: "docs topic" });
+    expect((jsonNone as { results: unknown[] }).results).toHaveLength(2);
+  });
+
+  it("handles missing query field gracefully", async () => {
+    mock = new LLMock();
+    mock.onSearch(/.*/i, [{ title: "Catch All", url: "https://all.com", content: "Everything" }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/search`, {});
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ title: string }> };
+    expect(data.results).toHaveLength(1);
+    expect(data.results[0].title).toBe("Catch All");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v2/rerank — additional coverage
+// ---------------------------------------------------------------------------
+
+describe("POST /v2/rerank — edge cases", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    mock = new LLMock();
+    mock.onRerank("anything", [{ index: 0, relevance_score: 0.5 }]);
+    const url = await mock.start();
+
+    const { status, json } = await postRaw(`${url}/v2/rerank`, "{{bad json!!");
+
+    expect(status).toBe(400);
+    const data = json as { error: { message: string; type: string; code: string } };
+    expect(data.error.message).toBe("Malformed JSON");
+    expect(data.error.type).toBe("invalid_request_error");
+    expect(data.error.code).toBe("invalid_json");
+  });
+
+  it("extracts text from object documents with text property", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [
+      { index: 0, relevance_score: 0.95 },
+      { index: 1, relevance_score: 0.8 },
+    ]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: [{ text: "Object doc with text field" }, "Plain string doc"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ index: number; document: { text: string } }>;
+    };
+    expect(data.results[0].document.text).toBe("Object doc with text field");
+    expect(data.results[1].document.text).toBe("Plain string doc");
+  });
+
+  it("returns empty text for documents that are neither string nor {text}", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [{ index: 0, relevance_score: 0.5 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: [42],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ document: { text: string } }>;
+    };
+    expect(data.results[0].document.text).toBe("");
+  });
+
+  it("returns empty text when document index is out of bounds", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [{ index: 5, relevance_score: 0.9 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: ["only one doc"],
+      model: "rerank-v3.5",
+    });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ index: number; document: { text: string } }>;
+    };
+    expect(data.results[0].index).toBe(5);
+    expect(data.results[0].document.text).toBe("");
+  });
+
+  it("handles missing query and documents gracefully", async () => {
+    mock = new LLMock();
+    mock.onRerank(/.*/i, [{ index: 0, relevance_score: 0.5 }]);
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v2/rerank`, { model: "rerank-v3.5" });
+
+    expect(status).toBe(200);
+    const data = json as {
+      results: Array<{ document: { text: string } }>;
+    };
+    // document at index 0 of empty array -> undefined -> empty text
+    expect(data.results[0].document.text).toBe("");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// POST /v1/moderations — additional coverage
+// ---------------------------------------------------------------------------
+
+describe("POST /v1/moderations — edge cases", () => {
+  it("returns 400 for malformed JSON body", async () => {
+    mock = new LLMock();
+    mock.onModerate("anything", { flagged: false, categories: {} });
+    const url = await mock.start();
+
+    const { status, json } = await postRaw(`${url}/v1/moderations`, "not-json");
+
+    expect(status).toBe(400);
+    const data = json as { error: { message: string; type: string; code: string } };
+    expect(data.error.message).toBe("Malformed JSON");
+    expect(data.error.type).toBe("invalid_request_error");
+    expect(data.error.code).toBe("invalid_json");
+  });
+
+  it("handles array input by joining elements", async () => {
+    mock = new LLMock();
+    mock.onModerate("violent hate", {
+      flagged: true,
+      categories: { violence: true, hate: true },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {
+      input: ["violent", "hate"],
+    });
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ flagged: boolean }> };
+    expect(data.results[0].flagged).toBe(true);
+  });
+
+  it("handles missing input field gracefully", async () => {
+    mock = new LLMock();
+    mock.onModerate(/.*/i, {
+      flagged: false,
+      categories: { sexual: false },
+    });
+    const url = await mock.start();
+
+    const { status, json } = await post(`${url}/v1/moderations`, {});
+
+    expect(status).toBe(200);
+    const data = json as { results: Array<{ flagged: boolean }> };
+    expect(data.results[0].flagged).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// /v2/rerank does NOT conflict with /v2/chat (Cohere endpoint)
+// ---------------------------------------------------------------------------
+
+describe("/v2/rerank vs /v2/chat", () => {
+  it("routes /v2/rerank to rerank handler, not Cohere chat", async () => {
+    mock = new LLMock();
+    mock.onRerank("test", [{ index: 0, relevance_score: 0.9 }]);
+    mock.onMessage("test", { content: "Cohere response" });
+    const url = await mock.start();
+
+    // Rerank endpoint should work
+    const rerankRes = await post(`${url}/v2/rerank`, {
+      query: "test query",
+      documents: ["doc1"],
+      model: "rerank-v3.5",
+    });
+    expect(rerankRes.status).toBe(200);
+    const rerankData = rerankRes.json as { id: string; results: unknown[] };
+    expect(rerankData.id).toMatch(/^rerank-/);
+    expect(rerankData.results).toHaveLength(1);
+
+    // Cohere chat endpoint should still work
+    const chatRes = await post(`${url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "test" }],
+    });
+    expect(chatRes.status).toBe(200);
+    const chatData = chatRes.json as { message?: unknown };
+    // Cohere chat returns a different shape — just verify it's not a rerank response
+    expect(chatData).not.toHaveProperty("meta");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Journal records service requests
+// ---------------------------------------------------------------------------
+
+describe("Journal records service requests", () => {
+  it("records search, rerank, and moderation requests in the journal", async () => {
+    mock = new LLMock();
+    mock.onSearch("test", [{ title: "Test", url: "https://test.com", content: "Test content" }]);
+    mock.onRerank("test", [{ index: 0, relevance_score: 0.9 }]);
+    mock.onModerate("test", { flagged: false, categories: {} });
+    const url = await mock.start();
+
+    await post(`${url}/search`, { query: "test query" });
+    await post(`${url}/v2/rerank`, { query: "test query", documents: ["doc"], model: "m" });
+    await post(`${url}/v1/moderations`, { input: "test input" });
+
+    const requests = mock.getRequests();
+    expect(requests).toHaveLength(3);
+
+    expect(requests[0].path).toBe("/search");
+    expect(requests[0].service).toBe("search");
+
+    expect(requests[1].path).toBe("/v2/rerank");
+    expect(requests[1].service).toBe("rerank");
+
+    expect(requests[2].path).toBe("/v1/moderations");
+    expect(requests[2].service).toBe("moderation");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Direct handler tests — exercises ?? fallback branches for req.method/req.url
+// ---------------------------------------------------------------------------
+
+/**
+ * Creates a minimal mock IncomingMessage with optional method/url overrides.
+ * When method or url is omitted, the property is undefined — which triggers
+ * the ?? fallback branches in journal.add() calls.
+ */
+function createMockReq(opts: { method?: string; url?: string } = {}): http.IncomingMessage {
+  const readable = new Readable({ read() {} }) as http.IncomingMessage;
+  readable.headers = {};
+  if (opts.method !== undefined) readable.method = opts.method;
+  else (readable as Partial<http.IncomingMessage>).method = undefined;
+  if (opts.url !== undefined) readable.url = opts.url;
+  else (readable as Partial<http.IncomingMessage>).url = undefined;
+  return readable;
+}
+
+/**
+ * Creates a mock ServerResponse that captures writeHead status and end body.
+ */
+function createMockRes(): http.ServerResponse & { _status: number; _body: string } {
+  const writable = new Writable({
+    write(_chunk, _encoding, cb) {
+      cb();
+    },
+  }) as http.ServerResponse & { _status: number; _body: string };
+  writable._status = 0;
+  writable._body = "";
+  writable.writeHead = function (statusCode: number) {
+    this._status = statusCode;
+    return this;
+  } as unknown as typeof writable.writeHead;
+  writable.end = function (body?: string) {
+    if (body) this._body = body;
+    return this;
+  } as unknown as typeof writable.end;
+  return writable;
+}
+
+const noop = () => {};
+
+describe("Direct handler — moderation ?? fallback branches", () => {
+  beforeEach(() => {
+    mock = undefined as unknown as LLMock;
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (malformed JSON)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq(); // method and url are undefined
+    const res = createMockRes();
+
+    await handleModeration(req, res, "{bad json!!", [], journal, { logger }, noop);
+
+    expect(res._status).toBe(400);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v1/moderations");
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (valid request)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq(); // method and url are undefined
+    const res = createMockRes();
+
+    await handleModeration(
+      req,
+      res,
+      JSON.stringify({ input: "hello" }),
+      [],
+      journal,
+      { logger },
+      noop,
+    );
+
+    expect(res._status).toBe(200);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v1/moderations");
+  });
+});
+
+describe("Direct handler — rerank ?? fallback branches", () => {
+  beforeEach(() => {
+    mock = undefined as unknown as LLMock;
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (malformed JSON)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleRerank(req, res, "not json", [], journal, { logger }, noop);
+
+    expect(res._status).toBe(400);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v2/rerank");
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (valid request)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleRerank(
+      req,
+      res,
+      JSON.stringify({ query: "test", documents: ["doc1"] }),
+      [],
+      journal,
+      { logger },
+      noop,
+    );
+
+    expect(res._status).toBe(200);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/v2/rerank");
+  });
+});
+
+describe("Direct handler — search ?? fallback branches", () => {
+  beforeEach(() => {
+    mock = undefined as unknown as LLMock;
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (malformed JSON)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleSearch(req, res, "{{bad", [], journal, { logger }, noop);
+
+    expect(res._status).toBe(400);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/search");
+  });
+
+  it("uses fallback method/path in journal when req.method and req.url are undefined (valid request)", async () => {
+    const journal = new Journal();
+    const logger = new Logger("silent");
+    const req = createMockReq();
+    const res = createMockRes();
+
+    await handleSearch(req, res, JSON.stringify({ query: "test" }), [], journal, { logger }, noop);
+
+    expect(res._status).toBe(200);
+    const entries = journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].method).toBe("POST");
+    expect(entries[0].path).toBe("/search");
+  });
+});
diff --git a/src/__tests__/suite.test.ts b/src/__tests__/suite.test.ts
new file mode 100644
index 0000000..984f2dd
--- /dev/null
+++ b/src/__tests__/suite.test.ts
@@ -0,0 +1,195 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { createMockSuite, type MockSuite } from "../suite.js";
+
+// ---- HTTP Helpers ----
+
+function httpRequest(
+  url: string,
+  path: string,
+  method: string,
+  body?: object,
+  extraHeaders?: Record<string, string>,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+      ...extraHeaders,
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function jsonRpc(method: string, params?: unknown, id: number = 1): object {
+  return { jsonrpc: "2.0", method, params, id };
+}
+
+async function initMcpSession(url: string, path: string): Promise<string> {
+  const res = await httpRequest(url, path, "POST", jsonRpc("initialize", {}, 1) as object);
+  const sessionId = res.headers["mcp-session-id"] as string;
+  // Send initialized notification
+  await httpRequest(
+    url,
+    path,
+    "POST",
+    { jsonrpc: "2.0", method: "notifications/initialized" },
+    { "mcp-session-id": sessionId },
+  );
+  return sessionId;
+}
+
+// ---- Tests ----
+
+describe("createMockSuite", () => {
+  let suite: MockSuite | null = null;
+
+  afterEach(async () => {
+    if (suite) {
+      await suite.stop();
+      suite = null;
+    }
+  });
+
+  it("with llm only — start/stop/reset work", async () => {
+    suite = await createMockSuite({ llm: {} });
+    await suite.start();
+
+    expect(suite.llm.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    expect(suite.mcp).toBeUndefined();
+    expect(suite.a2a).toBeUndefined();
+    expect(suite.vector).toBeUndefined();
+
+    // Reset should not throw
+    suite.reset();
+
+    await suite.stop();
+    suite = null;
+  });
+
+  it("with mcp — MCPMock mounted, tools/list works", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {} });
+    suite.mcp!.addTool({ name: "test-tool", description: "A test tool" });
+    await suite.start();
+
+    const sessionId = await initMcpSession(suite.llm.url, "/mcp");
+
+    const res = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    expect(res.status).toBe(200);
+    const data = JSON.parse(res.body);
+    expect(data.result.tools).toHaveLength(1);
+    expect(data.result.tools[0].name).toBe("test-tool");
+  });
+
+  it("with a2a — A2AMock mounted, agent card served", async () => {
+    suite = await createMockSuite({ llm: {}, a2a: {} });
+    suite.a2a!.registerAgent({
+      name: "suite-agent",
+      description: "Agent in suite",
+    });
+    await suite.start();
+
+    const res = await httpRequest(suite.llm.url, "/a2a/.well-known/agent-card.json", "GET");
+    expect(res.status).toBe(200);
+    const card = JSON.parse(res.body);
+    expect(card.name).toBe("suite-agent");
+  });
+
+  it("with vector — VectorMock mounted, query works", async () => {
+    suite = await createMockSuite({ llm: {}, vector: {} });
+    suite.vector!.addCollection("test-col", { dimension: 3 });
+    suite.vector!.onQuery("test-col", [
+      { id: "v1", score: 0.95, values: [1, 0, 0], metadata: { label: "first" } },
+    ]);
+    await suite.start();
+
+    const res = await httpRequest(suite.llm.url, "/vector/query", "POST", {
+      namespace: "test-col",
+      vector: [1, 0, 0],
+      topK: 5,
+    });
+    expect(res.status).toBe(200);
+    const data = JSON.parse(res.body);
+    expect(data.matches).toHaveLength(1);
+    expect(data.matches[0].id).toBe("v1");
+    expect(data.matches[0].score).toBe(0.95);
+  });
+
+  it("reset() delegates to all present mocks including a2a and vector", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+    suite.mcp!.addTool({ name: "reset-tool", description: "will be cleared" });
+    suite.a2a!.registerAgent({ name: "reset-agent", description: "will be cleared" });
+    suite.vector!.addCollection("reset-col", { dimension: 2 });
+    await suite.start();
+
+    // reset() should not throw and should delegate to all mocks
+    expect(() => suite!.reset()).not.toThrow();
+
+    // After reset, mcp tools should be cleared — verify via tools/list returning empty
+    const sessionId = await initMcpSession(suite.llm.url, "/mcp");
+    const res = await httpRequest(
+      suite.llm.url,
+      "/mcp",
+      "POST",
+      jsonRpc("tools/list", {}, 2) as object,
+      { "mcp-session-id": sessionId },
+    );
+    const data = JSON.parse(res.body);
+    expect(data.result.tools).toHaveLength(0);
+  });
+
+  it("default options — creates suite with no explicit llm options", async () => {
+    suite = await createMockSuite();
+    await suite.start();
+    expect(suite.llm.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    expect(suite.mcp).toBeUndefined();
+    expect(suite.a2a).toBeUndefined();
+    expect(suite.vector).toBeUndefined();
+    await suite.stop();
+    suite = null;
+  });
+
+  it("all mocks — suite with all four mock types", async () => {
+    suite = await createMockSuite({ llm: {}, mcp: {}, a2a: {}, vector: {} });
+    expect(suite.llm).toBeDefined();
+    expect(suite.mcp).toBeDefined();
+    expect(suite.a2a).toBeDefined();
+    expect(suite.vector).toBeDefined();
+    await suite.start();
+    expect(suite.llm.url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+  });
+});
diff --git a/src/__tests__/vector-mock.test.ts b/src/__tests__/vector-mock.test.ts
new file mode 100644
index 0000000..a4ef393
--- /dev/null
+++ b/src/__tests__/vector-mock.test.ts
@@ -0,0 +1,1166 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { VectorMock } from "../vector-mock.js";
+import { LLMock } from "../llmock.js";
+import { Journal } from "../journal.js";
+
+// ---- HTTP Helpers ----
+
+interface HttpResult {
+  status: number;
+  body: string;
+}
+
+function request(url: string, path: string, method: string, body?: unknown): Promise<HttpResult> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const payload = body !== undefined ? JSON.stringify(body) : undefined;
+    const headers: Record<string, string> = {
+      ...(payload
+        ? {
+            "Content-Type": "application/json",
+            "Content-Length": String(Buffer.byteLength(payload)),
+          }
+        : {}),
+    };
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method,
+        headers,
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    if (payload) req.write(payload);
+    req.end();
+  });
+}
+
+function get(url: string, path: string): Promise<HttpResult> {
+  return request(url, path, "GET");
+}
+
+function post(url: string, path: string, body: unknown): Promise<HttpResult> {
+  return request(url, path, "POST", body);
+}
+
+function put(url: string, path: string, body: unknown): Promise<HttpResult> {
+  return request(url, path, "PUT", body);
+}
+
+function del(url: string, path: string): Promise<HttpResult> {
+  return request(url, path, "DELETE");
+}
+
+// ---- Tests ----
+
+describe("VectorMock", () => {
+  let vector: VectorMock | null = null;
+  let llm: LLMock | null = null;
+
+  afterEach(async () => {
+    if (vector) {
+      try {
+        await vector.stop();
+      } catch {
+        // not started
+      }
+      vector = null;
+    }
+    if (llm) {
+      try {
+        await llm.stop();
+      } catch {
+        // not started
+      }
+      llm = null;
+    }
+  });
+
+  // ---- Standalone mode ----
+
+  describe("standalone mode", () => {
+    it("starts and stops", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+      await vector.stop();
+      vector = null;
+    });
+  });
+
+  // ---- Mounted mode ----
+
+  describe("mounted mode", () => {
+    it("routes via LLMock mount", async () => {
+      vector = new VectorMock();
+      vector
+        .addCollection("default", { dimension: 3 })
+        .onQuery("default", [{ id: "v1", score: 0.95 }]);
+
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      const res = await post(llm.url, "/vector/query", {
+        vector: [0.1, 0.2, 0.3],
+        topK: 5,
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(1);
+      expect(data.matches[0].id).toBe("v1");
+    });
+  });
+
+  // ---- Configuration ----
+
+  describe("addCollection + onQuery", () => {
+    it("static results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test-col", { dimension: 3 });
+      vector.onQuery("test-col", [
+        { id: "a", score: 0.9, metadata: { label: "first" } },
+        { id: "b", score: 0.8 },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 10,
+        namespace: "test-col",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(2);
+      expect(data.matches[0].id).toBe("a");
+      expect(data.matches[0].metadata).toEqual({ label: "first" });
+    });
+
+    it("function handler", async () => {
+      vector = new VectorMock();
+      vector.addCollection("dynamic", { dimension: 2 });
+      vector.onQuery("dynamic", (query) => {
+        const topK = query.topK ?? 1;
+        return Array.from({ length: topK }, (_, i) => ({
+          id: `result-${i}`,
+          score: 1 - i * 0.1,
+        }));
+      });
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 3,
+        namespace: "dynamic",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(3);
+      expect(data.matches[0].id).toBe("result-0");
+      expect(data.matches[2].id).toBe("result-2");
+    });
+  });
+
+  // ---- Pinecone endpoints ----
+
+  describe("Pinecone", () => {
+    it("POST /query returns correct response format", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [
+        { id: "vec-1", score: 0.99, metadata: { category: "test" } },
+        { id: "vec-2", score: 0.85 },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [0.1, 0.2, 0.3],
+        topK: 5,
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toBeDefined();
+      expect(data.matches).toHaveLength(2);
+      expect(data.matches[0]).toEqual({ id: "vec-1", score: 0.99, metadata: { category: "test" } });
+      expect(data.matches[1]).toEqual({ id: "vec-2", score: 0.85 });
+    });
+
+    it("POST /vectors/upsert returns upsertedCount", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [
+          { id: "v1", values: [1.0, 2.0], metadata: { tag: "a" } },
+          { id: "v2", values: [3.0, 4.0] },
+        ],
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.upsertedCount).toBe(2);
+    });
+
+    it("POST /vectors/delete returns ok", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      vector.upsert("default", [
+        { id: "v1", values: [1, 2] },
+        { id: "v2", values: [3, 4] },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        ids: ["v1"],
+        namespace: "default",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data).toEqual({});
+    });
+
+    it("GET /describe-index-stats", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 128 });
+      vector.upsert("default", [
+        { id: "v1", values: new Array(128).fill(0) },
+        { id: "v2", values: new Array(128).fill(0) },
+      ]);
+      const url = await vector.start();
+
+      const res = await get(url, "/describe-index-stats");
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.dimension).toBe(128);
+      expect(data.totalVectorCount).toBe(2);
+    });
+  });
+
+  // ---- Qdrant endpoints ----
+
+  describe("Qdrant", () => {
+    it("POST /collections/{name}/points/search returns correct format", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my-col", { dimension: 3 });
+      vector.onQuery("my-col", [{ id: "q1", score: 0.95, metadata: { source: "web" } }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/my-col/points/search", {
+        vector: [0.1, 0.2, 0.3],
+        limit: 5,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.result).toBeDefined();
+      expect(data.result).toHaveLength(1);
+      expect(data.result[0]).toEqual({ id: "q1", score: 0.95, payload: { source: "web" } });
+    });
+
+    it("PUT /collections/{name}/points returns ok", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my-col", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/my-col/points", {
+        points: [
+          { id: "p1", vector: [1.0, 2.0], payload: { tag: "a" } },
+          { id: "p2", vector: [3.0, 4.0] },
+        ],
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.status).toBe("ok");
+    });
+
+    it("POST /collections/{name}/points/delete returns ok", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my-col", { dimension: 2 });
+      vector.upsert("my-col", [{ id: "p1", values: [1, 2] }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/my-col/points/delete", {
+        points: ["p1"],
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.status).toBe("ok");
+    });
+  });
+
+  // ---- ChromaDB endpoints ----
+
+  describe("ChromaDB", () => {
+    it("POST /api/v1/collections/{id}/query returns correct format", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-col", { dimension: 3 });
+      vector.onQuery("chroma-col", [
+        { id: "c1", score: 0.12, metadata: { source: "doc" } },
+        { id: "c2", score: 0.34 },
+      ]);
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/chroma-col/query", {
+        query_embeddings: [[0.1, 0.2, 0.3]],
+        n_results: 5,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.ids).toEqual([["c1", "c2"]]);
+      expect(data.distances).toEqual([[0.12, 0.34]]);
+      expect(data.metadatas).toEqual([[{ source: "doc" }, null]]);
+    });
+
+    it("POST /api/v1/collections/{id}/add returns true", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-col", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/chroma-col/add", {
+        ids: ["d1", "d2"],
+        embeddings: [
+          [1, 2],
+          [3, 4],
+        ],
+        metadatas: [{ label: "a" }, { label: "b" }],
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data).toBe(true);
+    });
+
+    it("GET /api/v1/collections lists collections", async () => {
+      vector = new VectorMock();
+      vector.addCollection("col-a", { dimension: 3 });
+      vector.addCollection("col-b", { dimension: 5 });
+      const url = await vector.start();
+
+      const res = await get(url, "/api/v1/collections");
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data).toHaveLength(2);
+      const names = data.map((c: { name: string }) => c.name).sort();
+      expect(names).toEqual(["col-a", "col-b"]);
+    });
+
+    it("DELETE /api/v1/collections/{id} deletes collection", async () => {
+      vector = new VectorMock();
+      vector.addCollection("to-delete", { dimension: 3 });
+      const url = await vector.start();
+
+      const res = await del(url, "/api/v1/collections/to-delete");
+      expect(res.status).toBe(200);
+
+      // Verify it's gone
+      const listRes = await get(url, "/api/v1/collections");
+      const data = JSON.parse(listRes.body);
+      expect(data).toHaveLength(0);
+    });
+  });
+
+  // ---- Error cases ----
+
+  describe("error handling", () => {
+    it("unknown collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "nonexistent",
+      });
+      expect(res.status).toBe(404);
+    });
+
+    it("malformed JSON body returns 400 for POST (standalone)", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      // Send invalid JSON via raw http request
+      const parsed = new URL(url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const payload = "not valid json {{{";
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/query",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": String(Buffer.byteLength(payload)),
+            },
+          },
+          (res) => {
+            const chunks: Buffer[] = [];
+            res.on("data", (c: Buffer) => chunks.push(c));
+            res.on("end", () =>
+              resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString() }),
+            );
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+      expect(result.status).toBe(400);
+      const data = JSON.parse(result.body);
+      expect(data.error).toBe("Malformed JSON body");
+    });
+
+    it("malformed JSON body returns 400 for POST (mounted mode)", async () => {
+      vector = new VectorMock();
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      const parsed = new URL(llm.url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const payload = "not valid json {{{";
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/vector/query",
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": String(Buffer.byteLength(payload)),
+            },
+          },
+          (res) => {
+            const chunks: Buffer[] = [];
+            res.on("data", (c: Buffer) => chunks.push(c));
+            res.on("end", () =>
+              resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString() }),
+            );
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+      expect(result.status).toBe(400);
+      const data = JSON.parse(result.body);
+      expect(data.error).toBe("Malformed JSON body");
+    });
+
+    it("malformed JSON body is ignored for GET requests", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 128 });
+      vector.upsert("default", [{ id: "v1", values: new Array(128).fill(0) }]);
+      const url = await vector.start();
+
+      // GET with invalid body should still work (body ignored for GET)
+      const parsed = new URL(url);
+      const result = await new Promise<HttpResult>((resolve, reject) => {
+        const payload = "not valid json";
+        const req = http.request(
+          {
+            hostname: parsed.hostname,
+            port: parsed.port,
+            path: "/describe-index-stats",
+            method: "GET",
+            headers: {
+              "Content-Type": "application/json",
+              "Content-Length": String(Buffer.byteLength(payload)),
+            },
+          },
+          (res) => {
+            const chunks: Buffer[] = [];
+            res.on("data", (c: Buffer) => chunks.push(c));
+            res.on("end", () =>
+              resolve({ status: res.statusCode ?? 0, body: Buffer.concat(chunks).toString() }),
+            );
+          },
+        );
+        req.on("error", reject);
+        req.write(payload);
+        req.end();
+      });
+      expect(result.status).toBe(200);
+      const data = JSON.parse(result.body);
+      expect(data.dimension).toBe(128);
+    });
+
+    it("unhandled route returns 404 in standalone mode", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await get(url, "/nonexistent/path");
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.error).toBe("Not found");
+    });
+
+    it("Qdrant search on non-existent collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/missing/points/search", {
+        vector: [0.1, 0.2],
+        limit: 5,
+      });
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.status.error).toContain("missing");
+    });
+
+    it("ChromaDB query on non-existent collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/missing/query", {
+        query_embeddings: [[0.1, 0.2]],
+        n_results: 5,
+      });
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.error).toContain("missing");
+    });
+
+    it("ChromaDB delete on non-existent collection returns 404", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await del(url, "/api/v1/collections/missing");
+      expect(res.status).toBe(404);
+      const data = JSON.parse(res.body);
+      expect(data.error).toContain("missing");
+    });
+  });
+
+  // ---- Default/edge-case behavior ----
+
+  describe("defaults and edge cases", () => {
+    it("Pinecone query uses 'default' namespace when none specified", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      vector.onQuery("default", [{ id: "d1", score: 0.5 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 5,
+        // no namespace field
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(1);
+      expect(data.matches[0].id).toBe("d1");
+    });
+
+    it("Pinecone query defaults topK to 10 and truncates results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      // Return 15 results from handler
+      vector.onQuery(
+        "default",
+        Array.from({ length: 15 }, (_, i) => ({ id: `v${i}`, score: 1 - i * 0.01 })),
+      );
+      const url = await vector.start();
+
+      // No topK specified - should default to 10
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        namespace: "default",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(10);
+    });
+
+    it("Pinecone upsert auto-creates collection", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [{ id: "v1", values: [1.0, 2.0] }],
+        namespace: "auto-created",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.upsertedCount).toBe(1);
+
+      // Verify the collection exists via describe-index-stats
+      const stats = await get(url, "/describe-index-stats");
+      const statsData = JSON.parse(stats.body);
+      expect(statsData.totalVectorCount).toBe(1);
+    });
+
+    it("Pinecone upsert with default namespace", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [{ id: "v1", values: [1.0, 2.0] }],
+        // no namespace - defaults to "default"
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).upsertedCount).toBe(1);
+    });
+
+    it("Pinecone delete on non-existent collection is a no-op", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        ids: ["v1"],
+        namespace: "nonexistent",
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Pinecone delete with default namespace", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        ids: ["v1"],
+        // no namespace - defaults to "default"
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Qdrant upsert auto-creates collection", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/new-col/points", {
+        points: [{ id: "p1", vector: [1.0, 2.0], payload: { tag: "auto" } }],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+
+    it("Qdrant delete on non-existent collection is a no-op", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/nonexistent/points/delete", {
+        points: ["p1"],
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Qdrant search defaults limit to 10 and truncates results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test-qdrant", { dimension: 2 });
+      vector.onQuery(
+        "test-qdrant",
+        Array.from({ length: 15 }, (_, i) => ({ id: `q${i}`, score: 1 - i * 0.01 })),
+      );
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/test-qdrant/points/search", {
+        vector: [1, 0],
+        // no limit specified - defaults to 10
+      });
+      const data = JSON.parse(res.body);
+      expect(data.result).toHaveLength(10);
+    });
+
+    it("ChromaDB add auto-creates collection", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/auto-col/add", {
+        ids: ["d1"],
+        embeddings: [[1, 2, 3]],
+        metadatas: [{ label: "auto" }],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+
+      // Verify collection shows up
+      const listRes = await get(url, "/api/v1/collections");
+      const list = JSON.parse(listRes.body);
+      expect(list).toHaveLength(1);
+      expect(list[0].name).toBe("auto-col");
+    });
+
+    it("ChromaDB query with multiple query_embeddings", async () => {
+      vector = new VectorMock();
+      vector.addCollection("multi-q", { dimension: 2 });
+      vector.onQuery("multi-q", [{ id: "r1", score: 0.5 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/multi-q/query", {
+        query_embeddings: [
+          [1, 0],
+          [0, 1],
+        ],
+        n_results: 5,
+      });
+      const data = JSON.parse(res.body);
+      // Should have results for each query embedding
+      expect(data.ids).toHaveLength(2);
+      expect(data.distances).toHaveLength(2);
+      expect(data.metadatas).toHaveLength(2);
+    });
+
+    it("ChromaDB query defaults n_results to 10", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default-n", { dimension: 2 });
+      vector.onQuery(
+        "default-n",
+        Array.from({ length: 15 }, (_, i) => ({ id: `c${i}`, score: i * 0.1 })),
+      );
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/default-n/query", {
+        query_embeddings: [[1, 0]],
+        // no n_results - defaults to 10
+      });
+      const data = JSON.parse(res.body);
+      expect(data.ids[0]).toHaveLength(10);
+    });
+
+    it("describe-index-stats with no collections returns zeros", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await get(url, "/describe-index-stats");
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.dimension).toBe(0);
+      expect(data.totalVectorCount).toBe(0);
+    });
+
+    it("Qdrant search result uses payload instead of metadata", async () => {
+      vector = new VectorMock();
+      vector.addCollection("qdrant-meta", { dimension: 2 });
+      vector.onQuery("qdrant-meta", [{ id: "q1", score: 0.8 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/qdrant-meta/points/search", {
+        vector: [1, 0],
+        limit: 5,
+      });
+      const data = JSON.parse(res.body);
+      // No metadata -> no payload key at all
+      expect(data.result[0]).toEqual({ id: "q1", score: 0.8 });
+      expect(data.result[0]).not.toHaveProperty("payload");
+    });
+
+    it("Pinecone query result omits metadata when undefined", async () => {
+      vector = new VectorMock();
+      vector.addCollection("no-meta", { dimension: 2 });
+      vector.onQuery("no-meta", [{ id: "v1", score: 0.9 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 5,
+        namespace: "no-meta",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches[0]).toEqual({ id: "v1", score: 0.9 });
+      expect(data.matches[0]).not.toHaveProperty("metadata");
+    });
+  });
+
+  // ---- Reset ----
+
+  describe("reset", () => {
+    it("clears collections and query handlers", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test", { dimension: 3 });
+      vector.onQuery("test", [{ id: "v1", score: 0.9 }]);
+
+      vector.reset();
+
+      expect(vector.health().collections).toBe(0);
+    });
+
+    it("reset clears query handlers so queries return empty", async () => {
+      vector = new VectorMock();
+      vector.addCollection("test", { dimension: 3 });
+      vector.onQuery("test", [{ id: "v1", score: 0.9 }]);
+      const url = await vector.start();
+
+      vector.reset();
+      vector.addCollection("test", { dimension: 3 });
+
+      const res = await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "test",
+      });
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(0);
+    });
+  });
+
+  // ---- Health ----
+
+  describe("health", () => {
+    it("returns collection count", () => {
+      vector = new VectorMock();
+      vector.addCollection("a", { dimension: 3 });
+      vector.addCollection("b", { dimension: 5 });
+
+      const health = vector.health();
+      expect(health).toEqual({ status: "ok", collections: 2 });
+    });
+  });
+
+  // ---- Journal ----
+
+  describe("journal", () => {
+    it("shared journal with service: vector", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      await post(llm.url, "/vector/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "default",
+      });
+
+      const entries = llm.getRequests();
+      const vectorEntries = entries.filter((e) => e.service === "vector");
+      expect(vectorEntries.length).toBeGreaterThan(0);
+      expect(vectorEntries[0].service).toBe("vector");
+    });
+  });
+
+  // ---- getRequests ----
+
+  describe("getRequests", () => {
+    it("returns empty array without journal", () => {
+      vector = new VectorMock();
+      expect(vector.getRequests()).toEqual([]);
+    });
+  });
+
+  // ---- Lifecycle errors ----
+
+  describe("lifecycle", () => {
+    it("start() throws if already started", async () => {
+      vector = new VectorMock();
+      await vector.start();
+      await expect(vector.start()).rejects.toThrow("Server already started");
+    });
+
+    it("stop() throws if not started", async () => {
+      vector = new VectorMock();
+      await expect(vector.stop()).rejects.toThrow("Server not started");
+    });
+  });
+
+  // ---- deleteCollection ----
+
+  describe("deleteCollection", () => {
+    it("removes the collection and its query handler", () => {
+      vector = new VectorMock();
+      vector.addCollection("to-remove", { dimension: 3 });
+      vector.onQuery("to-remove", [{ id: "v1", score: 0.9 }]);
+
+      vector.deleteCollection("to-remove");
+      expect(vector.health().collections).toBe(0);
+    });
+  });
+
+  // ---- upsert method ----
+
+  describe("upsert method", () => {
+    it("auto-creates collection when it does not exist", async () => {
+      vector = new VectorMock();
+      vector.upsert("auto", [{ id: "v1", values: [1, 2, 3] }]);
+      expect(vector.health().collections).toBe(1);
+    });
+
+    it("updates existing vectors in a collection", async () => {
+      vector = new VectorMock();
+      vector.addCollection("col", { dimension: 2 });
+      vector.upsert("col", [{ id: "v1", values: [1, 2] }]);
+      vector.upsert("col", [{ id: "v1", values: [3, 4] }]);
+      // Should still have 1 collection, and the vector is updated (not duplicated)
+      expect(vector.health().collections).toBe(1);
+    });
+  });
+
+  // ---- Constructor options ----
+
+  describe("constructor", () => {
+    it("accepts custom host and port options", async () => {
+      vector = new VectorMock({ host: "127.0.0.1", port: 0 });
+      const url = await vector.start();
+      expect(url).toMatch(/^http:\/\/127\.0\.0\.1:\d+$/);
+    });
+  });
+
+  // ---- ChromaDB add edge cases ----
+
+  describe("ChromaDB add edge cases", () => {
+    it("adds with missing optional fields (no embeddings, no metadatas)", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/api/v1/collections/sparse-col/add", {
+        ids: ["d1", "d2"],
+        // no embeddings, no metadatas
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+    });
+
+    it("adds with missing embedding for specific index", async () => {
+      vector = new VectorMock();
+      vector.addCollection("partial", { dimension: 2 });
+      const url = await vector.start();
+
+      // embeddings array shorter than ids - embeddings[1] will be undefined
+      const res = await post(url, "/api/v1/collections/partial/add", {
+        ids: ["d1", "d2"],
+        embeddings: [[1, 2]],
+        metadatas: [{ a: 1 }, { b: 2 }],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+    });
+  });
+
+  // ---- Standalone journal ----
+
+  describe("standalone journal", () => {
+    it("journals requests in standalone mode when journal is set via setJournal", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      // Manually set a journal to cover the standalone journal branch
+      const journal = new Journal();
+      vector.setJournal(journal);
+
+      const url = await vector.start();
+
+      await post(url, "/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "default",
+      });
+
+      // getRequests should return journal entries filtered to service=vector
+      const requests = vector.getRequests();
+      expect(requests.length).toBeGreaterThan(0);
+      expect(requests[0].service).toBe("vector");
+    });
+
+    it("does NOT journal unhandled requests in standalone mode", async () => {
+      vector = new VectorMock();
+      const journal = new Journal();
+      vector.setJournal(journal);
+      const url = await vector.start();
+
+      const res = await get(url, "/nonexistent");
+      expect(res.status).toBe(404);
+
+      // Unhandled 404 should NOT create a journal entry
+      const requests = vector.getRequests();
+      expect(requests).toHaveLength(0);
+    });
+
+    it("journals handled requests in standalone mode", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 128 });
+      vector.upsert("default", [{ id: "v1", values: new Array(128).fill(0) }]);
+      const journal = new Journal();
+      vector.setJournal(journal);
+      const url = await vector.start();
+
+      const res = await get(url, "/describe-index-stats");
+      expect(res.status).toBe(200);
+
+      // Handled 200 SHOULD create a journal entry
+      const requests = vector.getRequests();
+      expect(requests).toHaveLength(1);
+      expect(requests[0].service).toBe("vector");
+    });
+
+    it("journals requests in mounted mode via LLMock", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 3 });
+      vector.onQuery("default", [{ id: "v1", score: 0.9 }]);
+
+      llm = new LLMock();
+      llm.mount("/vector", vector);
+      await llm.start();
+
+      await post(llm.url, "/vector/query", {
+        vector: [1, 2, 3],
+        topK: 5,
+        namespace: "default",
+      });
+
+      const requests = vector.getRequests();
+      expect(requests.length).toBeGreaterThan(0);
+      expect(requests[0].service).toBe("vector");
+    });
+  });
+
+  // ---- Qdrant URL-encoded collection names ----
+
+  describe("URL-encoded collection names", () => {
+    it("Qdrant handles URL-encoded collection names", async () => {
+      vector = new VectorMock();
+      vector.addCollection("my collection", { dimension: 2 });
+      vector.onQuery("my collection", [{ id: "q1", score: 0.8 }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/collections/my%20collection/points/search", {
+        vector: [1, 0],
+        limit: 5,
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.result).toHaveLength(1);
+    });
+  });
+
+  // ---- resolveQuery with no handler ----
+
+  describe("query with no handler", () => {
+    it("returns empty matches when collection exists but no query handler set", async () => {
+      vector = new VectorMock();
+      vector.addCollection("no-handler", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/query", {
+        vector: [1, 0],
+        topK: 5,
+        namespace: "no-handler",
+      });
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.matches).toHaveLength(0);
+    });
+  });
+
+  // ---- Missing/empty body field defaults ----
+
+  describe("missing body field defaults", () => {
+    it("Qdrant delete with no points field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("qdrant-del", { dimension: 2 });
+      vector.upsert("qdrant-del", [{ id: "p1", values: [1, 2] }]);
+      const url = await vector.start();
+
+      // Send body without 'points' field — should default to empty array, delete nothing
+      const res = await post(url, "/collections/qdrant-del/points/delete", {});
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+
+    it("ChromaDB query with no query_embeddings field returns empty results", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-empty", { dimension: 2 });
+      vector.onQuery("chroma-empty", [{ id: "c1", score: 0.5 }]);
+      const url = await vector.start();
+
+      // Send body without 'query_embeddings' — should default to empty array
+      const res = await post(url, "/api/v1/collections/chroma-empty/query", {});
+      expect(res.status).toBe(200);
+      const data = JSON.parse(res.body);
+      expect(data.ids).toEqual([]);
+      expect(data.distances).toEqual([]);
+      expect(data.metadatas).toEqual([]);
+    });
+
+    it("ChromaDB add with no ids field is a no-op", async () => {
+      vector = new VectorMock();
+      vector.addCollection("chroma-noid", { dimension: 2 });
+      const url = await vector.start();
+
+      // Send body without 'ids' — should default to empty array, add nothing
+      const res = await post(url, "/api/v1/collections/chroma-noid/add", {});
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body)).toBe(true);
+    });
+
+    it("Pinecone upsert with no vectors field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        namespace: "default",
+        // no vectors field
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).upsertedCount).toBe(0);
+    });
+
+    it("Pinecone upsert auto-creates collection with dimension 0 when vectors is empty", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/upsert", {
+        vectors: [],
+        namespace: "empty-vec",
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).upsertedCount).toBe(0);
+
+      // Collection was auto-created with dimension 0
+      const stats = await get(url, "/describe-index-stats");
+      const data = JSON.parse(stats.body);
+      expect(data.dimension).toBe(0);
+      expect(data.totalVectorCount).toBe(0);
+    });
+
+    it("Qdrant upsert auto-creates collection with dimension 0 when points is empty", async () => {
+      vector = new VectorMock();
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/empty-qdrant/points", {
+        points: [],
+      });
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+
+    it("Pinecone delete with no ids field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("default", { dimension: 2 });
+      vector.upsert("default", [{ id: "v1", values: [1, 2] }]);
+      const url = await vector.start();
+
+      const res = await post(url, "/vectors/delete", {
+        namespace: "default",
+        // no ids field
+      });
+      expect(res.status).toBe(200);
+    });
+
+    it("Qdrant upsert with no points field defaults to empty array", async () => {
+      vector = new VectorMock();
+      vector.addCollection("qdrant-empty", { dimension: 2 });
+      const url = await vector.start();
+
+      const res = await put(url, "/collections/qdrant-empty/points", {});
+      expect(res.status).toBe(200);
+      expect(JSON.parse(res.body).status).toBe("ok");
+    });
+  });
+});
diff --git a/src/__tests__/ws-framing.test.ts b/src/__tests__/ws-framing.test.ts
index 16ff6f4..d1714e2 100644
--- a/src/__tests__/ws-framing.test.ts
+++ b/src/__tests__/ws-framing.test.ts
@@ -475,4 +475,239 @@ describe("connection lifecycle", () => {
     socket.destroy();
     await new Promise((r) => setTimeout(r, 150));
   });
+
+  it("close() is a no-op when already closed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    ws.close(1000, "first close");
+    expect(ws.isClosed).toBe(true);
+
+    // Second close should be a no-op (branch: close when already closed)
+    ws.close(1001, "second close");
+    expect(ws.isClosed).toBe(true);
+
+    socket.destroy();
+    await new Promise((r) => setTimeout(r, 150));
+  });
+
+  it("destroy() is a no-op when already closed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    ws.close(1000, "closed");
+    expect(ws.isClosed).toBe(true);
+
+    // destroy should be a no-op (branch: destroy when already closed)
+    ws.destroy();
+    expect(ws.isClosed).toBe(true);
+
+    socket.destroy();
+    await new Promise((r) => setTimeout(r, 150));
+  });
+
+  it("destroy() destroys the socket and emits close 1006", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => resolve({ code, reason }));
+    });
+
+    ws.destroy();
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1006);
+    expect(reason).toBe("Connection destroyed");
+    expect(ws.isClosed).toBe(true);
+  });
+
+  it("emits close 1006 when TCP socket closes unexpectedly", async () => {
+    // Use a raw socket pair to directly control the server-side socket
+    const [clientSide, serverSide] = await new Promise<[net.Socket, net.Socket]>((resolve) => {
+      const srv = net.createServer((conn) => {
+        resolve([client, conn]);
+      });
+      srv.listen(0);
+      const port = (srv.address() as net.AddressInfo).port;
+      const client = net.connect({ port, host: "127.0.0.1" });
+      cleanupFns.push(() => {
+        srv.close();
+        if (!client.destroyed) client.destroy();
+      });
+    });
+
+    serverSide.on("error", () => {});
+    clientSide.on("error", () => {});
+
+    const ws = new WebSocketConnection(serverSide);
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => resolve({ code, reason }));
+    });
+
+    // Destroy the server-side socket to simulate unexpected connection loss
+    serverSide.destroy();
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1006);
+    expect(reason).toBe("Connection lost");
+    expect(ws.isClosed).toBe(true);
+  });
+
+  it("handles close frame with empty payload (no code)", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    const closeEvent = new Promise<{ code: number; reason: string }>((resolve) => {
+      ws.on("close", (code: number, reason: string) => resolve({ code, reason }));
+    });
+
+    // Send a close frame with empty payload (no status code)
+    socket.write(createMaskedFrame(OP_CLOSE, Buffer.alloc(0)));
+
+    const { code, reason } = await closeEvent;
+    expect(code).toBe(1005);
+    expect(reason).toBe("");
+  });
+
+  it("ignores unsolicited pong frames", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    // Send unsolicited pong — should be silently ignored
+    socket.write(createMaskedFrame(OP_PONG, Buffer.from("pong-data")));
+
+    // Then send a text message to confirm parsing continues
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+    socket.write(createMaskedFrame(OP_TEXT, Buffer.from("after-pong")));
+
+    const msg = await received;
+    expect(msg).toBe("after-pong");
+  });
+
+  it("writeFrame is a no-op when socket is already destroyed", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+    ws.on("error", () => {});
+
+    // Destroy the underlying socket
+    socket.destroy();
+    // Wait for the destroy to propagate
+    await new Promise((r) => setTimeout(r, 50));
+
+    // send() calls writeFrame internally — should not throw
+    // The ws is not closed yet (closed flag is separate from socket.destroyed)
+    // We need to access a fresh connection and destroy its socket
+    // Actually, socket.destroy fires the "close" event which sets closed=true.
+    // So let's test this differently: use a connection where socket.destroyed
+    // is true but closed might not be set yet.
+    // The writeFrame guard is tested implicitly by other tests, but let's
+    // verify send on a destroyed socket doesn't throw.
+    expect(() => ws.send("test")).not.toThrow();
+  });
+
+  it("handles binary/unknown opcode frames by ignoring them", async () => {
+    const { server, port, wsPromise } = createTestServer();
+    const { socket, response } = rawConnect(port());
+    trackCleanup(server, socket);
+
+    await response;
+    const ws = await wsPromise;
+
+    const OP_BINARY = 0x2;
+    // Send a binary frame — should be silently ignored
+    socket.write(createMaskedFrame(OP_BINARY, Buffer.from("binary-data")));
+
+    // Then send a text message to confirm parsing continues
+    const received = new Promise<string>((resolve) => {
+      ws.on("message", resolve);
+    });
+    socket.write(createMaskedFrame(OP_TEXT, Buffer.from("after-binary")));
+
+    const msg = await received;
+    expect(msg).toBe("after-binary");
+  });
+});
+
+describe("upgradeToWebSocket", () => {
+  it("rejects upgrade when Sec-WebSocket-Key header is missing", async () => {
+    // Create a separate server that catches the throw from upgradeToWebSocket
+    let caughtError: Error | null = null;
+    const server = http.createServer();
+    server.on("connection", (socket) => {
+      socket.on("error", () => {});
+    });
+    server.on("upgrade", (req, socket) => {
+      socket.on("error", () => {});
+      try {
+        upgradeToWebSocket(req, socket as net.Socket);
+      } catch (err) {
+        caughtError = err as Error;
+      }
+    });
+    server.listen(0);
+    const port = (server.address() as net.AddressInfo).port;
+
+    const socket = net.connect({ port, host: "127.0.0.1" });
+    socket.on("error", () => {});
+    trackCleanup(server, socket);
+
+    const response = new Promise<string>((resolve) => {
+      let buf = "";
+      socket.on("data", (chunk: Buffer) => {
+        buf += chunk.toString();
+        if (buf.includes("\r\n\r\n")) {
+          resolve(buf);
+        }
+      });
+    });
+
+    socket.write(
+      "GET / HTTP/1.1\r\n" +
+        "Host: localhost\r\n" +
+        "Upgrade: websocket\r\n" +
+        "Connection: Upgrade\r\n" +
+        "Sec-WebSocket-Version: 13\r\n" +
+        "\r\n",
+    );
+
+    const resp = await response;
+    expect(resp).toContain("400 Bad Request");
+    // Wait for server to process
+    await new Promise((r) => setTimeout(r, 50));
+    expect(caughtError).not.toBeNull();
+    expect(caughtError!.message).toBe("Missing Sec-WebSocket-Key header");
+  });
 });
diff --git a/src/__tests__/ws-gemini-live.test.ts b/src/__tests__/ws-gemini-live.test.ts
index 19c6e95..f53aab1 100644
--- a/src/__tests__/ws-gemini-live.test.ts
+++ b/src/__tests__/ws-gemini-live.test.ts
@@ -447,6 +447,411 @@ describe("WebSocket Gemini Live BidiGenerateContent", () => {
     ws.close();
   });
 
+  it("returns error for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send("not valid json {{{}");
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Malformed JSON");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("returns error for unrecognized message type (no setup/clientContent/toolResponse)", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send message with no recognized field
+    ws.send(JSON.stringify({ someUnknownField: true }));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(400);
+    expect(msg.error.message).toBe("Expected clientContent or toolResponse");
+    expect(msg.error.status).toBe("INVALID_ARGUMENT");
+
+    ws.close();
+  });
+
+  it("closes with 1008 in strict mode when no fixture matches", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("unknown-no-match-strict"));
+
+    await ws.waitForClose();
+  });
+
+  it("handles empty content text response", async () => {
+    const emptyFixture: Fixture = {
+      match: { userMessage: "empty-content" },
+      response: { content: "" },
+    };
+    instance = await createServer([emptyFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("empty-content"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+    expect(msg.serverContent.modelTurn.parts[0].text).toBe("");
+    expect(msg.serverContent.turnComplete).toBe(true);
+
+    ws.close();
+  });
+
+  it("handles setup without model (uses default)", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    // Send setup without model field
+    ws.send(JSON.stringify({ setup: {} }));
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    ws.close();
+  });
+
+  it("handles setup with tools", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(
+      JSON.stringify({
+        setup: {
+          model: "gemini-2.0-flash-exp",
+          tools: [
+            {
+              functionDeclarations: [
+                {
+                  name: "get_weather",
+                  description: "Gets weather",
+                  parameters: { type: "object" },
+                },
+              ],
+            },
+          ],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    ws.close();
+  });
+
+  it("handles model turns with text in conversation history", async () => {
+    // Test conversion of model turns with text content
+    const multiTurnFixture: Fixture = {
+      match: { userMessage: "follow-up" },
+      response: { content: "Follow-up response" },
+    };
+    instance = await createServer([multiTurnFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with both user and model turns
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [
+            { role: "user", parts: [{ text: "first" }] },
+            { role: "model", parts: [{ text: "model reply" }] },
+            { role: "user", parts: [{ text: "follow-up" }] },
+          ],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles model turns with function calls in conversation history", async () => {
+    const afterFuncFixture: Fixture = {
+      match: { userMessage: "after-func" },
+      response: { content: "After function response" },
+    };
+    instance = await createServer([afterFuncFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with model turn containing functionCall
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [
+            { role: "user", parts: [{ text: "do something" }] },
+            {
+              role: "model",
+              parts: [{ functionCall: { name: "search", args: { q: "test" } } }],
+            },
+            {
+              role: "user",
+              parts: [
+                { functionResponse: { name: "search", response: "results", id: "call_1" } },
+                { text: "after-func" },
+              ],
+            },
+          ],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles toolResponse with non-string response values", async () => {
+    const toolResultFixture2: Fixture = {
+      match: { toolCallId: "call_gemini_search_0" },
+      response: { content: "Search result" },
+    };
+    instance = await createServer([toolResultFixture2]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send toolResponse where response is an object (not string)
+    ws.send(
+      JSON.stringify({
+        toolResponse: {
+          functionResponses: [
+            { name: "search", response: { results: ["a", "b"] }, id: "call_gemini_search_0" },
+          ],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles tool call with malformed JSON arguments in fixture", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "search", arguments: "not-json{{{" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("bad-args"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    // Should still produce a toolCall with empty args object
+    expect(msg.toolCall).toBeDefined();
+    expect(msg.toolCall.functionCalls[0].name).toBe("search");
+    expect(msg.toolCall.functionCalls[0].args).toEqual({});
+
+    ws.close();
+  });
+
+  it("handles error fixture with default status 500", async () => {
+    const errorNoStatusFixture: Fixture = {
+      match: { userMessage: "error-no-status" },
+      response: {
+        error: { message: "Something went wrong", type: "server_error" },
+      },
+    };
+    instance = await createServer([errorNoStatusFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("error-no-status"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(500);
+    expect(msg.error.message).toBe("Something went wrong");
+
+    ws.close();
+  });
+
+  it("handles turn with missing role (defaults to user)", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with turn missing role field
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [{ parts: [{ text: "hello" }] }],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles user turn with functionResponse that has string response", async () => {
+    // Fixture that matches a tool call id
+    const toolResultFixtureStr: Fixture = {
+      match: { toolCallId: "call_gemini_search_0" },
+      response: { content: "Result processed" },
+    };
+    instance = await createServer([toolResultFixtureStr]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send clientContent with functionResponse where response is a string
+    ws.send(
+      JSON.stringify({
+        clientContent: {
+          turns: [
+            {
+              role: "user",
+              parts: [{ functionResponse: { name: "search", response: "string-result" } }],
+            },
+          ],
+          turnComplete: true,
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles toolResponse with fallback id and string response", async () => {
+    // Fixture matching on tool call id
+    const toolResultFixture3: Fixture = {
+      match: { toolCallId: "call_gemini_lookup_0" },
+      response: { content: "Lookup done" },
+    };
+    instance = await createServer([toolResultFixture3]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    // Send toolResponse without id (relies on fallback) and with string response
+    ws.send(
+      JSON.stringify({
+        toolResponse: {
+          functionResponses: [{ name: "lookup", response: "string-response-value" }],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles setup with tools that have empty functionDeclarations", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(
+      JSON.stringify({
+        setup: {
+          model: "gemini-2.0-flash-exp",
+          tools: [{}], // No functionDeclarations
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(1);
+    const msg = JSON.parse(raw[0]);
+    expect(msg).toEqual({ setupComplete: {} });
+
+    // Verify we can still send messages after setup with empty tools
+    ws.send(clientContentMsg("hello"));
+    const raw2 = await ws.waitForMessages(2);
+    const msg2 = JSON.parse(raw2[1]);
+    expect(msg2.serverContent).toBeDefined();
+
+    ws.close();
+  });
+
+  it("handles unknown response type gracefully", async () => {
+    const weirdFixture: Fixture = {
+      match: { userMessage: "weird-response-gemini" },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      response: { unknownField: "value" } as any,
+    };
+    instance = await createServer([weirdFixture]);
+    const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
+
+    ws.send(setupMsg());
+    await ws.waitForMessages(1); // setupComplete
+
+    ws.send(clientContentMsg("weird-response-gemini"));
+
+    const raw = await ws.waitForMessages(2);
+    const msg = JSON.parse(raw[1]);
+    expect(msg.error).toBeDefined();
+    expect(msg.error.code).toBe(500);
+    expect(msg.error.message).toBe("Fixture response did not match any known type");
+    expect(msg.error.status).toBe("INTERNAL");
+
+    ws.close();
+  });
+
   it("returns error when message sent before setup", async () => {
     instance = await createServer(allFixtures);
     const ws = await connectWebSocket(instance.url, GEMINI_WS_PATH);
diff --git a/src/__tests__/ws-realtime.test.ts b/src/__tests__/ws-realtime.test.ts
index ee3f5bb..c63ecff 100644
--- a/src/__tests__/ws-realtime.test.ts
+++ b/src/__tests__/ws-realtime.test.ts
@@ -2,6 +2,7 @@ import { describe, it, expect, afterEach } from "vitest";
 import { createServer, type ServerInstance } from "../server.js";
 import type { Fixture } from "../types.js";
 import { connectWebSocket } from "./ws-test-client.js";
+import { realtimeItemsToMessages } from "../ws-realtime.js";
 
 // --- fixtures ---
 
@@ -58,6 +59,40 @@ function sessionUpdate(config: Record<string, unknown>): string {
   return JSON.stringify({ type: "session.update", session: config });
 }
 
+function functionCallOutputItem(callId: string, output: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "function_call_output",
+      call_id: callId,
+      output,
+    },
+  });
+}
+
+function functionCallItem(name: string, callId: string, args: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "function_call",
+      name,
+      call_id: callId,
+      arguments: args,
+    },
+  });
+}
+
+function systemMessageItem(text: string): string {
+  return JSON.stringify({
+    type: "conversation.item.create",
+    item: {
+      type: "message",
+      role: "system",
+      content: [{ type: "input_text", text }],
+    },
+  });
+}
+
 // --- tests ---
 
 let instance: ServerInstance | null = null;
@@ -547,6 +582,216 @@ describe("WebSocket /v1/realtime", () => {
     expect(entry!.response.interruptReason).toBe("disconnectAfterMs");
   });
 
+  it("sends error for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send("this is not { valid json");
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as Record<string, unknown>).message).toBe("Malformed JSON");
+
+    ws.close();
+  });
+
+  it("sends error when conversation.item.create is missing item", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(JSON.stringify({ type: "conversation.item.create" }));
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as Record<string, unknown>).message).toBe(
+      "Missing 'item' in conversation.item.create",
+    );
+
+    ws.close();
+  });
+
+  it("assigns auto-generated item.id when missing in conversation.item.create", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Send item without id
+    ws.send(
+      JSON.stringify({
+        type: "conversation.item.create",
+        item: {
+          type: "message",
+          role: "user",
+          content: [{ type: "input_text", text: "hello" }],
+        },
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("conversation.item.created");
+    const item = event.item as Record<string, unknown>;
+    expect(item.id).toBeDefined();
+    expect((item.id as string).startsWith("item-")).toBe(true);
+
+    ws.close();
+  });
+
+  it("session.update updates modalities, model, and temperature", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(
+      sessionUpdate({
+        modalities: ["text", "audio"],
+        model: "gpt-4o-mini-realtime",
+        temperature: 0.5,
+      }),
+    );
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    expect(event.type).toBe("session.updated");
+    const session = event.session as Record<string, unknown>;
+    expect(session.modalities).toEqual(["text", "audio"]);
+    expect(session.model).toBe("gpt-4o-mini-realtime");
+    expect(session.temperature).toBe(0.5);
+
+    ws.close();
+  });
+
+  it("ignores unknown message types silently", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Send unknown message type
+    ws.send(JSON.stringify({ type: "some.unknown.type" }));
+
+    // Then send a valid message to confirm processing continues
+    ws.send(conversationItemCreate("user", "hello"));
+
+    const raw = await ws.waitForMessages(2);
+    const event = JSON.parse(raw[1]) as WSEvent;
+    // The unknown message is silently ignored, so next message is the item.created
+    expect(event.type).toBe("conversation.item.created");
+
+    ws.close();
+  });
+
+  it("handles function_call and function_call_output conversation items", async () => {
+    // Fixture that matches after tool call output is in conversation
+    const afterToolFixture: Fixture = {
+      match: { toolCallId: "call_123" },
+      response: { content: "Tool result processed" },
+    };
+    instance = await createServer([afterToolFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Add function_call item
+    ws.send(functionCallItem("get_weather", "call_123", '{"city":"NYC"}'));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    // Add function_call_output item
+    ws.send(functionCallOutputItem("call_123", "Sunny, 72F"));
+    await ws.waitForMessages(3); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Text response: response.created + output_item.added + content_part.added
+    // + text.delta(s) + text.done + content_part.done + output_item.done + response.done
+    // "Tool result processed" = 21 chars / chunkSize 20 = 2 deltas = 9 events
+    // Total: 3 + 9 = 12
+    const allRaw = await ws.waitForMessages(12);
+    const responseEvents = parseEvents(allRaw.slice(3));
+    const types = responseEvents.map((e) => e.type);
+    expect(types[0]).toBe("response.created");
+    expect(types[types.length - 1]).toBe("response.done");
+
+    // Verify text deltas reconstruct correctly
+    const deltas = responseEvents.filter((e) => e.type === "response.text.delta");
+    const fullText = deltas.map((d) => d.delta).join("");
+    expect(fullText).toBe("Tool result processed");
+
+    ws.close();
+  });
+
+  it("handles system role message items", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Add system message item
+    ws.send(systemMessageItem("You are a helpful assistant"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    // Add user message
+    ws.send(conversationItemCreate("user", "hello"));
+    await ws.waitForMessages(3); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for text response
+    const allRaw = await ws.waitForMessages(11);
+    const responseEvents = parseEvents(allRaw.slice(3));
+    expect(responseEvents[0].type).toBe("response.created");
+    expect(responseEvents[responseEvents.length - 1].type).toBe("response.done");
+
+    ws.close();
+  });
+
+  it("closes with 1008 in strict mode when no fixture matches", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "unknown-no-match"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Connection should be closed with 1008
+    await ws.waitForClose();
+  });
+
+  it("handles instructions in session for fixture matching", async () => {
+    instance = await createServer(allFixtures);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    // Set instructions
+    ws.send(sessionUpdate({ instructions: "You are a helpful assistant." }));
+    await ws.waitForMessages(2); // + session.updated
+
+    ws.send(conversationItemCreate("user", "hello"));
+    await ws.waitForMessages(3); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    // Wait for text response
+    const allRaw = await ws.waitForMessages(11);
+    const responseEvents = parseEvents(allRaw.slice(3));
+    expect(responseEvents[0].type).toBe("response.created");
+    expect(responseEvents[responseEvents.length - 1].type).toBe("response.done");
+
+    ws.close();
+  });
+
   it("accumulates conversation state across multiple response.create calls", async () => {
     instance = await createServer(allFixtures);
     const ws = await connectWebSocket(instance.url, "/v1/realtime");
@@ -585,4 +830,155 @@ describe("WebSocket /v1/realtime", () => {
 
     ws.close();
   });
+
+  it("handles error fixture with default status (no explicit status)", async () => {
+    const errorNoStatusFixture: Fixture = {
+      match: { userMessage: "error-no-status-rt" },
+      response: {
+        error: { message: "Internal failure", type: "server_error" },
+      },
+    };
+    instance = await createServer([errorNoStatusFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "error-no-status-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    const allRaw = await ws.waitForMessages(4);
+    const responseEvents = parseEvents(allRaw.slice(2));
+    expect(responseEvents[1].type).toBe("response.done");
+    const doneResp = responseEvents[1].response as Record<string, unknown>;
+    expect(doneResp.status).toBe("failed");
+
+    ws.close();
+  });
+
+  it("handles unknown response type gracefully", async () => {
+    const weirdFixture: Fixture = {
+      match: { userMessage: "weird-response-rt" },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      response: { unknownField: "value" } as any,
+    };
+    instance = await createServer([weirdFixture]);
+    const ws = await connectWebSocket(instance.url, "/v1/realtime");
+
+    await ws.waitForMessages(1); // session.created
+
+    ws.send(conversationItemCreate("user", "weird-response-rt"));
+    await ws.waitForMessages(2); // + conversation.item.created
+
+    ws.send(responseCreate());
+
+    const allRaw = await ws.waitForMessages(3);
+    const event = JSON.parse(allRaw[2]) as WSEvent;
+    expect(event.type).toBe("error");
+    expect((event.error as Record<string, unknown>).message).toBe(
+      "Fixture response did not match any known type",
+    );
+
+    ws.close();
+  });
+});
+
+// ─── Unit tests: realtimeItemsToMessages ─────────────────────────────────────
+
+describe("realtimeItemsToMessages", () => {
+  it("converts message items with all role types", () => {
+    const items = [
+      { type: "message" as const, role: "user" as const, content: [{ type: "text", text: "hi" }] },
+      {
+        type: "message" as const,
+        role: "assistant" as const,
+        content: [{ type: "text", text: "hello" }],
+      },
+      {
+        type: "message" as const,
+        role: "system" as const,
+        content: [{ type: "text", text: "you are helpful" }],
+      },
+    ];
+
+    const messages = realtimeItemsToMessages(items);
+    expect(messages).toEqual([
+      { role: "user", content: "hi" },
+      { role: "assistant", content: "hello" },
+      { role: "system", content: "you are helpful" },
+    ]);
+  });
+
+  it("adds system message when instructions provided", () => {
+    const items = [
+      { type: "message" as const, role: "user" as const, content: [{ type: "text", text: "hi" }] },
+    ];
+    const messages = realtimeItemsToMessages(items, "Be helpful");
+    expect(messages[0]).toEqual({ role: "system", content: "Be helpful" });
+    expect(messages[1]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("converts function_call items with fallback for missing name", () => {
+    const mockLogger = { warn: () => {}, error: () => {}, info: () => {}, debug: () => {} };
+    const items = [
+      {
+        type: "function_call" as const,
+        call_id: "call_123",
+        arguments: '{"q":"test"}',
+        // name is missing
+      },
+    ];
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const messages = realtimeItemsToMessages(items, undefined, mockLogger as any);
+    expect(messages.length).toBe(1);
+    expect(messages[0].role).toBe("assistant");
+    expect(messages[0].tool_calls![0].id).toBe("call_123");
+    expect(messages[0].tool_calls![0].function.name).toBe("");
+    expect(messages[0].tool_calls![0].function.arguments).toBe('{"q":"test"}');
+  });
+
+  it("converts function_call items with auto-generated call_id and empty arguments", () => {
+    const items = [
+      {
+        type: "function_call" as const,
+        name: "search",
+        // call_id and arguments missing
+      },
+    ];
+    const messages = realtimeItemsToMessages(items);
+    expect(messages.length).toBe(1);
+    expect(messages[0].tool_calls![0].id).toMatch(/^call_/);
+    expect(messages[0].tool_calls![0].function.name).toBe("search");
+    expect(messages[0].tool_calls![0].function.arguments).toBe("");
+  });
+
+  it("converts function_call_output items with fallback for missing output", () => {
+    const mockLogger = { warn: () => {}, error: () => {}, info: () => {}, debug: () => {} };
+    const items = [
+      {
+        type: "function_call_output" as const,
+        call_id: "call_456",
+        // output is missing
+      },
+    ];
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const messages = realtimeItemsToMessages(items, undefined, mockLogger as any);
+    expect(messages.length).toBe(1);
+    expect(messages[0].role).toBe("tool");
+    expect(messages[0].content).toBe("");
+    expect(messages[0].tool_call_id).toBe("call_456");
+  });
+
+  it("handles message items with missing content", () => {
+    const items = [
+      {
+        type: "message" as const,
+        role: "user" as const,
+        // content missing
+      },
+    ];
+    const messages = realtimeItemsToMessages(items);
+    expect(messages[0].content).toBe("");
+  });
 });
diff --git a/src/a2a-handler.ts b/src/a2a-handler.ts
new file mode 100644
index 0000000..c193083
--- /dev/null
+++ b/src/a2a-handler.ts
@@ -0,0 +1,324 @@
+import type { JsonRpcResponse, MethodHandler } from "./jsonrpc.js";
+import type {
+  A2AAgentDefinition,
+  A2AArtifact,
+  A2APart,
+  A2AStreamEvent,
+  A2ATask,
+  A2ATaskState,
+} from "./a2a-types.js";
+import { generateId } from "./helpers.js";
+
+// ---- Pattern types ----
+
+export interface MessagePatternEntry {
+  kind: "message";
+  pattern: string | RegExp;
+  agentName: string;
+  parts: A2APart[];
+}
+
+export interface TaskPatternEntry {
+  kind: "task";
+  pattern: string | RegExp;
+  agentName: string;
+  artifacts: A2AArtifact[];
+}
+
+export interface StreamingTaskPatternEntry {
+  kind: "streamingTask";
+  pattern: string | RegExp;
+  agentName: string;
+  events: A2AStreamEvent[];
+  delayMs?: number;
+}
+
+export type PatternEntry = MessagePatternEntry | TaskPatternEntry | StreamingTaskPatternEntry;
+
+// ---- Helpers ----
+
+function extractText(params: unknown): string {
+  const p = params as Record<string, unknown> | undefined;
+  if (!p?.message) return "";
+  const msg = p.message as Record<string, unknown>;
+  const parts = msg.parts as Array<Record<string, unknown>> | undefined;
+  if (!Array.isArray(parts)) return "";
+  return parts
+    .filter((part) => typeof part.text === "string")
+    .map((part) => part.text as string)
+    .join(" ");
+}
+
+function matchPattern(text: string, pattern: string | RegExp): boolean {
+  if (typeof pattern === "string") {
+    return text.includes(pattern);
+  }
+  return pattern.test(text);
+}
+
+export const TERMINAL_STATES: Set<string> = new Set([
+  "TASK_STATE_COMPLETED",
+  "TASK_STATE_FAILED",
+  "TASK_STATE_CANCELED",
+]);
+
+// ---- Agent card builder ----
+
+export function buildAgentCard(
+  agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }>,
+  baseUrl: string,
+): Record<string, unknown> {
+  // Use the first registered agent as the primary card, or a default
+  const first = agents.values().next().value;
+  const def = first?.def;
+
+  return {
+    name: def?.name ?? "a2a-mock",
+    description: def?.description ?? "A2A mock agent",
+    version: def?.version ?? "1.0.0",
+    supportedInterfaces: [
+      {
+        url: baseUrl,
+        protocolBinding: "JSONRPC",
+        protocolVersion: "1.0",
+      },
+    ],
+    skills: def?.skills ?? [],
+    capabilities: def?.capabilities ?? { streaming: true },
+  };
+}
+
+// ---- Method handlers ----
+
+export function createA2AMethods(
+  agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }>,
+  tasks: Map<string, A2ATask>,
+): Record<string, MethodHandler> {
+  function findMatch(text: string): PatternEntry | null {
+    for (const agent of agents.values()) {
+      for (const entry of agent.patterns) {
+        if (matchPattern(text, entry.pattern)) {
+          return entry;
+        }
+      }
+    }
+    return null;
+  }
+
+  function createTask(
+    _agentName: string,
+    artifacts: A2AArtifact[],
+    userParts: A2APart[],
+    state: A2ATaskState = "TASK_STATE_COMPLETED",
+  ): A2ATask {
+    const taskId = generateId("task");
+    const contextId = generateId("ctx");
+    const task: A2ATask = {
+      id: taskId,
+      contextId,
+      status: { state, timestamp: new Date().toISOString() },
+      artifacts,
+      history: [
+        {
+          messageId: generateId("msg"),
+          role: "ROLE_USER",
+          parts: userParts,
+        },
+      ],
+    };
+    tasks.set(taskId, task);
+    return task;
+  }
+
+  const methods: Record<string, MethodHandler> = {
+    SendMessage: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const text = extractText(params);
+      const entry = findMatch(text);
+
+      if (!entry) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32000, message: "No matching pattern for message" },
+        };
+      }
+
+      const p = params as Record<string, unknown> | undefined;
+      const msg = p?.message as Record<string, unknown> | undefined;
+      const userParts: A2APart[] = (msg?.parts as A2APart[]) ?? [{ text }];
+
+      if (entry.kind === "message") {
+        return {
+          jsonrpc: "2.0",
+          id,
+          result: {
+            message: {
+              messageId: generateId("msg"),
+              role: "ROLE_AGENT",
+              parts: entry.parts,
+            },
+          },
+        };
+      }
+
+      if (entry.kind === "task") {
+        const task = createTask(entry.agentName, entry.artifacts, userParts);
+        return {
+          jsonrpc: "2.0",
+          id,
+          result: {
+            task: {
+              id: task.id,
+              contextId: task.contextId,
+              status: task.status,
+              artifacts: task.artifacts,
+            },
+          },
+        };
+      }
+
+      // streamingTask patterns matched via SendMessage just return task
+      if (entry.kind === "streamingTask") {
+        const artifacts: A2AArtifact[] = [];
+        for (const evt of entry.events) {
+          if (evt.type === "artifact") {
+            artifacts.push({ parts: evt.parts, name: evt.name });
+          }
+        }
+        const task = createTask(entry.agentName, artifacts, userParts);
+        return {
+          jsonrpc: "2.0",
+          id,
+          result: {
+            task: {
+              id: task.id,
+              contextId: task.contextId,
+              status: task.status,
+              artifacts: task.artifacts,
+            },
+          },
+        };
+      }
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        error: { code: -32000, message: "No matching pattern for message" },
+      };
+    },
+
+    // SendStreamingMessage is handled specially in A2AMock (SSE response),
+    // but we register a placeholder so the dispatcher doesn't return "method not found".
+    SendStreamingMessage: async (
+      params: unknown,
+      id: string | number,
+    ): Promise<JsonRpcResponse | null> => {
+      // This is intercepted before reaching the dispatcher in a2a-mock.ts
+      // If it reaches here, return an error
+      const text = extractText(params);
+      const entry = findMatch(text);
+      if (!entry) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32000, message: "No matching pattern for message" },
+        };
+      }
+      return null;
+    },
+
+    GetTask: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const p = params as Record<string, unknown> | undefined;
+      const taskId = p?.id as string | undefined;
+
+      if (!taskId || !tasks.has(taskId)) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32001, message: "Task not found" },
+        };
+      }
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        result: { task: tasks.get(taskId) },
+      };
+    },
+
+    ListTasks: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const p = params as Record<string, unknown> | undefined;
+      const contextId = p?.contextId as string | undefined;
+      const status = p?.status as string | undefined;
+
+      let results = Array.from(tasks.values());
+
+      if (contextId) {
+        results = results.filter((t) => t.contextId === contextId);
+      }
+      if (status) {
+        results = results.filter((t) => t.status.state === status);
+      }
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        result: { tasks: results },
+      };
+    },
+
+    CancelTask: async (params: unknown, id: string | number): Promise<JsonRpcResponse> => {
+      const p = params as Record<string, unknown> | undefined;
+      const taskId = p?.id as string | undefined;
+
+      if (!taskId || !tasks.has(taskId)) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32001, message: "Task not found" },
+        };
+      }
+
+      const task = tasks.get(taskId)!;
+
+      if (TERMINAL_STATES.has(task.status.state)) {
+        return {
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32002, message: "Task already in terminal state" },
+        };
+      }
+
+      task.status = {
+        state: "TASK_STATE_CANCELED",
+        timestamp: new Date().toISOString(),
+      };
+
+      return {
+        jsonrpc: "2.0",
+        id,
+        result: { task },
+      };
+    },
+  };
+
+  return methods;
+}
+
+// ---- Streaming helpers ----
+
+export function findStreamingMatch(
+  text: string,
+  agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }>,
+): StreamingTaskPatternEntry | null {
+  for (const agent of agents.values()) {
+    for (const entry of agent.patterns) {
+      if (entry.kind === "streamingTask" && matchPattern(text, entry.pattern)) {
+        return entry;
+      }
+    }
+  }
+  return null;
+}
+
+export { extractText };
diff --git a/src/a2a-mock.ts b/src/a2a-mock.ts
new file mode 100644
index 0000000..2ac5c2a
--- /dev/null
+++ b/src/a2a-mock.ts
@@ -0,0 +1,384 @@
+import * as http from "node:http";
+import type { Mountable } from "./types.js";
+import type { Journal } from "./journal.js";
+import type { MetricsRegistry } from "./metrics.js";
+import type {
+  A2AAgentDefinition,
+  A2AArtifact,
+  A2AMockOptions,
+  A2APart,
+  A2AStreamEvent,
+  A2ATask,
+} from "./a2a-types.js";
+import type { PatternEntry } from "./a2a-handler.js";
+import {
+  buildAgentCard,
+  createA2AMethods,
+  extractText,
+  findStreamingMatch,
+  TERMINAL_STATES,
+} from "./a2a-handler.js";
+import { createJsonRpcDispatcher } from "./jsonrpc.js";
+import { generateId, flattenHeaders, readBody } from "./helpers.js";
+
+export class A2AMock implements Mountable {
+  private agents: Map<string, { def: A2AAgentDefinition; patterns: PatternEntry[] }> = new Map();
+  private tasks: Map<string, A2ATask> = new Map();
+  private server: http.Server | null = null;
+  private journal: Journal | null = null;
+  private registry: MetricsRegistry | null = null;
+  private options: A2AMockOptions;
+  private baseUrl = "";
+  private dispatcher: ReturnType<typeof createJsonRpcDispatcher>;
+
+  constructor(options?: A2AMockOptions) {
+    this.options = options ?? {};
+    this.dispatcher = this.buildDispatcher();
+  }
+
+  private buildDispatcher() {
+    const methods = createA2AMethods(this.agents, this.tasks);
+    return createJsonRpcDispatcher({ methods });
+  }
+
+  // ---- Agent registration ----
+
+  registerAgent(def: A2AAgentDefinition): this {
+    this.agents.set(def.name, { def, patterns: [] });
+    return this;
+  }
+
+  // ---- Pattern registration ----
+
+  onMessage(agentName: string, pattern: string | RegExp, parts: A2APart[]): this {
+    const agent = this.agents.get(agentName);
+    if (!agent) {
+      throw new Error(`Agent "${agentName}" not registered`);
+    }
+    agent.patterns.push({ kind: "message", pattern, agentName, parts });
+    return this;
+  }
+
+  onTask(agentName: string, pattern: string | RegExp, artifacts: A2AArtifact[]): this {
+    const agent = this.agents.get(agentName);
+    if (!agent) {
+      throw new Error(`Agent "${agentName}" not registered`);
+    }
+    agent.patterns.push({ kind: "task", pattern, agentName, artifacts });
+    return this;
+  }
+
+  onStreamingTask(
+    agentName: string,
+    pattern: string | RegExp,
+    events: A2AStreamEvent[],
+    delayMs?: number,
+  ): this {
+    const agent = this.agents.get(agentName);
+    if (!agent) {
+      throw new Error(`Agent "${agentName}" not registered`);
+    }
+    agent.patterns.push({ kind: "streamingTask", pattern, agentName, events, delayMs });
+    return this;
+  }
+
+  // ---- Mountable interface ----
+
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    // Agent card endpoint
+    if (req.method === "GET" && pathname === "/.well-known/agent-card.json") {
+      if (this.registry) {
+        this.registry.incrementCounter("aimock_a2a_requests_total", { method: "GetAgentCard" });
+      }
+      const card = buildAgentCard(this.agents, this.baseUrl);
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "A2A-Version": "1.0",
+      });
+      res.end(JSON.stringify(card));
+      return true;
+    }
+
+    // JSON-RPC endpoint
+    if (req.method === "POST" && (pathname === "/" || pathname === "")) {
+      const body = await readBody(req);
+
+      // Check for SendStreamingMessage before dispatching
+      let parsed: unknown;
+      try {
+        parsed = JSON.parse(body);
+      } catch {
+        res.writeHead(200, {
+          "Content-Type": "application/json",
+          "A2A-Version": "1.0",
+        });
+        res.end(
+          JSON.stringify({
+            jsonrpc: "2.0",
+            id: null,
+            error: { code: -32700, message: "Parse error" },
+          }),
+        );
+        return true;
+      }
+
+      // Record A2A method metric
+      if (this.registry) {
+        const rpcMethod =
+          typeof parsed === "object" && parsed !== null && "method" in parsed
+            ? String((parsed as Record<string, unknown>).method)
+            : "unknown";
+        this.registry.incrementCounter("aimock_a2a_requests_total", { method: rpcMethod });
+      }
+
+      if (isStreamingRequest(parsed)) {
+        await this.handleStreamingMessage(parsed as Record<string, unknown>, req, res);
+        return true;
+      }
+
+      // Regular JSON-RPC dispatch
+      // Add A2A-Version header before dispatching
+      res.setHeader("A2A-Version", "1.0");
+
+      await this.dispatcher(req, res, body);
+
+      // Journal the request after the handler completes
+      if (this.journal) {
+        this.journal.add({
+          method: req.method ?? "POST",
+          path: pathname,
+          headers: flattenHeaders(req.headers),
+          body: null,
+          service: "a2a",
+          response: { status: res.statusCode, fixture: null },
+        });
+      }
+
+      return true;
+    }
+
+    return false;
+  }
+
+  health(): { status: string; agents: number; tasks: number } {
+    return {
+      status: "ok",
+      agents: this.agents.size,
+      tasks: this.tasks.size,
+    };
+  }
+
+  setJournal(journal: Journal): void {
+    this.journal = journal;
+  }
+
+  setRegistry(registry: MetricsRegistry): void {
+    this.registry = registry;
+  }
+
+  // ---- Standalone mode ----
+
+  async start(): Promise<string> {
+    if (this.server) {
+      throw new Error("A2AMock server already started");
+    }
+
+    const host = this.options.host ?? "127.0.0.1";
+    const port = this.options.port ?? 0;
+
+    return new Promise<string>((resolve, reject) => {
+      const srv = http.createServer(async (req, res) => {
+        const url = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
+        await this.handleRequest(req, res, url.pathname).catch((err) => {
+          console.error("A2AMock request error:", err);
+          if (!res.headersSent) {
+            res.writeHead(500);
+            res.end("Internal server error");
+          }
+        });
+      });
+
+      srv.on("error", reject);
+
+      srv.listen(port, host, () => {
+        const addr = srv.address();
+        if (typeof addr === "object" && addr !== null) {
+          this.baseUrl = `http://${host}:${addr.port}`;
+        }
+        this.server = srv;
+        resolve(this.baseUrl);
+      });
+    });
+  }
+
+  async stop(): Promise<void> {
+    if (!this.server) {
+      throw new Error("A2AMock server not started");
+    }
+    const srv = this.server;
+    await new Promise<void>((resolve, reject) => {
+      srv.close((err: Error | undefined) => (err ? reject(err) : resolve()));
+    });
+    this.server = null;
+  }
+
+  get url(): string {
+    if (!this.server) {
+      throw new Error("A2AMock server not started");
+    }
+    return this.baseUrl;
+  }
+
+  // ---- Reset ----
+
+  reset(): this {
+    this.agents.clear();
+    this.tasks.clear();
+    return this;
+  }
+
+  // ---- Internal: set base URL when mounted ----
+
+  setBaseUrl(url: string): void {
+    this.baseUrl = url;
+  }
+
+  // ---- Private: streaming handler ----
+
+  private async handleStreamingMessage(
+    parsed: Record<string, unknown>,
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
+    const params = parsed.params as Record<string, unknown> | undefined;
+    const id = parsed.id as string | number;
+    const text = extractText(params);
+    const entry = findStreamingMatch(text, this.agents);
+
+    if (!entry) {
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "A2A-Version": "1.0",
+      });
+      res.end(
+        JSON.stringify({
+          jsonrpc: "2.0",
+          id,
+          error: { code: -32000, message: "No matching pattern for message" },
+        }),
+      );
+      return;
+    }
+
+    // Create task for the streaming response
+    const taskId = generateId("task");
+    const contextId = generateId("ctx");
+    const userParts: A2APart[] = params?.message
+      ? (((params.message as Record<string, unknown>).parts as A2APart[]) ?? [{ text }])
+      : [{ text }];
+
+    const task: A2ATask = {
+      id: taskId,
+      contextId,
+      status: { state: "TASK_STATE_WORKING", timestamp: new Date().toISOString() },
+      artifacts: [],
+      history: [
+        {
+          messageId: generateId("msg"),
+          role: "ROLE_USER",
+          parts: userParts,
+        },
+      ],
+    };
+    this.tasks.set(taskId, task);
+
+    // Write SSE response
+    res.writeHead(200, {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      Connection: "keep-alive",
+      "A2A-Version": "1.0",
+    });
+
+    const delayMs = entry.delayMs ?? 0;
+
+    for (const event of entry.events) {
+      if (delayMs > 0) {
+        await delay(delayMs);
+      }
+
+      let resultPayload: Record<string, unknown>;
+
+      if (event.type === "status") {
+        task.status = { state: event.state, timestamp: new Date().toISOString() };
+        resultPayload = {
+          task: {
+            id: task.id,
+            contextId: task.contextId,
+            status: task.status,
+          },
+        };
+      } else {
+        // artifact event
+        const artifact = {
+          parts: event.parts,
+          name: event.name,
+          append: event.append,
+          lastChunk: event.lastChunk,
+        };
+        task.artifacts.push({ parts: event.parts, name: event.name });
+        resultPayload = {
+          task: {
+            id: task.id,
+            contextId: task.contextId,
+            status: task.status,
+          },
+          artifact,
+        };
+      }
+
+      const envelope = JSON.stringify({
+        jsonrpc: "2.0",
+        id,
+        result: resultPayload,
+      });
+
+      res.write(`data: ${envelope}\n\n`);
+    }
+
+    // Final completion — only set COMPLETED if the task is not already in a terminal state
+    if (!TERMINAL_STATES.has(task.status.state)) {
+      task.status = { state: "TASK_STATE_COMPLETED", timestamp: new Date().toISOString() };
+    }
+
+    res.end();
+
+    // Journal
+    if (this.journal) {
+      this.journal.add({
+        method: "POST",
+        path: "/",
+        headers: flattenHeaders(req.headers),
+        body: null,
+        service: "a2a",
+        response: { status: res.statusCode, fixture: null },
+      });
+    }
+  }
+}
+
+// ---- Helpers ----
+
+function isStreamingRequest(parsed: unknown): boolean {
+  if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) return false;
+  const obj = parsed as Record<string, unknown>;
+  return obj.method === "SendStreamingMessage";
+}
+
+function delay(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
diff --git a/src/a2a-stub.ts b/src/a2a-stub.ts
new file mode 100644
index 0000000..79b6de4
--- /dev/null
+++ b/src/a2a-stub.ts
@@ -0,0 +1,13 @@
+export { A2AMock } from "./a2a-mock.js";
+export type {
+  A2AMockOptions,
+  A2AAgentDefinition,
+  A2APart,
+  A2AArtifact,
+  A2ATaskResponse,
+  A2AStreamEvent,
+  A2ATask,
+  A2AMessage,
+  A2ARole,
+  A2ATaskState,
+} from "./a2a-types.js";
diff --git a/src/a2a-types.ts b/src/a2a-types.ts
new file mode 100644
index 0000000..550610c
--- /dev/null
+++ b/src/a2a-types.ts
@@ -0,0 +1,56 @@
+export interface A2AMockOptions {
+  port?: number;
+  host?: string;
+}
+
+export interface A2AAgentDefinition {
+  name: string;
+  description?: string;
+  version?: string;
+  skills?: Array<{ id: string; name: string; description?: string; tags?: string[] }>;
+  capabilities?: { streaming?: boolean };
+}
+
+export type A2APart =
+  | { text: string }
+  | { data: unknown; mediaType?: string }
+  | { url: string; mediaType?: string };
+
+export interface A2AArtifact {
+  artifactId?: string;
+  name?: string;
+  description?: string;
+  parts: A2APart[];
+}
+
+export interface A2ATaskResponse {
+  artifacts?: A2AArtifact[];
+}
+
+export type A2AStreamEvent =
+  | { type: "status"; state: A2ATaskState }
+  | { type: "artifact"; parts: A2APart[]; append?: boolean; lastChunk?: boolean; name?: string };
+
+export interface A2ATask {
+  id: string;
+  contextId: string;
+  status: { state: A2ATaskState; timestamp: string };
+  artifacts: A2AArtifact[];
+  history: A2AMessage[];
+}
+
+export type A2ARole = "ROLE_USER" | "ROLE_AGENT";
+
+export interface A2AMessage {
+  messageId: string;
+  role: A2ARole;
+  parts: A2APart[];
+}
+
+export type A2ATaskState =
+  | "TASK_STATE_SUBMITTED"
+  | "TASK_STATE_WORKING"
+  | "TASK_STATE_COMPLETED"
+  | "TASK_STATE_FAILED"
+  | "TASK_STATE_CANCELED"
+  | "TASK_STATE_INPUT_REQUIRED";
diff --git a/src/aimock-cli.ts b/src/aimock-cli.ts
new file mode 100644
index 0000000..77e8b6c
--- /dev/null
+++ b/src/aimock-cli.ts
@@ -0,0 +1,124 @@
+#!/usr/bin/env node
+import { parseArgs } from "node:util";
+import { resolve } from "node:path";
+import { loadConfig, startFromConfig } from "./config-loader.js";
+
+const HELP = `
+Usage: aimock [options]
+
+Options:
+  -c, --config <path>   Path to aimock config JSON file (required)
+  -p, --port <number>   Port override (default: from config or 0)
+  -h, --host <string>   Host override (default: from config or 127.0.0.1)
+      --help            Show this help message
+`.trim();
+
+export interface AimockCliDeps {
+  argv?: string[];
+  log?: (msg: string) => void;
+  logError?: (msg: string) => void;
+  exit?: (code: number) => void;
+  loadConfigFn?: typeof loadConfig;
+  startFromConfigFn?: typeof startFromConfig;
+  onReady?: (ctx: { shutdown: () => void }) => void;
+}
+
+export function runAimockCli(deps: AimockCliDeps = {}): void {
+  /* v8 ignore next 6 -- defaults used only when called from CLI entry point */
+  const argv = deps.argv ?? process.argv.slice(2);
+  const log = deps.log ?? console.log.bind(console);
+  const logError = deps.logError ?? console.error.bind(console);
+  const exit = deps.exit ?? process.exit.bind(process);
+  const loadConfigFn = deps.loadConfigFn ?? loadConfig;
+  const startFromConfigFn = deps.startFromConfigFn ?? startFromConfig;
+
+  let values;
+  try {
+    ({ values } = parseArgs({
+      args: argv,
+      options: {
+        config: { type: "string", short: "c" },
+        port: { type: "string", short: "p" },
+        host: { type: "string", short: "h" },
+        help: { type: "boolean", default: false },
+      },
+      strict: true,
+    }));
+  } catch (err) {
+    /* v8 ignore next -- parseArgs always throws Error subclasses */
+    const msg = err instanceof Error ? err.message : String(err);
+    logError(`Error: ${msg}\n\n${HELP}`);
+    exit(1);
+    return;
+  }
+
+  if (values.help) {
+    log(HELP);
+    exit(0);
+    return;
+  }
+  if (!values.config) {
+    logError("Error: --config is required.\n\n" + HELP);
+    exit(1);
+    return;
+  }
+
+  const configPath = resolve(values.config);
+  let config;
+  try {
+    config = loadConfigFn(configPath);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    logError(`Failed to load config from ${configPath}: ${msg}`);
+    exit(1);
+    return;
+  }
+
+  const port = values.port ? Number(values.port) : undefined;
+  if (
+    port !== undefined &&
+    (Number.isNaN(port) || !Number.isInteger(port) || port < 0 || port > 65535)
+  ) {
+    logError(`Error: invalid port "${values.port}".\n\n${HELP}`);
+    exit(1);
+    return;
+  }
+  const host = values.host;
+
+  async function main() {
+    const { llmock, url } = await startFromConfigFn(config!, { port, host });
+    log(`aimock server listening on ${url}`);
+
+    function shutdown() {
+      log("Shutting down...");
+      process.removeListener("SIGINT", shutdown);
+      process.removeListener("SIGTERM", shutdown);
+      llmock.stop().then(
+        () => exit(0),
+        (err) => {
+          logError(`Shutdown error: ${err instanceof Error ? err.message : String(err)}`);
+          exit(1);
+        },
+      );
+    }
+    process.on("SIGINT", shutdown);
+    process.on("SIGTERM", shutdown);
+
+    if (deps.onReady) {
+      deps.onReady({ shutdown });
+    }
+  }
+
+  main().catch((err) => {
+    logError(err instanceof Error ? err.message : String(err));
+    exit(1);
+  });
+}
+
+// Run when executed as a script (not when imported for testing).
+/* v8 ignore start -- entry-point guard, exercised by integration tests */
+const scriptName = process.argv[1] ?? "";
+if (scriptName.endsWith("aimock-cli.js") || scriptName.endsWith("aimock-cli.ts")) {
+  runAimockCli();
+}
+/* v8 ignore stop */
diff --git a/src/chaos.ts b/src/chaos.ts
index 93b0d55..f30b927 100644
--- a/src/chaos.ts
+++ b/src/chaos.ts
@@ -36,17 +36,17 @@ function resolveChaosConfig(
 
   // Header overrides everything
   if (rawHeaders) {
-    const dropHeader = rawHeaders["x-llmock-chaos-drop"];
-    const malformedHeader = rawHeaders["x-llmock-chaos-malformed"];
-    const disconnectHeader = rawHeaders["x-llmock-chaos-disconnect"];
+    const dropHeader = rawHeaders["x-aimock-chaos-drop"];
+    const malformedHeader = rawHeaders["x-aimock-chaos-malformed"];
+    const disconnectHeader = rawHeaders["x-aimock-chaos-disconnect"];
 
     if (typeof dropHeader === "string") {
       const val = parseFloat(dropHeader);
       if (isNaN(val)) {
-        logger?.warn(`[chaos] x-llmock-chaos-drop: invalid value "${dropHeader}", ignoring`);
+        logger?.warn(`[chaos] x-aimock-chaos-drop: invalid value "${dropHeader}", ignoring`);
       } else {
         if (val < 0 || val > 1) {
-          logger?.warn(`[chaos] x-llmock-chaos-drop: value ${val} out of range [0,1], clamping`);
+          logger?.warn(`[chaos] x-aimock-chaos-drop: value ${val} out of range [0,1], clamping`);
         }
         base.dropRate = Math.min(1, Math.max(0, val));
       }
@@ -55,12 +55,12 @@ function resolveChaosConfig(
       const val = parseFloat(malformedHeader);
       if (isNaN(val)) {
         logger?.warn(
-          `[chaos] x-llmock-chaos-malformed: invalid value "${malformedHeader}", ignoring`,
+          `[chaos] x-aimock-chaos-malformed: invalid value "${malformedHeader}", ignoring`,
         );
       } else {
         if (val < 0 || val > 1) {
           logger?.warn(
-            `[chaos] x-llmock-chaos-malformed: value ${val} out of range [0,1], clamping`,
+            `[chaos] x-aimock-chaos-malformed: value ${val} out of range [0,1], clamping`,
           );
         }
         base.malformedRate = Math.min(1, Math.max(0, val));
@@ -70,12 +70,12 @@ function resolveChaosConfig(
       const val = parseFloat(disconnectHeader);
       if (isNaN(val)) {
         logger?.warn(
-          `[chaos] x-llmock-chaos-disconnect: invalid value "${disconnectHeader}", ignoring`,
+          `[chaos] x-aimock-chaos-disconnect: invalid value "${disconnectHeader}", ignoring`,
         );
       } else {
         if (val < 0 || val > 1) {
           logger?.warn(
-            `[chaos] x-llmock-chaos-disconnect: value ${val} out of range [0,1], clamping`,
+            `[chaos] x-aimock-chaos-disconnect: value ${val} out of range [0,1], clamping`,
           );
         }
         base.disconnectRate = Math.min(1, Math.max(0, val));
@@ -152,7 +152,7 @@ export function applyChaos(
   if (!action) return false;
 
   if (registry) {
-    registry.incrementCounter("llmock_chaos_triggered_total", { action });
+    registry.incrementCounter("aimock_chaos_triggered_total", { action });
   }
 
   switch (action) {
diff --git a/src/cli.ts b/src/cli.ts
index f06721e..42262bf 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -223,7 +223,7 @@ async function main() {
     strict: values.strict,
   });
 
-  logger.info(`llmock server listening on ${instance.url}`);
+  logger.info(`aimock server listening on ${instance.url}`);
 
   // Start file watcher if requested
   let watcher: { close: () => void } | null = null;
diff --git a/src/config-loader.ts b/src/config-loader.ts
new file mode 100644
index 0000000..df67772
--- /dev/null
+++ b/src/config-loader.ts
@@ -0,0 +1,243 @@
+import * as fs from "node:fs";
+import * as path from "node:path";
+import { LLMock } from "./llmock.js";
+import { MCPMock } from "./mcp-mock.js";
+import { A2AMock } from "./a2a-mock.js";
+import type { ChaosConfig, RecordConfig } from "./types.js";
+import type { MCPToolDefinition, MCPPromptDefinition } from "./mcp-types.js";
+import type { A2AAgentDefinition, A2APart, A2AArtifact, A2AStreamEvent } from "./a2a-types.js";
+import { VectorMock } from "./vector-mock.js";
+import type { QueryResult } from "./vector-types.js";
+import { Logger } from "./logger.js";
+
+export interface MCPConfigTool extends MCPToolDefinition {
+  result?: string;
+}
+
+export interface MCPConfigResource {
+  uri: string;
+  name: string;
+  mimeType?: string;
+  description?: string;
+  text?: string;
+  blob?: string;
+}
+
+export interface MCPConfigPrompt extends MCPPromptDefinition {
+  result?: {
+    messages: Array<{ role: string; content: { type: string; text: string } }>;
+  };
+}
+
+export interface MCPConfig {
+  path?: string;
+  serverInfo?: { name: string; version: string };
+  tools?: MCPConfigTool[];
+  resources?: MCPConfigResource[];
+  prompts?: MCPConfigPrompt[];
+}
+
+export interface A2AConfigPattern {
+  pattern: string;
+  parts?: A2APart[];
+  artifacts?: A2AArtifact[];
+  events?: A2AStreamEvent[];
+  delayMs?: number;
+}
+
+export interface A2AConfigAgent extends A2AAgentDefinition {
+  messages?: A2AConfigPattern[];
+  tasks?: A2AConfigPattern[];
+  streamingTasks?: A2AConfigPattern[];
+}
+
+export interface A2AConfig {
+  path?: string;
+  agents?: A2AConfigAgent[];
+}
+
+export interface VectorConfigCollection {
+  name: string;
+  dimension: number;
+  vectors?: Array<{
+    id: string;
+    values: number[];
+    metadata?: Record<string, unknown>;
+  }>;
+  queryResults?: QueryResult[];
+}
+
+export interface VectorConfig {
+  path?: string;
+  collections?: VectorConfigCollection[];
+}
+
+export interface AimockConfig {
+  llm?: {
+    fixtures?: string;
+    chaos?: ChaosConfig;
+    record?: RecordConfig;
+  };
+  mcp?: MCPConfig;
+  a2a?: A2AConfig;
+  vector?: VectorConfig;
+  services?: { search?: boolean; rerank?: boolean; moderate?: boolean };
+  metrics?: boolean;
+  strict?: boolean;
+  port?: number;
+  host?: string;
+}
+
+export function loadConfig(configPath: string): AimockConfig {
+  const raw = fs.readFileSync(configPath, "utf-8");
+  return JSON.parse(raw) as AimockConfig;
+}
+
+export async function startFromConfig(
+  config: AimockConfig,
+  overrides?: { port?: number; host?: string },
+): Promise<{ llmock: LLMock; url: string }> {
+  const logger = new Logger("info");
+
+  // Load fixtures if specified
+  const llmock = new LLMock({
+    port: overrides?.port ?? config.port ?? 0,
+    host: overrides?.host ?? config.host ?? "127.0.0.1",
+    chaos: config.llm?.chaos,
+    record: config.llm?.record,
+    metrics: config.metrics,
+    strict: config.strict,
+  });
+
+  if (config.llm?.fixtures) {
+    const fixturePath = path.resolve(config.llm.fixtures);
+    const stat = fs.statSync(fixturePath);
+    if (stat.isDirectory()) {
+      llmock.loadFixtureDir(fixturePath);
+    } else {
+      llmock.loadFixtureFile(fixturePath);
+    }
+  }
+
+  // MCP
+  if (config.mcp) {
+    const mcpConfig = config.mcp;
+    const mcp = new MCPMock({
+      serverInfo: mcpConfig.serverInfo,
+    });
+
+    if (mcpConfig.tools) {
+      for (const tool of mcpConfig.tools) {
+        const { result, ...def } = tool;
+        mcp.addTool(def);
+        if (result !== undefined) {
+          mcp.onToolCall(def.name, () => result);
+        }
+      }
+    }
+
+    if (mcpConfig.resources) {
+      for (const res of mcpConfig.resources) {
+        mcp.addResource(
+          { uri: res.uri, name: res.name, mimeType: res.mimeType, description: res.description },
+          res.text !== undefined || res.blob !== undefined
+            ? { text: res.text, blob: res.blob, mimeType: res.mimeType }
+            : undefined,
+        );
+      }
+    }
+
+    if (mcpConfig.prompts) {
+      for (const prompt of mcpConfig.prompts) {
+        const { result, ...def } = prompt;
+        if (result) {
+          mcp.addPrompt(def, () => result as import("./mcp-types.js").MCPPromptResult);
+        } else {
+          mcp.addPrompt(def);
+        }
+      }
+    }
+
+    const mcpPath = mcpConfig.path ?? "/mcp";
+    llmock.mount(mcpPath, mcp);
+    logger.info(`MCPMock mounted at ${mcpPath}`);
+  }
+
+  // A2A
+  if (config.a2a) {
+    const a2aConfig = config.a2a;
+    const a2a = new A2AMock();
+
+    if (a2aConfig.agents) {
+      for (const agentConfig of a2aConfig.agents) {
+        const { messages, tasks, streamingTasks, ...def } = agentConfig;
+        a2a.registerAgent(def);
+
+        if (messages) {
+          for (const m of messages) {
+            a2a.onMessage(def.name, m.pattern, m.parts ?? [{ text: "" }]);
+          }
+        }
+
+        if (tasks) {
+          for (const t of tasks) {
+            a2a.onTask(def.name, t.pattern, t.artifacts ?? []);
+          }
+        }
+
+        if (streamingTasks) {
+          for (const s of streamingTasks) {
+            a2a.onStreamingTask(def.name, s.pattern, s.events ?? [], s.delayMs);
+          }
+        }
+      }
+    }
+
+    const a2aPath = a2aConfig.path ?? "/a2a";
+    llmock.mount(a2aPath, a2a);
+    logger.info(`A2AMock mounted at ${a2aPath}`);
+  }
+
+  // Vector
+  if (config.vector) {
+    const vectorConfig = config.vector;
+    const vector = new VectorMock();
+
+    if (vectorConfig.collections) {
+      for (const col of vectorConfig.collections) {
+        vector.addCollection(col.name, { dimension: col.dimension });
+
+        if (col.vectors && col.vectors.length > 0) {
+          vector.upsert(col.name, col.vectors);
+        }
+
+        if (col.queryResults) {
+          vector.onQuery(col.name, col.queryResults);
+        }
+      }
+    }
+
+    const vectorPath = vectorConfig.path ?? "/vector";
+    llmock.mount(vectorPath, vector);
+    logger.info(`VectorMock mounted at ${vectorPath}`);
+  }
+
+  // Services — configure default catch-all responses
+  if (config.services) {
+    if (config.services.search) {
+      llmock.onSearch(/.*/, []);
+      logger.info("Search service enabled with default empty results");
+    }
+    if (config.services.rerank) {
+      llmock.onRerank(/.*/, []);
+      logger.info("Rerank service enabled with default empty results");
+    }
+    if (config.services.moderate) {
+      llmock.onModerate(/.*/, { flagged: false, categories: {} });
+      logger.info("Moderation service enabled with default unflagged results");
+    }
+  }
+
+  const url = await llmock.start();
+  return { llmock, url };
+}
diff --git a/src/fixture-loader.ts b/src/fixture-loader.ts
index 1b35eb6..48f7993 100644
--- a/src/fixture-loader.ts
+++ b/src/fixture-loader.ts
@@ -9,7 +9,7 @@ import {
 } from "./helpers.js";
 import type { Logger } from "./logger.js";
 
-function entryToFixture(entry: FixtureFileEntry): Fixture {
+export function entryToFixture(entry: FixtureFileEntry): Fixture {
   return {
     match: {
       userMessage: entry.match.userMessage,
diff --git a/src/helpers.ts b/src/helpers.ts
index ae48a19..3d25272 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -226,6 +226,26 @@ export function buildToolCallCompletion(toolCalls: ToolCall[], model: string): C
   };
 }
 
+// ─── HTTP helpers ─────────────────────────────────────────────────────────
+
+export function readBody(req: http.IncomingMessage): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    req.on("data", (chunk: Buffer) => chunks.push(chunk));
+    req.on("end", () => resolve(Buffer.concat(chunks).toString()));
+    req.on("error", reject);
+  });
+}
+
+// ─── Pattern matching ─────────────────────────────────────────────────────
+
+export function matchesPattern(text: string, pattern: string | RegExp): boolean {
+  if (typeof pattern === "string") {
+    return text.toLowerCase().includes(pattern.toLowerCase());
+  }
+  return pattern.test(text);
+}
+
 // ─── Embedding helpers ─────────────────────────────────────────────────────
 
 const DEFAULT_EMBEDDING_DIMENSIONS = 1536;
diff --git a/src/index.ts b/src/index.ts
index d18b8ca..30c2cee 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -54,6 +54,15 @@ export { handleOllama, handleOllamaGenerate, ollamaToCompletionRequest } from ".
 // Cohere
 export { handleCohere, cohereToCompletionRequest } from "./cohere.js";
 
+// Service mocks
+export { handleSearch } from "./search.js";
+export type { SearchResult, SearchFixture } from "./search.js";
+export { handleRerank } from "./rerank.js";
+export type { RerankResult, RerankFixture } from "./rerank.js";
+export { handleModeration } from "./moderation.js";
+export type { ModerationResult, ModerationFixture } from "./moderation.js";
+export type { ServiceFixtures } from "./server.js";
+
 // WebSocket
 export { WebSocketConnection, upgradeToWebSocket, computeAcceptKey } from "./ws-framing.js";
 export { handleWebSocketResponses } from "./ws-responses.js";
@@ -105,6 +114,60 @@ export {
 } from "./stream-collapse.js";
 export type { CollapseResult } from "./stream-collapse.js";
 
+// Mountable
+export type { Mountable } from "./types.js";
+
+// MCP
+export { MCPMock } from "./mcp-mock.js";
+export type {
+  MCPMockOptions,
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPPromptDefinition,
+  MCPContent,
+  MCPResourceContent,
+  MCPPromptResult,
+  MCPSession,
+} from "./mcp-types.js";
+
+// Vector
+export { VectorMock } from "./vector-mock.js";
+export type {
+  VectorMockOptions,
+  VectorCollection,
+  VectorEntry,
+  QueryResult,
+  VectorQuery,
+  QueryHandler,
+} from "./vector-types.js";
+
+// A2A
+export { A2AMock } from "./a2a-mock.js";
+export type {
+  A2AMockOptions,
+  A2AAgentDefinition,
+  A2APart,
+  A2AArtifact,
+  A2ATaskResponse,
+  A2AStreamEvent,
+  A2ATask,
+  A2AMessage,
+  A2ARole,
+  A2ATaskState,
+} from "./a2a-types.js";
+
+// JSON-RPC
+export { createJsonRpcDispatcher } from "./jsonrpc.js";
+export type { JsonRpcResponse, MethodHandler, JsonRpcDispatcherOptions } from "./jsonrpc.js";
+
+// Config loader
+export { loadConfig, startFromConfig } from "./config-loader.js";
+export type { AimockConfig } from "./config-loader.js";
+
+// Suite
+export { createMockSuite } from "./suite.js";
+export type { MockSuite, MockSuiteOptions } from "./suite.js";
+
 // Types
 export type {
   ChatMessage,
@@ -134,4 +197,7 @@ export type {
   ToolCallMessage,
   RecordConfig,
   RecordProviderKey,
+  ChatCompletion,
+  ChatCompletionChoice,
+  ChatCompletionMessage,
 } from "./types.js";
diff --git a/src/jsonrpc.ts b/src/jsonrpc.ts
new file mode 100644
index 0000000..dd49041
--- /dev/null
+++ b/src/jsonrpc.ts
@@ -0,0 +1,144 @@
+import type * as http from "node:http";
+
+export type JsonRpcResponse = {
+  jsonrpc: "2.0";
+  id: string | number | null;
+  result?: unknown;
+  error?: { code: number; message: string; data?: unknown };
+};
+
+export type MethodHandler = (
+  params: unknown,
+  id: string | number,
+  req: http.IncomingMessage,
+) => Promise<JsonRpcResponse | null>;
+
+export interface JsonRpcDispatcherOptions {
+  methods: Record<string, MethodHandler>;
+  onNotification?: (method: string, params: unknown) => void;
+}
+
+function errorResponse(
+  code: number,
+  message: string,
+  id: string | number | null = null,
+): JsonRpcResponse {
+  return { jsonrpc: "2.0", id, error: { code, message } };
+}
+
+function isObject(val: unknown): val is Record<string, unknown> {
+  return typeof val === "object" && val !== null && !Array.isArray(val);
+}
+
+async function processOne(
+  entry: unknown,
+  methods: Record<string, MethodHandler>,
+  onNotification: ((method: string, params: unknown) => void) | undefined,
+  req: http.IncomingMessage,
+): Promise<JsonRpcResponse | null> {
+  if (!isObject(entry)) {
+    return errorResponse(-32600, "Invalid request");
+  }
+
+  const { jsonrpc, method, params, id } = entry;
+
+  if (jsonrpc !== "2.0" || typeof method !== "string") {
+    const reqId = typeof id === "string" || typeof id === "number" ? id : null;
+    return errorResponse(-32600, "Invalid request", reqId);
+  }
+
+  // Notification: id is absent/undefined
+  const isNotification = !("id" in entry) || id === undefined;
+
+  if (isNotification) {
+    if (onNotification) {
+      onNotification(method, params);
+    }
+    // Invoke the method handler for side effects (e.g., MCP notifications/initialized),
+    // but discard the result — notifications MUST NOT produce responses per JSON-RPC 2.0.
+    const handler = methods[method];
+    if (handler) {
+      try {
+        await handler(params, null as unknown as string | number, req);
+      } catch (err: unknown) {
+        console.warn("Notification handler error:", err);
+      }
+    }
+    return null;
+  }
+
+  const reqId = typeof id === "string" || typeof id === "number" ? id : null;
+
+  const handler = methods[method];
+  if (!handler) {
+    return errorResponse(-32601, "Method not found", reqId);
+  }
+
+  try {
+    const result = await handler(params, reqId as string | number, req);
+    if (result) return result;
+    return { jsonrpc: "2.0", id: reqId, result: null };
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    return errorResponse(-32603, `Internal error: ${msg}`, reqId);
+  }
+}
+
+export function createJsonRpcDispatcher(
+  options: JsonRpcDispatcherOptions,
+): (req: http.IncomingMessage, res: http.ServerResponse, body: string) => Promise<void> {
+  const { methods, onNotification } = options;
+
+  return async (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    body: string,
+  ): Promise<void> => {
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(body);
+    } catch {
+      const resp = errorResponse(-32700, "Parse error");
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(resp));
+      return;
+    }
+
+    // Empty batch
+    if (Array.isArray(parsed) && parsed.length === 0) {
+      const resp = errorResponse(-32600, "Invalid request");
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(resp));
+      return;
+    }
+
+    // Batch mode
+    if (Array.isArray(parsed)) {
+      const responses: JsonRpcResponse[] = [];
+      for (const entry of parsed) {
+        const result = await processOne(entry, methods, onNotification, req);
+        if (result !== null) {
+          responses.push(result);
+        }
+      }
+      if (responses.length === 0) {
+        res.writeHead(202);
+        res.end("");
+        return;
+      }
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(responses));
+      return;
+    }
+
+    // Single request
+    const result = await processOne(parsed, methods, onNotification, req);
+    if (result === null) {
+      res.writeHead(202);
+      res.end("");
+      return;
+    }
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(result));
+  };
+}
diff --git a/src/llmock.ts b/src/llmock.ts
index d528c8a..d881d36 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -2,18 +2,32 @@ import type {
   ChaosConfig,
   EmbeddingFixtureOpts,
   Fixture,
+  FixtureFileEntry,
   FixtureMatch,
   FixtureOpts,
   FixtureResponse,
   MockServerOptions,
+  Mountable,
   RecordConfig,
 } from "./types.js";
 import { createServer, type ServerInstance } from "./server.js";
-import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
+import {
+  loadFixtureFile,
+  loadFixturesFromDir,
+  entryToFixture,
+  validateFixtures,
+} from "./fixture-loader.js";
 import { Journal } from "./journal.js";
+import type { SearchFixture, SearchResult } from "./search.js";
+import type { RerankFixture, RerankResult } from "./rerank.js";
+import type { ModerationFixture, ModerationResult } from "./moderation.js";
 
 export class LLMock {
   private fixtures: Fixture[] = [];
+  private searchFixtures: SearchFixture[] = [];
+  private rerankFixtures: RerankFixture[] = [];
+  private moderationFixtures: ModerationFixture[] = [];
+  private mounts: Array<{ path: string; handler: Mountable }> = [];
   private serverInstance: ServerInstance | null = null;
   private options: MockServerOptions;
 
@@ -52,6 +66,22 @@ export class LLMock {
     return this;
   }
 
+  /**
+   * Add fixtures from a JSON string or pre-parsed array of fixture entries.
+   * Validates all fixtures and throws if any have severity "error".
+   */
+  addFixturesFromJSON(input: string | FixtureFileEntry[]): this {
+    const entries: FixtureFileEntry[] = typeof input === "string" ? JSON.parse(input) : input;
+    const converted = entries.map(entryToFixture);
+    const issues = validateFixtures(converted);
+    const errors = issues.filter((i) => i.severity === "error");
+    if (errors.length > 0) {
+      throw new Error(`Fixture validation failed: ${JSON.stringify(errors)}`);
+    }
+    this.fixtures.push(...converted);
+    return this;
+  }
+
   // Uses length = 0 to preserve array reference identity — the running
   // server reads this same array on every request.
   clearFixtures(): this {
@@ -94,6 +124,23 @@ export class LLMock {
     return this.on({ toolCallId: id }, response, opts);
   }
 
+  // ---- Service mock convenience methods ----
+
+  onSearch(pattern: string | RegExp, results: SearchResult[]): this {
+    this.searchFixtures.push({ match: pattern, results });
+    return this;
+  }
+
+  onRerank(pattern: string | RegExp, results: RerankResult[]): this {
+    this.rerankFixtures.push({ match: pattern, results });
+    return this;
+  }
+
+  onModerate(pattern: string | RegExp, result: ModerationResult): this {
+    this.moderationFixtures.push({ match: pattern, result });
+    return this;
+  }
+
   /**
    * Queue a one-shot error that will be returned for the next matching
    * request, then automatically removed. Implemented as an internal fixture
@@ -134,6 +181,23 @@ export class LLMock {
     return this;
   }
 
+  // ---- Mounts ----
+
+  mount(path: string, handler: Mountable): this {
+    this.mounts.push({ path, handler });
+
+    // If server is already running, wire up journal, registry, and baseUrl immediately
+    // so late mounts behave identically to pre-start mounts.
+    if (this.serverInstance) {
+      if (handler.setJournal) handler.setJournal(this.serverInstance.journal);
+      if (handler.setBaseUrl) handler.setBaseUrl(this.serverInstance.url + path);
+      const registry = this.serverInstance.defaults.registry;
+      if (registry && handler.setRegistry) handler.setRegistry(registry);
+    }
+
+    return this;
+  }
+
   // ---- Journal proxies ----
 
   getRequests(): import("./types.js").JournalEntry[] {
@@ -183,6 +247,9 @@ export class LLMock {
 
   reset(): this {
     this.clearFixtures();
+    this.searchFixtures.length = 0;
+    this.rerankFixtures.length = 0;
+    this.moderationFixtures.length = 0;
     if (this.serverInstance) {
       this.serverInstance.journal.clear();
     }
@@ -195,7 +262,11 @@ export class LLMock {
     if (this.serverInstance) {
       throw new Error("Server already started");
     }
-    this.serverInstance = await createServer(this.fixtures, this.options);
+    this.serverInstance = await createServer(this.fixtures, this.options, this.mounts, {
+      search: this.searchFixtures,
+      rerank: this.rerankFixtures,
+      moderation: this.moderationFixtures,
+    });
     return this.serverInstance.url;
   }
 
diff --git a/src/logger.ts b/src/logger.ts
index 1c1894d..8a1c07e 100644
--- a/src/logger.ts
+++ b/src/logger.ts
@@ -15,21 +15,21 @@ export class Logger {
 
   info(...args: unknown[]): void {
     if (this.level >= LEVELS.info) {
-      console.log("[llmock]", ...args);
+      console.log("[aimock]", ...args);
     }
   }
 
   debug(...args: unknown[]): void {
     if (this.level >= LEVELS.debug) {
-      console.log("[llmock]", ...args);
+      console.log("[aimock]", ...args);
     }
   }
 
   warn(...args: unknown[]): void {
-    console.warn("[llmock]", ...args);
+    console.warn("[aimock]", ...args);
   }
 
   error(...args: unknown[]): void {
-    console.error("[llmock]", ...args);
+    console.error("[aimock]", ...args);
   }
 }
diff --git a/src/mcp-handler.ts b/src/mcp-handler.ts
new file mode 100644
index 0000000..f5942f3
--- /dev/null
+++ b/src/mcp-handler.ts
@@ -0,0 +1,266 @@
+import type * as http from "node:http";
+import { randomUUID } from "node:crypto";
+import { createJsonRpcDispatcher } from "./jsonrpc.js";
+import type {
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPResourceContent,
+  MCPPromptDefinition,
+  MCPPromptResult,
+  MCPContent,
+  MCPSession,
+} from "./mcp-types.js";
+
+export interface MCPState {
+  serverInfo: { name: string; version: string };
+  tools: Map<string, { def: MCPToolDefinition; handler?: (...args: unknown[]) => unknown }>;
+  resources: Map<string, { def: MCPResourceDefinition; content?: MCPResourceContent }>;
+  prompts: Map<
+    string,
+    {
+      def: MCPPromptDefinition;
+      handler?: (...args: unknown[]) => MCPPromptResult | Promise<MCPPromptResult>;
+    }
+  >;
+  sessions: Map<string, MCPSession>;
+}
+
+function jsonRpcResult(id: string | number, result: unknown) {
+  return { jsonrpc: "2.0" as const, id, result };
+}
+
+function jsonRpcError(id: string | number | null, code: number, message: string) {
+  return { jsonrpc: "2.0" as const, id, error: { code, message } };
+}
+
+export function createMCPRequestHandler(state: MCPState) {
+  const dispatcher = createJsonRpcDispatcher({
+    methods: {
+      // initialize is handled directly in the outer function — this entry is
+      // only here so the dispatcher doesn't return "Method not found" if the
+      // request somehow reaches it.
+      initialize: async (_params, id) => {
+        return jsonRpcResult(id, {
+          protocolVersion: "2025-03-26",
+          capabilities: { tools: {}, resources: {}, prompts: {} },
+          serverInfo: state.serverInfo,
+        });
+      },
+
+      "notifications/initialized": async (_params, _id, req) => {
+        const sessionId = req.headers["mcp-session-id"] as string;
+        const session = state.sessions.get(sessionId);
+        if (session) {
+          session.initialized = true;
+        }
+        return null;
+      },
+
+      ping: async (_params, id) => {
+        return jsonRpcResult(id, {});
+      },
+
+      "tools/list": async (_params, id) => {
+        const tools: MCPToolDefinition[] = [];
+        for (const { def } of state.tools.values()) {
+          tools.push(def);
+        }
+        return jsonRpcResult(id, { tools });
+      },
+
+      "tools/call": async (params, id) => {
+        const { name, arguments: args } = (params ?? {}) as { name?: string; arguments?: unknown };
+        if (!name) {
+          return jsonRpcError(id, -32602, "Missing tool name");
+        }
+        const entry = state.tools.get(name);
+        if (!entry) {
+          return jsonRpcError(id, -32602, `Unknown tool: ${name}`);
+        }
+        if (entry.handler) {
+          try {
+            const result = await entry.handler(args);
+            const content: MCPContent[] = Array.isArray(result)
+              ? result
+              : [{ type: "text", text: String(result) }];
+            return jsonRpcResult(id, { content, isError: false });
+          } catch (err: unknown) {
+            const message = err instanceof Error ? err.message : String(err);
+            return jsonRpcResult(id, {
+              content: [{ type: "text", text: message }],
+              isError: true,
+            });
+          }
+        }
+        // No handler — return empty content
+        return jsonRpcResult(id, { content: [], isError: false });
+      },
+
+      "resources/list": async (_params, id) => {
+        const resources: MCPResourceDefinition[] = [];
+        for (const { def } of state.resources.values()) {
+          resources.push(def);
+        }
+        return jsonRpcResult(id, { resources });
+      },
+
+      "resources/read": async (params, id) => {
+        const { uri } = (params ?? {}) as { uri?: string };
+        if (!uri) {
+          return jsonRpcError(id, -32602, "Missing resource URI");
+        }
+        const entry = state.resources.get(uri);
+        if (!entry) {
+          return jsonRpcError(id, -32602, `Unknown resource: ${uri}`);
+        }
+        return jsonRpcResult(id, {
+          contents: [
+            {
+              uri,
+              ...(entry.content?.text !== undefined && { text: entry.content.text }),
+              ...(entry.content?.blob !== undefined && { blob: entry.content.blob }),
+              ...(entry.content?.mimeType !== undefined && { mimeType: entry.content.mimeType }),
+            },
+          ],
+        });
+      },
+
+      "prompts/list": async (_params, id) => {
+        const prompts: MCPPromptDefinition[] = [];
+        for (const { def } of state.prompts.values()) {
+          prompts.push(def);
+        }
+        return jsonRpcResult(id, { prompts });
+      },
+
+      "prompts/get": async (params, id) => {
+        const { name, arguments: args } = (params ?? {}) as { name?: string; arguments?: unknown };
+        if (!name) {
+          return jsonRpcError(id, -32602, "Missing prompt name");
+        }
+        const entry = state.prompts.get(name);
+        if (!entry) {
+          return jsonRpcError(id, -32602, `Unknown prompt: ${name}`);
+        }
+        if (entry.handler) {
+          try {
+            const result = await entry.handler(args);
+            return jsonRpcResult(id, result);
+          } catch (err: unknown) {
+            const message = err instanceof Error ? err.message : String(err);
+            return jsonRpcError(id, -32603, `Prompt handler error: ${message}`);
+          }
+        }
+        // No handler — return empty messages
+        return jsonRpcResult(id, { messages: [] });
+      },
+    },
+  });
+
+  return async (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    body: string,
+  ): Promise<void> => {
+    // DELETE handler: session teardown
+    if (req.method === "DELETE") {
+      const sessionId = req.headers["mcp-session-id"] as string | undefined;
+      if (!sessionId) {
+        res.writeHead(400, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Missing mcp-session-id header" }));
+        return;
+      }
+      if (!state.sessions.has(sessionId)) {
+        res.writeHead(404, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Session not found" }));
+        return;
+      }
+      state.sessions.delete(sessionId);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ ok: true }));
+      return;
+    }
+
+    // Parse the body to determine method for session validation
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(body);
+    } catch {
+      // Let the dispatcher handle parse errors
+      await dispatcher(req, res, body);
+      return;
+    }
+
+    const method =
+      typeof parsed === "object" && parsed !== null && "method" in parsed
+        ? (parsed as { method: unknown }).method
+        : undefined;
+
+    // Handle initialize directly to control response headers
+    if (method === "initialize") {
+      const id =
+        typeof parsed === "object" && parsed !== null && "id" in parsed
+          ? (parsed as { id: unknown }).id
+          : null;
+
+      const sessionId = randomUUID();
+      state.sessions.set(sessionId, {
+        id: sessionId,
+        initialized: false,
+        createdAt: Date.now(),
+      });
+
+      const response = {
+        jsonrpc: "2.0",
+        id,
+        result: {
+          protocolVersion: "2025-03-26",
+          capabilities: { tools: {}, resources: {}, prompts: {} },
+          serverInfo: state.serverInfo,
+        },
+      };
+
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "Mcp-Session-Id": sessionId,
+      });
+      res.end(JSON.stringify(response));
+      return;
+    }
+
+    // Session validation for all other methods
+    const sessionId = req.headers["mcp-session-id"] as string | undefined;
+    if (!sessionId) {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Missing mcp-session-id header" }));
+      return;
+    }
+    if (!state.sessions.has(sessionId)) {
+      res.writeHead(404, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Session not found" }));
+      return;
+    }
+
+    // Enforce initialization: only allow notifications/initialized through
+    // before the session is fully initialized
+    const session = state.sessions.get(sessionId)!;
+    if (!session.initialized && method !== "notifications/initialized") {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify(
+          jsonRpcError(
+            typeof parsed === "object" && parsed !== null && "id" in parsed
+              ? ((parsed as { id: unknown }).id as string | number)
+              : null,
+            -32002,
+            "Session not initialized",
+          ),
+        ),
+      );
+      return;
+    }
+
+    // Delegate to the JSON-RPC dispatcher for all other methods
+    await dispatcher(req, res, body);
+  };
+}
diff --git a/src/mcp-mock.ts b/src/mcp-mock.ts
new file mode 100644
index 0000000..f3a5705
--- /dev/null
+++ b/src/mcp-mock.ts
@@ -0,0 +1,248 @@
+import * as http from "node:http";
+import type { Mountable } from "./types.js";
+import type { Journal } from "./journal.js";
+import type { MetricsRegistry } from "./metrics.js";
+import type {
+  MCPMockOptions,
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPResourceContent,
+  MCPPromptDefinition,
+  MCPPromptResult,
+  MCPContent,
+  MCPSession,
+} from "./mcp-types.js";
+import { createMCPRequestHandler, type MCPState } from "./mcp-handler.js";
+import { flattenHeaders, readBody } from "./helpers.js";
+
+export class MCPMock implements Mountable {
+  private tools: Map<
+    string,
+    { def: MCPToolDefinition; handler?: (...args: unknown[]) => unknown }
+  > = new Map();
+  private resources: Map<string, { def: MCPResourceDefinition; content?: MCPResourceContent }> =
+    new Map();
+  private prompts: Map<
+    string,
+    {
+      def: MCPPromptDefinition;
+      handler?: (...args: unknown[]) => MCPPromptResult | Promise<MCPPromptResult>;
+    }
+  > = new Map();
+  private sessions: Map<string, MCPSession> = new Map();
+  private server: http.Server | null = null;
+  private journal: Journal | null = null;
+  private registry: MetricsRegistry | null = null;
+  private options: MCPMockOptions;
+  private requestHandler: ReturnType<typeof createMCPRequestHandler>;
+
+  constructor(options?: MCPMockOptions) {
+    this.options = options ?? {};
+    this.requestHandler = this.buildHandler();
+  }
+
+  // ---- Configuration: Tools ----
+
+  addTool(def: MCPToolDefinition): this {
+    this.tools.set(def.name, { def });
+    return this;
+  }
+
+  onToolCall(
+    name: string,
+    handler: (args: unknown) => MCPContent[] | string | Promise<MCPContent[] | string>,
+  ): this {
+    const entry = this.tools.get(name);
+    if (entry) {
+      entry.handler = handler;
+    } else {
+      this.tools.set(name, { def: { name }, handler });
+    }
+    return this;
+  }
+
+  // ---- Configuration: Resources ----
+
+  addResource(def: MCPResourceDefinition, content?: MCPResourceContent): this {
+    this.resources.set(def.uri, { def, content });
+    return this;
+  }
+
+  // ---- Configuration: Prompts ----
+
+  addPrompt(
+    def: MCPPromptDefinition,
+    handler?: (args: unknown) => MCPPromptResult | Promise<MCPPromptResult>,
+  ): this {
+    this.prompts.set(def.name, { def, handler });
+    return this;
+  }
+
+  // ---- Mountable interface ----
+
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    // Only handle POST and DELETE to the root of the mount
+    if (pathname !== "/" && pathname !== "") {
+      return false;
+    }
+    if (req.method !== "POST" && req.method !== "DELETE") {
+      return false;
+    }
+
+    const body = await readBody(req);
+
+    // Extract JSON-RPC method for metrics (skip for DELETE — no JSON-RPC body)
+    if (this.registry) {
+      if (req.method === "DELETE") {
+        this.registry.incrementCounter("aimock_mcp_requests_total", { method: "session/delete" });
+      } else {
+        try {
+          const parsed = JSON.parse(body);
+          const method =
+            typeof parsed === "object" && parsed !== null && "method" in parsed
+              ? String(parsed.method)
+              : "unknown";
+          this.registry.incrementCounter("aimock_mcp_requests_total", { method });
+        } catch {
+          this.registry.incrementCounter("aimock_mcp_requests_total", { method: "unknown" });
+        }
+      }
+    }
+
+    await this.requestHandler(req, res, body);
+
+    // Journal the request after the handler completes
+    if (this.journal) {
+      this.journal.add({
+        method: req.method ?? "POST",
+        path: req.url ?? "/",
+        headers: flattenHeaders(req.headers),
+        body: null,
+        service: "mcp",
+        response: { status: res.statusCode, fixture: null },
+      });
+    }
+
+    return true;
+  }
+
+  health(): { status: string; [key: string]: unknown } {
+    return {
+      status: "ok",
+      tools: this.tools.size,
+      resources: this.resources.size,
+      prompts: this.prompts.size,
+      sessions: this.sessions.size,
+    };
+  }
+
+  setJournal(journal: Journal): void {
+    this.journal = journal;
+  }
+
+  setRegistry(registry: MetricsRegistry): void {
+    this.registry = registry;
+  }
+
+  // ---- Standalone mode ----
+
+  async start(): Promise<string> {
+    if (this.server) {
+      throw new Error("Server already started");
+    }
+
+    const host = this.options.host ?? "127.0.0.1";
+    const port = this.options.port ?? 0;
+
+    return new Promise((resolve, reject) => {
+      const srv = http.createServer((req, res) => {
+        const chunks: Buffer[] = [];
+        req.on("data", (chunk: Buffer) => chunks.push(chunk));
+        req.on("end", () => {
+          const body = Buffer.concat(chunks).toString();
+
+          this.requestHandler(req, res, body)
+            .then(() => {
+              if (this.journal) {
+                this.journal.add({
+                  method: req.method ?? "POST",
+                  path: req.url ?? "/",
+                  headers: flattenHeaders(req.headers),
+                  body: null,
+                  service: "mcp",
+                  response: { status: res.statusCode, fixture: null },
+                });
+              }
+            })
+            .catch((err) => {
+              console.error("MCPMock request error:", err);
+              if (!res.headersSent) {
+                res.writeHead(500);
+                res.end("Internal server error");
+              }
+            });
+        });
+      });
+
+      srv.listen(port, host, () => {
+        this.server = srv;
+        const addr = srv.address();
+        if (typeof addr === "object" && addr !== null) {
+          resolve(`http://${host}:${addr.port}`);
+        } else {
+          resolve(`http://${host}:${port}`);
+        }
+      });
+
+      srv.on("error", reject);
+    });
+  }
+
+  async stop(): Promise<void> {
+    if (!this.server) {
+      throw new Error("Server not started");
+    }
+    const srv = this.server;
+    this.server = null;
+    await new Promise<void>((resolve, reject) => {
+      srv.close((err) => (err ? reject(err) : resolve()));
+    });
+  }
+
+  // ---- Inspection ----
+
+  getRequests(): unknown[] {
+    if (!this.journal) return [];
+    return this.journal.getAll().filter((e) => e.service === "mcp");
+  }
+
+  getSessions(): Map<string, MCPSession> {
+    return new Map(this.sessions);
+  }
+
+  reset(): this {
+    this.tools.clear();
+    this.resources.clear();
+    this.prompts.clear();
+    this.sessions.clear();
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  // ---- Internal ----
+
+  private buildHandler() {
+    const state: MCPState = {
+      serverInfo: this.options.serverInfo ?? { name: "mcp-mock", version: "1.0.0" },
+      tools: this.tools,
+      resources: this.resources,
+      prompts: this.prompts,
+      sessions: this.sessions,
+    };
+    return createMCPRequestHandler(state);
+  }
+}
diff --git a/src/mcp-stub.ts b/src/mcp-stub.ts
new file mode 100644
index 0000000..c1f1335
--- /dev/null
+++ b/src/mcp-stub.ts
@@ -0,0 +1,11 @@
+export { MCPMock } from "./mcp-mock.js";
+export type {
+  MCPMockOptions,
+  MCPToolDefinition,
+  MCPResourceDefinition,
+  MCPPromptDefinition,
+  MCPContent,
+  MCPResourceContent,
+  MCPPromptResult,
+  MCPSession,
+} from "./mcp-types.js";
diff --git a/src/mcp-types.ts b/src/mcp-types.ts
new file mode 100644
index 0000000..075d024
--- /dev/null
+++ b/src/mcp-types.ts
@@ -0,0 +1,48 @@
+export interface MCPMockOptions {
+  port?: number;
+  host?: string;
+  serverInfo?: { name: string; version: string };
+}
+
+export interface MCPToolDefinition {
+  name: string;
+  description?: string;
+  inputSchema?: Record<string, unknown>;
+}
+
+export interface MCPResourceDefinition {
+  uri: string;
+  name: string;
+  mimeType?: string;
+  description?: string;
+}
+
+export interface MCPPromptDefinition {
+  name: string;
+  description?: string;
+  arguments?: Array<{ name: string; description?: string; required?: boolean }>;
+}
+
+export type MCPContent =
+  | { type: "text"; text: string }
+  | { type: "image"; data: string; mimeType: string }
+  | {
+      type: "resource";
+      resource: { uri: string; text?: string; blob?: string; mimeType?: string };
+    };
+
+export interface MCPResourceContent {
+  text?: string;
+  blob?: string;
+  mimeType?: string;
+}
+
+export interface MCPPromptResult {
+  messages: Array<{ role: string; content: MCPContent }>;
+}
+
+export interface MCPSession {
+  id: string;
+  initialized: boolean;
+  createdAt: number;
+}
diff --git a/src/moderation.ts b/src/moderation.ts
new file mode 100644
index 0000000..732fd87
--- /dev/null
+++ b/src/moderation.ts
@@ -0,0 +1,139 @@
+/**
+ * Moderation API support for LLMock.
+ *
+ * Handles POST /v1/moderations requests (OpenAI-compatible). Matches
+ * fixtures by comparing the request `input` field against registered
+ * patterns. First match wins; no match returns a default unflagged result.
+ */
+
+import type * as http from "node:http";
+import { flattenHeaders, generateId, matchesPattern } from "./helpers.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+
+// ─── Moderation types ─────────────────────────────────────────────────────
+
+export interface ModerationResult {
+  flagged: boolean;
+  categories: Record<string, boolean>;
+  category_scores?: Record<string, number>;
+}
+
+export interface ModerationFixture {
+  match: string | RegExp;
+  result: ModerationResult;
+}
+
+// ─── Default unflagged result ─────────────────────────────────────────────
+
+const DEFAULT_RESULT: ModerationResult = {
+  flagged: false,
+  categories: {
+    sexual: false,
+    hate: false,
+    harassment: false,
+    "self-harm": false,
+    "sexual/minors": false,
+    "hate/threatening": false,
+    "violence/graphic": false,
+    "self-harm/intent": false,
+    "self-harm/instructions": false,
+    "harassment/threatening": false,
+    violence: false,
+  },
+  category_scores: {
+    sexual: 0,
+    hate: 0,
+    harassment: 0,
+    "self-harm": 0,
+    "sexual/minors": 0,
+    "hate/threatening": 0,
+    "violence/graphic": 0,
+    "self-harm/intent": 0,
+    "self-harm/instructions": 0,
+    "harassment/threatening": 0,
+    violence: 0,
+  },
+};
+
+// ─── Request handler ──────────────────────────────────────────────────────
+
+export async function handleModeration(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: ModerationFixture[],
+  journal: Journal,
+  defaults: { logger: Logger },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let body: { input?: string | string[] };
+  try {
+    body = JSON.parse(raw) as { input?: string | string[] };
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/moderations",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      service: "moderation",
+      response: { status: 400, fixture: null },
+    });
+    res.writeHead(400, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Normalize input to a single string for matching
+  const rawInput = body.input ?? "";
+  const inputText = Array.isArray(rawInput) ? rawInput.join(" ") : rawInput;
+
+  // Find first matching fixture
+  let matchedResult: ModerationResult = DEFAULT_RESULT;
+  let matchedFixture: ModerationFixture | null = null;
+
+  for (const fixture of fixtures) {
+    if (matchesPattern(inputText, fixture.match)) {
+      matchedFixture = fixture;
+      matchedResult = fixture.result;
+      break;
+    }
+  }
+
+  if (matchedFixture) {
+    logger.debug(`Moderation fixture matched for input "${inputText.slice(0, 80)}"`);
+  } else {
+    logger.debug(
+      `No moderation fixture matched for input "${inputText.slice(0, 80)}" — returning unflagged`,
+    );
+  }
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v1/moderations",
+    headers: flattenHeaders(req.headers),
+    body: null,
+    service: "moderation",
+    response: { status: 200, fixture: null },
+  });
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(
+    JSON.stringify({
+      id: generateId("modr"),
+      model: "text-moderation-latest",
+      results: [matchedResult],
+    }),
+  );
+}
diff --git a/src/rerank.ts b/src/rerank.ts
new file mode 100644
index 0000000..4d706ff
--- /dev/null
+++ b/src/rerank.ts
@@ -0,0 +1,121 @@
+/**
+ * Reranking API support for LLMock.
+ *
+ * Handles POST /v2/rerank requests (Cohere rerank-compatible). Matches
+ * fixtures by comparing the request `query` field against registered
+ * patterns. First match wins; no match returns empty results.
+ */
+
+import type * as http from "node:http";
+import { flattenHeaders, generateId, matchesPattern } from "./helpers.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+
+// ─── Rerank types ─────────────────────────────────────────────────────────
+
+export interface RerankResult {
+  index: number;
+  relevance_score: number;
+}
+
+export interface RerankFixture {
+  match: string | RegExp;
+  results: RerankResult[];
+}
+
+// ─── Request handler ──────────────────────────────────────────────────────
+
+export async function handleRerank(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: RerankFixture[],
+  journal: Journal,
+  defaults: { logger: Logger },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let body: { query?: string; documents?: unknown[]; model?: string };
+  try {
+    body = JSON.parse(raw) as { query?: string; documents?: unknown[]; model?: string };
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/rerank",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      service: "rerank",
+      response: { status: 400, fixture: null },
+    });
+    res.writeHead(400, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  const query = body.query ?? "";
+  const documents = body.documents ?? [];
+
+  // Find first matching fixture
+  let matchedResults: RerankResult[] = [];
+  let matchedFixture: RerankFixture | null = null;
+
+  for (const fixture of fixtures) {
+    if (matchesPattern(query, fixture.match)) {
+      matchedFixture = fixture;
+      matchedResults = fixture.results;
+      break;
+    }
+  }
+
+  if (matchedFixture) {
+    logger.debug(`Rerank fixture matched for query "${query.slice(0, 80)}"`);
+  } else {
+    logger.debug(`No rerank fixture matched for query "${query.slice(0, 80)}" — returning empty`);
+  }
+
+  // Build response with document text included (Cohere rerank v2 format)
+  const results = matchedResults.map((r) => {
+    const doc = documents[r.index];
+    const text =
+      typeof doc === "string"
+        ? doc
+        : typeof doc === "object" && doc !== null && "text" in doc
+          ? (doc as { text: string }).text
+          : "";
+    return {
+      index: r.index,
+      relevance_score: r.relevance_score,
+      document: { text },
+    };
+  });
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v2/rerank",
+    headers: flattenHeaders(req.headers),
+    body: null,
+    service: "rerank",
+    response: { status: 200, fixture: null },
+  });
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(
+    JSON.stringify({
+      id: generateId("rerank"),
+      results,
+      meta: {
+        billed_units: { search_units: 0 },
+      },
+    }),
+  );
+}
diff --git a/src/search.ts b/src/search.ts
new file mode 100644
index 0000000..5d5d020
--- /dev/null
+++ b/src/search.ts
@@ -0,0 +1,104 @@
+/**
+ * Web Search API support for LLMock.
+ *
+ * Handles POST /search requests (Tavily-compatible). Matches fixtures by
+ * comparing the request `query` field against registered patterns. First
+ * match wins; no match returns empty results.
+ */
+
+import type * as http from "node:http";
+import { flattenHeaders, matchesPattern } from "./helpers.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+
+// ─── Search types ─────────────────────────────────────────────────────────
+
+export interface SearchResult {
+  title: string;
+  url: string;
+  content: string;
+  score?: number;
+}
+
+export interface SearchFixture {
+  match: string | RegExp;
+  results: SearchResult[];
+}
+
+// ─── Request handler ──────────────────────────────────────────────────────
+
+export async function handleSearch(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: SearchFixture[],
+  journal: Journal,
+  defaults: { logger: Logger },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let body: { query?: string; max_results?: number };
+  try {
+    body = JSON.parse(raw) as { query?: string; max_results?: number };
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/search",
+      headers: flattenHeaders(req.headers),
+      body: null,
+      service: "search",
+      response: { status: 400, fixture: null },
+    });
+    res.writeHead(400, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+          code: "invalid_json",
+        },
+      }),
+    );
+    return;
+  }
+
+  const query = body.query ?? "";
+  const maxResults = body.max_results;
+
+  // Find first matching fixture
+  let matchedResults: SearchResult[] = [];
+  let matchedFixture: SearchFixture | null = null;
+
+  for (const fixture of fixtures) {
+    if (matchesPattern(query, fixture.match)) {
+      matchedFixture = fixture;
+      matchedResults = fixture.results;
+      break;
+    }
+  }
+
+  if (matchedFixture) {
+    logger.debug(`Search fixture matched for query "${query.slice(0, 80)}"`);
+  } else {
+    logger.debug(`No search fixture matched for query "${query.slice(0, 80)}" — returning empty`);
+  }
+
+  // Apply max_results limit
+  if (maxResults !== undefined && maxResults > 0) {
+    matchedResults = matchedResults.slice(0, maxResults);
+  }
+
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/search",
+    headers: flattenHeaders(req.headers),
+    body: null,
+    service: "search",
+    response: { status: 200, fixture: null },
+  });
+
+  res.writeHead(200, { "Content-Type": "application/json" });
+  res.end(JSON.stringify({ results: matchedResults }));
+}
diff --git a/src/server.ts b/src/server.ts
index 9fbf233..5e5f08c 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,13 +1,16 @@
 import * as http from "node:http";
 import type {
   Fixture,
+  FixtureFileEntry,
   ChatCompletionRequest,
   HandlerDefaults,
   MockServerOptions,
+  Mountable,
   RecordProviderKey,
 } from "./types.js";
 import { Journal } from "./journal.js";
 import { matchFixture } from "./router.js";
+import { validateFixtures, entryToFixture } from "./fixture-loader.js";
 import { writeSSEStream, writeErrorResponse } from "./sse-writer.js";
 import { createInterruptionSignal } from "./interruption.js";
 import {
@@ -28,6 +31,9 @@ import { handleConverse, handleConverseStream } from "./bedrock-converse.js";
 import { handleEmbeddings } from "./embeddings.js";
 import { handleOllama, handleOllamaGenerate } from "./ollama.js";
 import { handleCohere } from "./cohere.js";
+import { handleSearch, type SearchFixture } from "./search.js";
+import { handleRerank, type RerankFixture } from "./rerank.js";
+import { handleModeration, type ModerationFixture } from "./moderation.js";
 import { upgradeToWebSocket, type WebSocketConnection } from "./ws-framing.js";
 import { handleWebSocketResponses } from "./ws-responses.js";
 import { handleWebSocketRealtime } from "./ws-realtime.js";
@@ -52,6 +58,9 @@ const GEMINI_LIVE_PATH =
 const MESSAGES_PATH = "/v1/messages";
 const EMBEDDINGS_PATH = "/v1/embeddings";
 const COHERE_CHAT_PATH = "/v2/chat";
+const SEARCH_PATH = "/search";
+const RERANK_PATH = "/v2/rerank";
+const MODERATIONS_PATH = "/v1/moderations";
 const DEFAULT_CHUNK_SIZE = 20;
 
 const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
@@ -109,6 +118,160 @@ function handleNotFound(res: http.ServerResponse, message: string): void {
   writeErrorResponse(res, 404, JSON.stringify({ error: { message, type: "not_found" } }));
 }
 
+// ---------------------------------------------------------------------------
+// /__aimock/* control API — used by aimock-pytest and other test harnesses
+// to manage fixtures, journal, and error injection without restarting the
+// server.
+// ---------------------------------------------------------------------------
+
+const CONTROL_PREFIX = "/__aimock";
+
+/**
+ * Handle requests under `/__aimock/`. Returns `true` if the request was
+ * handled, `false` if the path doesn't match the control prefix.
+ */
+async function handleControlAPI(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  fixtures: Fixture[],
+  journal: Journal,
+): Promise<boolean> {
+  if (!pathname.startsWith(CONTROL_PREFIX)) return false;
+
+  const subPath = pathname.slice(CONTROL_PREFIX.length);
+  setCorsHeaders(res);
+
+  // GET /__aimock/health
+  if (subPath === "/health" && req.method === "GET") {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ status: "ok" }));
+    return true;
+  }
+
+  // GET /__aimock/journal
+  if (subPath === "/journal" && req.method === "GET") {
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(journal.getAll()));
+    return true;
+  }
+
+  // POST /__aimock/fixtures — add fixtures dynamically
+  if (subPath === "/fixtures" && req.method === "POST") {
+    let raw: string;
+    try {
+      raw = await readBody(req);
+    } catch {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Failed to read request body" }));
+      return true;
+    }
+
+    let parsed: { fixtures?: FixtureFileEntry[] };
+    try {
+      parsed = JSON.parse(raw) as { fixtures?: FixtureFileEntry[] };
+    } catch {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Invalid JSON" }));
+      return true;
+    }
+
+    if (!Array.isArray(parsed.fixtures)) {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: 'Missing or invalid "fixtures" array' }));
+      return true;
+    }
+
+    const converted = parsed.fixtures.map(entryToFixture);
+    const issues = validateFixtures(converted);
+    const errors = issues.filter((i) => i.severity === "error");
+    if (errors.length > 0) {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Validation failed", details: errors }));
+      return true;
+    }
+
+    fixtures.push(...converted);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ added: converted.length }));
+    return true;
+  }
+
+  // DELETE /__aimock/fixtures — clear all fixtures
+  if (subPath === "/fixtures" && req.method === "DELETE") {
+    fixtures.length = 0;
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ cleared: true }));
+    return true;
+  }
+
+  // POST /__aimock/reset — clear fixtures + journal + match counts
+  if (subPath === "/reset" && req.method === "POST") {
+    fixtures.length = 0;
+    journal.clear();
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ reset: true }));
+    return true;
+  }
+
+  // POST /__aimock/error — queue a one-shot error
+  if (subPath === "/error" && req.method === "POST") {
+    let raw: string;
+    try {
+      raw = await readBody(req);
+    } catch {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Failed to read request body" }));
+      return true;
+    }
+
+    let parsed: { status?: number; body?: { message?: string; type?: string; code?: string } };
+    try {
+      parsed = JSON.parse(raw) as typeof parsed;
+    } catch {
+      res.writeHead(400, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ error: "Invalid JSON" }));
+      return true;
+    }
+
+    const status = parsed.status ?? 500;
+    const errorBody = parsed.body;
+    const errorFixture: Fixture = {
+      match: { predicate: () => true },
+      response: {
+        error: {
+          message: errorBody?.message ?? "Injected error",
+          type: errorBody?.type ?? "server_error",
+          code: errorBody?.code,
+        },
+        status,
+      },
+    };
+    // Insert at front so it matches before everything else
+    fixtures.unshift(errorFixture);
+    // Remove after first match
+    const original = errorFixture.match.predicate!;
+    errorFixture.match.predicate = (req) => {
+      const result = original(req);
+      if (result) {
+        queueMicrotask(() => {
+          const idx = fixtures.indexOf(errorFixture);
+          if (idx !== -1) fixtures.splice(idx, 1);
+        });
+      }
+      return result;
+    };
+
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ queued: true }));
+    return true;
+  }
+
+  // Unknown control path
+  handleNotFound(res, `Unknown control endpoint: ${pathname}`);
+  return true;
+}
+
 async function handleCompletions(
   req: http.IncomingMessage,
   res: http.ServerResponse,
@@ -365,12 +528,20 @@ async function handleCompletions(
   );
 }
 
+export interface ServiceFixtures {
+  search: SearchFixture[];
+  rerank: RerankFixture[];
+  moderation: ModerationFixture[];
+}
+
 // NOTE: The fixtures array is read by reference on each request. Callers
 // (e.g. LLMock) may mutate it after the server starts and changes will
 // be visible immediately. This is intentional — do not copy the array.
 export async function createServer(
   fixtures: Fixture[],
   options?: MockServerOptions,
+  mounts?: Array<{ path: string; handler: Mountable }>,
+  serviceFixtures?: ServiceFixtures,
 ): Promise<ServerInstance> {
   const host = options?.host ?? "127.0.0.1";
   const port = options?.port ?? 0;
@@ -409,12 +580,35 @@ export async function createServer(
 
   const journal = new Journal();
 
+  // Share journal and metrics registry with mounted services
+  if (mounts) {
+    for (const { handler } of mounts) {
+      if (handler.setJournal) handler.setJournal(journal);
+      if (registry && handler.setRegistry) handler.setRegistry(registry);
+    }
+  }
+
   // Set initial fixtures-loaded gauge
   if (registry) {
-    registry.setGauge("llmock_fixtures_loaded", {}, fixtures.length);
+    registry.setGauge("aimock_fixtures_loaded", {}, fixtures.length);
   }
 
   const server = http.createServer((req: http.IncomingMessage, res: http.ServerResponse) => {
+    // Delegate to async handler — catch unhandled rejections to prevent Node.js crashes
+    handleHttpRequest(req, res).catch((err: unknown) => {
+      const msg = err instanceof Error ? err.message : "Internal error";
+      defaults.logger.warn(`Unhandled request error: ${msg}`);
+      if (!res.headersSent) {
+        res.writeHead(500, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: { message: msg, type: "server_error" } }));
+      }
+    });
+  });
+
+  async function handleHttpRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+  ): Promise<void> {
     // OPTIONS preflight
     if (req.method === "OPTIONS") {
       handleOptions(res);
@@ -436,14 +630,14 @@ export async function createServer(
           const normalizedPath = normalizePathLabel(rawPathname);
           const method = req.method ?? "UNKNOWN";
           const status = String(res.statusCode);
-          registry.incrementCounter("llmock_requests_total", {
+          registry.incrementCounter("aimock_requests_total", {
             method,
             path: normalizedPath,
             status,
           });
           const elapsed = Number(process.hrtime.bigint() - startTime) / 1e9;
           registry.observeHistogram(
-            "llmock_request_duration_seconds",
+            "aimock_request_duration_seconds",
             { method, path: normalizedPath },
             elapsed,
           );
@@ -453,6 +647,23 @@ export async function createServer(
       });
     }
 
+    // Control API — must be checked before mounts and path rewrites
+    if (pathname.startsWith(CONTROL_PREFIX)) {
+      await handleControlAPI(req, res, pathname, fixtures, journal);
+      return;
+    }
+
+    // Dispatch to mounted services before any path rewrites
+    if (mounts) {
+      for (const { path: mountPath, handler } of mounts) {
+        if (pathname === mountPath || pathname.startsWith(mountPath + "/")) {
+          const subPath = pathname.slice(mountPath.length) || "/";
+          const handled = await handler.handleRequest(req, res, subPath);
+          if (handled) return;
+        }
+      }
+    }
+
     // Azure OpenAI: /openai/deployments/{id}/{operation} → /v1/{operation} (chat/completions, embeddings)
     // Must be checked BEFORE the generic /openai/ prefix strip
     let azureDeploymentId: string | undefined;
@@ -472,8 +683,22 @@ export async function createServer(
     // Health / readiness probes
     if (pathname === HEALTH_PATH && req.method === "GET") {
       setCorsHeaders(res);
-      res.writeHead(200, { "Content-Type": "application/json" });
-      res.end(JSON.stringify({ status: "ok" }));
+      if (mounts && mounts.length > 0) {
+        const services: Record<string, unknown> = {
+          llm: { status: "ok", fixtures: fixtures.length },
+        };
+        for (const { path: mountPath, handler } of mounts) {
+          if (handler.health) {
+            const name = mountPath.replace(/^\//, "");
+            services[name] = handler.health();
+          }
+        }
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ status: "ok", services }));
+      } else {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ status: "ok" }));
+      }
       return;
     }
 
@@ -510,7 +735,7 @@ export async function createServer(
         id,
         object: "model" as const,
         created: 1686935002,
-        owned_by: "llmock",
+        owned_by: "aimock",
       }));
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify({ object: "list", data }));
@@ -930,6 +1155,93 @@ export async function createServer(
       return;
     }
 
+    // POST /search — Web Search API (Tavily-compatible)
+    if (pathname === SEARCH_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleSearch(
+            req,
+            res,
+            raw,
+            serviceFixtures?.search ?? [],
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v2/rerank — Reranking API (Cohere rerank-compatible)
+    if (pathname === RERANK_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleRerank(
+            req,
+            res,
+            raw,
+            serviceFixtures?.rerank ?? [],
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/moderations — Moderation API (OpenAI-compatible)
+    if (pathname === MODERATIONS_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleModeration(
+            req,
+            res,
+            raw,
+            serviceFixtures?.moderation ?? [],
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
     // POST /v1/chat/completions — Chat Completions API
     if (pathname !== COMPLETIONS_PATH) {
       handleNotFound(res, "Not found");
@@ -974,7 +1286,7 @@ export async function createServer(
         res.end();
       }
     });
-  });
+  }
 
   // ─── WebSocket upgrade handling ──────────────────────────────────────────
 
@@ -983,65 +1295,90 @@ export async function createServer(
   server.on(
     "upgrade",
     (req: http.IncomingMessage, socket: import("node:net").Socket, head: Buffer) => {
-      const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
-      const pathname = parsedUrl.pathname;
-
-      if (
-        pathname !== RESPONSES_PATH &&
-        pathname !== REALTIME_PATH &&
-        pathname !== GEMINI_LIVE_PATH
-      ) {
-        socket.write("HTTP/1.1 404 Not Found\r\n\r\n");
-        socket.destroy();
-        return;
-      }
+      handleUpgradeRequest(req, socket, head).catch((err: unknown) => {
+        const msg = err instanceof Error ? err.message : "Internal error";
+        defaults.logger.warn(`Unhandled upgrade error: ${msg}`);
+        if (!socket.destroyed) socket.destroy();
+      });
+    },
+  );
 
-      // Push any buffered data back before upgrading
-      if (head.length > 0) {
-        socket.unshift(head);
+  async function handleUpgradeRequest(
+    req: http.IncomingMessage,
+    socket: import("node:net").Socket,
+    head: Buffer,
+  ): Promise<void> {
+    const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
+    const pathname = parsedUrl.pathname;
+
+    // Dispatch to mounted services
+    if (mounts) {
+      for (const { path: mountPath, handler } of mounts) {
+        if (
+          (pathname === mountPath || pathname.startsWith(mountPath + "/")) &&
+          handler.handleUpgrade
+        ) {
+          const subPath = pathname.slice(mountPath.length) || "/";
+          if (await handler.handleUpgrade(socket, head, subPath)) return;
+        }
       }
+    }
 
-      let ws: WebSocketConnection;
-      try {
-        ws = upgradeToWebSocket(req, socket);
-      } catch (err: unknown) {
-        const msg = err instanceof Error ? err.message : "WebSocket upgrade failed";
-        logger.error(`WebSocket upgrade error: ${msg}`);
-        if (!socket.destroyed) socket.destroy();
-        return;
-      }
+    if (
+      pathname !== RESPONSES_PATH &&
+      pathname !== REALTIME_PATH &&
+      pathname !== GEMINI_LIVE_PATH
+    ) {
+      socket.write("HTTP/1.1 404 Not Found\r\n\r\n");
+      socket.destroy();
+      return;
+    }
 
-      activeConnections.add(ws);
+    // Push any buffered data back before upgrading
+    if (head.length > 0) {
+      socket.unshift(head);
+    }
 
-      ws.on("error", (err: Error) => {
-        logger.error(`WebSocket error: ${err.message}`);
-        activeConnections.delete(ws);
-      });
+    let ws: WebSocketConnection;
+    try {
+      ws = upgradeToWebSocket(req, socket);
+    } catch (err: unknown) {
+      const msg = err instanceof Error ? err.message : "WebSocket upgrade failed";
+      logger.error(`WebSocket upgrade error: ${msg}`);
+      if (!socket.destroyed) socket.destroy();
+      return;
+    }
 
-      ws.on("close", () => {
-        activeConnections.delete(ws);
-      });
+    activeConnections.add(ws);
 
-      // Route to handler
-      if (pathname === RESPONSES_PATH) {
-        handleWebSocketResponses(ws, fixtures, journal, {
-          ...defaults,
-          model: "gpt-4",
-        });
-      } else if (pathname === REALTIME_PATH) {
-        const model = parsedUrl.searchParams.get("model") ?? "gpt-4o-realtime";
-        handleWebSocketRealtime(ws, fixtures, journal, {
-          ...defaults,
-          model,
-        });
-      } else if (pathname === GEMINI_LIVE_PATH) {
-        handleWebSocketGeminiLive(ws, fixtures, journal, {
-          ...defaults,
-          model: "gemini-2.0-flash",
-        });
-      }
-    },
-  );
+    ws.on("error", (err: Error) => {
+      logger.error(`WebSocket error: ${err.message}`);
+      activeConnections.delete(ws);
+    });
+
+    ws.on("close", () => {
+      activeConnections.delete(ws);
+    });
+
+    // Route to handler
+    if (pathname === RESPONSES_PATH) {
+      handleWebSocketResponses(ws, fixtures, journal, {
+        ...defaults,
+        model: "gpt-4",
+      });
+    } else if (pathname === REALTIME_PATH) {
+      const model = parsedUrl.searchParams.get("model") ?? "gpt-4o-realtime";
+      handleWebSocketRealtime(ws, fixtures, journal, {
+        ...defaults,
+        model,
+      });
+    } else if (pathname === GEMINI_LIVE_PATH) {
+      handleWebSocketGeminiLive(ws, fixtures, journal, {
+        ...defaults,
+        model: "gemini-2.0-flash",
+      });
+    }
+  }
 
   // Close active WS connections when server shuts down
   const originalClose = server.close.bind(server);
@@ -1063,6 +1400,14 @@ export async function createServer(
         return;
       }
       const url = `http://${addr.address}:${addr.port}`;
+
+      // Set base URL on mounted services that support it
+      if (mounts) {
+        for (const { path: mountPath, handler } of mounts) {
+          if (handler.setBaseUrl) handler.setBaseUrl(url + mountPath);
+        }
+      }
+
       resolve({ server, journal, url, defaults });
     });
   });
diff --git a/src/suite.ts b/src/suite.ts
new file mode 100644
index 0000000..788c500
--- /dev/null
+++ b/src/suite.ts
@@ -0,0 +1,66 @@
+import { LLMock } from "./llmock.js";
+import { MCPMock } from "./mcp-mock.js";
+import { A2AMock } from "./a2a-mock.js";
+import { VectorMock } from "./vector-mock.js";
+import type { MockServerOptions } from "./types.js";
+import type { MCPMockOptions } from "./mcp-types.js";
+import type { A2AMockOptions } from "./a2a-types.js";
+import type { VectorMockOptions } from "./vector-types.js";
+
+export interface MockSuiteOptions {
+  llm?: MockServerOptions;
+  mcp?: MCPMockOptions;
+  a2a?: A2AMockOptions;
+  vector?: VectorMockOptions;
+}
+
+export interface MockSuite {
+  llm: LLMock;
+  mcp?: MCPMock;
+  a2a?: A2AMock;
+  vector?: VectorMock;
+  start(): Promise<void>;
+  stop(): Promise<void>;
+  reset(): void;
+}
+
+export async function createMockSuite(options: MockSuiteOptions = {}): Promise<MockSuite> {
+  const llm = new LLMock(options.llm);
+  let mcp: MCPMock | undefined;
+  let a2a: A2AMock | undefined;
+  let vector: VectorMock | undefined;
+
+  if (options.mcp) {
+    mcp = new MCPMock(options.mcp);
+    llm.mount("/mcp", mcp);
+  }
+
+  if (options.a2a) {
+    a2a = new A2AMock(options.a2a);
+    llm.mount("/a2a", a2a);
+  }
+
+  if (options.vector) {
+    vector = new VectorMock(options.vector);
+    llm.mount("/vector", vector);
+  }
+
+  return {
+    llm,
+    mcp,
+    a2a,
+    vector,
+    async start() {
+      await llm.start();
+    },
+    async stop() {
+      await llm.stop();
+    },
+    reset() {
+      llm.reset();
+      if (mcp) mcp.reset();
+      if (a2a) a2a.reset();
+      if (vector) vector.reset();
+    },
+  };
+}
diff --git a/src/types.ts b/src/types.ts
index 2b64e4d..5f76d7d 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,8 +1,24 @@
+import type * as http from "node:http";
+import type * as net from "node:net";
+import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import type { MetricsRegistry } from "./metrics.js";
 
 // LLMock type definitions — shared across all provider adapters and the fixture router.
 
+export interface Mountable {
+  handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean>;
+  handleUpgrade?(socket: net.Socket, head: Buffer, pathname: string): Promise<boolean>;
+  health?(): { status: string; [key: string]: unknown };
+  setJournal?(journal: Journal): void;
+  setBaseUrl?(url: string): void;
+  setRegistry?(registry: MetricsRegistry): void;
+}
+
 export interface ContentPart {
   type: string;
   text?: string;
@@ -155,6 +171,7 @@ export interface JournalEntry {
   path: string;
   headers: Record<string, string>;
   body: ChatCompletionRequest | null;
+  service?: string;
   response: {
     status: number;
     fixture: Fixture | null;
diff --git a/src/vector-handler.ts b/src/vector-handler.ts
new file mode 100644
index 0000000..4149b93
--- /dev/null
+++ b/src/vector-handler.ts
@@ -0,0 +1,345 @@
+import type * as http from "node:http";
+import type {
+  VectorCollection,
+  VectorEntry,
+  VectorQuery,
+  QueryResult,
+  QueryHandler,
+} from "./vector-types.js";
+
+export interface VectorState {
+  collections: Map<string, VectorCollection>;
+  queryHandlers: Map<string, QueryHandler>;
+}
+
+interface RouteResult {
+  handled: boolean;
+}
+
+function jsonResponse(res: http.ServerResponse, status: number, body: unknown): void {
+  const payload = JSON.stringify(body);
+  res.writeHead(status, {
+    "Content-Type": "application/json",
+    "Content-Length": String(Buffer.byteLength(payload)),
+  });
+  res.end(payload);
+}
+
+function resolveQuery(
+  state: VectorState,
+  collectionName: string,
+  query: VectorQuery,
+): QueryResult[] {
+  const handler = state.queryHandlers.get(collectionName);
+  if (!handler) return [];
+  if (typeof handler === "function") return handler(query);
+  return handler;
+}
+
+// ---- Pinecone-compatible endpoints ----
+
+function handlePinecone(
+  state: VectorState,
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  body: Record<string, unknown>,
+): RouteResult {
+  // POST /query
+  if (req.method === "POST" && pathname === "/query") {
+    const namespace = (body.namespace as string) ?? "default";
+    const collection = state.collections.get(namespace);
+    if (!collection) {
+      jsonResponse(res, 404, { error: { message: `Collection '${namespace}' not found` } });
+      return { handled: true };
+    }
+
+    const query: VectorQuery = {
+      vector: body.vector as number[] | undefined,
+      topK: body.topK as number | undefined,
+      filter: body.filter,
+      collection: namespace,
+    };
+    const results = resolveQuery(state, namespace, query);
+    const topK = query.topK ?? 10;
+    const matches = results.slice(0, topK).map((r) => ({
+      id: r.id,
+      score: r.score,
+      ...(r.metadata !== undefined && { metadata: r.metadata }),
+    }));
+
+    jsonResponse(res, 200, { matches });
+    return { handled: true };
+  }
+
+  // POST /vectors/upsert
+  if (req.method === "POST" && pathname === "/vectors/upsert") {
+    const vectors = (body.vectors ?? []) as Array<{
+      id: string;
+      values: number[];
+      metadata?: Record<string, unknown>;
+    }>;
+    const namespace = (body.namespace as string) ?? "default";
+
+    let collection = state.collections.get(namespace);
+    if (!collection) {
+      const dim = vectors.length > 0 ? vectors[0].values.length : 0;
+      collection = { name: namespace, dimension: dim, vectors: new Map() };
+      state.collections.set(namespace, collection);
+    }
+
+    for (const v of vectors) {
+      const entry: VectorEntry = { id: v.id, values: v.values, metadata: v.metadata };
+      collection.vectors.set(v.id, entry);
+    }
+
+    jsonResponse(res, 200, { upsertedCount: vectors.length });
+    return { handled: true };
+  }
+
+  // POST /vectors/delete
+  if (req.method === "POST" && pathname === "/vectors/delete") {
+    const ids = (body.ids ?? []) as string[];
+    const namespace = (body.namespace as string) ?? "default";
+    const collection = state.collections.get(namespace);
+    if (collection) {
+      for (const id of ids) {
+        collection.vectors.delete(id);
+      }
+    }
+    jsonResponse(res, 200, {});
+    return { handled: true };
+  }
+
+  // GET /describe-index-stats
+  if (req.method === "GET" && pathname === "/describe-index-stats") {
+    let totalVectorCount = 0;
+    let dimension = 0;
+    for (const col of state.collections.values()) {
+      totalVectorCount += col.vectors.size;
+      if (col.dimension > 0) dimension = col.dimension;
+    }
+    jsonResponse(res, 200, { dimension, totalVectorCount });
+    return { handled: true };
+  }
+
+  return { handled: false };
+}
+
+// ---- Qdrant-compatible endpoints ----
+
+const QDRANT_SEARCH_RE = /^\/collections\/([^/]+)\/points\/search$/;
+const QDRANT_UPSERT_RE = /^\/collections\/([^/]+)\/points$/;
+const QDRANT_DELETE_RE = /^\/collections\/([^/]+)\/points\/delete$/;
+
+function handleQdrant(
+  state: VectorState,
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  body: Record<string, unknown>,
+): RouteResult {
+  // POST /collections/{name}/points/search
+  let match = pathname.match(QDRANT_SEARCH_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    const collection = state.collections.get(name);
+    if (!collection) {
+      jsonResponse(res, 404, { status: { error: `Collection '${name}' not found` } });
+      return { handled: true };
+    }
+
+    const query: VectorQuery = {
+      vector: body.vector as number[] | undefined,
+      topK: body.limit as number | undefined,
+      filter: body.filter,
+      collection: name,
+    };
+    const results = resolveQuery(state, name, query);
+    const limit = (body.limit as number) ?? 10;
+    const result = results.slice(0, limit).map((r) => ({
+      id: r.id,
+      score: r.score,
+      ...(r.metadata !== undefined && { payload: r.metadata }),
+    }));
+
+    jsonResponse(res, 200, { result });
+    return { handled: true };
+  }
+
+  // PUT /collections/{name}/points
+  match = pathname.match(QDRANT_UPSERT_RE);
+  if (match && req.method === "PUT") {
+    const name = decodeURIComponent(match[1]);
+    let collection = state.collections.get(name);
+    const points = (body.points ?? []) as Array<{
+      id: string;
+      vector: number[];
+      payload?: Record<string, unknown>;
+    }>;
+
+    if (!collection) {
+      const dim = points.length > 0 ? points[0].vector.length : 0;
+      collection = { name, dimension: dim, vectors: new Map() };
+      state.collections.set(name, collection);
+    }
+
+    for (const p of points) {
+      const entry: VectorEntry = { id: String(p.id), values: p.vector, metadata: p.payload };
+      collection.vectors.set(String(p.id), entry);
+    }
+
+    jsonResponse(res, 200, { status: "ok" });
+    return { handled: true };
+  }
+
+  // POST /collections/{name}/points/delete
+  match = pathname.match(QDRANT_DELETE_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    const collection = state.collections.get(name);
+    const points = (body.points ?? []) as string[];
+    if (collection) {
+      for (const id of points) {
+        collection.vectors.delete(String(id));
+      }
+    }
+    jsonResponse(res, 200, { status: "ok" });
+    return { handled: true };
+  }
+
+  return { handled: false };
+}
+
+// ---- ChromaDB-compatible endpoints ----
+
+const CHROMA_QUERY_RE = /^\/api\/v1\/collections\/([^/]+)\/query$/;
+const CHROMA_ADD_RE = /^\/api\/v1\/collections\/([^/]+)\/add$/;
+const CHROMA_COLLECTION_RE = /^\/api\/v1\/collections\/([^/]+)$/;
+const CHROMA_COLLECTIONS = "/api/v1/collections";
+
+function handleChromaDB(
+  state: VectorState,
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  pathname: string,
+  body: Record<string, unknown>,
+): RouteResult {
+  // POST /api/v1/collections/{id}/query
+  let match = pathname.match(CHROMA_QUERY_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    const collection = state.collections.get(name);
+    if (!collection) {
+      jsonResponse(res, 404, { error: `Collection '${name}' not found` });
+      return { handled: true };
+    }
+
+    const queryEmbeddings = (body.query_embeddings ?? []) as number[][];
+    const nResults = (body.n_results as number) ?? 10;
+
+    // Process each query embedding
+    const allIds: string[][] = [];
+    const allDistances: number[][] = [];
+    const allMetadatas: Array<Array<Record<string, unknown> | null>> = [];
+
+    for (const embedding of queryEmbeddings) {
+      const query: VectorQuery = {
+        vector: embedding,
+        topK: nResults,
+        filter: body.where,
+        collection: name,
+      };
+      const results = resolveQuery(state, name, query).slice(0, nResults);
+
+      allIds.push(results.map((r) => r.id));
+      allDistances.push(results.map((r) => r.score));
+      allMetadatas.push(results.map((r) => r.metadata ?? null));
+    }
+
+    jsonResponse(res, 200, {
+      ids: allIds,
+      distances: allDistances,
+      metadatas: allMetadatas,
+    });
+    return { handled: true };
+  }
+
+  // POST /api/v1/collections/{id}/add
+  match = pathname.match(CHROMA_ADD_RE);
+  if (match && req.method === "POST") {
+    const name = decodeURIComponent(match[1]);
+    let collection = state.collections.get(name);
+
+    const ids = (body.ids ?? []) as string[];
+    const embeddings = (body.embeddings ?? []) as number[][];
+    const metadatas = (body.metadatas ?? []) as Array<Record<string, unknown> | undefined>;
+
+    if (!collection) {
+      const dim = embeddings.length > 0 ? embeddings[0].length : 0;
+      collection = { name, dimension: dim, vectors: new Map() };
+      state.collections.set(name, collection);
+    }
+
+    for (let i = 0; i < ids.length; i++) {
+      const entry: VectorEntry = {
+        id: ids[i],
+        values: embeddings[i] ?? [],
+        metadata: metadatas[i],
+      };
+      collection.vectors.set(ids[i], entry);
+    }
+
+    jsonResponse(res, 200, true);
+    return { handled: true };
+  }
+
+  // GET /api/v1/collections — list collections
+  if (req.method === "GET" && pathname === CHROMA_COLLECTIONS) {
+    const collections = Array.from(state.collections.values()).map((c) => ({
+      id: c.name,
+      name: c.name,
+      metadata: null,
+    }));
+    jsonResponse(res, 200, collections);
+    return { handled: true };
+  }
+
+  // DELETE /api/v1/collections/{id}
+  match = pathname.match(CHROMA_COLLECTION_RE);
+  if (match && req.method === "DELETE") {
+    const name = decodeURIComponent(match[1]);
+    if (!state.collections.has(name)) {
+      jsonResponse(res, 404, { error: `Collection '${name}' not found` });
+      return { handled: true };
+    }
+    state.collections.delete(name);
+    state.queryHandlers.delete(name);
+    jsonResponse(res, 200, { status: "ok" });
+    return { handled: true };
+  }
+
+  return { handled: false };
+}
+
+// ---- Main dispatch ----
+
+export function createVectorRequestHandler(state: VectorState) {
+  return (
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+    body: Record<string, unknown>,
+  ): boolean => {
+    const pinecone = handlePinecone(state, req, res, pathname, body);
+    if (pinecone.handled) return true;
+
+    const qdrant = handleQdrant(state, req, res, pathname, body);
+    if (qdrant.handled) return true;
+
+    const chroma = handleChromaDB(state, req, res, pathname, body);
+    if (chroma.handled) return true;
+
+    return false;
+  };
+}
diff --git a/src/vector-mock.ts b/src/vector-mock.ts
new file mode 100644
index 0000000..e690e64
--- /dev/null
+++ b/src/vector-mock.ts
@@ -0,0 +1,277 @@
+import * as http from "node:http";
+import type { Mountable, JournalEntry } from "./types.js";
+import type { Journal } from "./journal.js";
+import type { MetricsRegistry } from "./metrics.js";
+import type {
+  VectorMockOptions,
+  VectorCollection,
+  VectorEntry,
+  QueryResult,
+  VectorQuery,
+  QueryHandler,
+} from "./vector-types.js";
+import { createVectorRequestHandler, type VectorState } from "./vector-handler.js";
+import { flattenHeaders, readBody } from "./helpers.js";
+
+export class VectorMock implements Mountable {
+  private collections: Map<string, VectorCollection> = new Map();
+  private queryHandlers: Map<string, QueryHandler> = new Map();
+  private server: http.Server | null = null;
+  private journal: Journal | null = null;
+  private registry: MetricsRegistry | null = null;
+  private options: VectorMockOptions;
+  private requestHandler: ReturnType<typeof createVectorRequestHandler>;
+
+  constructor(options?: VectorMockOptions) {
+    this.options = options ?? {};
+    this.requestHandler = this.buildHandler();
+  }
+
+  // ---- Configuration ----
+
+  addCollection(name: string, opts: { dimension: number }): this {
+    const collection: VectorCollection = {
+      name,
+      dimension: opts.dimension,
+      vectors: new Map(),
+    };
+    this.collections.set(name, collection);
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  upsert(collection: string, vectors: VectorEntry[]): this {
+    let col = this.collections.get(collection);
+    if (!col) {
+      const dim = vectors.length > 0 ? vectors[0].values.length : 0;
+      col = { name: collection, dimension: dim, vectors: new Map() };
+      this.collections.set(collection, col);
+    }
+    for (const v of vectors) {
+      col.vectors.set(v.id, v);
+    }
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  onQuery(
+    collection: string,
+    results: QueryResult[] | ((query: VectorQuery) => QueryResult[]),
+  ): this {
+    this.queryHandlers.set(collection, results);
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  deleteCollection(name: string): this {
+    this.collections.delete(name);
+    this.queryHandlers.delete(name);
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  // ---- Mountable interface ----
+
+  async handleRequest(
+    req: http.IncomingMessage,
+    res: http.ServerResponse,
+    pathname: string,
+  ): Promise<boolean> {
+    const body = await readBody(req);
+    let parsed: Record<string, unknown> = {};
+    try {
+      if (body) parsed = JSON.parse(body);
+    } catch {
+      if (req.method !== "GET") {
+        res.writeHead(400, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Malformed JSON body" }));
+        return true;
+      }
+    }
+
+    const handled = this.requestHandler(req, res, pathname, parsed);
+
+    // Record vector operation metric
+    if (handled && this.registry) {
+      const { operation, provider } = classifyVectorRequest(req.method ?? "GET", pathname);
+      this.registry.incrementCounter("aimock_vector_requests_total", { operation, provider });
+    }
+
+    // Journal the request after the handler completes
+    if (handled && this.journal) {
+      this.journal.add({
+        method: req.method ?? "GET",
+        path: req.url ?? "/",
+        headers: flattenHeaders(req.headers),
+        body: null,
+        service: "vector",
+        response: { status: res.statusCode, fixture: null },
+      });
+    }
+
+    return handled;
+  }
+
+  health(): { status: string; collections: number } {
+    return {
+      status: "ok",
+      collections: this.collections.size,
+    };
+  }
+
+  setJournal(journal: Journal): void {
+    this.journal = journal;
+  }
+
+  setRegistry(registry: MetricsRegistry): void {
+    this.registry = registry;
+  }
+
+  // ---- Standalone mode ----
+
+  async start(): Promise<string> {
+    if (this.server) {
+      throw new Error("Server already started");
+    }
+
+    const host = this.options.host ?? "127.0.0.1";
+    const port = this.options.port ?? 0;
+
+    return new Promise((resolve, reject) => {
+      const srv = http.createServer((req, res) => {
+        const chunks: Buffer[] = [];
+        req.on("data", (chunk: Buffer) => chunks.push(chunk));
+        req.on("end", () => {
+          const body = Buffer.concat(chunks).toString();
+          let parsed: Record<string, unknown> = {};
+          try {
+            if (body) parsed = JSON.parse(body);
+          } catch {
+            if (req.method !== "GET") {
+              res.writeHead(400, { "Content-Type": "application/json" });
+              res.end(JSON.stringify({ error: "Malformed JSON body" }));
+              return;
+            }
+          }
+
+          const url = new URL(req.url ?? "/", `http://${host}`);
+
+          const handled = this.requestHandler(req, res, url.pathname, parsed);
+
+          if (handled && this.journal) {
+            this.journal.add({
+              method: req.method ?? "GET",
+              path: req.url ?? "/",
+              headers: flattenHeaders(req.headers),
+              body: null,
+              service: "vector",
+              response: { status: res.statusCode, fixture: null },
+            });
+          }
+          if (!handled) {
+            res.writeHead(404, { "Content-Type": "application/json" });
+            res.end(JSON.stringify({ error: "Not found" }));
+          }
+        });
+      });
+
+      srv.listen(port, host, () => {
+        this.server = srv;
+        const addr = srv.address();
+        if (typeof addr === "object" && addr !== null) {
+          resolve(`http://${host}:${addr.port}`);
+        } else {
+          resolve(`http://${host}:${port}`);
+        }
+      });
+
+      srv.on("error", reject);
+    });
+  }
+
+  async stop(): Promise<void> {
+    if (!this.server) {
+      throw new Error("Server not started");
+    }
+    const srv = this.server;
+    this.server = null;
+    await new Promise<void>((resolve, reject) => {
+      srv.close((err) => (err ? reject(err) : resolve()));
+    });
+  }
+
+  // ---- Inspection ----
+
+  getRequests(): JournalEntry[] {
+    if (!this.journal) return [];
+    return this.journal.getAll().filter((e) => e.service === "vector");
+  }
+
+  reset(): this {
+    this.collections.clear();
+    this.queryHandlers.clear();
+    this.requestHandler = this.buildHandler();
+    return this;
+  }
+
+  // ---- Internal ----
+
+  private buildHandler() {
+    const state: VectorState = {
+      collections: this.collections,
+      queryHandlers: this.queryHandlers,
+    };
+    return createVectorRequestHandler(state);
+  }
+}
+
+// ---- Helpers ----
+
+/**
+ * Classify a vector request by operation and provider based on HTTP method and pathname.
+ */
+function classifyVectorRequest(
+  method: string,
+  pathname: string,
+): { operation: string; provider: string } {
+  // Pinecone paths
+  if (pathname === "/query" && method === "POST") {
+    return { operation: "query", provider: "pinecone" };
+  }
+  if (pathname === "/vectors/upsert" && method === "POST") {
+    return { operation: "upsert", provider: "pinecone" };
+  }
+  if (pathname === "/vectors/delete" && method === "POST") {
+    return { operation: "delete", provider: "pinecone" };
+  }
+  if (pathname === "/describe-index-stats" && method === "GET") {
+    return { operation: "describe", provider: "pinecone" };
+  }
+
+  // Qdrant paths
+  if (/^\/collections\/[^/]+\/points\/search$/.test(pathname) && method === "POST") {
+    return { operation: "query", provider: "qdrant" };
+  }
+  if (/^\/collections\/[^/]+\/points$/.test(pathname) && method === "PUT") {
+    return { operation: "upsert", provider: "qdrant" };
+  }
+  if (/^\/collections\/[^/]+\/points\/delete$/.test(pathname) && method === "POST") {
+    return { operation: "delete", provider: "qdrant" };
+  }
+
+  // ChromaDB paths
+  if (/^\/api\/v1\/collections\/[^/]+\/query$/.test(pathname) && method === "POST") {
+    return { operation: "query", provider: "chromadb" };
+  }
+  if (/^\/api\/v1\/collections\/[^/]+\/add$/.test(pathname) && method === "POST") {
+    return { operation: "upsert", provider: "chromadb" };
+  }
+  if (pathname === "/api/v1/collections" && method === "GET") {
+    return { operation: "list", provider: "chromadb" };
+  }
+  if (/^\/api\/v1\/collections\/[^/]+$/.test(pathname) && method === "DELETE") {
+    return { operation: "delete", provider: "chromadb" };
+  }
+
+  return { operation: "unknown", provider: "unknown" };
+}
diff --git a/src/vector-stub.ts b/src/vector-stub.ts
new file mode 100644
index 0000000..ae87749
--- /dev/null
+++ b/src/vector-stub.ts
@@ -0,0 +1,10 @@
+// Re-export VectorMock and types for backwards compatibility.
+export { VectorMock } from "./vector-mock.js";
+export type {
+  VectorMockOptions,
+  VectorCollection,
+  VectorEntry,
+  QueryResult,
+  VectorQuery,
+  QueryHandler,
+} from "./vector-types.js";
diff --git a/src/vector-types.ts b/src/vector-types.ts
new file mode 100644
index 0000000..b6c0c19
--- /dev/null
+++ b/src/vector-types.ts
@@ -0,0 +1,32 @@
+export interface VectorMockOptions {
+  port?: number;
+  host?: string;
+}
+
+export interface VectorCollection {
+  name: string;
+  dimension: number;
+  vectors: Map<string, VectorEntry>;
+}
+
+export interface VectorEntry {
+  id: string;
+  values: number[];
+  metadata?: Record<string, unknown>;
+}
+
+export interface QueryResult {
+  id: string;
+  score: number;
+  metadata?: Record<string, unknown>;
+  values?: number[];
+}
+
+export interface VectorQuery {
+  vector?: number[];
+  topK?: number;
+  filter?: unknown;
+  collection: string;
+}
+
+export type QueryHandler = QueryResult[] | ((query: VectorQuery) => QueryResult[]);
diff --git a/tsdown.config.ts b/tsdown.config.ts
index af597cb..5698d8a 100644
--- a/tsdown.config.ts
+++ b/tsdown.config.ts
@@ -1,6 +1,13 @@
 import { defineConfig } from "tsdown";
 export default defineConfig({
-  entry: ["src/index.ts", "src/cli.ts"],
+  entry: [
+    "src/index.ts",
+    "src/cli.ts",
+    "src/aimock-cli.ts",
+    "src/mcp-stub.ts",
+    "src/a2a-stub.ts",
+    "src/vector-stub.ts",
+  ],
   format: ["esm", "cjs"],
   dts: true,
   sourcemap: true,
diff --git a/vitest.config.ts b/vitest.config.ts
index cf26048..35f792e 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -5,5 +5,16 @@ export default defineConfig({
     globals: true,
     silent: true,
     include: ["src/__tests__/**/*.test.ts"],
+    coverage: {
+      provider: "v8",
+      reporter: ["text", "json-summary"],
+      include: ["src/**/*.ts"],
+      exclude: ["src/__tests__/**", "src/index.ts", "src/cli.ts", "src/aimock-cli.ts"],
+      thresholds: {
+        lines: 90,
+        branches: 85,
+        functions: 90,
+      },
+    },
   },
 });