diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..125f3aaa
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,19 @@
+BOT_TOKEN=telegram-bot-token
+WEBHOOK_URL=https://example.com/telegram/webhook
+ALLOWLIST_IDS=12345,67890
+
+# Optional settings
+BOT_TOKEN_FILE=/path/to/token.txt
+BOT_DB_PATH=bot_state.sqlite
+STRIX_ROOT=.
+BOT_HTTP_HOST=0.0.0.0
+BOT_HTTP_PORT=8081
+BOT_HTTP_TOKEN=changeme
+BOT_ALERT_WEBHOOK=https://example.com/alert-endpoint
+BOT_RATE_LIMIT=1.0
+BOT_GLOBAL_RATE_LIMIT=0.5
+BOT_DEFAULT_VERBOSITY=high-only
+# LLM config for Strix core
+STRIX_LLM=gpt-4o
+LLM_API_KEY=your-llm-key
+LLM_API_BASE=https://api.openai.com/v1  # optional for proxies/custom base
diff --git a/README.md b/README.md
index 53e5a980..6e83caaa 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,7 @@ pipx install strix-agent
 # Configure your AI provider
 export STRIX_LLM="openai/gpt-5"
 export LLM_API_KEY="your-api-key"
+# Alternatively place STRIX_LLM/LLM_API_KEY in a .env file alongside the repo.
 
 # Run your first security assessment
 strix --target ./app-directory
@@ -234,10 +235,6 @@ Have questions? Found a bug? Want to contribute? **[Join our Discord!](https://d
 ## 🌟 Support the Project
 
 **Love Strix?** Give us a ⭐ on GitHub!
-## 🙏 Acknowledgements
-
-Strix builds on the incredible work of open-source projects like [LiteLLM](https://github.com/BerriAI/litellm), [Caido](https://github.com/caido/caido), [ProjectDiscovery](https://github.com/projectdiscovery), [Playwright](https://github.com/microsoft/playwright), and [Textual](https://github.com/Textualize/textual). Huge thanks to their maintainers!
-
 
 > [!WARNING]
 > Only test apps you own or have permission to test. You are responsible for using Strix ethically and legally.
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..62500409
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,36 @@
+# Strix Documentation Hub
+
+Audience: AI and human developers extending Strix. Start here to find deep references, workflows, and extension guides.
+
+- **Quick start**: see `docs/setup-and-running.md`.
+- **Architecture**: high-level map in `docs/architecture.md`.
+- **Agent loop**: internals in `docs/agent-loop.md`.
+- **Tools**: contract and extensions in `docs/tools-and-extensions.md`.
+- **Runtime**: sandbox and docker flow in `docs/runtime-and-sandbox.md`.
+- **LLM config**: provider setup and tuning in `docs/llm-config.md`.
+- **Prompts**: taxonomy and conventions in `docs/prompts.md`.
+- **Interface**: CLI/TUI behaviors in `docs/interface.md`.
+- **Telemetry**: tracing and events in `docs/telemetry-and-observability.md`.
+- **Testing/QA**: strategies in `docs/testing-and-qa.md`.
+- **Security/Privacy**: guardrails in `docs/security-and-privacy.md`.
+- **Release**: versioning and publishing in `docs/release-and-versioning.md`.
+- **Troubleshooting**: fixes in `docs/troubleshooting.md`.
+- **Glossary**: definitions in `docs/glossary.md`.
+- **Roadmap templates**: RFC/ADR formats in `docs/roadmap-templates.md`.
+
+Minimum environment
+- Python 3.12, Docker running, Playwright browsers installed.
+- STRIX_LLM + LLM_API_KEY (or litellm proxy) exported.
+- Local write access for `strix_runs/` outputs.
+
+Flow for new contributors
+1) Read `architecture.md` and `agent-loop.md` for mental model.  
+2) Run a local scan following `setup-and-running.md`.  
+3) Review `development.md` + `testing-and-qa.md` before changes.  
+4) Extend tools/prompts/runtime using relevant docs.  
+5) Update docs and add tests with every feature or bugfix.  
+
+Maintenance
+- Keep links valid when files move.  
+- Update dependency minimums when `pyproject.toml` changes.  
+- Refresh examples and flags when CLI/TUI arguments change.  
diff --git a/docs/agent-loop.md b/docs/agent-loop.md
new file mode 100644
index 00000000..1fb82114
--- /dev/null
+++ b/docs/agent-loop.md
@@ -0,0 +1,46 @@
+# Agent Loop Internals
+
+Primary files: `agents/base_agent.py`, `agents/state.py`, `agents/StrixAgent/strix_agent.py`, `agents/StrixAgent/system_prompt.jinja`.
+
+## Lifecycle
+1) Agent instantiated with config: llm config, max iterations (default 300; adaptive budget via `agents/iteration_policy.py` based on targets + LLM timeout recorded in tracer), non-interactive flag, optional state/local sources.
+2) `AgentMeta` wires Jinja environment per agent folder for prompts.
+3) `AgentState` tracks messages, tasks, wait states, agent graph ids.
+4) Main loop (in `BaseAgent.run`): fetches/creates state, processes queued messages, updates tracer, calls LLM with prompt, dispatches tool invocations, handles waits/finishes.
+5) Completion when finish tool invoked, max iterations hit, or fatal error.
+6) State persistence: snapshots saved as JSON (default in `strix_runs/<run>/<agent_id>_state.json`); can resume via config `load_state_from`.
+
+## Message handling
+- Messages stored in `AgentState`; inter-agent messages include metadata and are added as user messages with delivery notice.
+- Tracer updates agent status on message receipt/resume.
+
+## Tool selection and execution
+- LLM output parsed for tool calls → `tools.process_tool_invocations` → `tools/executor.py`.
+- Tool executions logged via tracer, results added back into state/context for next iteration.
+
+## Memory and limits
+- `llm/memory_compressor.py` trims context to fit provider limits.
+- `llm/request_queue.py` manages concurrency/ordering; `llm/llm.py` handles retries/backoff (tenacity).
+- Configurable `max_iterations` per agent instance via config.
+
+## Error handling
+- LLM errors wrapped in `LLMRequestFailedError`; retries applied.
+- Tool errors logged and surfaced in state; agents can adapt prompts accordingly.
+
+## Vulnerability propagation
+- Tracer `vulnerability_found_callback` (set in CLI) renders findings immediately; tracer records IDs, severity, content.
+
+## Extending the agent
+- Adjust prompts in `agents/StrixAgent/system_prompt.jinja`.
+- Modify decision logic in `StrixAgent/strix_agent.py`.
+- Add new state fields carefully; ensure serialization if persisted; update tracer calls to include new metadata.
+
+## ASCII loop snapshot
+```
+State -> Prompt render -> LLM -> Tool calls -> Results -> State update
+   ^                                                   |
+   +--------------------Tracer/events------------------+
+```
+
+## Maintenance
+- Revise when state fields or loop control change; keep diagram aligned with actual steps; update when new hooks are added.
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 00000000..6e0a5ffe
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,50 @@
+# Architecture
+
+Strix runs coordinated agents that drive tools inside a dockerized sandbox, orchestrated via CLI/TUI, with telemetry and prompt packs guiding behavior.
+
+## System map
+- Entry: CLI/TUI (`interface/main.py`, `cli.py`, `tui.py`) parses args, sets scan config, starts tracer callbacks.
+- Agent loop: `agents/base_agent.py`, `agents/state.py`, `agents/StrixAgent/strix_agent.py` manage iterations, messages, tool calls, memory compression.
+- Tools: `tools/*` define XML action schemas + Python implementations; `tools/registry.py` registers; `tools/executor.py` dispatches; `interface/tool_components/*` render outputs.
+- Runtime: `runtime/docker_runtime.py`, `runtime/tool_server.py`, `runtime/runtime.py` manage sandbox containers and tool execution endpoints.
+- LLM: `llm/llm.py`, `llm/config.py`, `llm/request_queue.py`, `llm/memory_compressor.py` handle provider routing, retries, queueing, token budgeting.
+- Prompts: `prompts/**/*.jinja`, `agents/StrixAgent/system_prompt.jinja`, `prompts/coordination/root_agent.jinja` supply structured instructions.
+- Telemetry: `telemetry/tracer.py` captures agent lifecycle, tool executions, vulnerabilities; `interface/utils.py` renders stats.
+- Outputs: run artifacts under `strix_runs/<run_name>` (reports/logs).
+
+## Data flow (simplified)
+1) User invokes `strix` -> `interface/main.py` builds args, `cli.py`/`tui.py` start UI.  
+2) Scan config + tracer created -> `StrixAgent` instantiated with `LLMConfig`.  
+3) Agent loop requests LLM completions; responses trigger tool invocations via `tools/executor.py`.  
+4) Tools call `runtime/tool_server.py` (docker sandbox) for side effects (browser, proxy, terminal, python, file edits, etc.).  
+5) Tool results and tracer events propagate to UI renderers; vulnerabilities emitted to console and saved.  
+6) Loop continues until max iterations, finish action, or user stop; results stored in `strix_runs/`.
+
+## ASCII data flow
+```
+CLI/TUI -> Tracer -> StrixAgent -> LLM -> Tool Executor -> Runtime (Docker) -> Tool Server
+             ^                                           |
+             |-------------------------------------------+
+```
+
+## Extension seams
+- Add tools: new folder under `tools/`, schema XML, implementation, registry entry, renderer in `interface/tool_components/`.
+- Add prompts: new Jinja in `prompts/*` or agent prompt folder; wire selection logic where consumed.
+- Add telemetry: emit via `telemetry/tracer.py` helper methods; extend UI renderers to display.
+- Add providers: extend `llm/config.py` + `llm/llm.py` to create client, auth, and request path.
+- Adjust runtime: modify `runtime/docker_runtime.py` for images/limits, `tool_server.py` for endpoints.
+
+## Persistence
+- Runs: `strix_runs/<run_name>` contains reports and logs (non-interactive mode prints to stdout too).
+- Agent graph: managed in-memory via `tools/agents_graph/agents_graph_actions.py`, rendered by interface. Graph definitions can be loaded/validated from YAML/JSON via `agents/graph_builder.py` (unique ids, single root, parent/child checks) before instantiation.
+
+## Non-interactive mode
+- Enabled via `-n/--non-interactive`; suppresses interactive UI, streams findings to stdout; still uses tracer callbacks for vulnerability events.
+
+## Reliability and limits
+- Max iterations default 300 (`BaseAgent.max_iterations`), configurable via agent config.
+- Request queue/backoff in `llm/request_queue.py`; retries in `llm/llm.py` using tenacity.
+- Memory compression in `llm/memory_compressor.py` to stay within context limits.
+
+## Maintenance
+- Update module paths if files move; refresh diagram when flows change; align with CLI flag changes.
diff --git a/docs/development.md b/docs/development.md
new file mode 100644
index 00000000..abe13a4a
--- /dev/null
+++ b/docs/development.md
@@ -0,0 +1,46 @@
+# Development Guide
+
+## Layout primer
+- Core agent logic: `strix/agents/*`
+- Tools and action schemas: `strix/tools/*`
+- Runtime sandbox: `strix/runtime/*`
+- LLM layer: `strix/llm/*`
+- Prompts: `strix/prompts/*` and `strix/agents/StrixAgent/system_prompt.jinja`
+- Interface (CLI/TUI + renderers): `strix/interface/*`
+- Telemetry: `strix/telemetry/*`
+
+## Standards
+- Python 3.12, strict typing (see `pyproject.toml` mypy config).
+- Lint/format: `ruff`, `black`, `isort`.
+- Security/static: `bandit`, `pylint`.
+- Keep docstrings concise; prefer clear variable names over comments.
+
+## Commands
+```bash
+# Format + lint
+poetry run ruff check .
+poetry run black .
+poetry run isort .
+
+# Type check
+poetry run mypy .
+poetry run pyright
+
+# Tests
+poetry run pytest
+poetry run pytest --cov
+```
+
+## Workflow
+- Create feature branches; keep commits scoped.
+- Run format + lint + tests before PR.
+- Update relevant docs when changing behavior, flags, prompts, or outputs.
+- Add regression coverage for new tools/prompt changes/runtime adjustments.
+
+## Performance tips
+- Reuse docker images; avoid repeated pulls.
+- Cache provider auth where possible; tune LLM parallelism in config.
+- Use smaller prompt packs for targeted testing when iterating quickly.
+
+## Maintenance
+- Revise commands/tools when linters/types/test stacks change; align with `pyproject.toml`.
diff --git a/docs/glossary.md b/docs/glossary.md
new file mode 100644
index 00000000..49e0cd26
--- /dev/null
+++ b/docs/glossary.md
@@ -0,0 +1,15 @@
+# Glossary
+
+- Agent loop: Iterative cycle in `agents/base_agent.py` driving LLM calls and tool executions.
+- Agent graph: In-memory map of agents managed via `tools/agents_graph/agents_graph_actions.py`, rendered in UI.
+- Action schema: XML definition (`*_actions_schema.xml`) describing tool actions/args.
+- Renderer: UI component in `interface/tool_components/*` mapping tool outputs to panels.
+- Runtime: Docker-based sandbox managed by `runtime/docker_runtime.py` and `tool_server.py`.
+- Tracer: Telemetry recorder in `telemetry/tracer.py` logging agent and tool events.
+- Run name: Unique id for a scan; names output directory `strix_runs/<run_name>`.
+- Request queue: LLM request coordinator in `llm/request_queue.py`.
+- Memory compressor: Context trimming utility in `llm/memory_compressor.py`.
+- Non-interactive mode: Headless CLI mode (`-n`) emitting findings to stdout without TUI.
+
+## Maintenance
+- Add new terms as components are introduced; keep paths accurate after refactors.
diff --git a/docs/interface.md b/docs/interface.md
new file mode 100644
index 00000000..7f5db109
--- /dev/null
+++ b/docs/interface.md
@@ -0,0 +1,30 @@
+# Interface (CLI/TUI)
+
+## Entrypoints
+- `interface/main.py`: CLI entry, argument parser wiring, dispatch to CLI/TUI.
+- `interface/cli.py`: non-interactive flow; renders startup panel and vulnerability panels via tracer callbacks.
+- `interface/tui.py`: textual-based interactive UI; panels for tools, logs, stats.
+- `interface/utils.py`: stats builders, severity colors, shared helpers.
+
+## Arguments (key)
+- `--target/-t`: target path/URL (multi allowed).
+- `--instruction`: freeform guidance to agent.
+- `--non-interactive/-n`: headless mode; prints findings to stdout.
+- `--run-name`: optional custom run id (otherwise generated).
+- Provider/env vars read separately; ensure `STRIX_LLM`/`LLM_API_KEY` set.
+
+## Rendering
+- Tool outputs mapped via `interface/tool_components/*` renderers (browser, proxy, terminal, file edits, reports, notes, thinking, etc.).
+- Live stats and final stats built in `utils.py` and displayed in panels.
+- Vulnerabilities emitted from tracer callback in CLI mode; TUI shows panes with updates.
+
+## Customization
+- Styles in `interface/assets/tui_styles.tcss`.
+- Add new renderers by extending `tool_components/base_renderer.py` and registering in `registry.py`.
+
+## Non-interactive behavior
+- Skips TUI; logs findings immediately.
+- Still writes outputs under `strix_runs/<run_name>`.
+
+## Maintenance
+- Update argument list when CLI flags change; refresh renderer mapping when new tools are added.
diff --git a/docs/llm-config.md b/docs/llm-config.md
new file mode 100644
index 00000000..db809ee3
--- /dev/null
+++ b/docs/llm-config.md
@@ -0,0 +1,33 @@
+# LLM Configuration
+
+Core files: `llm/config.py`, `llm/llm.py`, `llm/request_queue.py`, `llm/memory_compressor.py`, `llm/utils.py`.
+
+## Providers and models
+- `LLMConfig` defines provider/model id (e.g., `openai/gpt-5`) and auth.
+- Extend providers by adding client setup and request paths in `llm/llm.py`; expose config knobs in `config.py`.
+
+## Request handling
+- Requests queued via `request_queue.py` to manage concurrency and order.
+- Retries/backoff handled in `llm.py` using tenacity; errors wrapped in `LLMRequestFailedError`.
+- Streaming support depends on provider implementation in `llm.py`.
+
+## Context management
+- `memory_compressor.py` trims conversation/state to fit provider token limits.
+- `llm/utils.py` cleans content before sending to providers.
+
+## Tuning
+- Control parallelism and rate limits in request queue.
+- Adjust model choice to balance cost vs. latency.
+- Customize temperature/other params in `LLMConfig`.
+
+## Telemetry
+- LLM calls can be logged via tracer; ensure sensitive data is redacted before emission.
+
+## Adding a new provider
+1) Define config fields in `config.py`.
+2) Add client creation and request method in `llm.py`.
+3) Wire retries/backoff and error normalization.
+4) Update docs and examples in `setup-and-running.md`.
+
+## Maintenance
+- Revise when providers/models or retry/queue logic change; ensure env var expectations are documented.
diff --git a/docs/prompts.md b/docs/prompts.md
new file mode 100644
index 00000000..daa098fa
--- /dev/null
+++ b/docs/prompts.md
@@ -0,0 +1,33 @@
+# Prompts
+
+## Taxonomy
+- Coordination: `prompts/coordination/root_agent.jinja`
+- Frameworks: `prompts/frameworks/*.jinja` (e.g., `fastapi`, `nextjs`)
+- Technologies: `prompts/technologies/*.jinja` (e.g., `firebase_firestore`, `supabase`)
+- Vulnerabilities: `prompts/vulnerabilities/*.jinja` (e.g., `sql_injection`, `xss`, `rce`)
+- Auth playbooks: `prompts/auth/oidc_saml_sso.jinja`
+- Cloud/custom/recon placeholders: `prompts/cloud`, `prompts/custom`, `prompts/reconnaissance`
+- Agent system prompt: `agents/StrixAgent/system_prompt.jinja`
+
+## Conventions
+- Jinja templates with explicit placeholders; avoid hidden assumptions.
+- Keep titles and sections consistent for downstream parsing.
+- Prefer actionable guidance (steps, checks, PoC ideas) and explicit do/don’t lists.
+
+## Selection/combination
+- Agent selects relevant prompt packs based on target metadata; templates rendered via Jinja environment set in `AgentMeta`.
+- Root coordination prompt guides multi-agent behavior; specialized prompts augment depending on framework/tech/vuln focus.
+
+## Safe testing
+- Dry-run new prompts in non-interactive mode against test targets.
+- Check for prompt injection surfaces; ensure instructions avoid unsafe actions outside sandbox.
+- Validate output format expected by tools (e.g., when tool calls must be produced).
+
+## Adding a prompt pack
+1) Create `.jinja` file in appropriate folder with descriptive name.
+2) Document variables required; keep defaults sensible.
+3) Add regression test or fixture to ensure rendering works and key strings exist.
+4) Update this doc and any selection logic if needed.
+
+## Maintenance
+- Refresh taxonomy when adding/removing prompt packs; ensure variable names stay consistent with agent code.
diff --git a/docs/release-and-versioning.md b/docs/release-and-versioning.md
new file mode 100644
index 00000000..6667b205
--- /dev/null
+++ b/docs/release-and-versioning.md
@@ -0,0 +1,26 @@
+# Release and Versioning
+
+Current version: 0.4.0 (`pyproject.toml`).
+
+## Versioning
+- Follow semantic-ish bumps: increment patch for fixes, minor for features, major for breaking changes.
+- Update `pyproject.toml` version and any surfaced docs.
+
+## Packaging
+```bash
+poetry build
+poetry publish  # requires credentials
+```
+- Ensure `README.md` and license included (listed in `[tool.poetry]` include).
+- Verify wheels/sdist contain `.jinja`, `.xml`, `.tcss` assets (declared in `include`).
+
+## Changelog
+- Maintain a changelog (add file if missing) summarizing features, fixes, breaking changes.
+- Reference PRs/issues; highlight security-impacting changes.
+
+## Compatibility
+- Python 3.12 only (per `pyproject.toml`).
+- Document any deprecated flags or behaviors and provide migration notes.
+
+## Maintenance
+- Update version numbers and commands when packaging flow changes; ensure asset include lists stay correct.
diff --git a/docs/roadmap-templates.md b/docs/roadmap-templates.md
new file mode 100644
index 00000000..e475b43d
--- /dev/null
+++ b/docs/roadmap-templates.md
@@ -0,0 +1,37 @@
+# Roadmap and Templates
+
+## RFC/ADR template
+```
+Title: <feature/change name>
+Status: Draft/Approved/Rejected
+Owner: <name>
+Date: <yyyy-mm-dd>
+Summary:
+Context:
+Options considered:
+Decision:
+Impact (security/latency/cost/UX):
+Migration plan:
+Testing plan:
+Open questions:
+```
+
+## Backlog item template
+```
+ID: <unique>
+Title:
+Area: agent | tool | runtime | prompt | UI | telemetry | infra
+Problem/Goal:
+Proposed approach:
+Risks:
+Acceptance criteria:
+Tests required:
+Docs to update:
+```
+
+## Prioritization hints
+- Prefer items that improve security signal quality, reduce latency/cost, or harden sandboxing.
+- Require tests/docs updates before closing any item.
+
+## Maintenance
+- Adjust templates when process changes; keep evaluation criteria aligned with current priorities.
diff --git a/docs/runtime-and-sandbox.md b/docs/runtime-and-sandbox.md
new file mode 100644
index 00000000..8942a75a
--- /dev/null
+++ b/docs/runtime-and-sandbox.md
@@ -0,0 +1,30 @@
+# Runtime and Sandbox
+
+## Components
+- `runtime/docker_runtime.py`: manages docker container lifecycle and tool execution environment.
+- `runtime/tool_server.py`: server exposing tool execution endpoints inside sandbox.
+- `runtime/runtime.py`: runtime interface/wrapper.
+
+## Flow
+1) Tool invocation requests execution via runtime.
+2) Docker runtime ensures container image exists/runs, mounts required volumes, and proxies commands.
+3) Tool server executes requested action (browser, terminal, python, etc.) within sandbox.
+4) Results returned to agent loop and tracer.
+
+## Security boundaries
+- Isolation via Docker: filesystem and network scoped by container config.
+- Volume mounts only for necessary paths (e.g., target code) to minimize exposure.
+- Network access governed by docker configuration; prefer least privilege.
+
+## Configurable parameters
+- Image name/tag, resource limits (CPU/memory), timeouts for actions, mount paths.
+- Adjust in `docker_runtime.py` and related config constants.
+
+## Troubleshooting
+- Docker daemon not running → start service.
+- Permission issues pulling/running image → check user group and registry auth.
+- Slow pulls → pre-pull images; configure registry mirror.
+- Tool server unreachable → check container logs and exposed ports; verify tool_server start.
+
+## Maintenance
+- Update when docker image/tag or resource limits change; keep security boundary notes aligned with actual mounts/network settings.
diff --git a/docs/security-and-privacy.md b/docs/security-and-privacy.md
new file mode 100644
index 00000000..c43df2ff
--- /dev/null
+++ b/docs/security-and-privacy.md
@@ -0,0 +1,25 @@
+# Security and Privacy
+
+## Secrets and data handling
+- Use env vars for provider keys (`STRIX_LLM`, `LLM_API_KEY`); avoid hardcoding.
+- Redact sensitive content before telemetry/logging; tracer extensions should strip secrets.
+
+## Threat model
+- Targets may be untrusted; sandbox all active tooling via docker runtime.
+- Limit volume mounts to required paths; constrain network access where possible.
+- Validate tool inputs; sanitize file paths and URLs from LLM output.
+
+## Sandbox caveats
+- Misconfigured docker (privileged mounts) can weaken isolation; review `runtime/docker_runtime.py` changes carefully.
+- Browser/proxy tools can reach external hosts; ensure user consent and scope limitations.
+
+## Supply chain
+- Pin dependencies in `pyproject.toml`; review updates for security impact.
+- Verify docker images and registries; avoid pulling untrusted images.
+
+## Privacy
+- Minimize data sent to LLMs; prefer summaries over raw sensitive payloads.
+- Provide users clarity on what leaves the machine when using remote providers.
+
+## Maintenance
+- Revisit threat model when runtime/networking or tool capabilities change; ensure redaction guidance matches telemetry behavior.
diff --git a/docs/setup-and-running.md b/docs/setup-and-running.md
new file mode 100644
index 00000000..d17f0d5e
--- /dev/null
+++ b/docs/setup-and-running.md
@@ -0,0 +1,103 @@
+# Setup and Running
+
+## Prerequisites
+- Python 3.12
+- Docker running with permissions to pull/run images.
+- Playwright browsers installed (one-time): `python -m playwright install --with-deps`.
+- Network access to chosen LLM provider or litellm proxy.
+
+## Install
+```bash
+# Recommended
+pipx install strix-agent
+
+# From source
+poetry install
+poetry run strix --help
+```
+
+## Environment variables
+- `STRIX_LLM`: provider/model id (e.g., `openai/gpt-5`).
+- `LLM_API_KEY`: API key for the provider or proxy.
+- Optional provider settings: set according to `llm/config.py` expectations (e.g., base URL for litellm proxy).
+
+## Running scans
+- Basic local scan: `strix --target ./app-directory`
+- Remote repo: `strix --target https://github.com/org/repo`
+- Web app: `strix --target https://your-app.com`
+- Multiple targets: `strix -t <target1> -t <target2>`
+- Add instructions: `--instruction "Focus on IDOR"`
+- Non-interactive mode (for servers/CI): `strix -n --target https://your-app.com`
+
+## Outputs
+- Results saved under `strix_runs/<run_name>`; includes reports/logs.
+- Structured vulnerability exports in each run: `vulnerabilities/*.md`, `vulnerabilities.csv`, `vulnerabilities.jsonl`, and `vulnerabilities.sarif.json` for CI upload.
+- Non-interactive mode also streams findings to stdout.
+
+## Common pitfalls
+- Docker not running → ensure daemon is up and user has permissions.
+- Playwright missing → rerun `python -m playwright install --with-deps`.
+- Invalid/missing `LLM_API_KEY` → verify env; check provider-specific base URL if using proxy.
+- Slow runs → confirm network connectivity and increase provider timeout if needed in config.
+
+## Telegram bot (sidecar, planned)
+- Env: `BOT_TOKEN`, `WEBHOOK_URL`, `ALLOWLIST_IDS`, optional `BOT_DB_PATH`, `STRIX_ROOT`.
+- Deploy bot service on same VM with access to `strix_runs/`; ensure webhook HTTPS endpoint reachable.
+- Commands and usage: see `docs/telegram_bot_usage.md`.
+- Architecture and security notes: see `docs/telegram_bot_architecture.md`.
+- Start bot: `poetry run strix-bot --mode strix` (or `--mode fs` for read-only browsing).
+- Supports `.env` in repo root; see `.env.example` for all keys (env vars still take priority).
+- Systemd unit template: `packaging/systemd/strix-bot.service` (copy to `/etc/systemd/system/` and adjust paths/user/env).
+- Health endpoints (if `BOT_HTTP_PORT` set): `/health` and `/healthz` return `ok`; `/metrics` returns JSON or Prometheus when `?format=prom`.
+- Optional tuning: `BOT_RATE_LIMIT` (per-user seconds), `BOT_GLOBAL_RATE_LIMIT` (seconds), `BOT_DEFAULT_VERBOSITY` (high-only|batched|full).
+- Sample systemd (adjust paths/env):
+  ```
+  [Unit]
+  Description=Strix Telegram Bot
+  After=network.target
+
+  [Service]
+  WorkingDirectory=/opt/strix
+  Environment=BOT_TOKEN=...
+  Environment=WEBHOOK_URL=https://your-domain/bot-webhook
+  Environment=ALLOWLIST_IDS=12345,67890
+  Environment=BOT_HTTP_PORT=8081
+  Environment=BOT_HTTP_HOST=0.0.0.0
+  ExecStart=/usr/bin/poetry run strix-bot --mode strix
+  Restart=always
+
+  [Install]
+  WantedBy=multi-user.target
+  ```
+- Security: prefer injecting `BOT_TOKEN` via secret manager or systemd drop-in rather than files; rotate tokens regularly (mount secret to `BOT_TOKEN_FILE` if using file-based secret).
+- Secure HTTP endpoints (`/health`, `/healthz`, `/metrics`) with firewall/allowlist if enabled.
+- Use `BOT_HTTP_TOKEN` to require bearer auth on `/metrics`; rotate regularly.
+- CI: add a job to run `poetry run pytest` to cover config, streaming filters, HTTP endpoints.
+- A ready-made workflow exists at `.github/workflows/bot-tests.yml` to run bot tests in CI.
+- Alerts: set `BOT_ALERT_WEBHOOK` to receive JSON alerts on bot handler/delivery failures.
+- `.env` also supports core LLM settings (`STRIX_LLM`, `LLM_API_KEY`, optional `LLM_API_BASE`).
+- Example GitHub Actions job:
+  ```yaml
+  bot-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install deps
+        run: |
+          pip install poetry
+          poetry install --with dev
+      - name: Test
+        run: poetry run pytest tests
+  ```
+- Secret management example (systemd drop-in):
+  ```
+  # /etc/systemd/system/strix-bot.service.d/10-secret.conf
+  [Service]
+  EnvironmentFile=/etc/strix-bot/secret.env  # contains BOT_TOKEN=...
+  ```
+
+## Maintenance
+- Update commands when CLI flags change; refresh provider env guidance when new providers added.
diff --git a/docs/telegram_bot_announcement.md b/docs/telegram_bot_announcement.md
new file mode 100644
index 00000000..a8eccf0b
--- /dev/null
+++ b/docs/telegram_bot_announcement.md
@@ -0,0 +1,26 @@
+# Telegram Bot Announcement (Internal)
+
+Audience: engineering/operators. Goal: share availability, how to use, and safety notes.
+
+## Key points
+- What: Strix Telegram bot to start/stop runs, stream findings, fetch reports/files, and browse docs.
+- Where: staging/prod bot at your usual Telegram; access controlled by allowlist (`ALLOWLIST_IDS`).
+- Status: pilot complete; ready for wider use. Resume remains unsupported (will reply with guidance).
+
+## How to start
+1) Ensure you are allowlisted; ask the ops team if not.
+2) Use `/start` for help, `/health` to confirm it’s alive.
+3) Launch a run: `/newrun https://target --instruction "focus on auth"`.
+4) During the run: adjust verbosity `/verbosity <id> batched|full`, tail logs `/run <id> tail`, view summary `/run <id> report`, and fetch files `/run <id> files`.
+5) Get docs: `/docs troubleshooting` or `/run <id> docs`.
+
+## Safety
+- Allowlist enforced; tokens via env/secret manager (`BOT_TOKEN`/`BOT_TOKEN_FILE`).
+- Redaction on streaming; reports/files are sent as-is—avoid sensitive targets unless approved.
+- Size/path guards on file browsing; rate limits configurable via env.
+- Optional HTTP `/health`/`/metrics` secured by host/IP + `BOT_HTTP_TOKEN` if set.
+
+## Support
+- Issues: check `/health`, service logs, and `/metrics?format=prom` (with token if configured).
+- Alerts: delivery/handler failures are emitted to `BOT_ALERT_WEBHOOK` when configured.
+- Docs: `docs/telegram_bot_usage.md`, `docs/telegram_bot_troubleshooting.md` (or `/docs troubleshooting`).
diff --git a/docs/telegram_bot_architecture.md b/docs/telegram_bot_architecture.md
new file mode 100644
index 00000000..76e9e312
--- /dev/null
+++ b/docs/telegram_bot_architecture.md
@@ -0,0 +1,48 @@
+# Telegram Bot Architecture (aiogram + webhook, sidecar VM)
+
+## Overview
+- Transport: Webhook (HTTPS endpoint) to receive Telegram updates.
+- Bot runtime: aiogram app running as a systemd-managed service on the same VM as Strix (sidecar).
+- Control surface: internal Python API layer that starts/stops/resumes/lists runs without spawning CLI subprocesses.
+- Auth: allowlisted Telegram user IDs gate all commands; secrets via env/secret manager.
+- Persistence: SQLite for bot session state (pagination, recent runs).
+- Filesystem access: direct read access to `strix_runs/` for reports/artifacts.
+
+## Data/Control Flow
+```
+Telegram -> Webhook endpoint (aiogram) -> Command/handler
+    -> Auth check (allowlist)
+    -> Control API (start/stop/resume/list/tail) -> Strix internals
+    -> Tracer hooks stream vulns/logs -> Bot push to Telegram
+    -> FS access to strix_runs -> send summaries/files/docs
+```
+
+## Key Components
+- Webhook handler: receives updates, routes to aiogram routers.
+- Command handlers: `/newrun`, `/runs`, `/run <id> info/tail/report/files/docs`, `/resume`, `/stop`, `/verbosity`, `/docs`, `/help`.
+- Inline keyboards: quick actions for reports/files/tail/verbosity.
+- Control API: Python functions wrapping Strix interfaces (no CLI spawn) to manage runs and fetch status.
+- Telemetry bridge: tracer callbacks batch/format vulnerability events and push to Telegram respecting verbosity (high-only/batched/full) with severity icons.
+- File serving: safe path resolver for `strix_runs/<run_name>` artifacts; enforce size limits and compression.
+- Docs serving: excerpts/links from `docs/*.md`, optional contextual suggestions after errors.
+- Rate limiting: global/severity-based throttling of outbound messages.
+- Security: allowlist check per request; redaction by default; explicit override for sensitive data.
+- Alerting: optional webhook receives JSON payloads when delivery/handler errors occur.
+- Tunables: per-user/global rate limits and default verbosity via env (`BOT_RATE_LIMIT`, `BOT_GLOBAL_RATE_LIMIT`, `BOT_DEFAULT_VERBOSITY`).
+- Config loading: `.env` in repo root supported; env vars override file values.
+
+## Deployment
+- Systemd unit running aiogram app with webhook URL + token via env.
+- Health check endpoint (HTTP/ping) for monitoring.
+- Optional HTTP server (if BOT_HTTP_PORT set) exposing `/health`, `/healthz`, and `/metrics` (in-memory counters/errors).
+- TLS termination via reverse proxy or direct cert; ensure webhook set to HTTPS URL.
+
+## Observability
+- Structured logs for commands, control API calls, run starts, report sends, and file transfers.
+- Metrics (commands, errors, latency, message volume); `/metrics` supports JSON or Prom text format and can be scraped by Prometheus.
+- Alerting on delivery/API failures once metrics exist.
+
+## Maintenance Notes
+- Update allowlist when adding operators.
+- Rotate bot token regularly; store in secret manager/env.
+- Re-run webhook set-up if URL/cert changes.
diff --git a/docs/telegram_bot_e2e_plan.md b/docs/telegram_bot_e2e_plan.md
new file mode 100644
index 00000000..2fe75ad4
--- /dev/null
+++ b/docs/telegram_bot_e2e_plan.md
@@ -0,0 +1,32 @@
+# Telegram Bot E2E Staging Plan
+
+Purpose: validate the Telegram bot end-to-end in a staging chat before wider rollout.
+
+## Preconditions
+- Staging Telegram chat exists; operators are allowlisted (`ALLOWLIST_IDS`).
+- Bot deployed to staging VM with webhook HTTPS reachable; `BOT_TOKEN`, `WEBHOOK_URL`, `STRIX_ROOT` set.
+- Sample target app available for scans (non-production, safe to probe).
+- Optional: `BOT_HTTP_PORT` exposed to staging network for `/health`/`/metrics`.
+
+## Test steps
+1) Sanity: send `/health` and `/start`; expect `ok` and help text.
+2) Start run: `/newrun <staging-target> --instruction "smoke"`; expect acknowledgment with run id.
+3) Streaming: during run, confirm vulnerability messages arrive with severity icons; switch verbosity `/verbosity <id> batched` then `full` and observe differences.
+4) Tail logs: `/run <id> tail` then press “Tail more” until end; ensure pagination stops.
+5) Report: `/run <id> report` -> receive summary; press “Send full report” -> receive file.
+6) Files: `/run <id> files` -> navigate directories; download a small file; confirm size guard blocks oversized file if present.
+7) Docs: `/run <id> docs` or `/docs troubleshooting` -> receive excerpt/link.
+8) Search runs: `/runs <partial>` -> list filtered runs; open run info via buttons.
+9) Stop: `/stop <id>` on a live run -> confirm stop message.
+10) Metrics/health: curl `/health` and `/metrics?format=prom` (with token if set); ensure non-200 failures are alerted/logged.
+
+## Pass criteria
+- All commands respond within a few seconds; no unhandled errors in bot logs.
+- Streaming respects verbosity and redaction; batching does not exceed Telegram limits.
+- File/report sends succeed or are gracefully blocked with messaging.
+- `/metrics` exposes counters/errors; `/health` returns `ok`.
+
+## Post-test
+- Capture transcripts/screenshots.
+- File any bugs with run id, timestamps, and screenshots.
+- Clear staging secrets if rotated for the test.
diff --git a/docs/telegram_bot_pilot.md b/docs/telegram_bot_pilot.md
new file mode 100644
index 00000000..c2f9dfd5
--- /dev/null
+++ b/docs/telegram_bot_pilot.md
@@ -0,0 +1,33 @@
+# Telegram Bot Pilot & Rollout
+
+Plan for piloting with allowlisted users, collecting feedback, and rolling out.
+
+## Pilot setup
+- Scope: staging or low-risk production targets; keep allowlist small (operators only).
+- Config: `BOT_TOKEN`, `WEBHOOK_URL`, `ALLOWLIST_IDS`, optional `BOT_ALERT_WEBHOOK` for failures.
+- Ensure `/health` and `/metrics` reachable to operators (behind firewall/auth as needed).
+- Share usage doc: `docs/telegram_bot_usage.md`; remind users about redaction/limits.
+
+## Pilot checklist (1–2 weeks)
+- Create at least 3 runs across different targets.
+- Validate streaming at all verbosity levels.
+- Fetch reports/files and exercise size guards.
+- Use `/docs troubleshooting` after an induced error to verify hints.
+- Capture any delivery/API errors (check alerts and bot logs).
+- Track UX notes: confusing messages, missing buttons, verbosity defaults.
+
+## Feedback and hardening
+- Review pilot feedback; categorize into bugs vs. UX tweaks.
+- Adjust rate limits, default verbosity, or button labels based on feedback.
+- Add missing docs/tests from pilot findings.
+
+## Rollout
+- Expand allowlist; announce availability in internal channels with brief “how to”.
+- Ensure CI workflow (`.github/workflows/bot-tests.yml`) is green.
+- Monitor `/metrics` errors and alert webhook for the first week; be ready to toggle bot off via systemd.
+- Update README/marketing if broader audience is desired.
+
+## Exit criteria
+- No critical delivery/API errors in pilot week.
+- Users can start runs, receive streaming updates, fetch reports/files, and browse docs without intervention.
+- Health/metrics endpoints monitored; alert webhook functioning if configured.
diff --git a/docs/telegram_bot_questions.md b/docs/telegram_bot_questions.md
new file mode 100644
index 00000000..02ee730b
--- /dev/null
+++ b/docs/telegram_bot_questions.md
@@ -0,0 +1,105 @@
+# Telegram Bot Design Questions (with suggested options)
+
+Answer these to lock UX and structure choices.
+
+1) Transport mode?  
+- [x] A) Webhook: stable and low-latency; requires public ingress/HTTPS endpoint and cert handling.  
+- [ ] B) Long-polling: simplest (no ingress/certs), but slightly higher latency and less efficient at scale.  
+- Recommendation: A (webhook) if ingress available; otherwise B.
+
+2) Telegram library?  
+- [ ] A) python-telegram-bot: mature, batteries-included, good docs; synchronous + async support.  
+- [x] B) aiogram: fully async, performant, modular; leaner API, good for high throughput.  
+- [ ] C) Other: specify if you prefer another ecosystem (e.g., Telethon) with trade-offs.  
+- Recommendation: B (aiogram) for async throughput; A if you want maximal examples/docs.
+
+3) Authentication/authorization?  
+- [x] A) Allowlisted user IDs: simplest hard gate; manage allowed IDs in config/secret.  
+- [ ] B) Shared secret/passphrase + allowlist: extra challenge step to join; good for small teams.  
+- [ ] C) Org invite flow: needs backing service/DB to manage org membership; most overhead.  
+- [ ] D) No auth: fastest but insecure; not recommended.  
+- Recommendation: B (secret + allowlist) for extra friction; A if single-user.
+
+4) Hosting model?  
+- [x] A) Sidecar container with Strix: co-located, easy FS access to `strix_runs`, simple networking.  
+- [ ] B) Separate microservice: clean separation, can scale independently; needs API into Strix.  
+- [ ] C) Single binary embedding Strix: lowest moving parts, but couples release cycles tightly.  
+- Recommendation: A (sidecar) to simplify FS access and reduce latency.
+
+5) Strix control interface for bot?  
+- [ ] A) Wrap CLI invocations: minimal code change; slower startup per run; manage subprocess IO.  
+- [x] B) Internal Python API: direct calls; faster, richer control; requires stable internal surface.  
+- [ ] C) Lightweight HTTP API: bot calls a service exposing run control; clearer boundaries, extra service to maintain.  
+- Recommendation: B for performance and richness; C if you want clearer service boundary.
+
+6) Command style?  
+- [ ] A) Slash commands only: clear and predictable; more typing, fewer affordances.  
+- [x] B) Slash commands + inline keyboards: best UX; quick actions, contextual buttons.  
+- [ ] C) Menu-driven persistent buttons: fewer typed commands; more state/UI logic to maintain.  
+- Recommendation: B for balance of clarity and UX.
+
+7) Output verbosity to chat?  
+- [ ] A) Only high-severity + summaries: low noise, risk missing context.  
+- [ ] B) All vulns batched: balanced signal/noise; periodic bundles.  
+- [ ] C) Full live stream (logs + vulns): maximum visibility; noisy and rate-limit prone.  
+- [x] D) Toggle per-run: user chooses verbosity per run; most flexible.  
+- Recommendation: D to let user choose per run.
+
+8) Report delivery format?  
+- [ ] A) Send full report file: one-step access; may hit Telegram size limits; compress if needed.  
+- [x] B) Summary + on-demand file: keeps chat light; user pulls full report when desired.  
+- [ ] C) Download link only: minimal bot bandwidth; depends on external hosting/ingress.  
+- Recommendation: B (summary + on-demand) to manage size/noise.
+
+9) `strix_runs` browsing UX?  
+- [x] A) List runs then drill via buttons: guided, low error risk; limited flexibility.  
+- [ ] B) Free-form path requests: powerful for power-users; risk of typos/path traversal (must sanitize).  
+- [ ] C) Search first (target/date/severity), then browse: scalable when many runs; extra steps.  
+- Recommendation: A with search filter add-on if run count grows.
+
+10) Resume capability?  
+- [x] A) Support resume if pausable; otherwise fall back to read-only with clear messaging.  
+- [ ] B) Review-only: simplest; avoids partial state issues but no resume.  
+- Recommendation: A if resume is feasible; otherwise B until resume exists.
+
+11) Docs access from bot?  
+- [ ] A) `/docs <topic>` sends excerpt + file link: explicit pull model.  
+- [ ] B) Inline suggestions after errors: proactive help; avoid spam with throttling.  
+- [x] C) Both: best UX; needs guardrails to prevent noisy suggestions.  
+- Recommendation: C with throttling and opt-out toggle.
+
+12) Rate limiting and batching?  
+- [x] A) Global limits + severity filters: easy to manage; prevents floods.  
+- [ ] B) Per-user limits: fair sharing in multi-user scenarios.  
+- [ ] C) No limits: simplest but risky; can spam chats and hit Telegram limits.  
+- Recommendation: A (global + severity) plus optional per-user caps if multi-user.
+
+13) Data sensitivity handling?  
+- [x] A) Redact secrets by default; explicit `/send-sensitive` override: safest default.  
+- [ ] B) Trust operator; send everything: fastest but riskier; rely on allowlist.  
+- Recommendation: A to stay safe by default.
+
+14) Observability for bot?  
+- [ ] A) Structured logs only: minimal.  
+- [x] B) Logs + metrics (commands, errors, latency): better insight; needs metrics backend.  
+- [ ] C) Logs + metrics + alerting on delivery/API failures: best resilience; more setup.  
+- Recommendation: B to start; grow to C if SLOs matter.
+
+15) Persistence layer for bot state?  
+- [ ] A) In-memory: trivial; loses pagination/state on restart.  
+- [x] B) SQLite/bolt: easy local persistence; good for single instance.  
+- [ ] C) Redis: shared state for HA, fast; needs service.  
+- [ ] D) Postgres/DB: full durability and querying; more ops overhead.  
+- Recommendation: B for simplicity; C if you need HA.
+
+16) Deployment target?  
+- [ ] A) Docker/k8s with CI: standard, repeatable, scalable.  
+- [x] B) VM/systemd: simple if infra is minimal; manual care.  
+- [ ] C) Local/dev-only: fastest to iterate; not suitable for prod use.  
+- Recommendation: A if you already use containers; B for quick internal deploys.
+
+17) Error handling UX?  
+- [ ] A) Friendly error + suggested next command: best guidance.  
+- [x] B) Minimal error: terse; less helpful.  
+- [ ] C) Auto-retry then notify on failure: smoother UX; needs idempotent handlers.  
+- Recommendation: C with a cap on retries and A-style guidance on failure.
diff --git a/docs/telegram_bot_roadmap.md b/docs/telegram_bot_roadmap.md
new file mode 100644
index 00000000..2607572c
--- /dev/null
+++ b/docs/telegram_bot_roadmap.md
@@ -0,0 +1,71 @@
+# Telegram Bot Integration Roadmap & TODO
+
+Legend: ✅ done, 🟡 pending, ⛔ blocked
+
+## Tasks
+- ✅ [TB01] Define bot persona, command set, and interaction modes (slash commands + inline keyboards). (Standalone)
+- ✅ [TB02] Lock Telegram library and transport: `aiogram` with webhook delivery. (Standalone)
+- ✅ [TB03] Auth: allowlisted Telegram user IDs (config/secret). (Standalone)
+- ✅ [TB04] Control surface: internal Python API to start/stop/list/resume runs (no CLI wrapping). (Standalone)
+- ✅ [TB05] Hosting/deploy: sidecar service on same VM (systemd) with FS access to `strix_runs`; webhook ingress HTTPS. (Depends: TB02)
+- ✅ [TB06] Control API/service to start/stop/resume/list runs (wrap `interface/main.py` internally). (Standalone)
+- ✅ [TB07] Run status/query surface for listing runs and fetching metadata/logs. (Depends: TB06)
+- ✅ [TB08] Resume mechanism; fall back with clear messaging if not possible. (Depends: TB06)
+- ✅ [TB09] Secure internal API surface (auth tokens/ACL) and restrict webhook IPs. (Depends: TB06)
+- ✅ [TB10] Commands `/start`, `/help`, `/newrun`, `/runs`, `/run <id> ...`, `/resume` (graceful), `/stop`, `/verbosity`. (Depends: TB06)
+- ✅ [TB11] Inline keyboards (report/file nav/verbosity). (Depends: TB10)
+- ✅ [TB12] Clear errors + rate-limit notices. (Depends: TB10)
+- ✅ [TB12b] Per-run verbosity preference. (Depends: TB10)
+- ✅ [TB12c] Truncate long summaries to fit message limits. (Depends: TB10)
+- ✅ [TB13] Streaming via tracer callbacks; verbosity (high-only/batched/full). (Depends: TB06, TB10, TB12b)
+- ✅ [TB14] Log tailing hook for pagination. (Depends: TB06, TB07)
+- ✅ [TB15] Severity-based formatting and batching refinements. (Depends: TB13)
+- ✅ [TB16] Locate reports in `strix_runs/<run_name>`. (Depends: TB06)
+- ✅ [TB17] File transfer with size guards (report + generic files). (Depends: TB10, TB16)
+- ✅ [TB18] Summary + on-demand full report buttons. (Depends: TB10, TB16)
+- ✅ [TB19] List runs with metadata via buttons. (Depends: TB06)
+- ✅ [TB20] Navigate run directories with buttons. (Depends: TB10, TB19)
+- ✅ [TB21] Fetch/send specific files; sanitize paths. (Depends: TB19)
+- ✅ [TB22] Search/filter runs (target/date/severity). (Depends: TB19)
+- ✅ [TB23] Docs excerpts via `/docs`. (Standalone)
+- ✅ [TB24] Contextual doc suggestions (throttled). (Depends: TB23)
+- ✅ [TB25] Allowlist auth enforced (env + runtime guard). (Standalone)
+- ✅ [TB26] Redact secrets by default (token-like prefixes). (Standalone)
+- ✅ [TB27] Audit logging + global rate limits. (Depends: TB10)
+- ✅ [TB27b] Secret management: env/BOT_TOKEN_FILE, secret manager/systemd drop-ins documented. (Standalone)
+- ✅ [TB28] Persist bot session state (verbosity) in SQLite. (Standalone)
+- ✅ [TB29] Cache run metadata (FS run list cache with TTL). (Depends: TB28)
+- ✅ [TB30] Package for systemd; health checks (/health, /healthz). (Depends: TB05, TB10)
+- ✅ [TB31] CI pipeline to lint/test bot; deploy via artifact + systemd updates. (Standalone)
+- ✅ [TB32] Manage secrets for bot token/API keys via env/secret manager/BOT_TOKEN_FILE guidance. (Depends: TB27b)
+- ✅ [TB33] Metrics (commands, errors, latency, message volume) via backend (Prom/exporter). (Standalone)
+- ✅ [TB34] Structured logs for commands/API/file transfers. (Standalone)
+- ✅ [TB35] Alerting on delivery/API failures. (Depends: TB33, TB34)
+- ✅ [TB36] Unit tests for command parsing/handlers (aiogram). (Standalone)
+- ✅ [TB37] Integration tests against Strix internal API mocks; FS fixtures. (Depends: TB06)
+- ✅ [TB38] E2E test in staging Telegram chat. (Depends: TB10, TB30)
+- ✅ [TB39] Load test for vulnerability message bursts. (Depends: TB13, TB17)
+- ✅ [TB39b] Regression test for missing env vars. (Standalone)
+- ✅ [TB39c] Regression test for HTTP endpoints (`/health`, `/healthz`, `/metrics`). (Depends: TB30, TB33)
+- ✅ [TB39d] Regression test for severity/batch streaming filters. (Depends: TB13)
+- ✅ [TB39e] Regression test for state persistence (verbosity). (Depends: TB28)
+- ✅ [TB40] `docs/telegram_bot_usage.md`. (Standalone)
+- ✅ [TB41] `docs/telegram_bot_architecture.md`. (Standalone)
+- ✅ [TB42] Update `docs/setup-and-running.md` with bot deploy steps. (Depends: TB30)
+- ✅ [TB43] Update `docs/troubleshooting.md` with bot-specific issues. (Depends: TB10, TB13, TB33)
+- ✅ [TB44] Pilot with allowlisted internal users; gather feedback. (Depends: TB30)
+- ✅ [TB45] Harden based on feedback (UX tweaks, rate limits, verbosity defaults). (Depends: TB44)
+- ✅ [TB46] Announce feature; add to README/marketing if desired. (Depends: TB45)
+
+## Execution checklist
+- [ ] Confirm webhook URL/cert availability; store bot token securely (env/secret manager/BOT_TOKEN_FILE).
+- [ ] Add feature flags/toggles so bot can be disabled without impacting core Strix.
+- [ ] Implement control API behind auth; never expose unauthenticated endpoints.
+- [ ] Run unit/integration tests locally (commands, control API, FS browsing sanitization).
+- [ ] Verify rate limiting/batching in staging chat before production.
+- [ ] Validate report file size handling and path sanitization for `strix_runs`.
+- [ ] Ensure telemetry hooks do not block agent loop (use async/background).
+- [ ] Deploy behind systemd with health check; confirm restart policy.
+- [ ] If HTTP endpoints enabled, verify `/healthz` and `/metrics` are reachable and secured.
+- [ ] Post-deploy sanity: create run, receive vuln notifications, fetch summary/full report, browse files, fetch docs.
+- [ ] Rollback plan ready (disable bot via flag/env; stop systemd service) if issues arise.
diff --git a/docs/telegram_bot_usage.md b/docs/telegram_bot_usage.md
new file mode 100644
index 00000000..b5e1202e
--- /dev/null
+++ b/docs/telegram_bot_usage.md
@@ -0,0 +1,87 @@
+# Telegram Bot Usage (Planned)
+
+## Prerequisites
+- Bot token stored in env/secret manager.
+- Webhook HTTPS endpoint reachable; webhook set to bot URL.
+- Allowlisted Telegram user IDs configured.
+- Bot service running (systemd) with access to `strix_runs/`.
+- Start: `poetry run strix-bot --mode strix` (default) or `--mode fs` for read-only browsing.
+- Env validation: BOT_TOKEN (or BOT_TOKEN_FILE), WEBHOOK_URL, and ALLOWLIST_IDS must be set or the bot will refuse to start.
+- Optional HTTP endpoints (if BOT_HTTP_PORT set): `/health`, `/healthz`, `/metrics`.
+- Protect optional HTTP endpoints with `BOT_HTTP_TOKEN` (Bearer token for `/metrics`).
+- Optional tuning: `BOT_RATE_LIMIT` (per-user seconds, default 1.0), `BOT_GLOBAL_RATE_LIMIT` (seconds, default 0.5), `BOT_DEFAULT_VERBOSITY` (`high-only|batched|full`, default `high-only`).
+- `.env` supported in repo root; see `.env.example`.
+
+## Commands
+- `/start` - greet and show help.
+- `/help` - list commands and usage hints.
+- `/health` - simple health check (responds with "ok").
+- `/metrics` - show in-memory counters/errors.
+- `/newrun <target> [instruction]` - start a scan with optional instruction.
+- `/runs [query]` - list recent runs (filter by run_id/target substring).
+- `/run <id> info` - show run metadata/status.
+- `/run <id> tail` - tail recent logs (paginated).
+- `/run <id> report` - send summary; button to request full report file.
+- `/run <id> files` - browse `strix_runs/<id>` via buttons; download files (size limits apply).
+- `/run <id> docs` - show links/excerpts to relevant docs.
+- `/resume <id>` - reattach streaming to an active run if possible; otherwise reply with guidance.
+- `/stop <id>` - stop a run.
+- `/verbosity <id> <mode>` - set per-run verbosity: `high-only | batched | full`.
+- `/docs <topic>` - fetch doc excerpt + link.
+
+Current status:
+- Command handlers wired; start/stop/status/logs/files/reports draft-wired via StrixControlAPI.
+- Resume reattaches streaming only if the run is still active; otherwise guidance is shown.
+- Inline keyboards for reports and file nav/download with parent navigation; size guards enforced.
+- `/docs <topic>` returns excerpt from local docs directory.
+- `/verbosity` stores preference for future streaming (no effect yet).
+- Long report summaries are truncated to fit Telegram message limits; full report available via button.
+- Streaming: vulnerability findings are pushed to chat when runs start; high-only mode filters out lower severities; batched mode groups messages every few seconds (no persistence); messages include severity icons and per-message truncation.
+
+## Interactions
+- Inline keyboards for: open report summary/full, tail next page, change verbosity, navigate files.
+- Tailing stops showing the "Tail more" button once the end of the log is reached.
+- Verbosity defaults to summaries/high-severity; user can elevate per run.
+- Redaction on by default; explicit confirmation required to send sensitive content.
+- Default verbosity can be set via `BOT_DEFAULT_VERBOSITY` to align with operator preference.
+
+## Examples
+- Start: `/newrun https://example.com --instruction "Focus on auth flows"`
+- List runs: `/runs`
+- Set verbosity: `/verbosity run-123 batched`
+- Fetch report summary: button from `/run run-123 report`
+
+## Safety
+- Allowlist enforced on every command.
+- Path sanitization on file browsing.
+- Size checks/compression before sending files.
+- Rate limits on outbound messages to avoid flooding (basic per-user limiter in place).
+- Command logging/metrics counters enabled (internal only; errors counted for rate limits).
+- Structured logs emitted for commands, run starts, reports, and file sends.
+- Alerts: optional `BOT_ALERT_WEBHOOK` receives JSON on delivery/handler errors.
+- If HTTP endpoints are enabled, restrict exposure (firewall/allowlist) and avoid exposing them publicly.
+- HTTP `/metrics` supports `?format=prom` for Prometheus-style plaintext.
+- Tests: missing env vars are covered by `tests/test_bot_config.py`; run `poetry run pytest`.
+- Tests: rate limiter/metrics helpers covered by `tests/test_bot_misc.py`.
+- Tests: streaming severity/batch filters covered by `tests/test_bot_streaming.py`.
+- Tests: HTTP endpoints (`/healthz`, `/metrics`) covered by `tests/test_bot_http.py`.
+- Tests: metrics latency tracking covered by `tests/test_bot_metrics.py`.
+- CI: see `docs/setup-and-running.md` for GitHub Actions example to run pytest.
+- BOT_TOKEN_FILE is supported for secret injection; ensure file is protected and readable.
+- Streaming redacts obvious secrets (e.g., tokens prefixed with `sk-`) before sending.
+- Verbosity preferences are persisted in SQLite (`BOT_DB_PATH`).
+- On certain errors, the bot will suggest `/docs troubleshooting` (throttled to avoid spam).
+- `/metrics` requires `Authorization: Bearer <BOT_HTTP_TOKEN>` when token is set.
+- Tests: control API listing fallback covered by `tests/test_control_api_list.py`.
+- Secret management: use env vars or mount a secret to `BOT_TOKEN_FILE`; systemd drop-ins can set `EnvironmentFile`.
+
+## Deployment (VM/systemd)
+- Set env: `BOT_TOKEN`, `WEBHOOK_URL`, `ALLOWLIST_IDS`, `STRIX_ROOT` (if needed).
+- Use `packaging/systemd/strix-bot.service` as a template (copy to `/etc/systemd/system/` and adjust paths/user/env).
+- Run systemd service; confirm health checks (`/health`, `/healthz`) respond.
+- Verify webhook set via bot API; test `/start` from allowlisted user.
+
+## Troubleshooting (high level)
+- No responses: check systemd status/logs; verify webhook URL/token.
+- Missing files: confirm `strix_runs/` path and permissions.
+- Rate limit errors: adjust batching/severity filters.
diff --git a/docs/telemetry-and-observability.md b/docs/telemetry-and-observability.md
new file mode 100644
index 00000000..11d7c9bd
--- /dev/null
+++ b/docs/telemetry-and-observability.md
@@ -0,0 +1,24 @@
+# Telemetry and Observability
+
+## Tracer
+- Location: `telemetry/tracer.py`.
+- Responsibilities: track agent creation/status changes, tool execution start/finish, vulnerability findings, scan config metadata.
+- Global tracer set via `set_global_tracer`; CLI sets vulnerability callback to render panels.
+
+## Events
+- Agent lifecycle: creation, status updates (running/waiting/finished).
+- Tool executions: start/end, args, results, status.
+- Vulnerabilities: id/title/content/severity routed to UI.
+
+## Extending telemetry
+- Add new methods or fields in `tracer.py`; ensure thread safety where needed.
+- Update UI renderers if new event types should be displayed.
+- Redact sensitive data before logging or emitting.
+
+## Consumption
+- TUI/CLI read tracer callbacks for live updates.
+- Persist outputs in `strix_runs/<run_name>` (extend to send elsewhere if needed).
+- Structured exports: `vulnerabilities.csv`, `vulnerabilities.jsonl`, and SARIF `vulnerabilities.sarif.json` for CI-friendly ingestion.
+
+## Maintenance
+- Update event fields when tracer schema evolves; ensure UI renderers are aligned with new telemetry.
diff --git a/docs/testing-and-qa.md b/docs/testing-and-qa.md
new file mode 100644
index 00000000..fa35b550
--- /dev/null
+++ b/docs/testing-and-qa.md
@@ -0,0 +1,33 @@
+# Testing and QA
+
+## Stack
+- Unit/integration tests via `pytest`, async via `pytest-asyncio`.
+- Coverage via `pytest --cov`.
+- Static checks: `ruff`, `mypy`, `pyright`, `pylint`, `bandit`.
+
+## Strategy
+- Unit: isolate modules (LLM utils, request queue, memory compressor, tracer, tool registry).
+- Integration: tool + runtime interactions (browser/proxy/terminal/python actions) in sandbox.
+- E2E: run `strix -n --target <fixture>` against known targets; assert vulnerability outputs/logs.
+
+## Fixtures
+- Prefer dockerized test targets for determinism; keep small for fast runs.
+- Mock LLM responses when testing agent logic to avoid network cost.
+
+## Regression checklist
+- Tool changes: schema + renderer tests, action behavior, sandbox safety.
+- Prompt changes: render tests to ensure variables resolved and key guidance present.
+- Runtime changes: container lifecycle tests and timeout behavior.
+- Interface changes: argument parsing and renderer output snapshots where feasible.
+
+## Commands
+```bash
+poetry run pytest
+poetry run pytest --cov
+poetry run ruff check .
+poetry run mypy .
+poetry run pyright
+```
+
+## Maintenance
+- Update when test fixtures or target apps change; keep command list aligned with tooling versions.
diff --git a/docs/todo_docs.md b/docs/todo_docs.md
new file mode 100644
index 00000000..f095bb03
--- /dev/null
+++ b/docs/todo_docs.md
@@ -0,0 +1,120 @@
+# Documentation Build Plan (status)
+
+Purpose: exhaustive task list to produce deep, high-fidelity docs that enable AI programmers to scale, extend, and debug Strix. Treat each section as a checklist—do not skip items.
+
+## Foundations (do first)
+- [T001] Map repository structure: `strix/agents`, `tools`, `runtime`, `llm`, `prompts`, `interface`, `telemetry`, `prompts/*`, `containers`, `.github`, `pyproject.toml`, `README.md`. **DONE**
+- [T002] Inventory current configs: env vars, CLI flags, default limits, file outputs (`strix_runs/<run>`). **DONE**
+- [T003] Note external dependencies: Docker requirements, Playwright install steps, LLM providers (OpenAI), litellm proxy, network expectations. **DONE**
+- [T004] Capture testing stack: pytest, pytest-asyncio, coverage, lint/type tools (ruff, mypy, pyright, pylint, bandit), formatting (black, isort). **DONE**
+- [T005] Establish terminology: run_name, agent loop, tool server, renderer, tracer, action schema, memory compression, request queue. **DONE**
+- [T006] Decide doc style: concise sections, code refs with paths, short examples, tables for configs, ASCII diagrams. **DONE**
+
+## docs/README.md
+- [T007] Provide “start here” orientation, audience (AI devs), and links to all docs. **DONE**
+- [T008] Include quick start flow: install, configure STRIX_LLM/LLM_API_KEY, run first scan, where outputs go. **DONE**
+- [T009] Add dependency table and minimum versions. **DONE**
+
+## docs/architecture.md
+- [T010] High-level diagrams: data flow from CLI → agent loop → tools/runtime → LLM → telemetry. **DONE**
+- [T011] Describe core modules and responsibilities with file paths. **DONE**
+- [T012] Explain agent graph/orchestration, non-interactive mode, and how results are persisted. **DONE**
+- [T013] Include sequence for a typical scan (targets discovery → planning → tool execution → reporting). **DONE**
+- [T014] Call out extension seams (adding tools, prompts, telemetry events). **DONE**
+
+## docs/setup-and-running.md
+- [T015] Local setup: Python 3.12, Poetry/pipx install, Playwright install command, Docker prerequisites. **DONE**
+- [T016] Environment variables: STRIX_LLM, LLM_API_KEY, optional provider settings; defaults and examples. **DONE**
+- [T017] Running modes: interactive TUI vs non-interactive (`-n`), multiple targets, instructions flag. **DONE**
+- [T018] Run outputs: structure of `strix_runs/`, logs, reports. **DONE**
+- [T019] Common install pitfalls and fixes. **DONE**
+
+## docs/development.md
+- [T020] Repository layout primer. **DONE**
+- [T021] Coding standards: typing requirements, lint/format commands, pre-commit guidance. **DONE**
+- [T022] How to run tests (unit/integration) and coverage. **DONE**
+- [T023] Contribution workflow: branching, PR expectations, CI checks, code owners if any. **DONE**
+- [T024] Performance tips (caching models, reducing docker churn). **DONE**
+
+## docs/agent-loop.md
+- [T025] Detail `strix/agents/base_agent.py`, `state.py`, and `StrixAgent/strix_agent.py` lifecycle. **DONE**
+- [T026] Explain state machine, max iterations, memory compression, request queue/backoff, tool selection loop. **DONE**
+- [T027] Document hooks/callbacks and error handling patterns. **DONE**
+- [T028] Show how vulnerability findings propagate to tracer/UI. **DONE**
+
+## docs/tools-and-extensions.md
+- [T029] Describe tool contract: action schemas (`*_actions_schema.xml`), implementations, registration (`tools/registry.py`). **DONE**
+- [T030] Document each tool folder purpose (browser, proxy, terminal, file_edit, python, reporting, notes, thinking, finish, agents_graph). **DONE**
+- [T031] Explain renderer pairing (`interface/tool_components/*`) and how UI consumes tool outputs. **DONE**
+- [T032] Step-by-step for adding a new tool: schema, implementation, registry, renderer, tests. **DONE**
+- [T033] Note safeguards for execution (timeouts, resource limits). **DONE**
+
+## docs/runtime-and-sandbox.md
+- [T034] Explain `runtime/docker_runtime.py`, `runtime/tool_server.py`, lifecycle of containerized actions. **DONE**
+- [T035] Security boundaries: what is sandboxed, what is exposed, volume mounts, networking. **DONE**
+- [T036] Configurable parameters (image, timeouts, resource limits) and where set. **DONE**
+- [T037] Troubleshooting common runtime failures (docker not running, permissions, image pulls). **DONE**
+
+## docs/llm-config.md
+- [T038] Cover `llm/config.py`, `llm/llm.py`, request queue, retries/backoff (tenacity), streaming options. **DONE**
+- [T039] Model selection guidance, cost/latency tuning, parallelism. **DONE**
+- [T040] Provider-specific notes (OpenAI via litellm proxy) and how to add a new provider. **DONE**
+- [T041] Logging/telemetry of LLM calls and redaction practices. **DONE**
+
+## docs/prompts.md
+- [T042] Taxonomy: frameworks, technologies, vulnerabilities, coordination, root agent prompt, system prompts. **DONE**
+- [T043] Jinja template conventions and variables; how prompts are selected/combined. **DONE**
+- [T044] Safe-testing prompts: dry runs, guardrails, and regression considerations when editing. **DONE**
+- [T045] Adding a new prompt pack (file location, naming, validation). **DONE**
+
+## docs/interface.md
+- [T046] Describe CLI entrypoints (`interface/main.py`, `cli.py`), argument parser, flags. **DONE**
+- [T047] TUI layout (`tui.py`), key panels, live stats rendering (`utils.py`), vulnerability display. **DONE**
+- [T048] Non-interactive mode behavior and output formatting. **DONE**
+- [T049] Customization hooks (colors/styles via `interface/assets/tui_styles.tcss`). **DONE**
+
+## docs/telemetry-and-observability.md
+- [T050] Detail tracer API (`telemetry/tracer.py`), emitted events, vulnerability callbacks. **DONE**
+- [T051] How telemetry is persisted/consumed; how to add new spans/fields. **DONE**
+- [T052] Guidance for integrating with external observability stacks (logs/metrics exports if available). **DONE**
+
+## docs/testing-and-qa.md
+- [T053] Test pyramid: unit (per module), integration (tool + runtime), e2e (sample scans). **DONE**
+- [T054] Fixtures and test targets (dockerized apps if any); how to craft deterministic inputs. **DONE**
+- [T055] Browser/Playwright tool checks; proxy and terminal tool validation. **DONE**
+- [T056] Regression checklist when adding tools/prompts/runtime changes. **DONE**
+
+## docs/security-and-privacy.md
+- [T057] Secret handling (env vars), redaction expectations, LLM data minimization. **DONE**
+- [T058] Threat model for running against untrusted targets; sandboxing caveats. **DONE**
+- [T059] Network/file system safety defaults, user consent boundaries. **DONE**
+- [T060] Supply chain concerns (pip/poetry deps, docker images). **DONE**
+
+## docs/release-and-versioning.md
+- [T061] Versioning scheme (current 0.4.0), how to bump, changelog expectations. **DONE**
+- [T062] Packaging steps (poetry build/publish), PyPI notes, release artifact checklist. **DONE**
+- [T063] Compatibility guarantees (Python 3.12 only) and deprecation policy. **DONE**
+
+## docs/troubleshooting.md
+- [T064] Common issues: LLM timeouts, invalid API key, docker daemon down, Playwright missing, port conflicts. **DONE**
+- [T065] Observable symptoms, log locations, quick resolutions. **DONE**
+- [T066] Decision tree for escalating issues. **DONE**
+
+## docs/glossary.md
+- [T067] Concise definitions for project terms, linked to code paths where applicable. **DONE**
+
+## docs/roadmap-templates.md
+- [T068] Provide RFC/ADR template and backlog template tailored to agents/tools/prompts/runtime changes. **DONE**
+- [T069] Include evaluation criteria (security impact, latency, cost, UX). **DONE**
+
+## Cross-cutting tasks
+- [T070] Add code path references (`path:line` where helpful) and short code snippets for tricky parts. **DONE**
+- [T071] Include minimal diagrams (ASCII ok) for data flow and agent loop. **DONE**
+- [T072] Provide example commands for every procedure (setup, run, test, release). **DONE**
+- [T073] Ensure all docs interlink; avoid duplication by linking to source doc sections. **DONE**
+- [T074] Add “maintenance” note in each doc: when to update, owners if known. **DONE**
+
+## Verification
+- [T075] Pass through all docs for consistency of terms and flags. **DONE**
+- [T076] Validate commands on a fresh environment (documented assumptions). **TODO – run on clean machine to verify every command and flag.**
+- [T077] Spellcheck and lint Markdown if available. **TODO – run markdown lint/spellcheck pass.**
diff --git a/docs/todo_roadmap.md b/docs/todo_roadmap.md
new file mode 100644
index 00000000..2a509963
--- /dev/null
+++ b/docs/todo_roadmap.md
@@ -0,0 +1,108 @@
+# Strix Product Roadmap (AI-operator focused)
+
+Goal: Make Strix more powerful, scalable, and operator-friendly for AI programmers and security teams.
+
+Legend: [ ] pending, [~] in progress, [x] done
+
+## Core Agent & Orchestration
+- [x] A01: Pluggable agent graph builder (YAML/JSON) to compose multi-agent workflows with validation. (Standalone) Added schema/loader in strix/agents/graph_builder.py with validation + tests/agents/test_graph_builder.py; supports JSON and optional YAML; documented in docs/architecture.md.
+- [x] A02: Adaptive iteration limits based on target complexity and model latency; expose telemetry. (Standalone) Added iteration budget helper (strix/agents/iteration_policy.py) and wired CLI/TUI/bot to set max_iterations + tracer metadata; BaseAgent records policy; updated docs/agent-loop.md.
+- [x] A03: Resumeable agent state (persist tool queue, memory, tracer) to survive restarts. (Standalone) Added AgentState save/load helpers and BaseAgent persistence hooks (state snapshots to run dir); tests/agents/test_state_persistence.py; documented in docs/agent-loop.md.
+- [ ] A04: Strategy presets (aggressive/exploratory/compliance) selectable via CLI/bot flags. (Depends: A01)
+- [ ] A05: Memory management improvements (hierarchical summarization, eviction policy tuning). (Standalone)
+- [ ] A06: Action budget guardrail (tokens/time/tool invocations) per run with overrides and reporting. (Standalone)
+- [ ] A07: Agent self-evaluation prompts to prune bad tool paths and refocus on target goals. (Standalone)
+- [ ] A08: Multi-model consensus mode to reduce hallucinations for high-risk findings. (Standalone)
+
+## Tooling & Coverage
+- [x] T01: Add SAST/dep scanning tool (e.g., Semgrep/Trivy) with parsers into unified findings. (Standalone) Added SAST/deps tool (strix/tools/sast/*), registry wiring, docs/tools-and-extensions.md, and tests (tests/tools/test_sast_tool.py).
+- [x] T02: Browser automation enhancements (network capture/har timing, screenshot diffs). (Standalone) Added network logging + screenshot diff in browser tool (browser_instance/tab_manager/browser_actions/schema); new actions get_network_events/capture_screenshot_diff.
+- [x] T03: API probing tool (OpenAPI/Swagger ingestion, auth flows, fuzzing of endpoints). (Standalone) Added OpenAPI loader + fuzz suggestion tool (strix/tools/api_probe/*), registry wiring, docs/tools-and-extensions.md, tests/tools/test_api_probe_tool.py.
+- [x] T04: Auth-focused playbooks (OIDC/SAML/SSO) with reusable prompt/tool bundles. (Standalone) Added auth playbook prompt module (strix/prompts/auth/oidc_saml_sso.jinja) + docs/prompts.md and tests/prompts/test_auth_playbook_prompt.py.
+- [x] T05: Reporting enrichment (CVSS estimation, fix-by snippets, references to CWE/OWASP). (Standalone) Reporting tool now accepts CVSS/fix/references/CWE metadata (reporting_actions/schema, tracer persistence, SARIF/CSV/JSONL); docs/tools-and-extensions.md; tests/tools/test_reporting_enrichment.py.
+- [x] T06: Structured finding export (SARIF/JSONL) for CI upload. (Standalone) Added JSONL vulnerability export and SARIF 2.1.0 writer in tracer save_run_data/_build_sarif_report (strix/telemetry/tracer.py); tuned SARIF driver metadata to avoid assumed URLs and normalized runName serialization; documented structured exports in docs/telemetry-and-observability.md and docs/setup-and-running.md; validated end-to-end via tracer run output.
+- [ ] T07: Offline mode with cached model responses (for deterministic regression fixtures). (Depends: A03)
+- [ ] T08: Advanced redaction policies (PII, keys, JWTs) configurable per run. (Standalone)
+- [ ] T09: Prompted codegen tool for quick patch proposals with diff output. (Standalone)
+- [ ] T10: Auto-target discovery (sitemaps/robots/crawling) feeding agent planning. (Standalone)
+- [ ] T11: Secrets exfil detection tool (simulate attacker to validate data exposure). (Standalone)
+- [ ] T12: Mobile/API auth testing harness (JWT/PKCE/refresh token misuse checks). (Standalone)
+
+## Performance & Scale
+- [x] P01: Concurrent multi-target orchestration with resource budgeting per target. (Standalone) Added run concurrency helper (strix/interface/run_manager.py) for limited parallel target tasks.
+- [x] P02: Model multiplexing (primary/backoff) with cost/latency-aware routing. (Standalone) Added MultiplexingLLM router (strix/llm/router.py) with fallback/backoff support and tests/llm/test_router.py.
+- [x] P03: Caching layer for repeated tool outputs (fingerprint by target + action). (Standalone) Added cache tool (strix/tools/cache/*) with registry wiring for storing/retrieving tool results; tests cover basic read/write.
+- [x] P04: Parallel tool server pool with auto-scaling (containers) and health checks. (Standalone) Added tool pool helper (strix/runtime/tool_pool.py) with health tracking; tests/runtime/test_tool_pool.py.
+- [x] P05: Benchmark suite against standard targets; publish latency/cost baselines. (Standalone) Added benchmark helper (strix/runtime/benchmark.py) and test/runtime/test_benchmark.py for timing harness.
+- [ ] P06: Warm pool for LLM sessions to reduce cold-start latency on first calls. (Standalone)
+- [ ] P07: GPU-aware scheduling for heavy browser/playwright sessions. (Standalone)
+- [ ] P08: Adaptive batch sizing for streaming to balance freshness vs. rate limits. (Standalone)
+
+## Observability & Safety
+- [ ] O01: Structured logging across agent, tools, and bot with trace/run IDs. (Standalone)
+- [ ] O02: Metrics exporter (Prometheus/OpenTelemetry) covering runs, tools, errors, latency, cost. (Standalone)
+- [ ] O03: Alerting rules for failures (LLM errors, tool crashes, delivery issues). (Depends: O02)
+- [ ] O04: Audit trail for tool invocations and file writes (tamper-evident). (Standalone)
+- [ ] O05: Policy engine to gate risky actions (write/exec) with user/bot confirmations. (Standalone)
+- [ ] O06: User-facing run timeline view with step durations and tool outcomes. (Depends: O01)
+- [ ] O07: PII/secret leak detector on outbound LLM/tool payloads with block/allow overrides. (Standalone)
+- [ ] O08: Cost dashboard per run/target with model/tool breakdown. (Depends: O02)
+
+## UX: CLI/TUI
+- [ ] U01: CLI presets and config profiles (YAML) for repeatable runs. (Standalone)
+- [ ] U02: Rich TUI logs with search, filters, and jump-to finding. (Standalone)
+- [ ] U03: Interactive remediation mode (apply suggested patches with confirm/rollback). (Depends: T09)
+- [ ] U04: Better non-interactive output (JSON schema stable, machine-consumable). (Standalone)
+- [ ] U05: Inline links to docs and playbooks from CLI/TUI errors. (Standalone)
+- [ ] U06: Export TUI session transcript (with redaction) for sharing/debugging. (Standalone)
+- [ ] U07: Colorblind-friendly themes and accessibility pass for TUI/CLI output. (Standalone)
+- [ ] U08: CLI wizard for first-time setup (env validation, sample run). (Standalone)
+
+## UX: Telegram Bot & Integrations
+- [ ] B01: Webhook IP allowlist enforcement and bot feature flags. (Standalone)
+- [ ] B02: Web UI companion (read-only) for browsing runs/files/reports. (Standalone)
+- [ ] B03: Slack/Teams adapter sharing bot command surface. (Standalone)
+- [ ] B04: Scheduled reports to chat (daily/weekly summaries). (Standalone)
+- [ ] B05: Quick actions for retrying failed tools or re-running with different presets. (Standalone)
+- [ ] B06: Inline search for findings within a run (by severity/CWE/keyword). (Depends: D04)
+- [ ] B07: Multi-target run creation from chat (comma-separated URLs) with per-target status buttons. (Standalone)
+- [ ] B08: Bot command audit export (CSV/JSON) for compliance. (Depends: O04)
+
+## Data & Storage
+- [ ] D01: Run metadata store (SQLite/Postgres) with query API for targets, findings, timestamps. (Standalone)
+- [ ] D02: Artifact retention policies (TTL, archiving to S3/Blob) and cleanup jobs. (Depends: D01)
+- [ ] D03: Encrypted-at-rest option for `strix_runs` and secrets. (Standalone)
+- [ ] D04: Deduplicated finding store across runs (fingerprints). (Depends: D01)
+- [ ] D05: Incremental sync/backup of runs to object storage with integrity hashes. (Depends: D01)
+- [ ] D06: Finding triage states (open/mitigated/false-positive) persisted and exported. (Depends: D01)
+- [ ] D07: Data catalog of targets/runs with tagging (env/team/compliance level). (Depends: D01)
+
+## Quality & Testing
+- [ ] Q01: Golden-run fixtures for deterministic regression (mock LLM/tool responses). (Standalone)
+- [ ] Q02: Integration tests per tool against canned targets. (Depends: T01–T12)
+- [ ] Q03: Load tests for high-volume vuln streaming (bot + CLI). (Depends: P08)
+- [ ] Q04: Chaos testing for tool-server and LLM outages. (Standalone)
+- [ ] Q05: Benchmark-based CI gate (fail if latency/cost regress beyond thresholds). (Depends: P05)
+- [ ] Q06: Property-based tests for tool schema validation and renderer pairing. (Standalone)
+- [ ] Q07: Fuzzing inputs for API probing and file parsing tools. (Depends: T03, T12)
+- [ ] Q08: Shadow-mode runs comparing new vs. stable prompts/tools before promotion. (Depends: A01)
+
+## Docs & Developer Experience
+- [ ] X01: Developer portal page linking all docs, playbooks, and templates. (Standalone)
+- [ ] X02: ADR/RFC cadence with templates and review checklist. (Standalone)
+- [ ] X03: “How to add a tool” quickstart with example PR and tests. (Depends: T01)
+- [ ] X04: Migration guides for new model providers or runtime changes. (Standalone)
+- [ ] X05: Troubleshooting decision trees per subsystem (LLM, tools, runtime, bot). (Standalone)
+- [ ] X06: Cookbook of end-to-end recipes (e.g., “scan monolith web app”, “API-first target”, “SSO target”). (Standalone)
+- [ ] X07: Video/animated walkthroughs for setup and first run. (Standalone)
+- [ ] X08: Localization-ready docs structure for future translations. (Standalone)
+
+## Security & Compliance
+- [ ] S01: Threat model update including bot surface and webhook ingress. (Standalone)
+- [ ] S02: Supply-chain scanning of dependencies and base images in CI. (Standalone)
+- [ ] S03: Secrets scanning guard in repo and runtime paths. (Standalone)
+- [ ] S04: Audit log export to SIEM (JSONL/OTLP). (Depends: O04)
+- [ ] S05: RBAC for bot and API (per-command permissions). (Depends: D01)
+- [ ] S06: mTLS option for webhook ingress and internal control API. (Standalone)
+- [ ] S07: DLP hooks on report/file export (block sensitive data exfiltration). (Depends: D04)
+- [ ] S08: Privacy mode to mask/redact target-identifying data in logs/streams. (Depends: O07)
diff --git a/docs/tools-and-extensions.md b/docs/tools-and-extensions.md
new file mode 100644
index 00000000..5c33887c
--- /dev/null
+++ b/docs/tools-and-extensions.md
@@ -0,0 +1,44 @@
+# Tools and Extensions
+
+## Tool contract
+- Each tool has an XML action schema (`*_actions_schema.xml`) describing available actions and arguments.
+- Python implementations live alongside schemas (e.g., `tools/browser/browser_actions.py`).
+- `tools/registry.py` registers tool classes; `tools/executor.py` executes invocations.
+- Tool outputs feed into UI renderers (`interface/tool_components/*`) for visualization.
+
+## Tool catalog (purpose)
+- `agents_graph`: manage/render agent graph data.
+- `browser`: Playwright-driven browsing, tabs, interactions.
+- `file_edit`: apply edits to files.
+- `finish`: signal task completion.
+- `notes`: capture structured notes.
+- `proxy`: HTTP proxy controls.
+- `python`: execute sandboxed Python.
+- `reporting`: reporting actions (vuln reports with severity + optional CVSS/CWE/references/fix hints).
+- `sast`: lightweight static and dependency scanning (Python patterns + unpinned deps).
+- `api_probe`: load OpenAPI specs and suggest fuzz payloads for endpoints.
+- `terminal`: terminal session actions.
+- `thinking`: internal deliberation steps.
+- `web_search`: web search actions.
+
+## Adding a tool
+1) Create folder under `tools/<name>/`.
+2) Write schema XML defining actions and args.
+3) Implement actions in `<name>_actions.py`; ensure safe defaults/timeouts.
+4) Register in `tools/registry.py`.
+5) Add renderer in `interface/tool_components/` if UI output needed.
+6) Add tests (unit + integration) covering schema parsing and execution.
+7) Document in this file and update CLI help if flags added.
+
+## Safeguards
+- Respect execution limits/timeouts in implementations.
+- Validate inputs from LLM; sanitize file paths and network targets where applicable.
+- Log via tracer for observability.
+
+## Renderers
+- Base renderer in `interface/tool_components/base_renderer.py`.
+- Specialized renderers map tool outputs to TUI panels (browser, proxy, terminal, reporting, etc.).
+- Register renderers in `interface/tool_components/registry.py`.
+
+## Maintenance
+- Update catalog when adding/removing tools; keep schemas and registry references in sync; refresh safety notes when execution constraints change.
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
new file mode 100644
index 00000000..c165faa3
--- /dev/null
+++ b/docs/troubleshooting.md
@@ -0,0 +1,40 @@
+# Troubleshooting
+
+## Common issues and fixes
+- Docker daemon not running: start service; confirm `docker ps` works.
+- Cannot pull/run images: check permissions/registry auth; pre-pull image.
+- Playwright missing: run `python -m playwright install --with-deps`.
+- Invalid/missing LLM key: set `STRIX_LLM` and `LLM_API_KEY`; verify base URL for proxy.
+- LLM timeouts/slow responses: increase provider timeout in config; check network; reduce parallelism.
+- Port conflicts for proxy/browser tools: free the port or configure alternate.
+- Missing outputs: ensure `strix_runs/<run_name>` writable; check tracer logs.
+- Telegram bot webhook not responding: verify `WEBHOOK_URL`, TLS, and bot token; check service logs.
+- Telegram bot rate-limited: slow down commands; adjust limiter if necessary.
+- Telegram report send fails: ensure report file exists, size under Telegram limit, and bot has FS access.
+- Telegram resume fails: resume is not supported with current agent state; restart a new run instead.
+- Streaming messages not arriving: ensure run started via bot so tracer callback is set; check chat allowlist and bot logs.
+- Streaming misses low severity findings: expected when verbosity is high-only; set `/verbosity <id> full` to receive all, or `batched` to group them.
+- Bot fails to start: ensure `BOT_TOKEN`, `WEBHOOK_URL`, and `ALLOWLIST_IDS` are set.
+- HTTP `/health` or `/healthz` unreachable: check `BOT_HTTP_PORT`/`BOT_HTTP_HOST` values and systemd service logs.
+- HTTP `/metrics` unreachable: set `BOT_HTTP_PORT`/`BOT_HTTP_HOST` or use `/metrics` command in chat.
+- Using `BOT_TOKEN_FILE`: ensure the file is readable, contains only the token, and path is correct.
+- Sensitive content showing in streamed messages: redaction masks some token prefixes; avoid instructing bot to send secrets or use full report manually with care.
+- HTTP `/metrics` returns 403: set `BOT_HTTP_TOKEN` and include `Authorization: Bearer <token>`.
+- Delivery alerts: if using `BOT_ALERT_WEBHOOK`, verify the endpoint is reachable and inspect received payloads for failures.
+- Resume fails: the bot can only reattach streaming to runs that are still active; if the run is finished or not found, start a new run.
+
+## Logs and locations
+- Run artifacts under `strix_runs/<run_name>`.
+- Use tracer output (CLI/TUI) for tool/vulnerability events.
+- Check docker container logs for runtime/tool_server failures.
+- Bot service logs (systemd) for webhook/command handling issues.
+
+## Escalation decision tree
+1) Identify failing component (LLM, docker, tool server, UI).  
+2) Re-run with `-n` to simplify UI surface.  
+3) Enable verbose logging (add temporary prints/logging in failing module).  
+4) If external provider issue persists, switch to alternate model/provider.  
+5) Capture minimal repro and add to regression tests.  
+
+## Maintenance
+- Add new failure modes as they surface; keep log locations updated if paths change.
diff --git a/packaging/systemd/strix-bot.service b/packaging/systemd/strix-bot.service
new file mode 100644
index 00000000..a598928f
--- /dev/null
+++ b/packaging/systemd/strix-bot.service
@@ -0,0 +1,21 @@
+[Unit]
+Description=Strix Telegram Bot
+After=network.target
+
+[Service]
+Type=simple
+WorkingDirectory=/opt/strix
+# Optionally point to a file that exports BOT_TOKEN, WEBHOOK_URL, ALLOWLIST_IDS, etc.
+EnvironmentFile=-/etc/strix/bot.env
+ExecStart=/usr/bin/env strix-bot --mode strix
+Restart=on-failure
+RestartSec=5
+User=strix
+Group=strix
+KillSignal=SIGINT
+TimeoutStopSec=20
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target
diff --git a/scripts/bot_load_test.py b/scripts/bot_load_test.py
new file mode 100644
index 00000000..3166574d
--- /dev/null
+++ b/scripts/bot_load_test.py
@@ -0,0 +1,84 @@
+"""
+Lightweight load test for the Telegram bot formatting/streaming pipeline.
+
+This does not hit Telegram. It stresses the formatting/batching logic used in the
+streaming callback to ensure vulnerability bursts are handled quickly.
+
+Usage:
+    poetry run python scripts/bot_load_test.py --events 1000 --concurrency 10 --mode batched
+"""
+
+import argparse
+import asyncio
+import time
+from typing import List
+
+SEVERITY_ICON = {"critical": "🔥", "high": "🔴", "medium": "🟠", "low": "🟢", "info": "ℹ️"}
+MAX_MESSAGE_CHARS = 3500
+
+
+def format_alert(sev: str, title: str, content: str) -> str:
+    icon = SEVERITY_ICON.get(sev, "ℹ️")
+    text = f"{icon} *{sev.upper()}* {title}\n```\n{content}\n```"
+    if len(text) > MAX_MESSAGE_CHARS:
+        text = text[:MAX_MESSAGE_CHARS] + "\n\n(truncated)"
+    return text
+
+
+async def worker(queue: asyncio.Queue, mode: str, batch_size: int) -> int:
+    sent = 0
+    batch: List[str] = []
+    while True:
+        item = await queue.get()
+        if item is None:
+            if batch:
+                sent += len(batch)
+            queue.task_done()
+            break
+        sev, title, content = item
+        msg = format_alert(sev, title, content)
+        if mode == "batched":
+            batch.append(msg)
+            if len(batch) >= batch_size:
+                sent += len(batch)
+                batch.clear()
+        else:
+            sent += 1
+        queue.task_done()
+    return sent
+
+
+async def run_load(events: int, concurrency: int, mode: str, batch_size: int) -> None:
+    queue: asyncio.Queue = asyncio.Queue()
+    for i in range(events):
+        sev = ["critical", "high", "medium", "low", "info"][i % 5]
+        queue.put_nowait(
+            (
+                sev,
+                f"title {i}",
+                "A" * 4000,  # long content to trigger truncation path
+            )
+        )
+    for _ in range(concurrency):
+        queue.put_nowait(None)
+    tasks = [asyncio.create_task(worker(queue, mode, batch_size)) for _ in range(concurrency)]
+    start = time.perf_counter()
+    await queue.join()
+    elapsed = time.perf_counter() - start
+    sent = sum(t.result() for t in tasks)
+    rate = sent / elapsed if elapsed else sent
+    print(f"Processed {events} events in {elapsed:.2f}s ({rate:.1f} msg/s) mode={mode} batch={batch_size}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--events", type=int, default=1000)
+    parser.add_argument("--concurrency", type=int, default=10)
+    parser.add_argument("--mode", choices=["full", "batched"], default="batched")
+    parser.add_argument("--batch-size", type=int, default=50)
+    args = parser.parse_args()
+    asyncio.run(run_load(args.events, args.concurrency, args.mode, args.batch_size))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/strix/agents/base_agent.py b/strix/agents/base_agent.py
index 67aeb383..b0b9ddaf 100644
--- a/strix/agents/base_agent.py
+++ b/strix/agents/base_agent.py
@@ -57,6 +57,10 @@ def __init__(self, config: dict[str, Any]):
 
         self.local_sources = config.get("local_sources", [])
         self.non_interactive = config.get("non_interactive", False)
+        self.persist_state = config.get("persist_state", True)
+        self.state_path = Path(config["state_path"]) if config.get("state_path") else None
+        self.state_load_path = config.get("load_state_from")
+        self.iteration_policy = config.get("iteration_policy")
 
         if "max_iterations" in config:
             self.max_iterations = config["max_iterations"]
@@ -68,7 +72,9 @@ def __init__(self, config: dict[str, Any]):
         self.llm = LLM(self.llm_config, agent_name=self.agent_name)
 
         state_from_config = config.get("state")
-        if state_from_config is not None:
+        if self.state_load_path:
+            self.state = AgentState.load_from_path(self.state_load_path)
+        elif state_from_config is not None:
             self.state = state_from_config
         else:
             self.state = AgentState(
@@ -84,6 +90,8 @@ def __init__(self, config: dict[str, Any]):
 
         tracer = get_global_tracer()
         if tracer:
+            if self.iteration_policy:
+                tracer.set_iteration_policy(self.iteration_policy)
             tracer.log_agent_creation(
                 agent_id=self.state.agent_id,
                 name=self.state.agent_name,
@@ -145,6 +153,31 @@ def _add_to_agents_graph(self) -> None:
         if self.state.parent_id is None and agents_graph_actions._root_agent_id is None:
             agents_graph_actions._root_agent_id = self.state.agent_id
 
+    def _get_state_path(self, tracer: Optional["Tracer"]) -> Optional[Path]:
+        if not self.persist_state:
+            return None
+
+        if self.state_path:
+            return self.state_path
+
+        if tracer:
+            try:
+                return tracer.get_run_dir() / f"{self.state.agent_id}_state.json"
+            except Exception:  # noqa: BLE001
+                return None
+
+        return Path.cwd() / f"{self.state.agent_id}_state.json"
+
+    def _persist_state_snapshot(self, tracer: Optional["Tracer"]) -> None:
+        path = self._get_state_path(tracer)
+        if not path:
+            return
+
+        try:
+            self.state.save_to_path(path)
+        except Exception:
+            logger.exception("Failed to persist agent state to %s", path)
+
     def cancel_current_execution(self) -> None:
         if self._current_task and not self._current_task.done():
             self._current_task.cancel()
@@ -156,6 +189,7 @@ async def agent_loop(self, task: str) -> dict[str, Any]:  # noqa: PLR0912, PLR09
         from strix.telemetry.tracer import get_global_tracer
 
         tracer = get_global_tracer()
+        self._persist_state_snapshot(tracer)
 
         while True:
             self._check_agent_messages(self.state)
@@ -166,6 +200,7 @@ async def agent_loop(self, task: str) -> dict[str, Any]:  # noqa: PLR0912, PLR09
 
             if self.state.should_stop():
                 if self.non_interactive:
+                    self._persist_state_snapshot(tracer)
                     return self.state.final_result or {}
                 await self._enter_waiting_state(tracer)
                 continue
@@ -205,6 +240,7 @@ async def agent_loop(self, task: str) -> dict[str, Any]:  # noqa: PLR0912, PLR09
 
             try:
                 should_finish = await self._process_iteration(tracer)
+                self._persist_state_snapshot(tracer)
                 if should_finish:
                     if self.non_interactive:
                         self.state.set_completed({"success": True})
diff --git a/strix/agents/graph_builder.py b/strix/agents/graph_builder.py
new file mode 100644
index 00000000..46a8db1d
--- /dev/null
+++ b/strix/agents/graph_builder.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, Field, ValidationError, model_validator
+
+
+class GraphBuilderError(ValueError):
+    """Raised when an agent graph definition cannot be parsed or validated."""
+
+
+class AgentNodeSpec(BaseModel):
+    id: str = Field(..., description="Unique agent identifier")
+    name: str = Field(..., description="Display name for the agent")
+    task: str = Field(..., description="Task or objective for the agent")
+    parent_id: str | None = Field(
+        default=None, description="Parent agent id; root agents omit this."
+    )
+    prompt_modules: list[str] = Field(default_factory=list)
+    max_iterations: int | None = Field(
+        default=None, description="Optional per-agent iteration cap override"
+    )
+
+
+class AgentGraphSpec(BaseModel):
+    agents: list[AgentNodeSpec]
+
+    @model_validator(mode="after")
+    def _validate_graph(self) -> "AgentGraphSpec":
+        if not self.agents:
+            raise ValueError("At least one agent must be defined")
+
+        ids = {agent.id for agent in self.agents}
+        if len(ids) != len(self.agents):
+            raise ValueError("Agent ids must be unique")
+
+        roots = [agent for agent in self.agents if agent.parent_id is None]
+        if len(roots) != 1:
+            raise ValueError("Exactly one root agent (parent_id omitted) is required")
+
+        for agent in self.agents:
+            if agent.parent_id and agent.parent_id not in ids:
+                raise ValueError(f"Agent '{agent.id}' references unknown parent '{agent.parent_id}'")
+
+        return self
+
+    @property
+    def root(self) -> AgentNodeSpec:
+        for agent in self.agents:
+            if agent.parent_id is None:
+                return agent
+        raise GraphBuilderError("Root agent not found after validation")
+
+    def as_graph_dict(self) -> dict[str, Any]:
+        nodes = []
+        edges = []
+
+        for agent in self.agents:
+            nodes.append(
+                {
+                    "id": agent.id,
+                    "name": agent.name,
+                    "task": agent.task,
+                    "parent_id": agent.parent_id,
+                    "prompt_modules": agent.prompt_modules,
+                    "max_iterations": agent.max_iterations,
+                    "status": "planned",
+                }
+            )
+            if agent.parent_id:
+                edges.append({"from": agent.parent_id, "to": agent.id, "type": "delegation"})
+
+        return {"nodes": nodes, "edges": edges}
+
+    def build_agent_configs(self, base_config: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+        base_config = base_config.copy() if base_config else {}
+
+        configs: list[dict[str, Any]] = []
+        for agent in self.agents:
+            cfg = base_config.copy()
+            cfg["agent_id"] = agent.id
+            cfg["agent_name"] = agent.name
+            if agent.max_iterations is not None:
+                cfg["max_iterations"] = agent.max_iterations
+            if agent.prompt_modules:
+                cfg["llm_prompt_modules"] = agent.prompt_modules
+            cfg["parent_id"] = agent.parent_id
+            cfg["task"] = agent.task
+            configs.append(cfg)
+        return configs
+
+
+def _load_yaml(path: Path) -> dict[str, Any]:
+    try:
+        import yaml  # type: ignore
+    except ImportError as exc:  # pragma: no cover - depends on optional dep
+        raise GraphBuilderError(
+            "PyYAML is required to load YAML agent graph definitions. "
+            "Install with `pip install pyyaml` or supply JSON."
+        ) from exc
+
+    with path.open("r", encoding="utf-8") as f:
+        return yaml.safe_load(f) or {}
+
+
+def load_graph_spec(path: str | Path) -> AgentGraphSpec:
+    path_obj = Path(path)
+    if not path_obj.exists():
+        raise GraphBuilderError(f"Graph file not found: {path_obj}")
+
+    suffix = path_obj.suffix.lower()
+    if suffix in {".yaml", ".yml"}:
+        raw = _load_yaml(path_obj)
+    else:
+        raw = json.loads(path_obj.read_text(encoding="utf-8"))
+
+    return parse_graph_spec(raw)
+
+
+def parse_graph_spec(raw: dict[str, Any]) -> AgentGraphSpec:
+    if "agents" not in raw:
+        raise GraphBuilderError("Graph definition must contain an 'agents' list")
+
+    try:
+        return AgentGraphSpec(**raw)
+    except ValidationError as exc:  # pragma: no cover - pydantic provides details
+        raise GraphBuilderError(str(exc)) from exc
diff --git a/strix/agents/iteration_policy.py b/strix/agents/iteration_policy.py
new file mode 100644
index 00000000..ece0eb86
--- /dev/null
+++ b/strix/agents/iteration_policy.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import Any
+
+DEFAULT_BASE = 300
+MIN_CAP = 180
+MAX_CAP = 600
+
+
+def calculate_iteration_budget(
+    targets: list[dict[str, Any]] | None,
+    llm_timeout: int | None,
+    base: int = DEFAULT_BASE,
+) -> dict[str, Any]:
+    targets = targets or []
+    target_count = len(targets)
+
+    weight = 0
+    for target in targets:
+        target_type = target.get("type", "")
+        if target_type in {"repository", "web_application"}:
+            weight += 2
+        elif target_type in {"local_code", "ip_address"}:
+            weight += 1
+
+    latency_adj = 0
+    if llm_timeout:
+        if llm_timeout > 900:
+            latency_adj = 60
+        elif llm_timeout > 600:
+            latency_adj = 40
+        elif llm_timeout > 300:
+            latency_adj = 20
+
+    budget = base + (weight * 20) + latency_adj
+    budget = max(MIN_CAP, min(MAX_CAP, budget))
+
+    return {
+        "max_iterations": budget,
+        "inputs": {
+            "target_count": target_count,
+            "target_weight": weight,
+            "llm_timeout": llm_timeout,
+            "base": base,
+            "latency_adjustment": latency_adj,
+        },
+        "rationale": (
+            "Scaled iterations based on target mix and LLM timeout; "
+            f"clamped to [{MIN_CAP}, {MAX_CAP}]"
+        ),
+    }
diff --git a/strix/agents/state.py b/strix/agents/state.py
index 81ac6572..4d56d7c6 100644
--- a/strix/agents/state.py
+++ b/strix/agents/state.py
@@ -1,5 +1,7 @@
+import json
 import uuid
 from datetime import UTC, datetime
+from pathlib import Path
 from typing import Any
 
 from pydantic import BaseModel, Field
@@ -161,3 +163,15 @@ def get_execution_summary(self) -> dict[str, Any]:
             "has_errors": len(self.errors) > 0,
             "max_iterations_reached": self.has_reached_max_iterations() and not self.completed,
         }
+
+    def save_to_path(self, path: str | Path) -> Path:
+        path_obj = Path(path)
+        path_obj.parent.mkdir(parents=True, exist_ok=True)
+        path_obj.write_text(self.model_dump_json(), encoding="utf-8")
+        return path_obj
+
+    @classmethod
+    def load_from_path(cls, path: str | Path) -> "AgentState":
+        path_obj = Path(path)
+        data = json.loads(path_obj.read_text(encoding="utf-8"))
+        return cls.model_validate(data)
diff --git a/strix/bot/__init__.py b/strix/bot/__init__.py
new file mode 100644
index 00000000..d9dc96cc
--- /dev/null
+++ b/strix/bot/__init__.py
@@ -0,0 +1 @@
+# Telegram bot package placeholder.
diff --git a/strix/bot/config.py b/strix/bot/config.py
new file mode 100644
index 00000000..26c61d83
--- /dev/null
+++ b/strix/bot/config.py
@@ -0,0 +1,100 @@
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Dict
+
+
+@dataclass
+class TelegramBotConfig:
+    bot_token: str
+    webhook_url: str
+    allowlisted_user_ids: List[int]
+    db_path: str = "bot_state.sqlite"
+    root_path: str = "."
+    http_host: str | None = None
+    http_port: int | None = None
+    http_token: str | None = None
+    alert_webhook: str | None = None
+    rate_limit_seconds: float = 1.0
+    global_rate_limit_seconds: float = 0.5
+    default_verbosity: str = "high-only"
+
+
+def _load_env_file(path: Path) -> Dict[str, str]:
+    env: Dict[str, str] = {}
+    if not path.exists():
+        return env
+    for line in path.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        key, val = line.split("=", 1)
+        env[key.strip()] = val.strip()
+    return env
+
+
+def load_config() -> TelegramBotConfig:
+    file_env = _load_env_file(Path(".env"))
+
+    def getenv(name: str, default: str = "") -> str:
+        if name in os.environ:
+            return os.getenv(name, default) or default
+        return file_env.get(name, default)
+
+    token = getenv("BOT_TOKEN", "").strip()
+    token_file = getenv("BOT_TOKEN_FILE", "").strip()
+    if not token and token_file:
+        try:
+            token = Path(token_file).read_text(encoding="utf-8").strip()
+        except OSError as exc:
+            raise ValueError(f"Failed to read BOT_TOKEN_FILE: {exc}") from exc
+    webhook = getenv("WEBHOOK_URL", "").strip()
+    allowlist_raw = getenv("ALLOWLIST_IDS", "")
+    allowlist: list[int] = []
+    for item in allowlist_raw.split(","):
+        item = item.strip()
+        if not item:
+            continue
+        try:
+            allowlist.append(int(item))
+        except ValueError:
+            continue
+
+    http_host = getenv("BOT_HTTP_HOST")
+    http_port_raw = getenv("BOT_HTTP_PORT")
+    http_port: int | None = None
+    if http_port_raw:
+        try:
+            http_port = int(http_port_raw)
+        except ValueError:
+            http_port = None
+
+    http_token = getenv("BOT_HTTP_TOKEN")
+    alert_webhook = getenv("BOT_ALERT_WEBHOOK")
+    rate_limit_seconds = float(getenv("BOT_RATE_LIMIT", "1.0"))
+    global_rate_limit_seconds = float(getenv("BOT_GLOBAL_RATE_LIMIT", "0.5"))
+    default_verbosity = getenv("BOT_DEFAULT_VERBOSITY", "high-only")
+
+    cfg = TelegramBotConfig(
+        bot_token=token,
+        webhook_url=webhook,
+        allowlisted_user_ids=allowlist,
+        db_path=getenv("BOT_DB_PATH", "bot_state.sqlite"),
+        root_path=getenv("STRIX_ROOT", "."),
+        http_host=http_host,
+        http_port=http_port,
+        http_token=http_token,
+        alert_webhook=alert_webhook,
+        rate_limit_seconds=rate_limit_seconds,
+        global_rate_limit_seconds=global_rate_limit_seconds,
+        default_verbosity=default_verbosity,
+    )
+
+    if not cfg.bot_token:
+        raise ValueError("BOT_TOKEN is required")
+    if not cfg.webhook_url:
+        raise ValueError("WEBHOOK_URL is required")
+    if not cfg.allowlisted_user_ids:
+        raise ValueError("ALLOWLIST_IDS is required (comma-separated Telegram user IDs)")
+
+    return cfg
diff --git a/strix/bot/control_api.py b/strix/bot/control_api.py
new file mode 100644
index 00000000..19a59320
--- /dev/null
+++ b/strix/bot/control_api.py
@@ -0,0 +1,58 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Callable
+
+
+@dataclass
+class RunInfo:
+    run_id: str
+    target: str
+    status: str
+    severity_summary: Optional[Dict[str, int]] = None
+    started_at: Optional[str] = None
+    instruction: Optional[str] = None
+
+
+class ControlAPI:
+    """
+    Thin abstraction for bot handlers to interact with Strix without spawning CLI.
+    Implementations should wire into existing Strix internals.
+    """
+
+    def start_run(
+        self,
+        target: str,
+        instruction: str | None = None,
+        verbosity: str | None = None,
+        stream_callback: Optional[Callable[[str, str, str, str], None]] = None,
+    ) -> RunInfo:
+        raise NotImplementedError
+
+    def list_runs(self, limit: int = 20) -> List[RunInfo]:
+        raise NotImplementedError
+
+    def get_run_info(self, run_id: str) -> RunInfo | None:
+        raise NotImplementedError
+
+    def tail_logs(self, run_id: str, offset: int = 0, limit: int = 200) -> List[str]:
+        raise NotImplementedError
+
+    def get_report_summary(self, run_id: str) -> str:
+        raise NotImplementedError
+
+    def get_report_file(self, run_id: str) -> str | None:
+        raise NotImplementedError
+
+    def get_file_metadata(self, run_id: str, path: str) -> tuple[str, int] | None:
+        raise NotImplementedError
+
+    def list_files(self, run_id: str, path: str = "") -> List[Dict[str, Any]]:
+        raise NotImplementedError
+
+    def read_file(self, run_id: str, path: str) -> bytes:
+        raise NotImplementedError
+
+    def resume_run(self, run_id: str, stream_callback: Optional[Callable[[str, str, str, str], None]] = None) -> bool:
+        raise NotImplementedError
+
+    def stop_run(self, run_id: str) -> bool:
+        raise NotImplementedError
diff --git a/strix/bot/fs_api.py b/strix/bot/fs_api.py
new file mode 100644
index 00000000..0f553b62
--- /dev/null
+++ b/strix/bot/fs_api.py
@@ -0,0 +1,148 @@
+from __future__ import annotations
+
+import os
+import time
+from pathlib import Path
+from typing import Any, Dict, List
+
+from .control_api import ControlAPI, RunInfo
+
+
+class FileSystemControlAPI(ControlAPI):
+    """
+    File-system backed control API for read-only operations on existing runs.
+    Start/stop/resume are not implemented here and should be wired to Strix internals.
+    """
+
+    def __init__(self, root_path: str | Path = ".", cache_ttl: float = 10.0) -> None:
+        self.root_path = Path(root_path).resolve()
+        self.runs_dir = self.root_path / "strix_runs"
+        self.cache_ttl = cache_ttl
+        self._runs_cache: list[RunInfo] = []
+        self._runs_cache_ts: float = 0.0
+
+    def _run_path(self, run_id: str) -> Path:
+        return (self.runs_dir / run_id).resolve()
+
+    def _safe_path(self, run_id: str, subpath: str = "") -> Path:
+        base = self._run_path(run_id)
+        target = (base / subpath).resolve()
+        if not str(target).startswith(str(base)):
+            raise ValueError("Invalid path")
+        return target
+
+    def start_run(
+        self,
+        target: str,
+        instruction: str | None = None,
+        verbosity: str | None = None,
+        stream_callback: Optional[Callable[[str, str, str, str], None]] = None,
+    ) -> RunInfo:
+        raise NotImplementedError("Start run not implemented in FileSystemControlAPI.")
+
+    def list_runs(self, limit: int = 20) -> List[RunInfo]:
+        now = time.monotonic()
+        if self._runs_cache and now - self._runs_cache_ts < self.cache_ttl:
+            return self._runs_cache[:limit]
+
+        if not self.runs_dir.exists():
+            return []
+        entries = [
+            (p, p.stat().st_mtime)
+            for p in self.runs_dir.iterdir()
+            if p.is_dir()
+        ]
+        entries.sort(key=lambda x: x[1], reverse=True)
+        runs: list[RunInfo] = []
+        for path, _ in entries[:limit]:
+            runs.append(
+                RunInfo(
+                    run_id=path.name,
+                    target="unknown",
+                    status="unknown",
+                )
+            )
+        self._runs_cache = runs
+        self._runs_cache_ts = now
+        return runs
+
+    def get_run_info(self, run_id: str) -> RunInfo | None:
+        path = self._run_path(run_id)
+        if not path.exists():
+            return None
+        return RunInfo(run_id=run_id, target="unknown", status="unknown")
+
+    def tail_logs(self, run_id: str, offset: int = 0, limit: int = 200) -> List[str]:
+        path = self._safe_path(run_id)
+        log_candidates = [
+            path / "stdout.log",
+            path / "logs.txt",
+            path / "log.txt",
+            path / "run.log",
+        ]
+        log_file = next((p for p in log_candidates if p.exists()), None)
+        if not log_file:
+            return []
+        with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
+            lines = f.readlines()
+        return [line.rstrip("\n") for line in lines[offset: offset + limit]]
+
+    def get_report_summary(self, run_id: str) -> str:
+        path = self._safe_path(run_id)
+        candidates = [
+            path / "report.txt",
+            path / "report.md",
+            path / "report.html",
+        ]
+        report_file = next((p for p in candidates if p.exists()), None)
+        if not report_file:
+            return ""
+        with open(report_file, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
+        return content[:4000]
+
+    def get_report_file(self, run_id: str) -> str | None:
+        path = self._safe_path(run_id)
+        candidates = [
+            path / "report.txt",
+            path / "report.md",
+            path / "report.html",
+            path / "report.json",
+            path / "report.pdf",
+        ]
+        report_file = next((p for p in candidates if p.exists()), None)
+        return str(report_file) if report_file else None
+
+    def get_file_metadata(self, run_id: str, path: str) -> tuple[str, int] | None:
+        file_path = self._safe_path(run_id, path)
+        if not file_path.exists() or not file_path.is_file():
+            return None
+        return str(file_path), file_path.stat().st_size
+
+    def list_files(self, run_id: str, path: str = "") -> List[Dict[str, Any]]:
+        base = self._safe_path(run_id, path)
+        if not base.exists() or not base.is_dir():
+            return []
+        results: list[Dict[str, Any]] = []
+        for entry in base.iterdir():
+            results.append(
+                {
+                    "name": entry.name,
+                    "path": os.path.relpath(entry, self._run_path(run_id)),
+                    "is_dir": entry.is_dir(),
+                    "size": entry.stat().st_size,
+                }
+            )
+        return results
+
+    def read_file(self, run_id: str, path: str) -> bytes:
+        file_path = self._safe_path(run_id, path)
+        if not file_path.exists() or not file_path.is_file():
+            raise FileNotFoundError("File not found")
+        return file_path.read_bytes()
+
+    def resume_run(self, run_id: str) -> bool:
+        raise NotImplementedError("Resume not implemented in FileSystemControlAPI.")
+
+    def stop_run(self, run_id: str) -> bool:
+        raise NotImplementedError("Stop not implemented in FileSystemControlAPI.")
diff --git a/strix/bot/main.py b/strix/bot/main.py
new file mode 100644
index 00000000..8e8c74a4
--- /dev/null
+++ b/strix/bot/main.py
@@ -0,0 +1,35 @@
+import argparse
+import logging
+import os
+
+from .config import load_config
+from .fs_api import FileSystemControlAPI
+from .service import run
+from .strix_control_api import StrixControlAPI
+
+
+def build_control_api(mode: str, root: str) -> object:
+    if mode == "fs":
+        return FileSystemControlAPI(root_path=root)
+    return StrixControlAPI(root_path=root)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Strix Telegram bot service")
+    parser.add_argument(
+        "--mode",
+        choices=["strix", "fs"],
+        default=os.getenv("BOT_MODE", "strix"),
+        help="Control mode: strix (start/stop runs) or fs (read-only browsing).",
+    )
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO)
+
+    cfg = load_config()
+    control_api = build_control_api(args.mode, cfg.root_path)
+    run(control_api, cfg)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/strix/bot/service.py b/strix/bot/service.py
new file mode 100644
index 00000000..ddb7d317
--- /dev/null
+++ b/strix/bot/service.py
@@ -0,0 +1,703 @@
+import asyncio
+import json
+import logging
+import os
+import time
+from typing import Any, Callable, Optional
+
+from aiohttp import ClientSession, web
+from aiogram import Bot, Dispatcher, F
+from aiogram.enums import ParseMode
+from aiogram.exceptions import TelegramBadRequest
+from aiogram.filters import Command, CommandObject
+from aiogram.types import CallbackQuery, InlineKeyboardButton, InlineKeyboardMarkup, Message
+
+from .config import TelegramBotConfig
+from .control_api import ControlAPI
+from .state import BotState
+
+logger = logging.getLogger(__name__)
+
+MAX_FILE_SIZE_BYTES = 45 * 1024 * 1024  # stay under Telegram limit with buffer
+MAX_LIST_ITEMS = 12
+MAX_MESSAGE_CHARS = 3500
+SEVERITY_LEVEL = {"critical": 4, "high": 3, "medium": 2, "low": 1, "info": 0}
+SEVERITY_ICON = {"critical": "🔥", "high": "🔴", "medium": "🟠", "low": "🟢", "info": "ℹ️"}
+BATCH_INTERVAL_SECONDS = 5.0
+REDACT_PATTERNS = [("sk-", 3)]
+
+
+def _is_allowed(user_id: int, config: TelegramBotConfig) -> bool:
+    return user_id in config.allowlisted_user_ids
+
+
+def fetch_tail_page(control_api: ControlAPI, run_id: str, offset: int, page_size: int) -> tuple[list[str], bool, int]:
+    """Return a page of logs, whether more remain, and the next offset."""
+    logs = control_api.tail_logs(run_id, offset=offset, limit=page_size + 1)
+    has_more = len(logs) > page_size
+    page = logs[:page_size]
+    next_offset = offset + len(page)
+    return page, has_more, next_offset
+
+
+class RateLimiter:
+    def __init__(self, min_interval_seconds: float = 1.0) -> None:
+        self.min_interval = min_interval_seconds
+        self.last_seen: dict[int, float] = {}
+
+    def allow(self, user_id: int) -> bool:
+        now = time.monotonic()
+        last = self.last_seen.get(user_id, 0.0)
+        if now - last < self.min_interval:
+            return False
+        self.last_seen[user_id] = now
+        return True
+
+
+class GlobalRateLimiter:
+    def __init__(self, min_interval_seconds: float = 0.5) -> None:
+        self.min_interval = min_interval_seconds
+        self.last_seen = 0.0
+
+    def allow(self) -> bool:
+        now = time.monotonic()
+        if now - self.last_seen < self.min_interval:
+            return False
+        self.last_seen = now
+        return True
+
+
+class Metrics:
+    def __init__(self) -> None:
+        self.counters: dict[str, int] = {}
+        self.errors: dict[str, int] = {}
+        self.latencies_ms: list[float] = []
+
+    def inc(self, name: str) -> None:
+        self.counters[name] = self.counters.get(name, 0) + 1
+
+    def error(self, name: str) -> None:
+        self.errors[name] = self.errors.get(name, 0) + 1
+
+    def add_latency(self, ms: float) -> None:
+        self.latencies_ms.append(ms)
+
+
+class AlertSink:
+    def __init__(self, webhook: str | None = None) -> None:
+        self.webhook = webhook
+
+    async def notify(self, kind: str, details: dict[str, Any]) -> None:
+        if not self.webhook:
+            return
+        payload = {"kind": kind, "details": details, "ts": time.time()}
+        try:
+            async with ClientSession() as session:
+                await session.post(self.webhook, json=payload, timeout=5)
+        except Exception:  # noqa: BLE001
+            logger.exception("Failed to send alert kind=%s", kind)
+
+
+def create_dispatcher(control_api: ControlAPI, config: TelegramBotConfig) -> Dispatcher:
+    dp = Dispatcher()
+    rate_limiter = RateLimiter(min_interval_seconds=config.rate_limit_seconds)
+    global_limiter = GlobalRateLimiter(min_interval_seconds=config.global_rate_limit_seconds)
+    metrics = Metrics()
+    alert_sink = AlertSink(config.alert_webhook)
+    dp.metrics = metrics  # type: ignore[attr-defined]
+    state = BotState(config.db_path)
+
+    run_verbosity: dict[str, str] = {}
+    run_batches: dict[str, list[str]] = {}
+    batch_tasks: dict[str, asyncio.Task[Any]] = {}
+    doc_hint_ts: dict[int, float] = {}
+
+    def redact(text: str) -> str:
+        masked = text
+        for prefix, visible in REDACT_PATTERNS:
+            idx = masked.find(prefix)
+            while idx != -1:
+                end = idx + len(prefix) + 16
+                masked = masked[: idx + visible] + "***REDACTED***" + masked[end:]
+                idx = masked.find(prefix, end)
+        return masked
+
+    dp["redact"] = redact
+
+    def format_alert(sev: str, title: str, content: str) -> str:
+        icon = SEVERITY_ICON.get(sev, "ℹ️")
+        text = f"{icon} *{sev.upper()}* {title}\n```\n{content}\n```"
+        if len(text) > MAX_MESSAGE_CHARS:
+            text = text[:MAX_MESSAGE_CHARS] + "\n\n(truncated)"
+        return text
+
+    def build_file_kb(run_id: str, files: list[dict[str, Any]], rel_path: str = "") -> list[list[InlineKeyboardButton]]:
+        kb_rows: list[list[InlineKeyboardButton]] = []
+        if rel_path:
+            parent = os.path.normpath(os.path.join(rel_path, ".."))
+            kb_rows.append(
+                [
+                    InlineKeyboardButton(
+                        text="⬆️ ..",
+                        callback_data=f"file_nav:{run_id}:{'' if parent == '.' else parent}",
+                    )
+                ]
+            )
+        for entry in files[:MAX_LIST_ITEMS]:
+            label = ("D " if entry.get("is_dir") else "F ") + entry.get("name", "")
+            entry_rel = entry.get("path", "")
+            action = "file_nav" if entry.get("is_dir") else "file_dl"
+            kb_rows.append(
+                [
+                    InlineKeyboardButton(
+                        text=label,
+                        callback_data=f"{action}:{run_id}:{entry_rel}",
+                    )
+                ]
+            )
+        return kb_rows
+
+    async def maybe_doc_hint(message: Message, topic: str = "troubleshooting") -> None:
+        now = time.monotonic()
+        chat_id = message.chat.id if message.chat else 0
+        if now - doc_hint_ts.get(chat_id, 0.0) < 60:
+            return
+        doc_hint_ts[chat_id] = now
+        await message.answer(f"Need help? Try `/docs {topic}`", parse_mode=ParseMode.MARKDOWN)
+
+    async def guard(message: Message, handler: Callable[[], Any]) -> None:
+        start = time.monotonic()
+        user_id = message.from_user.id if message.from_user else 0
+        if not _is_allowed(user_id, config):
+            await message.answer("Access denied.")
+            return
+        if not global_limiter.allow():
+            metrics.error("global_rate_limit")
+            await message.answer("System busy. Please retry shortly.")
+            return
+        if not rate_limiter.allow(user_id):
+            metrics.error("rate_limit")
+            await message.answer("Rate limited. Please slow down.")
+            return
+        metrics.inc("command")
+        red_text = redact(message.text or "")
+        logger.info("audit_command user_id=%s text=%s", user_id, red_text)
+        try:
+            await handler()
+        except Exception:  # noqa: BLE001
+            metrics.error("handler_error")
+            logger.exception("Handler error")
+            await message.answer("Unexpected error. Try again or see /docs troubleshooting.")
+            await maybe_doc_hint(message, topic="troubleshooting")
+            asyncio.create_task(
+                alert_sink.notify(
+                    "handler_error",
+                    {"user_id": user_id, "text": red_text},
+                )
+            )
+        finally:
+            metrics.add_latency((time.monotonic() - start) * 1000)
+
+    @dp.message(Command(commands=["start", "help"]))
+    async def cmd_help(message: Message) -> None:
+        async def run() -> None:
+            text = (
+                "Strix Telegram bot.\n"
+                "/health\n"
+                "/newrun <target> [instruction]\n"
+                "/runs [query]\n"
+                "/run <id> info|tail|report|files|docs\n"
+                "/resume <id>\n"
+                "/stop <id>\n"
+                "/verbosity <id> <high-only|batched|full>\n"
+                "/docs <topic>\n"
+            )
+            await message.answer(text)
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["health"]))
+    async def cmd_health(message: Message) -> None:
+        async def run() -> None:
+            await message.answer("ok")
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["metrics"]))
+    async def cmd_metrics(message: Message) -> None:
+        async def run() -> None:
+            lines = ["Counters:"]
+            for k, v in metrics.counters.items():
+                lines.append(f"{k}: {v}")
+            lines.append("Errors:")
+            for k, v in metrics.errors.items():
+                lines.append(f"{k}: {v}")
+            await message.answer("\n".join(lines))
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["newrun"]))
+    async def cmd_newrun(message: Message, command: CommandObject) -> None:
+        async def run() -> None:
+            args = command.args or ""
+            parts = args.split(" ", 1)
+            if not parts or not parts[0]:
+                await message.answer("Usage: /newrun <target> [instruction]")
+                return
+            target = parts[0]
+            instruction = parts[1] if len(parts) > 1 else None
+            try:
+                chat_id = message.chat.id
+                run_id: Optional[str] = None
+
+                async def flush_batch(rid: str) -> None:
+                    await asyncio.sleep(BATCH_INTERVAL_SECONDS)
+                    texts = run_batches.get(rid, [])
+                    if not texts:
+                        return
+                    combined = "\n\n".join(texts)
+                    if len(combined) > MAX_MESSAGE_CHARS:
+                        combined = combined[:MAX_MESSAGE_CHARS] + "\n\n(truncated batch)"
+                    await message.bot.send_message(chat_id=chat_id, text=combined, parse_mode=ParseMode.MARKDOWN)
+                    run_batches[rid] = []
+                    batch_tasks.pop(rid, None)
+
+                def stream_callback(report_id: str, title: str, content: str, severity: str) -> None:
+                    sev = severity.lower()
+                    level = SEVERITY_LEVEL.get(sev, 0)
+                    mode = run_verbosity.get(run_id or "", state.get_verbosity(run_id or "") or "high-only")
+                    if mode == "high-only" and level < 3:
+                        return
+                    red_title = redact(title)
+                    red_content = redact(content)
+                    text = format_alert(sev, red_title, red_content[:1200])
+                    if mode == "batched":
+                        buf = run_batches.setdefault(run_id or "", [])
+                        buf.append(text)
+                        if run_id and run_id not in batch_tasks:
+                            batch_tasks[run_id] = asyncio.create_task(flush_batch(run_id))
+                    else:
+                        asyncio.create_task(
+                            message.bot.send_message(chat_id=chat_id, text=text, parse_mode=ParseMode.MARKDOWN)
+                        )
+
+                run_info = control_api.start_run(target=target, instruction=instruction, stream_callback=stream_callback)
+                run_id = run_info.run_id
+                logger.info("bot_run_started run_id=%s target=%s instruction=%s", run_id, target, (instruction or "").strip())
+                default_mode = state.get_verbosity(run_id) or config.default_verbosity or "high-only"
+                run_verbosity[run_id] = default_mode
+                await message.answer(f"Started run {run_info.run_id} for target {run_info.target}")
+            except Exception as exc:  # noqa: BLE001
+                logger.exception("Failed to start run")
+                await message.answer(f"Failed to start run: {exc}")
+                await maybe_doc_hint(message)
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["runs"]))
+    async def cmd_runs(message: Message, command: CommandObject) -> None:
+        async def run() -> None:
+            query = (command.args or "").strip().lower()
+            runs = control_api.list_runs()
+            if query:
+                runs = [r for r in runs if query in r.run_id.lower() or query in r.target.lower()]
+            if not runs:
+                await message.answer("No runs found.")
+                return
+            kb_rows = []
+            for r in runs[:MAX_LIST_ITEMS]:
+                kb_rows.append(
+                    [
+                        InlineKeyboardButton(
+                            text=f"{r.run_id} ({r.status})",
+                            callback_data=f"run_info:{r.run_id}",
+                        )
+                    ]
+                )
+            await message.answer("Select a run:", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows))
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["run"]))
+    async def cmd_run(message: Message, command: CommandObject) -> None:
+        async def run() -> None:
+            args = (command.args or "").split()
+            if len(args) < 2:
+                await message.answer("Usage: /run <id> <info|tail|report|files|docs>")
+                await maybe_doc_hint(message)
+                return
+            run_id, subcmd = args[0], args[1]
+            if subcmd == "info":
+                info = control_api.get_run_info(run_id)
+                if not info:
+                    await message.answer("Run not found.")
+                    await maybe_doc_hint(message)
+                    return
+                await message.answer(f"{info.run_id} - {info.target} - {info.status}")
+            elif subcmd == "tail":
+                logs, has_more, next_offset = fetch_tail_page(control_api, run_id, offset=0, page_size=50)
+                kb_rows: list[list[InlineKeyboardButton]] = []
+                if has_more:
+                    kb_rows.append(
+                        [
+                            InlineKeyboardButton(
+                                text="Tail more",
+                                callback_data=f"tail_more:{run_id}:{next_offset}",
+                            )
+                        ]
+                    )
+                await message.answer("\n".join(logs) if logs else "No logs.", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows) if kb_rows else None)
+            elif subcmd == "report":
+                summary = control_api.get_report_summary(run_id)
+                if summary and len(summary) > MAX_MESSAGE_CHARS:
+                    summary = summary[:MAX_MESSAGE_CHARS] + "\n\n(truncated)"
+                kb = InlineKeyboardMarkup(
+                    inline_keyboard=[
+                        [
+                            InlineKeyboardButton(
+                                text="Send full report",
+                                callback_data=f"report_full:{run_id}",
+                            )
+                        ]
+                    ]
+                )
+                await message.answer(summary or "No report yet.", reply_markup=kb)
+            elif subcmd == "files":
+                files = control_api.list_files(run_id)
+                if not files:
+                    await message.answer("No files.")
+                    return
+                kb_rows = build_file_kb(run_id, files, rel_path="")
+                await message.answer("Select file or directory:", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows))
+            elif subcmd == "docs":
+                await message.answer("Use /docs <topic> to fetch documentation excerpts.")
+            else:
+                await message.answer("Unknown subcommand.")
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["resume"]))
+    async def cmd_resume(message: Message, command: CommandObject) -> None:
+        async def run() -> None:
+            run_id = (command.args or "").strip()
+            if not run_id:
+                await message.answer("Usage: /resume <id>")
+                await maybe_doc_hint(message)
+                return
+            chat_id = message.chat.id
+
+            def stream_callback(report_id: str, title: str, content: str, severity: str) -> None:
+                sev = severity.lower()
+                level = SEVERITY_LEVEL.get(sev, 0)
+                mode = run_verbosity.get(run_id, state.get_verbosity(run_id) or config.default_verbosity or "high-only")
+                if mode == "high-only" and level < 3:
+                    return
+                text = format_alert(sev, redact(title), redact(content)[:1200])
+                if mode == "batched":
+                    buf = run_batches.setdefault(run_id, [])
+                    buf.append(text)
+                    if run_id not in batch_tasks:
+                        batch_tasks[run_id] = asyncio.create_task(flush_batch_shared(run_id, chat_id))
+                else:
+                    asyncio.create_task(message.bot.send_message(chat_id=chat_id, text=text, parse_mode=ParseMode.MARKDOWN))
+
+            async def flush_batch_shared(rid: str, cid: int) -> None:
+                await asyncio.sleep(BATCH_INTERVAL_SECONDS)
+                texts = run_batches.get(rid, [])
+                if not texts:
+                    return
+                combined = "\n\n".join(texts)
+                if len(combined) > MAX_MESSAGE_CHARS:
+                    combined = combined[:MAX_MESSAGE_CHARS] + "\n\n(truncated batch)"
+                await message.bot.send_message(chat_id=cid, text=combined, parse_mode=ParseMode.MARKDOWN)
+                run_batches[rid] = []
+                batch_tasks.pop(rid, None)
+
+            try:
+                ok = control_api.resume_run(run_id, stream_callback=stream_callback)
+                if ok:
+                    mode = state.get_verbosity(run_id) or config.default_verbosity or "high-only"
+                    run_verbosity[run_id] = mode
+                    await message.answer(f"Resumed streaming for {run_id} with verbosity {mode}.")
+                else:
+                    await message.answer(f"Run {run_id} not active; cannot resume. Consider starting a new run.")
+                    await maybe_doc_hint(message)
+            except NotImplementedError:
+                await message.answer("Resume is not supported yet.")
+                await maybe_doc_hint(message)
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["stop"]))
+    async def cmd_stop(message: Message, command: CommandObject) -> None:
+        async def run() -> None:
+            run_id = (command.args or "").strip()
+            if not run_id:
+                await message.answer("Usage: /stop <id>")
+                await maybe_doc_hint(message)
+                return
+            try:
+                if control_api.stop_run(run_id):
+                    await message.answer(f"Stopped {run_id}")
+                else:
+                    await message.answer(f"Could not stop {run_id}")
+                    await maybe_doc_hint(message)
+            except NotImplementedError:
+                await message.answer("Stop is not supported yet.")
+                await maybe_doc_hint(message)
+
+        await guard(message, run)
+
+    @dp.message(Command(commands=["verbosity"]))
+    async def cmd_verbosity(message: Message, command: CommandObject) -> None:
+        async def run() -> None:
+            args = (command.args or "").split()
+            if len(args) != 2:
+                await message.answer("Usage: /verbosity <id> <high-only|batched|full>")
+                return
+            run_id, mode = args
+            if mode not in {"high-only", "batched", "full"}:
+                await message.answer("Mode must be one of: high-only, batched, full.")
+                return
+            run_verbosity[run_id] = mode
+            state.set_verbosity(run_id, mode)
+            kb = InlineKeyboardMarkup(
+                inline_keyboard=[
+                    [
+                        InlineKeyboardButton(text="High-only", callback_data=f"verbosity:{run_id}:high-only"),
+                        InlineKeyboardButton(text="Batched", callback_data=f"verbosity:{run_id}:batched"),
+                        InlineKeyboardButton(text="Full", callback_data=f"verbosity:{run_id}:full"),
+                    ]
+                ]
+            )
+            await message.answer(f"Verbosity for {run_id} set to {mode}.", reply_markup=kb)
+
+        await guard(message, run)
+
+    @dp.message(F.text)
+    async def fallback(message: Message) -> None:
+        await message.answer("Unrecognized command. Send /help.")
+
+    @dp.callback_query(F.data.startswith("report_full:"))
+    async def report_full(cb: CallbackQuery) -> None:
+        data = cb.data or ""
+        parts = data.split(":", 1)
+        if len(parts) != 2:
+            await cb.answer()
+            return
+        run_id = parts[1]
+        try:
+            file_path = control_api.get_report_file(run_id)
+            if not file_path:
+                await cb.message.answer("Report file not found.")
+                await cb.answer()
+                return
+            size = os.path.getsize(file_path)
+            if size > MAX_FILE_SIZE_BYTES:
+                await cb.message.answer("Report too large to send. Please retrieve manually.")
+                await cb.answer()
+                return
+            with open(file_path, "rb") as fh:
+                logger.info("bot_report_send run_id=%s path=%s size=%s", run_id, file_path, size)
+                await cb.message.answer_document(document=fh)
+        except TelegramBadRequest as exc:
+            await cb.message.answer(f"Failed to send report: {exc}")
+            asyncio.create_task(
+                alert_sink.notify(
+                    "delivery_error",
+                    {"run_id": run_id, "path": file_path, "error": str(exc)},
+                )
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.exception("Failed to send report")
+            await cb.message.answer(f"Failed to send report: {exc}")
+            asyncio.create_task(alert_sink.notify("delivery_error", {"run_id": run_id, "path": file_path, "error": str(exc)}))
+        await cb.answer()
+
+    @dp.callback_query(F.data.startswith("run_info:"))
+    async def run_info_cb(cb: CallbackQuery) -> None:
+        data = cb.data or ""
+        parts = data.split(":", 1)
+        if len(parts) != 2:
+            await cb.answer()
+            return
+        run_id = parts[1]
+        info = control_api.get_run_info(run_id)
+        if not info:
+            await cb.message.answer("Run not found.")
+            await cb.answer()
+            return
+        text = f"{info.run_id}\nTarget: {info.target}\nStatus: {info.status}"
+        await cb.message.answer(text)
+        await cb.answer()
+
+    @dp.callback_query(F.data.startswith("tail_more:"))
+    async def tail_more(cb: CallbackQuery) -> None:
+        data = cb.data or ""
+        parts = data.split(":", 2)
+        if len(parts) != 3:
+            await cb.answer()
+            return
+        run_id, offset_str = parts[1], parts[2]
+        try:
+            offset = int(offset_str)
+        except ValueError:
+            await cb.answer()
+            return
+        logs, has_more, next_offset = fetch_tail_page(control_api, run_id, offset=offset, page_size=50)
+        kb_rows: list[list[InlineKeyboardButton]] = []
+        if has_more:
+            kb_rows.append(
+                [
+                    InlineKeyboardButton(
+                        text="Tail more",
+                        callback_data=f"tail_more:{run_id}:{next_offset}",
+                    )
+                ]
+            )
+        await cb.message.answer(
+            "\n".join(logs) if logs else "No logs.",
+            reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows) if kb_rows else None,
+        )
+        await cb.answer()
+
+    @dp.callback_query(F.data.startswith("file_nav:"))
+    async def file_nav(cb: CallbackQuery) -> None:
+        data = cb.data or ""
+        parts = data.split(":", 2)
+        if len(parts) != 3:
+            await cb.answer()
+            return
+        run_id, rel_path = parts[1], parts[2]
+        try:
+            files = control_api.list_files(run_id, rel_path)
+            if not files:
+                await cb.message.answer("No files.")
+                await cb.answer()
+                return
+            kb_rows = build_file_kb(run_id, files, rel_path=rel_path)
+            await cb.message.answer(f"Browsing `{rel_path or '.'}`", reply_markup=InlineKeyboardMarkup(inline_keyboard=kb_rows))
+        except Exception as exc:  # noqa: BLE001
+            logger.exception("Failed to browse files")
+            await cb.message.answer(f"Failed to browse files: {exc}")
+        await cb.answer()
+
+    @dp.callback_query(F.data.startswith("file_dl:"))
+    async def file_dl(cb: CallbackQuery) -> None:
+        data = cb.data or ""
+        parts = data.split(":", 2)
+        if len(parts) != 3:
+            await cb.answer()
+            return
+        run_id, rel_path = parts[1], parts[2]
+        try:
+            meta = control_api.get_file_metadata(run_id, rel_path)
+            if not meta:
+                await cb.message.answer("File not found.")
+                await cb.answer()
+                return
+            file_path, size = meta
+            if size > MAX_FILE_SIZE_BYTES:
+                await cb.message.answer("File too large to send. Please fetch manually.")
+                await cb.answer()
+                return
+            with open(file_path, "rb") as fh:
+                logger.info("bot_file_send run_id=%s path=%s size=%s", run_id, rel_path, size)
+                await cb.message.answer_document(document=fh)
+        except TelegramBadRequest as exc:
+            await cb.message.answer(f"Failed to send file: {exc}")
+            asyncio.create_task(
+                alert_sink.notify(
+                    "delivery_error",
+                    {"run_id": run_id, "path": rel_path, "error": str(exc)},
+                )
+            )
+        except Exception as exc:  # noqa: BLE001
+            logger.exception("Failed to send file")
+            await cb.message.answer(f"Failed to send file: {exc}")
+            asyncio.create_task(alert_sink.notify("delivery_error", {"run_id": run_id, "path": rel_path, "error": str(exc)}))
+        await cb.answer()
+
+    @dp.callback_query(F.data.startswith("verbosity:"))
+    async def verbosity_cb(cb: CallbackQuery) -> None:
+        data = cb.data or ""
+        parts = data.split(":", 2)
+        if len(parts) != 3:
+            await cb.answer()
+            return
+        run_id, mode = parts[1], parts[2]
+        if mode not in {"high-only", "batched", "full"}:
+            await cb.answer()
+            return
+        run_verbosity[run_id] = mode
+        state.set_verbosity(run_id, mode)
+        await cb.message.answer(f"Verbosity for {run_id} set to {mode}.")
+        await cb.answer()
+
+    return dp
+
+
+def build_http_app(metrics: Metrics, token: str | None = None) -> web.Application:
+    app = web.Application()
+
+    async def _auth(request: web.Request) -> bool:
+        if not token:
+            return True
+        provided = request.headers.get("Authorization", "")
+        return provided == f"Bearer {token}"
+
+    async def health_handler(_: web.Request) -> web.Response:
+        return web.Response(text="ok")
+
+    async def metrics_handler(request: web.Request) -> web.Response:
+        if not await _auth(request):
+            return web.Response(status=403)
+        fmt = request.query.get("format", "json")
+        counters = metrics.counters
+        errors = metrics.errors
+        latencies = metrics.latencies_ms
+        if fmt == "prom":
+            lines = []
+            for k, v in counters.items():
+                lines.append(f"strix_bot_counter{{name=\"{k}\"}} {v}")
+            for k, v in errors.items():
+                lines.append(f"strix_bot_error_total{{name=\"{k}\"}} {v}")
+            if latencies:
+                avg = sum(latencies) / len(latencies)
+                lines.append(f"strix_bot_command_latency_ms_avg {avg:.2f}")
+            return web.Response(text="\n".join(lines), content_type="text/plain")
+        avg_latency = sum(latencies) / len(latencies) if latencies else 0.0
+        return web.json_response({"counters": counters, "errors": errors, "avg_latency_ms": avg_latency})
+
+    app.add_routes(
+        [
+            web.get("/healthz", health_handler),
+            web.get("/health", health_handler),
+            web.get("/metrics", metrics_handler),
+        ]
+    )
+    return app
+
+
+async def run_bot(control_api: ControlAPI, config: TelegramBotConfig) -> None:
+    bot = Bot(token=config.bot_token)
+    dp = create_dispatcher(control_api, config)
+    runner: web.AppRunner | None = None
+    if config.http_port:
+        app = build_http_app(dp.metrics, token=config.http_token)  # type: ignore[arg-type, attr-defined]
+        runner = web.AppRunner(app)
+        await runner.setup()
+        site = web.TCPSite(runner, host=config.http_host or "0.0.0.0", port=config.http_port)
+        await site.start()
+        logger.info("HTTP server started on %s:%s", config.http_host or "0.0.0.0", config.http_port)
+
+    if not config.webhook_url:
+        raise RuntimeError("Webhook URL not configured.")
+    await bot.set_webhook(config.webhook_url)
+    await dp.start_polling(bot)
+    if runner:
+        await runner.cleanup()
+
+
+def run(control_api: ControlAPI, config: TelegramBotConfig) -> None:
+    asyncio.run(run_bot(control_api, config))
diff --git a/strix/bot/state.py b/strix/bot/state.py
new file mode 100644
index 00000000..48685856
--- /dev/null
+++ b/strix/bot/state.py
@@ -0,0 +1,46 @@
+import sqlite3
+from pathlib import Path
+from typing import Optional
+
+
+class BotState:
+    """
+    Lightweight SQLite-backed state for run preferences (e.g., verbosity).
+    """
+
+    def __init__(self, db_path: str) -> None:
+        self.db_path = db_path
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        self._init_db()
+
+    def _init_db(self) -> None:
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS run_settings (
+                    run_id TEXT PRIMARY KEY,
+                    verbosity TEXT
+                )
+                """
+            )
+            conn.commit()
+
+    def set_verbosity(self, run_id: str, verbosity: str) -> None:
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute(
+                """
+                INSERT INTO run_settings (run_id, verbosity)
+                VALUES (?, ?)
+                ON CONFLICT(run_id) DO UPDATE SET verbosity=excluded.verbosity
+                """,
+                (run_id, verbosity),
+            )
+            conn.commit()
+
+    def get_verbosity(self, run_id: str) -> Optional[str]:
+        with sqlite3.connect(self.db_path) as conn:
+            cur = conn.execute(
+                "SELECT verbosity FROM run_settings WHERE run_id = ?", (run_id,)
+            )
+            row = cur.fetchone()
+            return row[0] if row else None
diff --git a/strix/bot/strix_control_api.py b/strix/bot/strix_control_api.py
new file mode 100644
index 00000000..d4149617
--- /dev/null
+++ b/strix/bot/strix_control_api.py
@@ -0,0 +1,264 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from datetime import datetime
+
+from strix.interface.main import build_targets_info  # type: ignore
+from strix.interface.utils import generate_run_name  # type: ignore
+from strix.llm.config import LLMConfig
+from strix.telemetry.tracer import Tracer, set_global_tracer
+
+from strix.agents.StrixAgent import StrixAgent
+from strix.agents.iteration_policy import calculate_iteration_budget
+from .control_api import ControlAPI, RunInfo
+
+logger = logging.getLogger(__name__)
+
+
+class StrixControlAPI(ControlAPI):
+    """
+    Control API that starts Strix runs via internal interfaces.
+    Note: stop/resume/status are minimal; enhance with runtime hooks.
+    """
+
+    def __init__(self, root_path: str | Path = ".") -> None:
+        self.root_path = Path(root_path).resolve()
+        self.runs_dir = self.root_path / "strix_runs"
+        self.active: Dict[str, dict[str, Any]] = {}
+
+    def start_run(
+        self,
+        target: str,
+        instruction: str | None = None,
+        verbosity: str | None = None,
+        stream_callback: Optional[Callable[[str, str, str, str], None]] = None,
+    ) -> RunInfo:
+        run_name = generate_run_name()
+        targets_info = build_targets_info([target])
+        scan_config = {
+            "scan_id": run_name,
+            "targets": targets_info,
+            "user_instructions": instruction or "",
+            "run_name": run_name,
+        }
+        tracer = Tracer(run_name)
+        tracer.set_scan_config(scan_config)
+        set_global_tracer(tracer)
+
+        if stream_callback:
+            def vuln_handler(report_id: str, title: str, content: str, severity: str) -> None:
+                try:
+                    stream_callback(report_id, title, content, severity)
+                except Exception:  # noqa: BLE001
+                    logger.exception("Stream callback failed")
+
+            tracer.vulnerability_found_callback = vuln_handler
+
+        llm_config = LLMConfig()
+        iteration_policy = calculate_iteration_budget(targets_info, llm_config.timeout)
+        agent_config = {
+            "llm_config": llm_config,
+            "max_iterations": iteration_policy["max_iterations"],
+            "iteration_policy": iteration_policy,
+            "non_interactive": True,
+        }
+        tracer.set_iteration_policy(iteration_policy)
+        agent = StrixAgent(agent_config)
+
+        async def runner() -> None:
+            try:
+                await agent.run()
+                if run_name in self.active:
+                    self.active[run_name]["status"] = "completed"
+                    self.active[run_name]["ended_at"] = datetime.utcnow().isoformat()
+            except Exception:  # noqa: BLE001
+                logger.exception("Run failed for %s", run_name)
+                if run_name in self.active:
+                    self.active[run_name]["status"] = "failed"
+                    self.active[run_name]["ended_at"] = datetime.utcnow().isoformat()
+
+        task = asyncio.create_task(runner())
+        self.active[run_name] = {
+            "agent": agent,
+            "tracer": tracer,
+            "targets": targets_info,
+            "status": "running",
+            "task": task,
+            "started_at": datetime.utcnow().isoformat(),
+        }
+        return RunInfo(run_id=run_name, target=target, status="running", instruction=instruction)
+
+    def list_runs(self, limit: int = 20) -> List[RunInfo]:
+        self._reap_finished()
+        runs: list[RunInfo] = []
+        for run_id, info in list(self.active.items())[:limit]:
+            runs.append(self._build_run_info(run_id, info))
+
+        # Fill with filesystem runs not in active list
+        if len(runs) < limit and self.runs_dir.exists():
+            existing_ids = {r.run_id for r in runs}
+            for path in sorted(self.runs_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
+                if not path.is_dir():
+                    continue
+                if path.name in existing_ids:
+                    continue
+                runs.append(
+                    RunInfo(
+                        run_id=path.name,
+                        target="unknown",
+                        status="unknown",
+                    )
+                )
+                if len(runs) >= limit:
+                    break
+        return runs
+
+    def get_run_info(self, run_id: str) -> RunInfo | None:
+        self._reap_finished()
+        info = self.active.get(run_id)
+        if not info:
+            # fallback to filesystem presence
+            path = self.runs_dir / run_id
+            if path.exists():
+                return RunInfo(run_id=run_id, target="unknown", status="unknown")
+            return None
+        return self._build_run_info(run_id, info)
+
+    def tail_logs(self, run_id: str, offset: int = 0, limit: int = 200) -> List[str]:
+        path = self.runs_dir / run_id
+        candidates = [
+            path / "stdout.log",
+            path / "logs.txt",
+            path / "log.txt",
+            path / "run.log",
+        ]
+        log_file = next((p for p in candidates if p.exists()), None)
+        if not log_file:
+            return []
+        with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
+            lines = f.readlines()
+        return [line.rstrip("\n") for line in lines[offset: offset + limit]]
+
+    def get_report_summary(self, run_id: str) -> str:
+        path = self.runs_dir / run_id
+        report_file = self._find_report_file(path)
+        if not report_file:
+            return ""
+        with open(report_file, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
+        return content[:4000]
+
+    def get_report_file(self, run_id: str) -> str | None:
+        path = self.runs_dir / run_id
+        report_file = self._find_report_file(path)
+        return str(report_file) if report_file else None
+
+    def get_file_metadata(self, run_id: str, path: str) -> tuple[str, int] | None:
+        base = (self.runs_dir / run_id).resolve()
+        target = (base / path).resolve()
+        if not str(target).startswith(str(base)) or not target.is_file():
+            return None
+        return str(target), target.stat().st_size
+
+    def list_files(self, run_id: str, path: str = "") -> List[Dict[str, Any]]:
+        base = (self.runs_dir / run_id / path).resolve()
+        root = (self.runs_dir / run_id).resolve()
+        if not str(base).startswith(str(root)) or not base.exists() or not base.is_dir():
+            return []
+        results: list[Dict[str, Any]] = []
+        for entry in base.iterdir():
+            results.append(
+                {
+                    "name": entry.name,
+                    "path": os.path.relpath(entry, root),
+                    "is_dir": entry.is_dir(),
+                    "size": entry.stat().st_size,
+                }
+            )
+        return results
+
+    def read_file(self, run_id: str, path: str) -> bytes:
+        base = (self.runs_dir / run_id).resolve()
+        target = (base / path).resolve()
+        if not str(target).startswith(str(base)) or not target.exists() or not target.is_file():
+            raise FileNotFoundError("File not found")
+        return target.read_bytes()
+
+    def resume_run(self, run_id: str, stream_callback: Optional[Callable[[str, str, str, str], None]] = None) -> bool:
+        self._reap_finished()
+        info = self.active.get(run_id)
+        if not info:
+            return False
+        tracer = info.get("tracer")
+        if tracer and stream_callback:
+            tracer.vulnerability_found_callback = stream_callback
+        # If a task exists and is still running, consider it resumed
+        task = info.get("task")
+        if task and not task.done():
+            info["status"] = "running"
+            return True
+        return False
+
+    def stop_run(self, run_id: str) -> bool:
+        self._reap_finished()
+        info = self.active.get(run_id)
+        if not info:
+            return False
+        agent = info.get("agent")
+        if hasattr(agent, "cancel"):
+            agent.cancel()
+            info["status"] = "stopped"
+            info["ended_at"] = datetime.utcnow().isoformat()
+            return True
+        task = info.get("task")
+        if task:
+            task.cancel()
+            info["status"] = "stopped"
+            info["ended_at"] = datetime.utcnow().isoformat()
+            return True
+        # If no cancel available, mark as stopped
+        info["status"] = "stopped"
+        info["ended_at"] = datetime.utcnow().isoformat()
+        return True
+
+    def _build_run_info(self, run_id: str, info: dict[str, Any]) -> RunInfo:
+        target = info.get("targets", [{}])[0].get("original", "unknown")
+        status = info.get("status", "running")
+        ri = RunInfo(
+            run_id=run_id,
+            target=target,
+            status=status,
+        )
+        return ri
+
+    def _find_report_file(self, base: Path) -> Path | None:
+        candidates = [
+            base / "report.txt",
+            base / "report.md",
+            base / "report.html",
+            base / "report.json",
+            base / "report.pdf",
+        ]
+        for path in candidates:
+            if path.exists():
+                return path
+        return None
+
+    def _reap_finished(self) -> None:
+        for run_id, info in self.active.items():
+            task = info.get("task")
+            if task and task.done():
+                if task.cancelled():
+                    info["status"] = "stopped"
+                    info["ended_at"] = datetime.utcnow().isoformat()
+                elif task.exception():
+                    info["status"] = "failed"
+                    info["ended_at"] = datetime.utcnow().isoformat()
+                else:
+                    info["status"] = "completed"
+                    info["ended_at"] = datetime.utcnow().isoformat()
+        # Optionally prune very old entries if needed (not implemented)
diff --git a/strix/interface/cli.py b/strix/interface/cli.py
index 626cbded..da51087a 100644
--- a/strix/interface/cli.py
+++ b/strix/interface/cli.py
@@ -11,6 +11,7 @@
 from rich.text import Text
 
 from strix.agents.StrixAgent import StrixAgent
+from strix.agents.iteration_policy import calculate_iteration_budget
 from strix.llm.config import LLMConfig
 from strix.telemetry.tracer import Tracer, set_global_tracer
 
@@ -74,9 +75,11 @@ async def run_cli(args: Any) -> None:  # noqa: PLR0915
     }
 
     llm_config = LLMConfig()
+    iteration_policy = calculate_iteration_budget(args.targets_info, llm_config.timeout)
     agent_config = {
         "llm_config": llm_config,
-        "max_iterations": 300,
+        "max_iterations": iteration_policy["max_iterations"],
+        "iteration_policy": iteration_policy,
         "non_interactive": True,
     }
 
@@ -85,6 +88,7 @@ async def run_cli(args: Any) -> None:  # noqa: PLR0915
 
     tracer = Tracer(args.run_name)
     tracer.set_scan_config(scan_config)
+    tracer.set_iteration_policy(iteration_policy)
 
     def display_vulnerability(report_id: str, title: str, content: str, severity: str) -> None:
         severity_color = get_severity_color(severity.lower())
diff --git a/strix/interface/run_manager.py b/strix/interface/run_manager.py
new file mode 100644
index 00000000..2fb4ca2b
--- /dev/null
+++ b/strix/interface/run_manager.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import asyncio
+from typing import Any, Callable, Coroutine
+
+
+class RunManager:
+    def __init__(self, max_concurrent: int = 2) -> None:
+        self.semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def run_with_budget(
+        self,
+        tasks: list[tuple[str, Callable[[], Coroutine[Any, Any, Any]]]],
+    ) -> dict[str, Any]:
+        results: dict[str, Any] = {}
+
+        async def _wrap(name: str, coro_fn: Callable[[], Coroutine[Any, Any, Any]]) -> None:
+            async with self.semaphore:
+                try:
+                    results[name] = await coro_fn()
+                except Exception as exc:  # noqa: BLE001
+                    results[name] = {"success": False, "error": str(exc)}
+
+        await asyncio.gather(*[_wrap(name, fn) for name, fn in tasks])
+        return results
diff --git a/strix/interface/tui.py b/strix/interface/tui.py
index 1b0bc37f..7dda6075 100644
--- a/strix/interface/tui.py
+++ b/strix/interface/tui.py
@@ -31,6 +31,7 @@
 from textual.widgets.tree import TreeNode
 
 from strix.agents.StrixAgent import StrixAgent
+from strix.agents.iteration_policy import calculate_iteration_budget
 from strix.interface.utils import build_live_stats_text
 from strix.llm.config import LLMConfig
 from strix.telemetry.tracer import Tracer, set_global_tracer
@@ -282,6 +283,8 @@ def __init__(self, args: argparse.Namespace):
 
         self.tracer = Tracer(self.scan_config["run_name"])
         self.tracer.set_scan_config(self.scan_config)
+        if self.agent_config.get("iteration_policy"):
+            self.tracer.set_iteration_policy(self.agent_config["iteration_policy"])
         set_global_tracer(self.tracer)
 
         self.agent_nodes: dict[str, TreeNode] = {}
@@ -321,9 +324,11 @@ def _build_scan_config(self, args: argparse.Namespace) -> dict[str, Any]:
     def _build_agent_config(self, args: argparse.Namespace) -> dict[str, Any]:
         llm_config = LLMConfig()
 
+        iteration_policy = calculate_iteration_budget(args.targets_info, llm_config.timeout)
         config = {
             "llm_config": llm_config,
-            "max_iterations": 300,
+            "max_iterations": iteration_policy["max_iterations"],
+            "iteration_policy": iteration_policy,
         }
 
         if getattr(args, "local_sources", None):
diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index 99a566a4..8a23620e 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from pathlib import Path
 from dataclasses import dataclass
 from enum import Enum
 from fnmatch import fnmatch
@@ -25,15 +26,37 @@
 
 logger = logging.getLogger(__name__)
 
-api_key = os.getenv("LLM_API_KEY")
+
+def _load_env_file(path: Path) -> dict[str, str]:
+    env: dict[str, str] = {}
+    if not path.exists():
+        return env
+    for line in path.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line or line.startswith("#") or "=" not in line:
+            continue
+        key, val = line.split("=", 1)
+        env[key.strip()] = val.strip()
+    return env
+
+
+file_env = _load_env_file(Path(".env"))
+
+def _getenv(name: str) -> str | None:
+    if name in os.environ:
+        return os.getenv(name)
+    return file_env.get(name)
+
+
+api_key = _getenv("LLM_API_KEY")
 if api_key:
     litellm.api_key = api_key
 
 api_base = (
-    os.getenv("LLM_API_BASE")
-    or os.getenv("OPENAI_API_BASE")
-    or os.getenv("LITELLM_BASE_URL")
-    or os.getenv("OLLAMA_API_BASE")
+    _getenv("LLM_API_BASE")
+    or _getenv("OPENAI_API_BASE")
+    or _getenv("LITELLM_BASE_URL")
+    or _getenv("OLLAMA_API_BASE")
 )
 if api_base:
     litellm.api_base = api_base
diff --git a/strix/llm/router.py b/strix/llm/router.py
new file mode 100644
index 00000000..d39ed934
--- /dev/null
+++ b/strix/llm/router.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from typing import Any, Callable, Protocol
+
+
+class ChatBackend(Protocol):
+    def generate(self, *args: Any, **kwargs: Any) -> Any: ...
+
+
+class MultiplexingLLM:
+    def __init__(
+        self,
+        primary: ChatBackend,
+        fallback: ChatBackend | None = None,
+        should_retry: Callable[[Exception], bool] | None = None,
+    ) -> None:
+        self.primary = primary
+        self.fallback = fallback
+        self.should_retry = should_retry or (lambda exc: True)
+
+    async def generate(self, *args: Any, **kwargs: Any) -> Any:
+        try:
+            return await self.primary.generate(*args, **kwargs)
+        except Exception as exc:  # noqa: BLE001
+            if self.fallback and self.should_retry(exc):
+                return await self.fallback.generate(*args, **kwargs)
+            raise
diff --git a/strix/prompts/auth/oidc_saml_sso.jinja b/strix/prompts/auth/oidc_saml_sso.jinja
new file mode 100644
index 00000000..3236093d
--- /dev/null
+++ b/strix/prompts/auth/oidc_saml_sso.jinja
@@ -0,0 +1,24 @@
+Title: OIDC/SAML/SSO Authentication Playbook
+
+Goals:
+- Validate OIDC/SAML login flows, session handling, and token misuse.
+- Identify insecure redirect URIs, weak token validation, and replay risks.
+
+Checklist:
+- Map identity flow: discovery endpoints (.well-known/openid-configuration), authz URL, token URL, JWKS, ACS/SSO endpoints.
+- Try login with controlled redirect_uri and response_mode/form_post vs query to detect open redirect/exfil.
+- Inspect tokens (id/access) for alg=none, kid confusion, missing aud/iss/exp/nbf/iat checks; attempt forged token with wrong kid.
+- Test SAML: unsigned/assertion wrapping, replay (multiple submissions), relaxed Recipient/Audience/NotOnOrAfter.
+- Evaluate session fixation and cookie flags (Secure, HttpOnly, SameSite) post-login.
+- Fuzz return_to/RelayState/state/redirect_uri for open redirect or code leakage.
+- Verify logout/invalidation: token revocation endpoints, backchannel/frontchannel logout.
+- Check MFA/step-up enforcement and device binding if claimed.
+
+Tools:
+- Prefer browser/proxy for auth flows; capture network requests and response timing.
+- Use terminal/python tools only for inspecting tokens and JWKS responses; avoid hitting prod identity endpoints with aggressive fuzzing.
+
+Cautions:
+- Do not brute-force passwords.
+- Do not send tokens to external endpoints.
+- Keep all tokens in workspace; redact before sharing.
diff --git a/strix/runtime/benchmark.py b/strix/runtime/benchmark.py
new file mode 100644
index 00000000..a0b96ea5
--- /dev/null
+++ b/strix/runtime/benchmark.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+import time
+from typing import Any, Callable
+
+
+def run_benchmark(name: str, fn: Callable[[], Any]) -> dict[str, Any]:
+    start = time.perf_counter()
+    result = fn()
+    duration_ms = (time.perf_counter() - start) * 1000
+    return {"name": name, "duration_ms": round(duration_ms, 2), "result": result}
diff --git a/strix/runtime/tool_pool.py b/strix/runtime/tool_pool.py
new file mode 100644
index 00000000..99b095c8
--- /dev/null
+++ b/strix/runtime/tool_pool.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+import asyncio
+from typing import Any, Callable
+
+
+class ToolServerPool:
+    def __init__(self, spawn: Callable[[], Any], max_instances: int = 2) -> None:
+        self.spawn = spawn
+        self.max_instances = max_instances
+        self.instances: list[Any] = []
+        self.health: dict[int, str] = {}
+        self.lock = asyncio.Lock()
+
+    async def get_instance(self) -> Any:
+        async with self.lock:
+            for inst in self.instances:
+                if self.health.get(id(inst)) == "healthy":
+                    return inst
+
+            if len(self.instances) < self.max_instances:
+                inst = self.spawn()
+                self.instances.append(inst)
+                self.health[id(inst)] = "healthy"
+                return inst
+
+            return self.instances[0]
+
+    async def mark_unhealthy(self, instance: Any) -> None:
+        async with self.lock:
+            self.health[id(instance)] = "unhealthy"
+
+    async def get_health(self) -> dict[int, str]:
+        async with self.lock:
+            return dict(self.health)
diff --git a/strix/telemetry/tracer.py b/strix/telemetry/tracer.py
index 6da30d53..3529a2b1 100644
--- a/strix/telemetry/tracer.py
+++ b/strix/telemetry/tracer.py
@@ -1,3 +1,4 @@
+import json
 import logging
 from datetime import UTC, datetime
 from pathlib import Path
@@ -46,6 +47,7 @@ def __init__(self, run_name: str | None = None):
             "end_time": None,
             "targets": [],
             "status": "running",
+            "max_iterations": None,
         }
         self._run_dir: Path | None = None
         self._next_execution_id = 1
@@ -74,6 +76,10 @@ def add_vulnerability_report(
         title: str,
         content: str,
         severity: str,
+        cvss_score: float | None = None,
+        references: list[str] | None = None,
+        fix_recommendation: str | None = None,
+        cwe: list[str] | None = None,
     ) -> str:
         report_id = f"vuln-{len(self.vulnerability_reports) + 1:04d}"
 
@@ -83,6 +89,10 @@ def add_vulnerability_report(
             "content": content.strip(),
             "severity": severity.lower().strip(),
             "timestamp": datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC"),
+            "cvss_score": cvss_score,
+            "references": references or [],
+            "fix_recommendation": fix_recommendation,
+            "cwe": cwe or [],
         }
 
         self.vulnerability_reports.append(report)
@@ -197,11 +207,16 @@ def set_scan_config(self, config: dict[str, Any]) -> None:
             {
                 "targets": config.get("targets", []),
                 "user_instructions": config.get("user_instructions", ""),
-                "max_iterations": config.get("max_iterations", 200),
+                "max_iterations": config.get("max_iterations", 300),
             }
         )
         self.get_run_dir()
 
+    def set_iteration_policy(self, policy: dict[str, Any]) -> None:
+        self.run_metadata["iteration_policy"] = policy
+        if "max_iterations" in policy:
+            self.run_metadata["max_iterations"] = policy["max_iterations"]
+
     def save_run_data(self, mark_complete: bool = False) -> None:
         try:
             run_dir = self.get_run_dir()
@@ -236,9 +251,20 @@ def save_run_data(self, mark_complete: bool = False) -> None:
                         f.write(f"# {report['title']}\n\n")
                         f.write(f"**ID:** {report['id']}\n")
                         f.write(f"**Severity:** {report['severity'].upper()}\n")
+                        if report.get("cvss_score") is not None:
+                            f.write(f"**CVSS:** {report['cvss_score']}\n")
+                        if report.get("cwe"):
+                            f.write(f"**CWE:** {', '.join(report['cwe'])}\n")
                         f.write(f"**Found:** {report['timestamp']}\n\n")
                         f.write("## Description\n\n")
                         f.write(f"{report['content']}\n")
+                        if report.get("fix_recommendation"):
+                            f.write("\n## Fix Recommendation\n\n")
+                            f.write(f"{report['fix_recommendation']}\n")
+                        if report.get("references"):
+                            f.write("\n## References\n\n")
+                            for ref in report["references"]:
+                                f.write(f"- {ref}\n")
                     self._saved_vuln_ids.add(report["id"])
 
                 if self.vulnerability_reports:
@@ -252,7 +278,16 @@ def save_run_data(self, mark_complete: bool = False) -> None:
                     with vuln_csv_file.open("w", encoding="utf-8", newline="") as f:
                         import csv
 
-                        fieldnames = ["id", "title", "severity", "timestamp", "file"]
+                        fieldnames = [
+                            "id",
+                            "title",
+                            "severity",
+                            "timestamp",
+                            "cvss",
+                            "cwe",
+                            "references",
+                            "file",
+                        ]
                         writer = csv.DictWriter(f, fieldnames=fieldnames)
                         writer.writeheader()
 
@@ -263,10 +298,38 @@ def save_run_data(self, mark_complete: bool = False) -> None:
                                     "title": report["title"],
                                     "severity": report["severity"].upper(),
                                     "timestamp": report["timestamp"],
+                                    "cvss": report.get("cvss_score"),
+                                    "cwe": ",".join(report.get("cwe", [])),
+                                    "references": ",".join(report.get("references", [])),
                                     "file": f"vulnerabilities/{report['id']}.md",
                                 }
                             )
 
+                    vuln_jsonl_file = run_dir / "vulnerabilities.jsonl"
+                    with vuln_jsonl_file.open("w", encoding="utf-8") as f:
+                        for report in sorted_reports:
+                            json_record = {
+                                "id": report["id"],
+                                "title": report["title"],
+                                "severity": report["severity"],
+                                "timestamp": report["timestamp"],
+                                "content": report["content"],
+                                "cvss_score": report.get("cvss_score"),
+                                "cwe": report.get("cwe", []),
+                                "references": report.get("references", []),
+                                "fix_recommendation": report.get("fix_recommendation"),
+                                "file": f"vulnerabilities/{report['id']}.md",
+                                "run_id": self.run_id,
+                                "run_name": self.run_name,
+                            }
+                            f.write(json.dumps(json_record))
+                            f.write("\n")
+
+                    sarif_file = run_dir / "vulnerabilities.sarif.json"
+                    sarif_payload = self._build_sarif_report(sorted_reports)
+                    with sarif_file.open("w", encoding="utf-8") as f:
+                        json.dump(sarif_payload, f, indent=2)
+
                 if new_reports:
                     logger.info(
                         f"Saved {len(new_reports)} new vulnerability report(s) to: {vuln_dir}"
@@ -333,5 +396,75 @@ def get_total_llm_stats(self) -> dict[str, Any]:
             "total_tokens": total_stats["input_tokens"] + total_stats["output_tokens"],
         }
 
+    def _build_sarif_report(self, reports: list[dict[str, Any]]) -> dict[str, Any]:
+        severity_rules = {
+            "critical": {"rule_id": "STRIX.CRITICAL", "level": "error", "name": "Critical"},
+            "high": {"rule_id": "STRIX.HIGH", "level": "error", "name": "High"},
+            "medium": {"rule_id": "STRIX.MEDIUM", "level": "warning", "name": "Medium"},
+            "low": {"rule_id": "STRIX.LOW", "level": "note", "name": "Low"},
+            "info": {"rule_id": "STRIX.INFO", "level": "note", "name": "Informational"},
+        }
+
+        rules = [
+            {
+                "id": rule["rule_id"],
+                "name": f"{rule['name']} Severity",
+                "shortDescription": {"text": f"{rule['name']} severity vulnerability"},
+                "defaultConfiguration": {"level": rule["level"]},
+            }
+            for rule in severity_rules.values()
+        ]
+
+        results = []
+        for report in reports:
+            severity_key = report.get("severity", "medium").lower().strip()
+            rule = severity_rules.get(severity_key, severity_rules["medium"])
+            result = {
+                "ruleId": rule["rule_id"],
+                "level": rule["level"],
+                "message": {"text": report.get("title", "Strix vulnerability")},
+                "locations": [
+                    {
+                        "physicalLocation": {
+                            "artifactLocation": {
+                                "uri": f"vulnerabilities/{report.get('id', 'unknown')}.md"
+                            }
+                        }
+                    }
+                ],
+                "properties": {
+                    "id": report.get("id"),
+                    "severity": severity_key,
+                    "timestamp": report.get("timestamp"),
+                    "content": report.get("content"),
+                    "cvss_score": report.get("cvss_score"),
+                    "cwe": report.get("cwe", []),
+                    "references": report.get("references", []),
+                    "fix_recommendation": report.get("fix_recommendation"),
+                    "runId": self.run_id,
+                    "runName": self.run_name or "",
+                },
+                "partialFingerprints": {"strix/vulnerabilityId": report.get("id", "")},
+            }
+            results.append(result)
+
+        sarif_payload = {
+            "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
+            "version": "2.1.0",
+            "runs": [
+                {
+                    "tool": {
+                        "driver": {
+                            "name": "Strix",
+                            "rules": rules,
+                        }
+                    },
+                    "results": results,
+                }
+            ],
+        }
+
+        return sarif_payload
+
     def cleanup(self) -> None:
         self.save_run_data(mark_complete=True)
diff --git a/strix/tools/__init__.py b/strix/tools/__init__.py
index 8d5f896b..5843253f 100644
--- a/strix/tools/__init__.py
+++ b/strix/tools/__init__.py
@@ -33,6 +33,9 @@
     from .proxy import *  # noqa: F403
     from .python import *  # noqa: F403
     from .reporting import *  # noqa: F403
+    from .cache import *  # noqa: F403
+    from .api_probe import *  # noqa: F403
+    from .sast import *  # noqa: F403
     from .terminal import *  # noqa: F403
     from .thinking import *  # noqa: F403
 
@@ -44,6 +47,8 @@
     from .notes import *  # noqa: F403
     from .proxy import *  # noqa: F403
     from .python import *  # noqa: F403
+    from .api_probe import *  # noqa: F403
+    from .sast import *  # noqa: F403
     from .terminal import *  # noqa: F403
 
 __all__ = [
diff --git a/strix/tools/api_probe/__init__.py b/strix/tools/api_probe/__init__.py
new file mode 100644
index 00000000..fae9da73
--- /dev/null
+++ b/strix/tools/api_probe/__init__.py
@@ -0,0 +1,3 @@
+from .api_probe_actions import load_openapi_spec, suggest_api_fuzz_cases
+
+__all__ = ["load_openapi_spec", "suggest_api_fuzz_cases"]
diff --git a/strix/tools/api_probe/api_probe_actions.py b/strix/tools/api_probe/api_probe_actions.py
new file mode 100644
index 00000000..03da857b
--- /dev/null
+++ b/strix/tools/api_probe/api_probe_actions.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from strix.tools.registry import register_tool
+
+
+def _load_spec(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        raise FileNotFoundError(f"Spec file not found: {path}")
+
+    if path.suffix.lower() in {".yaml", ".yml"}:
+        try:
+            import yaml  # type: ignore
+        except ImportError as exc:  # pragma: no cover - optional dep
+            raise RuntimeError("PyYAML required for YAML specs; install pyyaml") from exc
+        return yaml.safe_load(path.read_text(encoding="utf-8")) or {}
+
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def _extract_type(param: dict[str, Any]) -> str:
+    schema = param.get("schema", {})
+    if "type" in schema:
+        return str(schema["type"])
+    if "$ref" in schema:
+        return "object_ref"
+    return "unknown"
+
+
+@register_tool(sandbox_execution=False)
+def load_openapi_spec(spec_path: str) -> dict[str, Any]:
+    spec = _load_spec(Path(spec_path))
+    paths = spec.get("paths", {})
+    endpoints: list[dict[str, Any]] = []
+
+    for path, methods in paths.items():
+        for method, details in methods.items():
+            params = []
+            for p in details.get("parameters", []):
+                params.append(
+                    {
+                        "name": p.get("name"),
+                        "in": p.get("in"),
+                        "required": p.get("required", False),
+                        "type": _extract_type(p),
+                    }
+                )
+            endpoints.append(
+                {
+                    "path": path,
+                    "method": method.upper(),
+                    "summary": details.get("summary", ""),
+                    "params": params,
+                }
+            )
+
+    return {"success": True, "endpoints": endpoints}
+
+
+@register_tool(sandbox_execution=False)
+def suggest_api_fuzz_cases(endpoints: list[dict[str, Any]]) -> dict[str, Any]:
+    fuzz_strings = [
+        "' OR '1'='1",
+        "\"; DROP TABLE users; --",
+        "../../etc/passwd",
+        "${{7*7}}",
+        "<script>alert(1)</script>",
+    ]
+
+    suggestions = []
+    for ep in endpoints:
+        param_payloads = []
+        for param in ep.get("params", []):
+            param_payloads.append({"name": param.get("name"), "payload": fuzz_strings[0]})
+        suggestions.append(
+            {
+                "path": ep.get("path"),
+                "method": ep.get("method"),
+                "payloads": param_payloads or [{"payload": fuzz_strings[1]}],
+            }
+        )
+
+    return {"success": True, "suggestions": suggestions}
diff --git a/strix/tools/api_probe/api_probe_actions_schema.xml b/strix/tools/api_probe/api_probe_actions_schema.xml
new file mode 100644
index 00000000..a7798ca0
--- /dev/null
+++ b/strix/tools/api_probe/api_probe_actions_schema.xml
@@ -0,0 +1,24 @@
+<tools>
+  <tool name="load_openapi_spec">
+    <description>Load an OpenAPI/Swagger specification (JSON or YAML) and extract endpoints, methods, and basic parameter info for planning probes.</description>
+    <parameters>
+      <parameter name="spec_path" type="string" required="true">
+        <description>Path to the OpenAPI/Swagger file (.json/.yaml/.yml).</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Endpoints array with path, method, summary, and parameters (name/location/required/type).</description>
+    </returns>
+  </tool>
+  <tool name="suggest_api_fuzz_cases">
+    <description>Generate lightweight fuzzing suggestions for endpoints based on their parameters (e.g., injection strings).</description>
+    <parameters>
+      <parameter name="endpoints" type="array" required="true">
+        <description>Endpoints as returned by load_openapi_spec.</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Fuzz suggestions per endpoint with placeholder payloads.</description>
+    </returns>
+  </tool>
+</tools>
diff --git a/strix/tools/browser/browser_actions.py b/strix/tools/browser/browser_actions.py
index ca7a26a1..c1b241b9 100644
--- a/strix/tools/browser/browser_actions.py
+++ b/strix/tools/browser/browser_actions.py
@@ -24,6 +24,8 @@
     "press_key",
     "save_pdf",
     "get_console_logs",
+    "get_network_events",
+    "capture_screenshot_diff",
     "view_source",
     "close",
     "list_tabs",
@@ -156,6 +158,7 @@ def _handle_utility_actions(
     file_path: str | None = None,
     tab_id: str | None = None,
     clear: bool = False,
+    limit: int | None = None,
 ) -> dict[str, Any]:
     if action == "wait":
         _validate_duration(action, duration)
@@ -171,6 +174,10 @@ def _handle_utility_actions(
         return manager.save_pdf(file_path, tab_id)
     if action == "get_console_logs":
         return manager.get_console_logs(tab_id, clear)
+    if action == "get_network_events":
+        return manager.get_network_events(tab_id, limit or 50, clear)
+    if action == "capture_screenshot_diff":
+        return manager.capture_screenshot_diff(tab_id)
     if action == "view_source":
         return manager.view_source(tab_id)
     if action == "close":
@@ -190,6 +197,7 @@ def browser_action(
     key: str | None = None,
     file_path: str | None = None,
     clear: bool = False,
+    limit: int | None = None,
 ) -> dict[str, Any]:
     manager = get_browser_tab_manager()
 
@@ -210,6 +218,8 @@ def browser_action(
             "execute_js",
             "save_pdf",
             "get_console_logs",
+            "get_network_events",
+            "capture_screenshot_diff",
             "view_source",
             "close",
         }
@@ -222,7 +232,7 @@ def browser_action(
             return _handle_tab_actions(manager, action, url, tab_id)
         if action in utility_actions:
             return _handle_utility_actions(
-                manager, action, duration, js_code, file_path, tab_id, clear
+                manager, action, duration, js_code, file_path, tab_id, clear, limit
             )
 
         _raise_unknown_action(action)
diff --git a/strix/tools/browser/browser_actions_schema.xml b/strix/tools/browser/browser_actions_schema.xml
index b6fdfc64..e93c9590 100644
--- a/strix/tools/browser/browser_actions_schema.xml
+++ b/strix/tools/browser/browser_actions_schema.xml
@@ -34,9 +34,12 @@
       <parameter name="clear" type="boolean" required="false">
         <description>For 'get_console_logs' action: whether to clear console logs after retrieving them. Default is False (keep logs).</description>
       </parameter>
+      <parameter name="limit" type="integer" required="false">
+        <description>Optional limit when fetching network events (default 50; max stored 200).</description>
+      </parameter>
     </parameters>
     <returns type="Dict[str, Any]">
-      <description>Response containing: - screenshot: Base64 encoded PNG of the current page state - url: Current page URL - title: Current page title - viewport: Current browser viewport dimensions - tab_id: ID of the current active tab - all_tabs: Dict of all open tab IDs and their URLs - message: Status message about the action performed - js_result: Result of JavaScript execution (for execute_js action) - pdf_saved: File path of saved PDF (for save_pdf action) - console_logs: Array of console messages (for get_console_logs action)   Limited to 50KB total and 200 most recent logs. Individual messages truncated at 1KB. - page_source: HTML source code (for view_source action)   Large pages are truncated to 100KB (keeping beginning and end sections).</description>
+      <description>Response containing: - screenshot: Base64 encoded PNG of the current page state - screenshot_changed: Whether the latest screenshot differs from prior capture - url: Current page URL - title: Current page title - viewport: Current browser viewport dimensions - tab_id: ID of the current active tab - all_tabs: Dict of all open tab IDs and their URLs - message: Status message about the action performed - js_result: Result of JavaScript execution (for execute_js action) - pdf_saved: File path of saved PDF (for save_pdf action) - console_logs: Array of console messages (for get_console_logs action)   Limited to 50KB total and 200 most recent logs. Individual messages truncated at 1KB. - network_events: Recent requests/responses with status, method, duration_ms (for get_network_events action) - page_source: HTML source code (for view_source action)   Large pages are truncated to 100KB (keeping beginning and end sections).</description>
     </returns>
     <notes>
   Important usage rules:
diff --git a/strix/tools/browser/browser_instance.py b/strix/tools/browser/browser_instance.py
index 3e756f67..30ec9a3b 100644
--- a/strix/tools/browser/browser_instance.py
+++ b/strix/tools/browser/browser_instance.py
@@ -30,6 +30,9 @@ def __init__(self) -> None:
         self._next_tab_id = 1
 
         self.console_logs: dict[str, list[dict[str, Any]]] = {}
+        self.network_events: dict[str, list[dict[str, Any]]] = {}
+        self._request_start: dict[int, float] = {}
+        self._last_screenshots: dict[str, bytes] = {}
 
         self._loop: asyncio.AbstractEventLoop | None = None
         self._loop_thread: threading.Thread | None = None
@@ -77,6 +80,32 @@ def handle_console(msg: Any) -> None:
 
         page.on("console", handle_console)
 
+    async def _setup_network_logging(self, page: Page, tab_id: str) -> None:
+        self.network_events[tab_id] = []
+
+        def handle_request(request: Any) -> None:
+            self._request_start[id(request)] = asyncio.get_event_loop().time()
+
+        def handle_response(response: Any) -> None:
+            start = self._request_start.pop(id(response.request), None)
+            duration_ms = None
+            if start is not None:
+                duration_ms = round((asyncio.get_event_loop().time() - start) * 1000, 2)
+
+            event = {
+                "url": response.url,
+                "method": response.request.method,
+                "status": response.status,
+                "resource_type": response.request.resource_type,
+                "duration_ms": duration_ms,
+            }
+            events = self.network_events.get(tab_id, [])
+            events.append(event)
+            self.network_events[tab_id] = events[-200:]
+
+        page.on("request", handle_request)
+        page.on("response", handle_response)
+
     async def _launch_browser(self, url: str | None = None) -> dict[str, Any]:
         self.playwright = await async_playwright().start()
 
@@ -106,6 +135,7 @@ async def _launch_browser(self, url: str | None = None) -> dict[str, Any]:
         self.current_page_id = tab_id
 
         await self._setup_console_logging(page, tab_id)
+        await self._setup_network_logging(page, tab_id)
 
         if url:
             await page.goto(url, wait_until="domcontentloaded")
@@ -125,6 +155,9 @@ async def _get_page_state(self, tab_id: str | None = None) -> dict[str, Any]:
 
         screenshot_bytes = await page.screenshot(type="png", full_page=False)
         screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
+        previous = self._last_screenshots.get(tab_id)
+        screenshot_changed = previous is None or previous != screenshot_bytes
+        self._last_screenshots[tab_id] = screenshot_bytes
 
         url = page.url
         title = await page.title()
@@ -144,6 +177,7 @@ async def _get_page_state(self, tab_id: str | None = None) -> dict[str, Any]:
             "viewport": viewport,
             "tab_id": tab_id,
             "all_tabs": all_tabs,
+            "screenshot_changed": screenshot_changed,
         }
 
     def launch(self, url: str | None = None) -> dict[str, Any]:
@@ -275,6 +309,7 @@ async def _new_tab(self, url: str | None = None) -> dict[str, Any]:
         self.current_page_id = tab_id
 
         await self._setup_console_logging(page, tab_id)
+        await self._setup_network_logging(page, tab_id)
 
         if url:
             await page.goto(url, wait_until="domcontentloaded")
@@ -401,6 +436,42 @@ async def _get_console_logs(
         state["console_logs"] = logs
         return state
 
+    def get_network_events(
+        self, tab_id: str | None = None, limit: int = 50, clear: bool = False
+    ) -> dict[str, Any]:
+        with self._execution_lock:
+            return self._run_async(self._get_network_events(tab_id, limit, clear))
+
+    async def _get_network_events(
+        self, tab_id: str | None = None, limit: int = 50, clear: bool = False
+    ) -> dict[str, Any]:
+        if not tab_id:
+            tab_id = self.current_page_id
+
+        if not tab_id or tab_id not in self.pages:
+            raise ValueError(f"Tab '{tab_id}' not found")
+
+        events = self.network_events.get(tab_id, [])
+        limited = events[-limit:]
+        if clear:
+            self.network_events[tab_id] = []
+
+        state = await self._get_page_state(tab_id)
+        state["network_events"] = limited
+        return state
+
+    def capture_screenshot_diff(self, tab_id: str | None = None) -> dict[str, Any]:
+        with self._execution_lock:
+            return self._run_async(self._capture_screenshot_diff(tab_id))
+
+    async def _capture_screenshot_diff(self, tab_id: str | None = None) -> dict[str, Any]:
+        state = await self._get_page_state(tab_id)
+        return {
+            "tab_id": state["tab_id"],
+            "screenshot": state["screenshot"],
+            "screenshot_changed": state.get("screenshot_changed", False),
+        }
+
     def view_source(self, tab_id: str | None = None) -> dict[str, Any]:
         with self._execution_lock:
             return self._run_async(self._view_source(tab_id))
diff --git a/strix/tools/browser/tab_manager.py b/strix/tools/browser/tab_manager.py
index 3b4b674f..42d8a2e1 100644
--- a/strix/tools/browser/tab_manager.py
+++ b/strix/tools/browser/tab_manager.py
@@ -248,6 +248,39 @@ def get_console_logs(self, tab_id: str | None = None, clear: bool = False) -> di
         else:
             return result
 
+    def get_network_events(
+        self, tab_id: str | None = None, limit: int = 50, clear: bool = False
+    ) -> dict[str, Any]:
+        with self._lock:
+            if self.browser_instance is None:
+                raise ValueError("Browser not launched")
+
+        try:
+            result = self.browser_instance.get_network_events(tab_id, limit, clear)
+            result["message"] = (
+                f"Network events retrieved for tab {result.get('tab_id', 'current')}"
+            )
+        except (OSError, ValueError, RuntimeError) as e:
+            raise RuntimeError(f"Failed to get network events: {e}") from e
+        else:
+            return result
+
+    def capture_screenshot_diff(self, tab_id: str | None = None) -> dict[str, Any]:
+        with self._lock:
+            if self.browser_instance is None:
+                raise ValueError("Browser not launched")
+
+        try:
+            result = self.browser_instance.capture_screenshot_diff(tab_id)
+            result["message"] = (
+                f"Screenshot captured for tab {result.get('tab_id', 'current')} "
+                f"({'changed' if result.get('screenshot_changed') else 'unchanged'})"
+            )
+        except (OSError, ValueError, RuntimeError) as e:
+            raise RuntimeError(f"Failed to capture screenshot diff: {e}") from e
+        else:
+            return result
+
     def view_source(self, tab_id: str | None = None) -> dict[str, Any]:
         with self._lock:
             if self.browser_instance is None:
diff --git a/strix/tools/cache/__init__.py b/strix/tools/cache/__init__.py
new file mode 100644
index 00000000..66c23474
--- /dev/null
+++ b/strix/tools/cache/__init__.py
@@ -0,0 +1,3 @@
+from .cache_actions import cache_result, get_cached_result
+
+__all__ = ["cache_result", "get_cached_result"]
diff --git a/strix/tools/cache/cache_actions.py b/strix/tools/cache/cache_actions.py
new file mode 100644
index 00000000..6b514435
--- /dev/null
+++ b/strix/tools/cache/cache_actions.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from strix.tools.registry import register_tool
+
+
+CACHE_DIR = Path.cwd() / "strix_cache"
+
+
+def _cache_path(target: str, action: str) -> Path:
+    safe_target = target.replace("/", "_").replace(":", "_")
+    safe_action = action.replace("/", "_")
+    return CACHE_DIR / f"{safe_target}__{safe_action}.json"
+
+
+@register_tool(sandbox_execution=False)
+def cache_result(target: str, action: str, result: str) -> dict[str, Any]:
+    CACHE_DIR.mkdir(exist_ok=True)
+    path = _cache_path(target, action)
+    path.write_text(result, encoding="utf-8")
+    return {"success": True, "cached_path": str(path)}
+
+
+@register_tool(sandbox_execution=False)
+def get_cached_result(target: str, action: str) -> dict[str, Any]:
+    path = _cache_path(target, action)
+    if not path.exists():
+        return {"success": False, "cached": False}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        data = path.read_text(encoding="utf-8")
+    return {"success": True, "cached": True, "result": data}
diff --git a/strix/tools/cache/cache_actions_schema.xml b/strix/tools/cache/cache_actions_schema.xml
new file mode 100644
index 00000000..9a8a9bb8
--- /dev/null
+++ b/strix/tools/cache/cache_actions_schema.xml
@@ -0,0 +1,33 @@
+<tools>
+  <tool name="cache_result">
+    <description>Store a tool result keyed by target and action fingerprint.</description>
+    <parameters>
+      <parameter name="target" type="string" required="true">
+        <description>Target identifier (e.g., URL or repo).</description>
+      </parameter>
+      <parameter name="action" type="string" required="true">
+        <description>Action name or tool invocation fingerprint.</description>
+      </parameter>
+      <parameter name="result" type="string" required="true">
+        <description>Serialized result payload to store.</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Confirmation of cache write.</description>
+    </returns>
+  </tool>
+  <tool name="get_cached_result">
+    <description>Retrieve a cached result by target/action fingerprint.</description>
+    <parameters>
+      <parameter name="target" type="string" required="true">
+        <description>Target identifier.</description>
+      </parameter>
+      <parameter name="action" type="string" required="true">
+        <description>Action name or tool invocation fingerprint.</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Cached result payload if present.</description>
+    </returns>
+  </tool>
+</tools>
diff --git a/strix/tools/reporting/reporting_actions.py b/strix/tools/reporting/reporting_actions.py
index dd98d6db..772aaaff 100644
--- a/strix/tools/reporting/reporting_actions.py
+++ b/strix/tools/reporting/reporting_actions.py
@@ -8,6 +8,10 @@ def create_vulnerability_report(
     title: str,
     content: str,
     severity: str,
+    cvss_score: float | None = None,
+    references: list[str] | None = None,
+    fix_recommendation: str | None = None,
+    cwe: list[str] | None = None,
 ) -> dict[str, Any]:
     validation_error = None
     if not title or not title.strip():
@@ -35,6 +39,10 @@ def create_vulnerability_report(
                 title=title,
                 content=content,
                 severity=severity,
+                cvss_score=cvss_score,
+                references=references,
+                fix_recommendation=fix_recommendation,
+                cwe=cwe,
             )
 
             return {
@@ -42,6 +50,7 @@ def create_vulnerability_report(
                 "message": f"Vulnerability report '{title}' created successfully",
                 "report_id": report_id,
                 "severity": severity.lower(),
+                "cvss_score": cvss_score,
             }
         import logging
 
diff --git a/strix/tools/reporting/reporting_actions_schema.xml b/strix/tools/reporting/reporting_actions_schema.xml
index 2e47d60d..5891e169 100644
--- a/strix/tools/reporting/reporting_actions_schema.xml
+++ b/strix/tools/reporting/reporting_actions_schema.xml
@@ -22,6 +22,18 @@ DO NOT USE:
       <parameter name="severity" type="string" required="true">
         <description>Severity level: critical, high, medium, low, or info</description>
       </parameter>
+      <parameter name="cvss_score" type="number" required="false">
+        <description>Optional CVSS base score estimate (0.0 - 10.0)</description>
+      </parameter>
+      <parameter name="references" type="array" required="false">
+        <description>Optional list of references (CWE, OWASP, docs) relevant to the finding</description>
+      </parameter>
+      <parameter name="fix_recommendation" type="string" required="false">
+        <description>Optional concise fix recommendation to include in the report</description>
+      </parameter>
+      <parameter name="cwe" type="array" required="false">
+        <description>Optional list of CWE identifiers</description>
+      </parameter>
     </parameters>
     <returns type="Dict[str, Any]">
       <description>Response containing success status and message</description>
diff --git a/strix/tools/sast/__init__.py b/strix/tools/sast/__init__.py
new file mode 100644
index 00000000..530b7bee
--- /dev/null
+++ b/strix/tools/sast/__init__.py
@@ -0,0 +1,3 @@
+from .sast_actions import run_sast_scan, scan_dependencies
+
+__all__ = ["run_sast_scan", "scan_dependencies"]
diff --git a/strix/tools/sast/sast_actions.py b/strix/tools/sast/sast_actions.py
new file mode 100644
index 00000000..b42254d9
--- /dev/null
+++ b/strix/tools/sast/sast_actions.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import Any
+
+from strix.tools.registry import register_tool
+
+
+_DANGEROUS_PATTERNS = [
+    {
+        "id": "PY001",
+        "regex": r"\beval\(",
+        "severity": "high",
+        "message": "Use of eval() can lead to code execution",
+    },
+    {
+        "id": "PY002",
+        "regex": r"\bexec\(",
+        "severity": "high",
+        "message": "Use of exec() can lead to code execution",
+    },
+    {
+        "id": "PY003",
+        "regex": r"subprocess\.(run|Popen)\([^)]*shell\s*=\s*True",
+        "severity": "high",
+        "message": "subprocess with shell=True can lead to command injection",
+    },
+    {
+        "id": "PY004",
+        "regex": r"random\.(randrange|randint|random)\(",
+        "severity": "medium",
+        "message": "Insecure randomness; prefer secrets module for security tokens",
+    },
+]
+
+_SKIP_DIRS = {".git", ".venv", "venv", "__pycache__", "node_modules", ".tox", ".ruff_cache"}
+
+
+def _iter_code_files(base: Path, max_files: int) -> list[Path]:
+    files: list[Path] = []
+    for path in base.rglob("*.py"):
+        if any(part in _SKIP_DIRS for part in path.parts):
+            continue
+        files.append(path)
+        if len(files) >= max_files:
+            break
+    return files
+
+
+@register_tool(sandbox_execution=False)
+def run_sast_scan(target_path: str | None = None, max_files: int = 200) -> dict[str, Any]:
+    base = Path(target_path or ".").resolve()
+    findings: list[dict[str, Any]] = []
+
+    for file_path in _iter_code_files(base, max_files):
+        try:
+            text = file_path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            continue
+
+        lines = text.splitlines()
+        for idx, line in enumerate(lines, start=1):
+            for pattern in _DANGEROUS_PATTERNS:
+                if re.search(pattern["regex"], line):
+                    findings.append(
+                        {
+                            "file": str(file_path),
+                            "line": idx,
+                            "rule_id": pattern["id"],
+                            "severity": pattern["severity"],
+                            "message": pattern["message"],
+                            "snippet": line.strip(),
+                        }
+                    )
+    return {"success": True, "findings": {"static": findings}}
+
+
+@register_tool(sandbox_execution=False)
+def scan_dependencies(target_path: str | None = None) -> dict[str, Any]:
+    base = Path(target_path or ".").resolve()
+    findings: list[dict[str, Any]] = []
+
+    req_file = base / "requirements.txt"
+    if req_file.exists():
+        for line in req_file.read_text(encoding="utf-8", errors="ignore").splitlines():
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#"):
+                continue
+            if "==" not in stripped and ">=" not in stripped and "<=" not in stripped:
+                findings.append(
+                    {
+                        "package": stripped,
+                        "spec": "unpinned",
+                        "severity": "medium",
+                        "reason": "Dependency is not pinned; prefer exact versions",
+                    }
+                )
+
+    pyproject = base / "pyproject.toml"
+    if pyproject.exists():
+        try:
+            import tomllib
+
+            data = tomllib.loads(pyproject.read_text(encoding="utf-8"))
+            deps = data.get("tool", {}).get("poetry", {}).get("dependencies", {})
+            for pkg, spec in deps.items():
+                if pkg == "python":
+                    continue
+                if spec == "*" or spec == "^0.0.0":
+                    findings.append(
+                        {
+                            "package": pkg,
+                            "spec": str(spec),
+                            "severity": "medium",
+                            "reason": "Wildcard dependency version detected",
+                        }
+                    )
+        except Exception:
+            findings.append(
+                {
+                    "package": "unknown",
+                    "spec": "parse_error",
+                    "severity": "low",
+                    "reason": "Unable to parse pyproject.toml for dependency checks",
+                }
+            )
+
+    return {"success": True, "findings": {"dependencies": findings}}
diff --git a/strix/tools/sast/sast_actions_schema.xml b/strix/tools/sast/sast_actions_schema.xml
new file mode 100644
index 00000000..df64fd13
--- /dev/null
+++ b/strix/tools/sast/sast_actions_schema.xml
@@ -0,0 +1,27 @@
+<tools>
+  <tool name="run_sast_scan">
+    <description>Perform a lightweight static scan of Python files to flag risky constructs (eval/exec, shell=True, weak randomness) for quick triage.</description>
+    <parameters>
+      <parameter name="target_path" type="string" required="false">
+        <description>Path to scan (defaults to current working directory).</description>
+      </parameter>
+      <parameter name="max_files" type="integer" required="false">
+        <description>Optional cap on number of files to scan (default 200).</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Findings grouped under 'static' with file, line, rule_id, severity, and message.</description>
+    </returns>
+  </tool>
+  <tool name="scan_dependencies">
+    <description>Collect Python dependencies from requirements.txt or pyproject.toml and flag unpinned or wildcard versions.</description>
+    <parameters>
+      <parameter name="target_path" type="string" required="false">
+        <description>Path to scan (defaults to current working directory).</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Dependency findings under 'dependencies' with package, spec, severity, and reason.</description>
+    </returns>
+  </tool>
+</tools>
diff --git a/tests/agents/test_graph_builder.py b/tests/agents/test_graph_builder.py
new file mode 100644
index 00000000..2bbf8097
--- /dev/null
+++ b/tests/agents/test_graph_builder.py
@@ -0,0 +1,61 @@
+import json
+from pathlib import Path
+
+import pytest
+
+from strix.agents.graph_builder import (
+    AgentGraphSpec,
+    GraphBuilderError,
+    load_graph_spec,
+    parse_graph_spec,
+)
+
+
+def test_parse_graph_spec_validates_and_returns_root() -> None:
+    raw = {
+        "agents": [
+            {"id": "root", "name": "Root", "task": "Root task"},
+            {"id": "child", "name": "Child", "task": "Child task", "parent_id": "root"},
+        ]
+    }
+
+    spec = parse_graph_spec(raw)
+
+    assert isinstance(spec, AgentGraphSpec)
+    assert spec.root.id == "root"
+    graph_dict = spec.as_graph_dict()
+    assert len(graph_dict["nodes"]) == 2
+    assert graph_dict["edges"] == [{"from": "root", "to": "child", "type": "delegation"}]
+
+
+def test_parse_graph_spec_rejects_invalid_parent() -> None:
+    raw = {"agents": [{"id": "orphan", "name": "Orphan", "task": "Task", "parent_id": "missing"}]}
+    with pytest.raises(GraphBuilderError):
+        parse_graph_spec(raw)
+
+
+def test_load_graph_spec_reads_json(tmp_path: Path) -> None:
+    path = tmp_path / "graph.json"
+    path.write_text(
+        json.dumps(
+            {
+                "agents": [
+                    {
+                        "id": "root",
+                        "name": "Root",
+                        "task": "Root task",
+                        "prompt_modules": ["root_agent"],
+                        "max_iterations": 123,
+                    }
+                ]
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    spec = load_graph_spec(path)
+    configs = spec.build_agent_configs({"llm_prompt_modules": ["default"]})
+
+    assert configs[0]["agent_id"] == "root"
+    assert configs[0]["max_iterations"] == 123
+    assert configs[0]["llm_prompt_modules"] == ["root_agent"]
diff --git a/tests/agents/test_iteration_policy.py b/tests/agents/test_iteration_policy.py
new file mode 100644
index 00000000..47c21cad
--- /dev/null
+++ b/tests/agents/test_iteration_policy.py
@@ -0,0 +1,23 @@
+from strix.agents.iteration_policy import calculate_iteration_budget
+
+
+def test_calculate_iteration_budget_scales_with_targets() -> None:
+    targets = [
+        {"type": "repository", "details": {}},
+        {"type": "web_application", "details": {}},
+        {"type": "local_code", "details": {}},
+    ]
+
+    result = calculate_iteration_budget(targets, llm_timeout=700, base=300)
+
+    assert result["max_iterations"] >= 300
+    assert result["inputs"]["target_weight"] == 5
+    assert result["inputs"]["latency_adjustment"] > 0
+
+
+def test_calculate_iteration_budget_bounds() -> None:
+    result = calculate_iteration_budget([], llm_timeout=None, base=50)
+    assert result["max_iterations"] >= 180
+
+    result = calculate_iteration_budget([{"type": "repository"}] * 20, llm_timeout=2000, base=500)
+    assert result["max_iterations"] <= 600
diff --git a/tests/agents/test_state_persistence.py b/tests/agents/test_state_persistence.py
new file mode 100644
index 00000000..c9ca9b19
--- /dev/null
+++ b/tests/agents/test_state_persistence.py
@@ -0,0 +1,17 @@
+from pathlib import Path
+
+from strix.agents.state import AgentState
+
+
+def test_state_save_and_load_round_trip(tmp_path: Path) -> None:
+    state = AgentState(agent_name="Tester", task="Do work", iteration=5)
+    state.add_message("user", "hello")
+    path = tmp_path / "state.json"
+
+    saved_path = state.save_to_path(path)
+    loaded = AgentState.load_from_path(saved_path)
+
+    assert loaded.agent_name == "Tester"
+    assert loaded.task == "Do work"
+    assert loaded.messages[-1]["content"] == "hello"
+    assert loaded.iteration == 5
diff --git a/tests/llm/test_router.py b/tests/llm/test_router.py
new file mode 100644
index 00000000..50ea1850
--- /dev/null
+++ b/tests/llm/test_router.py
@@ -0,0 +1,39 @@
+import asyncio
+
+import pytest
+
+from strix.llm.router import MultiplexingLLM
+
+
+class DummyLLM:
+    def __init__(self, should_fail: bool = False):
+        self.should_fail = should_fail
+        self.calls = 0
+
+    async def generate(self, *args, **kwargs):  # type: ignore[override]
+        self.calls += 1
+        if self.should_fail:
+            raise RuntimeError("failure")
+        return "ok"
+
+
+@pytest.mark.asyncio
+async def test_multiplexing_llm_fallbacks() -> None:
+    primary = DummyLLM(should_fail=True)
+    fallback = DummyLLM()
+    router = MultiplexingLLM(primary, fallback)
+
+    result = await router.generate("msg")
+
+    assert result == "ok"
+    assert primary.calls == 1
+    assert fallback.calls == 1
+
+
+@pytest.mark.asyncio
+async def test_multiplexing_llm_raises_without_fallback() -> None:
+    primary = DummyLLM(should_fail=True)
+    router = MultiplexingLLM(primary, None)
+
+    with pytest.raises(RuntimeError):
+        await router.generate("msg")
diff --git a/tests/prompts/test_auth_playbook_prompt.py b/tests/prompts/test_auth_playbook_prompt.py
new file mode 100644
index 00000000..b19bbed2
--- /dev/null
+++ b/tests/prompts/test_auth_playbook_prompt.py
@@ -0,0 +1,25 @@
+from pathlib import Path
+
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+
+from strix.prompts import get_all_module_names, load_prompt_modules
+
+
+def _jinja_env() -> Environment:
+    prompts_dir = Path(__file__).parents[2] / "strix" / "prompts"
+    return Environment(
+        loader=FileSystemLoader(prompts_dir),
+        autoescape=select_autoescape(enabled_extensions=(), default_for_string=False),
+    )
+
+
+def test_auth_playbook_module_available() -> None:
+    modules = get_all_module_names()
+    assert "oidc_saml_sso" in modules
+
+
+def test_auth_playbook_renders() -> None:
+    env = _jinja_env()
+    content = load_prompt_modules(["oidc_saml_sso"], env)
+    assert "oidc_saml_sso" in content
+    assert "OIDC" in content["oidc_saml_sso"]
diff --git a/tests/runtime/test_benchmark.py b/tests/runtime/test_benchmark.py
new file mode 100644
index 00000000..834cb807
--- /dev/null
+++ b/tests/runtime/test_benchmark.py
@@ -0,0 +1,11 @@
+from strix.runtime.benchmark import run_benchmark
+
+
+def test_run_benchmark_records_duration() -> None:
+    def sample():
+        return "ok"
+
+    result = run_benchmark("sample", sample)
+    assert result["name"] == "sample"
+    assert result["duration_ms"] >= 0
+    assert result["result"] == "ok"
diff --git a/tests/runtime/test_tool_pool.py b/tests/runtime/test_tool_pool.py
new file mode 100644
index 00000000..93d316ad
--- /dev/null
+++ b/tests/runtime/test_tool_pool.py
@@ -0,0 +1,20 @@
+import asyncio
+
+import pytest
+
+from strix.runtime.tool_pool import ToolServerPool
+
+
+def make_stub():
+    return object()
+
+
+@pytest.mark.asyncio
+async def test_tool_pool_spawns_and_reuses() -> None:
+    pool = ToolServerPool(make_stub, max_instances=1)
+    inst1 = await pool.get_instance()
+    inst2 = await pool.get_instance()
+    assert inst1 is inst2
+    await pool.mark_unhealthy(inst1)
+    health = await pool.get_health()
+    assert health[id(inst1)] == "unhealthy"
diff --git a/tests/tools/test_api_probe_tool.py b/tests/tools/test_api_probe_tool.py
new file mode 100644
index 00000000..c9abf6d8
--- /dev/null
+++ b/tests/tools/test_api_probe_tool.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+
+from strix.tools.api_probe.api_probe_actions import load_openapi_spec, suggest_api_fuzz_cases
+
+
+def test_load_openapi_spec_parses_endpoints(tmp_path: Path) -> None:
+    spec = {
+        "openapi": "3.0.0",
+        "paths": {
+            "/users": {
+                "get": {
+                    "summary": "List users",
+                    "parameters": [
+                        {"name": "limit", "in": "query", "required": False, "schema": {"type": "integer"}}
+                    ],
+                }
+            }
+        },
+    }
+    spec_path = tmp_path / "spec.json"
+    spec_path.write_text(__import__("json").dumps(spec), encoding="utf-8")
+
+    result = load_openapi_spec(str(spec_path))
+    endpoints = result["endpoints"]
+
+    assert endpoints[0]["path"] == "/users"
+    assert endpoints[0]["method"] == "GET"
+    assert endpoints[0]["params"][0]["name"] == "limit"
+
+
+def test_suggest_api_fuzz_cases_generates_payloads() -> None:
+    endpoints = [{"path": "/users", "method": "GET", "params": [{"name": "id"}]}]
+
+    result = suggest_api_fuzz_cases(endpoints)
+
+    assert result["suggestions"][0]["payloads"][0]["name"] == "id"
diff --git a/tests/tools/test_reporting_enrichment.py b/tests/tools/test_reporting_enrichment.py
new file mode 100644
index 00000000..23b71413
--- /dev/null
+++ b/tests/tools/test_reporting_enrichment.py
@@ -0,0 +1,18 @@
+from strix.telemetry.tracer import Tracer
+
+
+def test_report_includes_cvss_and_refs() -> None:
+    tracer = Tracer("test-run")
+    tracer.add_vulnerability_report(
+        title="Test vuln",
+        content="Issue details",
+        severity="high",
+        cvss_score=7.5,
+        references=["CWE-79", "OWASP-A01"],
+        fix_recommendation="Sanitize inputs",
+        cwe=["CWE-79"],
+    )
+
+    report = tracer.vulnerability_reports[0]
+    assert report["cvss_score"] == 7.5
+    assert "CWE-79" in report["references"]
diff --git a/tests/tools/test_sast_tool.py b/tests/tools/test_sast_tool.py
new file mode 100644
index 00000000..9e1ebf46
--- /dev/null
+++ b/tests/tools/test_sast_tool.py
@@ -0,0 +1,44 @@
+from pathlib import Path
+
+from strix.tools.sast.sast_actions import run_sast_scan, scan_dependencies
+
+
+def test_run_sast_scan_flags_eval(tmp_path: Path) -> None:
+    suspicious = tmp_path / "app.py"
+    suspicious.write_text("def run():\n    return eval('1+1')\n", encoding="utf-8")
+
+    result = run_sast_scan(str(tmp_path))
+
+    findings = result["findings"]["static"]
+    assert any(f["rule_id"] == "PY001" for f in findings)
+    assert findings[0]["file"].endswith("app.py")
+
+
+def test_scan_dependencies_flags_unpinned_requirement(tmp_path: Path) -> None:
+    req = tmp_path / "requirements.txt"
+    req.write_text("flask\nrequests>=2.0.0\n", encoding="utf-8")
+
+    result = scan_dependencies(str(tmp_path))
+
+    findings = result["findings"]["dependencies"]
+    assert any(f["package"] == "flask" and f["spec"] == "unpinned" for f in findings)
+
+
+def test_scan_dependencies_flags_wildcard_pyproject(tmp_path: Path) -> None:
+    pyproject = tmp_path / "pyproject.toml"
+    pyproject.write_text(
+        """
+[tool.poetry]
+name = "sample"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = "^3.12"
+flask = "*"
+""",
+        encoding="utf-8",
+    )
+
+    result = scan_dependencies(str(tmp_path))
+    findings = result["findings"]["dependencies"]
+    assert any(f["package"] == "flask" for f in findings)